├── .dockerignore ├── .github ├── release.yml └── workflows │ ├── ci.yaml │ ├── release.yaml │ └── tagpr.yaml ├── .gitignore ├── .tagpr ├── Dockerfile ├── LICENSE ├── Makefile ├── PROJECT ├── README.md ├── api └── v1alpha1 │ ├── groupversion_info.go │ ├── nodedisruptionbudget_types.go │ ├── nodeoperation_types.go │ ├── nodeoperationtemplate_types.go │ ├── noderemediation_types.go │ ├── noderemediationtemplate_types.go │ └── zz_generated.deepcopy.go ├── cmd └── kube-node-status │ └── main.go ├── config ├── crd │ ├── bases │ │ ├── nodeops.k8s.preferred.jp_nodedisruptionbudgets.yaml │ │ ├── nodeops.k8s.preferred.jp_nodeoperations.yaml │ │ ├── nodeops.k8s.preferred.jp_nodeoperationtemplates.yaml │ │ ├── nodeops.k8s.preferred.jp_noderemediations.yaml │ │ └── nodeops.k8s.preferred.jp_noderemediationtemplates.yaml │ ├── kustomization.yaml │ ├── kustomizeconfig.yaml │ └── patches │ │ ├── cainjection_in_nodedisruptionbudgets.yaml │ │ ├── cainjection_in_nodeoperations.yaml │ │ ├── cainjection_in_nodeoperationtemplates.yaml │ │ ├── cainjection_in_noderemediations.yaml │ │ ├── cainjection_in_noderemediationtemplates.yaml │ │ ├── webhook_in_nodedisruptionbudgets.yaml │ │ ├── webhook_in_nodeoperations.yaml │ │ ├── webhook_in_nodeoperationtemplates.yaml │ │ ├── webhook_in_noderemediations.yaml │ │ └── webhook_in_noderemediationtemplates.yaml ├── default │ ├── kustomization.yaml │ ├── manager_auth_proxy_patch.yaml │ └── manager_config_patch.yaml ├── kind │ ├── config.yaml │ └── test.yaml ├── manager │ ├── controller_manager_config.yaml │ ├── kustomization.yaml │ └── manager.yaml ├── prometheus │ ├── kustomization.yaml │ └── monitor.yaml ├── rbac │ ├── auth_proxy_client_clusterrole.yaml │ ├── auth_proxy_role.yaml │ ├── auth_proxy_role_binding.yaml │ ├── auth_proxy_service.yaml │ ├── kustomization.yaml │ ├── leader_election_role.yaml │ ├── leader_election_role_binding.yaml │ ├── nodedisruptionbudget_editor_role.yaml │ ├── nodedisruptionbudget_viewer_role.yaml │ ├── nodeoperation_editor_role.yaml │ ├── nodeoperation_viewer_role.yaml │ ├── nodeoperationtemplate_editor_role.yaml │ ├── nodeoperationtemplate_viewer_role.yaml │ ├── noderemediation_editor_role.yaml │ ├── noderemediation_viewer_role.yaml │ ├── noderemediationtemplate_editor_role.yaml │ ├── noderemediationtemplate_viewer_role.yaml │ ├── role.yaml │ ├── role_binding.yaml │ └── service_account.yaml └── samples │ ├── nodeops_v1alpha1_nodedisruptionbudget.yaml │ ├── nodeops_v1alpha1_nodeoperation.yaml │ ├── nodeops_v1alpha1_nodeoperationtemplate.yaml │ ├── nodeops_v1alpha1_noderemediation.yaml │ └── nodeops_v1alpha1_noderemediationtemplate.yaml ├── controllers ├── eviction_stragegy.go ├── nodedisruptionbudget_controller.go ├── nodeoperation_controller.go ├── nodeoperation_controller_test.go ├── nodeoperationtemplate_controller.go ├── noderemediation_controller.go ├── noderemediationtemplate_controller.go └── suite_test.go ├── doc └── images │ ├── nodeoperation.png │ └── noderemediationrule.png ├── e2e └── e2e_test.go ├── go.mod ├── go.sum ├── hack └── boilerplate.go.txt ├── main.go └── tutorial ├── README.md ├── kind.yaml ├── nodedisruptionbudget-tutorial1.yaml ├── nodeoperation-tutorial1.yaml ├── nodeoperation-tutorial2.yaml ├── nodeoperation-tutorial3.yaml ├── nodeoperationtemplate-tutorial1.yaml └── noderemediationtemplate-tutorial1.yaml /.dockerignore: -------------------------------------------------------------------------------- 1 | # More info: https://docs.docker.com/engine/reference/builder/#dockerignore-file 2 | # Ignore build and test binaries. 3 | bin/ 4 | testbin/ 5 | -------------------------------------------------------------------------------- /.github/release.yml: -------------------------------------------------------------------------------- 1 | # 2 | # This is a configuration for automatic changelog generation 3 | # tagpr respects this. 4 | # ref: https://docs.github.com/en/repositories/releasing-projects-on-github/automatically-generated-release-notes 5 | # 6 | changelog: 7 | exclude: 8 | labels: 9 | - release-note/skip 10 | - tagpr 11 | categories: 12 | - title: '💣 Breaking Changes' 13 | labels: 14 | - 'release-note/breaking-change' 15 | - title: '🚀 Features' 16 | labels: 17 | - 'release-note/feature' 18 | - title: '🐛 Bug Fixes' 19 | labels: 20 | - 'release-note/bugfix' 21 | - title: '📜 Documentation' 22 | labels: 23 | - 'release-note/document' 24 | - title: '🧰 Maintenance' 25 | labels: 26 | - 'release-note/chore' 27 | - title: '🔬 Other Changes' 28 | labels: 29 | - "*" 30 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | paths-ignore: ['**.md'] 7 | pull_request: 8 | types: [opened, synchronize] 9 | paths-ignore: ['**.md'] 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v4 16 | - uses: actions/setup-go@v5 17 | with: 18 | go-version-file: go.mod 19 | - run: make test 20 | 21 | docker-build: 22 | runs-on: ubuntu-latest 23 | steps: 24 | - uses: actions/checkout@v4 25 | - run: make docker-build 26 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | # tagged manually 5 | push: 6 | tags: ["v*"] 7 | # dispatch from tagpr.yaml workflow 8 | workflow_dispatch: 9 | 10 | jobs: 11 | run: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | - name: Login to GitHub Container Registry 16 | uses: docker/login-action@v3 17 | with: 18 | registry: ghcr.io 19 | username: ${{ github.actor }} 20 | password: ${{ secrets.GITHUB_TOKEN }} 21 | - run: make docker-build 22 | - run: make docker-push 23 | -------------------------------------------------------------------------------- /.github/workflows/tagpr.yaml: -------------------------------------------------------------------------------- 1 | name: tagpr 2 | 3 | on: 4 | push: 5 | branches: ["master"] 6 | 7 | jobs: 8 | tagpr: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v4 12 | - id: tagpr 13 | name: Tagpr 14 | uses: Songmu/tagpr@v1 15 | env: 16 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 17 | 18 | - name: Trigger Release Workflow(only when tagged) 19 | uses: actions/github-script@v7 20 | if: "steps.tagpr.outputs.tag != ''" 21 | with: 22 | script: | 23 | github.rest.actions.createWorkflowDispatch({ 24 | owner: context.repo.owner, 25 | repo: context.repo.repo, 26 | workflow_id: 'release.yaml', 27 | ref: "refs/tags/${{ steps.tagpr.outputs.tag }}", 28 | }) 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Binaries for programs and plugins 3 | *.exe 4 | *.exe~ 5 | *.dll 6 | *.so 7 | *.dylib 8 | bin 9 | 10 | # Test binary, build with `go test -c` 11 | *.test 12 | 13 | # Output of the go coverage tool, specifically when used with LiteIDE 14 | *.out 15 | 16 | # Kubernetes Generated files - skip generated files, except for vendored files 17 | 18 | !vendor/**/zz_generated.* 19 | 20 | # editor and IDE paraphernalia 21 | .idea 22 | *.swp 23 | *.swo 24 | *~ 25 | 26 | .idea 27 | .vscode 28 | 29 | _tmp 30 | tmp 31 | -------------------------------------------------------------------------------- /.tagpr: -------------------------------------------------------------------------------- 1 | # config file for the tagpr in git config format 2 | # The tagpr generates the initial configuration, which you can rewrite to suit your environment. 3 | # CONFIGURATIONS: 4 | # tagpr.releaseBranch 5 | # Generally, it is "main." It is the branch for releases. The pcpr tracks this branch, 6 | # creates or updates a pull request as a release candidate, or tags when they are merged. 7 | # 8 | # tagpr.versionFile 9 | # Versioning file containing the semantic version needed to be updated at release. 10 | # It will be synchronized with the "git tag". 11 | # Often this is a meta-information file such as gemspec, setup.cfg, package.json, etc. 12 | # Sometimes the source code file, such as version.go or Bar.pm, is used. 13 | # If you do not want to use versioning files but only git tags, specify the "-" string here. 14 | # You can specify multiple version files by comma separated strings. 15 | # 16 | # tagpr.vPrefix 17 | # Flag whether or not v-prefix is added to semver when git tagging. (e.g. v1.2.3 if true) 18 | # This is only a tagging convention, not how it is described in the version file. 19 | # 20 | # tagpr.changelog (Optional) 21 | # Flag whether or not changelog is added or changed during the release. 22 | # 23 | # tagpr.command (Optional) 24 | # Command to change files just before release. 25 | # 26 | # tagpr.tmplate (Optional) 27 | # Pull request template in go template format 28 | # 29 | # tagpr.release (Optional) 30 | # GitHub Release creation behavior after tagging [true, draft, false] 31 | # If this value is not set, the release is to be created. 32 | [tagpr] 33 | vPrefix = true 34 | releaseBranch = master 35 | versionFile = - 36 | changelog = false 37 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM golang:1.23 AS base 2 | 3 | WORKDIR /workspace 4 | # Copy the Go Modules manifests 5 | COPY go.* . 6 | # cache deps before building and copying source so that we don't need to re-download as much 7 | # and so that source changes don't invalidate our downloaded layer 8 | RUN --mount=type=cache,target=/go/pkg/mod \ 9 | go mod download 10 | 11 | FROM golangci/golangci-lint:v1.64 AS lint-base 12 | FROM base AS lint 13 | RUN --mount=target=. \ 14 | --mount=from=lint-base,src=/usr/bin/golangci-lint,target=/usr/bin/golangci-lint \ 15 | --mount=type=cache,target=/go/pkg/mod \ 16 | --mount=type=cache,target=/root/.cache/go-build \ 17 | --mount=type=cache,target=/root/.cache/golangci-lint \ 18 | golangci-lint run --timeout 10m0s ./... 19 | 20 | FROM base AS build 21 | # Build the manager binary 22 | RUN --mount=target=. \ 23 | --mount=type=cache,target=/go/pkg/mod \ 24 | --mount=type=cache,target=/root/.cache/go-build \ 25 | CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -a -o /out/manager main.go 26 | 27 | # Use distroless as minimal base image to package the manager binary 28 | # Refer to https://github.com/GoogleContainerTools/distroless for more details 29 | FROM gcr.io/distroless/static:nonroot 30 | WORKDIR / 31 | COPY --from=build /out/manager . 32 | 33 | ENTRYPOINT ["/manager"] 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2020 Preferred Networks, Inc. 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | DOCKER_BUILD ?= DOCKER_BUILDKIT=1 docker build --progress plain 2 | 3 | # Image URL to use all building/pushing image targets 4 | TAG := $(shell git describe --tags --always --dirty) 5 | IMG ?= ghcr.io/pfnet-research/node-operation-controller:$(TAG) 6 | 7 | # ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary. 8 | ENVTEST_K8S_VERSION = 1.30 9 | 10 | # Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set) 11 | ifeq (,$(shell go env GOBIN)) 12 | GOBIN=$(shell go env GOPATH)/bin 13 | else 14 | GOBIN=$(shell go env GOBIN) 15 | endif 16 | 17 | # Setting SHELL to bash allows bash commands to be executed by recipes. 18 | # This is a requirement for 'setup-envtest.sh' in the test target. 19 | # Options are set to exit when a recipe line exits non-zero or a piped command fails. 20 | SHELL = /usr/bin/env bash -o pipefail 21 | .SHELLFLAGS = -ec 22 | 23 | .PHONY: all 24 | all: build 25 | 26 | ##@ General 27 | 28 | # The help target prints out all targets with their descriptions organized 29 | # beneath their categories. The categories are represented by '##@' and the 30 | # target descriptions by '##'. The awk commands is responsible for reading the 31 | # entire set of makefiles included in this invocation, looking for lines of the 32 | # file as xyz: ## something, and then pretty-format the target and help. Then, 33 | # if there's a line with ##@ something, that gets pretty-printed as a category. 34 | # More info on the usage of ANSI control characters for terminal formatting: 35 | # https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_parameters 36 | # More info on the awk command: 37 | # http://linuxcommand.org/lc3_adv_awk.php 38 | 39 | .PHONY: help 40 | help: ## Display this help. 41 | @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) 42 | 43 | ##@ Tools 44 | PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST)))) 45 | 46 | CONTROLLER_GEN := $(CURDIR)/bin/controller-gen 47 | CONTROLLER_GEN_VERSION ?= v0.16.5 48 | $(CONTROLLER_GEN): ## Download controller-gen locally if necessary. 49 | GOBIN=$(PROJECT_DIR)/bin go install sigs.k8s.io/controller-tools/cmd/controller-gen@$(CONTROLLER_GEN_VERSION) 50 | 51 | KUSTOMIZE := $(CURDIR)/bin/kustomize 52 | KUSTOMIZE_VERSION ?= v4.5.7 53 | $(KUSTOMIZE): ## Download kustomize locally if necessary. 54 | GOBIN=$(PROJECT_DIR)/bin go install sigs.k8s.io/kustomize/kustomize/v4@$(KUSTOMIZE_VERSION) 55 | 56 | ENVTEST := $(CURDIR)/bin/setup-envtest 57 | $(ENVTEST): ## Download envtest-setup locally if necessary. 58 | GOBIN=$(PROJECT_DIR)/bin go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest 59 | 60 | KUBECTL := $(CURDIR)/bin/kubectl 61 | KUBECTL_VERSION ?= v1.30.11 62 | $(KUBECTL): ## Download kubectl locally if necessary. 63 | curl -Lo $(PROJECT_DIR)/bin/kubectl "https://dl.k8s.io/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl" 64 | chmod +x $(PROJECT_DIR)/bin/kubectl 65 | 66 | KIND := $(CURDIR)/bin/kind 67 | KIND_VERSION ?= v0.25.0 68 | $(KIND): ## Download kind locally if necessary. 69 | curl -Lo $(PROJECT_DIR)/bin/kind "https://kind.sigs.k8s.io/dl/${KIND_VERSION}/kind-linux-amd64" 70 | chmod +x $(PROJECT_DIR)/bin/kind 71 | 72 | ##@ Development 73 | 74 | .PHONY: manifests 75 | manifests: $(CONTROLLER_GEN) ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. 76 | $(CONTROLLER_GEN) rbac:roleName=manager-role crd:generateEmbeddedObjectMeta=true webhook paths="./..." output:crd:artifacts:config=config/crd/bases 77 | 78 | .PHONY: generate 79 | generate: $(CONTROLLER_GEN) ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. 80 | $(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..." 81 | 82 | .PHONY: fmt 83 | fmt: ## Run go fmt against code. 84 | go fmt ./... 85 | 86 | .PHONY: vet 87 | vet: ## Run go vet against code. 88 | go vet ./... 89 | 90 | .PHONY: lint 91 | lint: ## Run golangci-lint against code. 92 | $(DOCKER_BUILD) --target lint . 93 | 94 | KUBECONFIG := $(CURDIR)/tmp/node-operation-controller-test.kubeconfig.yaml 95 | 96 | .PHONY: test 97 | test: manifests generate fmt vet lint $(ENVTEST) kind-for-test ## Run tests. 98 | KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) -p path)" KUBECONFIG=$(KUBECONFIG) go test ./... -coverprofile cover.out 99 | 100 | test-focus: generate fmt vet manifests kind-for-test 101 | ginkgo -focus "${FOCUS}" ./... 102 | 103 | kind-for-test: $(KIND) $(KUBECTL) 104 | $(KIND) delete cluster --name=node-operation-controller-test || true 105 | $(KIND) create cluster --name=node-operation-controller-test --config=config/kind/test.yaml --kubeconfig=$(KUBECONFIG) 106 | KUBECONFIG=$(KUBECONFIG) $(KUBECTL) delete deploy -n kube-system coredns 107 | KUBECONFIG=$(KUBECONFIG) $(KUBECTL) delete deploy -n local-path-storage local-path-provisioner 108 | 109 | ##@ Build 110 | 111 | .PHONY: build 112 | build: generate fmt vet ## Build manager binary. 113 | go build -o bin/manager main.go 114 | 115 | .PHONY: run 116 | run: manifests generate fmt vet ## Run a controller from your host. 117 | go run ./main.go 118 | 119 | .PHONY: docker-build 120 | docker-build: ## Build docker image with the manager. 121 | $(DOCKER_BUILD) -t ${IMG} . 122 | 123 | .PHONY: docker-push 124 | docker-push: ## Push docker image with the manager. 125 | docker push ${IMG} 126 | 127 | ##@ Deployment 128 | 129 | ifndef ignore-not-found 130 | ignore-not-found = false 131 | endif 132 | 133 | .PHONY: install 134 | install: manifests $(KUSTOMIZE) $(KUBECTL) ## Install CRDs into the K8s cluster specified in ~/.kube/config. 135 | $(KUSTOMIZE) build config/crd | $(KUBECTL) apply -f - 136 | 137 | .PHONY: uninstall 138 | uninstall: manifests $(KUSTOMIZE) $(KUBECTL) ## Uninstall CRDs from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. 139 | $(KUSTOMIZE) build config/crd | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f - 140 | 141 | .PHONY: deploy 142 | deploy: manifests $(KUSTOMIZE) $(KUBECTL) ## Deploy controller to the K8s cluster specified in ~/.kube/config. 143 | cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG} 144 | $(KUSTOMIZE) build config/default | $(KUBECTL) apply -f - 145 | 146 | .PHONY: undeploy 147 | undeploy: $(KUSTOMIZE) $(KUBECTL) ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion. 148 | $(KUSTOMIZE) build config/default | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f - 149 | 150 | .PHONY: clean 151 | clean: 152 | rm -rf $(PROJECT_DIR)/bin 153 | -------------------------------------------------------------------------------- /PROJECT: -------------------------------------------------------------------------------- 1 | domain: k8s.preferred.jp 2 | layout: 3 | - go.kubebuilder.io/v3 4 | projectName: kubebuilder-v3 5 | repo: github.com/pfnet-research/node-operation-controller 6 | resources: 7 | - api: 8 | crdVersion: v1 9 | namespaced: true 10 | controller: true 11 | domain: k8s.preferred.jp 12 | group: nodeops 13 | kind: NodeOperation 14 | path: github.com/pfnet-research/node-operation-controller/api/v1alpha1 15 | version: v1alpha1 16 | - api: 17 | crdVersion: v1 18 | namespaced: true 19 | controller: true 20 | domain: k8s.preferred.jp 21 | group: nodeops 22 | kind: NodeDisruptionBudget 23 | path: github.com/pfnet-research/node-operation-controller/api/v1alpha1 24 | version: v1alpha1 25 | - api: 26 | crdVersion: v1 27 | namespaced: true 28 | controller: true 29 | domain: k8s.preferred.jp 30 | group: nodeops 31 | kind: NodeOperationTemplate 32 | path: github.com/pfnet-research/node-operation-controller/api/v1alpha1 33 | version: v1alpha1 34 | - api: 35 | crdVersion: v1 36 | namespaced: true 37 | controller: true 38 | domain: k8s.preferred.jp 39 | group: nodeops 40 | kind: NodeRemediationTemplate 41 | path: github.com/pfnet-research/node-operation-controller/api/v1alpha1 42 | version: v1alpha1 43 | - api: 44 | crdVersion: v1 45 | namespaced: true 46 | controller: true 47 | domain: k8s.preferred.jp 48 | group: nodeops 49 | kind: NodeRemediation 50 | path: github.com/pfnet-research/node-operation-controller/api/v1alpha1 51 | version: v1alpha1 52 | version: "3" 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Node Operation Controller 2 | 3 | This is a Kubernetes controller for an automated Node operation. In general, if we perform a Node operation that affects running Pods, we need to do the following steps: 4 | 5 | 1. Make the Node unschedulable. 6 | 1. Evict running Pods in the Node and wait all running node to be evicted. 7 | 1. Perform the operation. 8 | 1. Make the Node schedulable. 9 | 10 | Node operation controller automates these steps. In addition, this controller: 11 | 12 | - watches NodeCondition and perform an arbitrary operation 13 | - takes care count of unavailable Nodes due to the operation 14 | 15 | ## Table of contents 16 | 17 | 18 | 19 | - [Node Operation Controller](#node-operation-controller) 20 | - [Table of contents](#table-of-contents) 21 | - [How it works](#how-it-works) 22 | - [NodeOperation and NodeDisruptionBudget](#nodeoperation-and-nodedisruptionbudget) 23 | - [NodeRemediation](#noderemediation) 24 | - [Custom Resources](#custom-resources) 25 | - [NodeOperation](#nodeoperation) 26 | - [`evictionStrategy`](#evictionstrategy) 27 | - [`nodeDisruptionBudgetSelector`](#nodedisruptionbudgetselector) 28 | - [`skipWaitingForEviction`](#skipwaitingforeviction) 29 | - [NodeDisruptionBudget](#nodedisruptionbudget) 30 | - [`maxUnavailable` and `minAvailable`](#maxunavailable-and-minavailable) 31 | - [`taintTargets`](#tainttargets) 32 | - [NodeRemediation](#noderemediation-1) 33 | - [NodeRemediationTemplate](#noderemediationtemplate) 34 | - [How to release](#how-to-release) 35 | 36 | 37 | 38 | ## How it works 39 | 40 | ### NodeOperation and NodeDisruptionBudget 41 | 42 | 1. When NodeOperation resource is created, go to next step 43 | 1. Confirm the NodeOperation does not violate NodeDisruptionBudgets. 44 | - If it violates NodeDisruptionBudgets, wait for other NodeOperations to finish. 45 | 1. Taint the target Node specified in NodeOperation. 46 | - The Taint is `nodeops.k8s.preferred.jp/operating=:NoSchedule` 47 | 1. Evict all running Pods in the Node. 48 | - By default, this uses Pod eviction API. You can control eviction by NodeDisruptionBudget. 49 | - This behavior can be configured by `evictionStrategy` option of NodeOperation. 50 | 1. After eviction, run a Job configured in the NodeOperation 51 | - The Pod created by the Job has `nodeops.k8s.preferred.jp/nodename` annotation which indicates the target Node. 52 | 1. Wait the Job to be in Completed or Failed phase. 53 | 1. Untaint the Node. 54 | 55 | ![](doc/images/nodeoperation.png) 56 | 57 | ### NodeRemediation 58 | 59 | For most operation team, they would have their own secret-sauce for daily operation. This means typical node failure can be cured by common recipe shared among the team. `NodeRemediation`, `NodeRemediationTemplate` and `NodeOperationTemplate` enable us to automate the common operation for known node issues. 60 | 61 | - `NodeOperationTemplate` represents a template of common node operation. 62 | - `NodeRemediation` defines 63 | - target node to apply the remediation, 64 | - known failure by Node `conditions`, and 65 | - corresponding `nodeOperationTemplate` to fix the failure. 66 | - `NodeRemediationTemplate` defines 67 | - target nodes to apply the remediation by `nodeSelector` and 68 | - a template of `NodeRemediation`. 69 | 70 | Node operation controller watches nodes and if it detected the failure matches some `NodeRemediation`, then it creates `NodeOperation` from specified `NodeOperationTemplate` automatically. 71 | 72 | ![](doc/images/noderemediationrule.png) 73 | 74 | ## Custom Resources 75 | 76 | ### NodeOperation 77 | 78 | ```yaml 79 | apiVersion: nodeops.k8s.preferred.jp/v1alpha1 80 | kind: NodeOperation 81 | metadata: 82 | name: example 83 | spec: 84 | nodeName: "" 85 | jobTemplate: 86 | metadata: 87 | namespace: default 88 | spec: # batchv1.JobSpec 89 | template: 90 | spec: 91 | containers: 92 | - name: operation 93 | image: busybox 94 | command: ["sh", "-c", "echo Do some operation for $TARGET_NODE && sleep 60 && echo Done"] 95 | env: 96 | - name: TARGET_NODE 97 | valueFrom: 98 | fieldRef: 99 | fieldPath: "metadata.annotations['nodeops.k8s.preferred.jp/nodename']" 100 | restartPolicy: Never 101 | 102 | evictionStrategy: Evict # optional 103 | nodeDisruptionBudgetSelector: {} # optional 104 | skipWaitingForEviction: false # optional 105 | ``` 106 | 107 | #### `evictionStrategy` 108 | 109 | This controller has some ways to evict Pods: 110 | 111 | - `evictionStrategy: Evict`: This strategy tries to evict Pods by Pod eviction API and it respects PodDisruptionBudget. 112 | - `evictionStrategy: Delete`: This strategy tries to evict Pods by deleting Pods. 113 | - `evictionStrategy: ForceDelete`: This strategy tries to evict Pods by deleting Pods forcibly. 114 | - `evictionStrategy: None`: This strategy does not evict Pods and it just waits all Pods to finish. 115 | 116 | #### `nodeDisruptionBudgetSelector` 117 | 118 | By default, a NodeOperation respects all NodeDisruptionBudgets (NDB) but in some cases, some NDBs need to be ignored. (e.g. urgent operations) 119 | If nodeDisruptionBudgetSelector is set, only NDBs whose labels match the nodeDisruptionBudgetSelector will be respected. 120 | 121 | #### `skipWaitingForEviction` 122 | 123 | By default, a NodeOperation waits for all pods drained by the eviction. 124 | If skipWaitingForEviction is true, a NodeOperation skips waiting for the eviction finishing. It means that a NodeOperation ignores not drained pods. 125 | 126 | ### NodeDisruptionBudget 127 | 128 | ```yaml 129 | apiVersion: nodeops.k8s.preferred.jp/v1alpha1 130 | kind: NodeDisruptionBudget 131 | metadata: 132 | name: example 133 | spec: 134 | selector: # nodeSelector for Nodes that this NodeDisruptionBudget affects 135 | nodeLabelKey: nodeLabelValue 136 | maxUnavailable: 1 # optional 137 | minAvailable: 1 # optional 138 | taintTargets: [] # optional 139 | ``` 140 | 141 | #### `maxUnavailable` and `minAvailable` 142 | 143 | - `minAvailable`: minimum number of available Nodes 144 | - `maxAvailable`: maximum number of unavailable Nodes 145 | 146 | #### `taintTargets` 147 | 148 | By default, this controller treats Nodes with a specific taint as "unavailable". The taint is `nodeops.k8s.preferred.jp/operating=:NoSchedule` and it is added to Nodes during this controller is processing NodeOperations. 149 | In addition to the default taint, Nodes with taints which match `taintTargets` are "unavailable". 150 | 151 | ```yaml 152 | taintTargets: 153 | - key: 'k1' 154 | operator: 'Equal' 155 | value: 'v1' 156 | effect: 'NoSchedule' 157 | ``` 158 | 159 | For instance, if the above `taintTargets` are set, Nodes with `k1=v1:NoSchedule` taint are "unavailable". 160 | 161 | ### NodeRemediation 162 | 163 | A NodeRemediation watches condition of a Node and it creates a NodeOperation from a NodeOperationTemplate to remediate the condition. 164 | 165 | ```yaml 166 | apiVersion: nodeops.k8s.preferred.jp/v1alpha1 167 | kind: NodeOperationTemplate 168 | metadata: 169 | name: optemplate1 170 | spec: 171 | template: 172 | metadata: {} 173 | spec: # NodeOperationSpec 174 | job: 175 | metadata: 176 | namespace: default 177 | spec: # batchv1.JobSpec 178 | template: 179 | spec: 180 | containers: 181 | - name: operation 182 | image: busybox 183 | command: ["echo", "Do some operation here"] 184 | restartPolicy: Never 185 | ``` 186 | 187 | ```yaml 188 | apiVersion: nodeops.k8s.preferred.jp/v1alpha1 189 | kind: NodeRemediation 190 | metadata: 191 | name: remediation1 192 | spec: 193 | nodeName: node1 194 | nodeOperationTemplateName: 'optemplate1' 195 | rule: 196 | conditions: 197 | - type: PIDPressure 198 | status: "True" 199 | - type: OtherCondition 200 | status: "Unknown" 201 | ``` 202 | 203 | ### NodeRemediationTemplate 204 | 205 | A NodeRemediationTemplate creates NodeRemediations for each Nodes filtered by nodeSelector. 206 | 207 | ```yaml 208 | apiVersion: nodeops.k8s.preferred.jp/v1alpha1 209 | kind: NodeRemediationTemplate 210 | metadata: 211 | name: remediationtemplate1 212 | spec: 213 | nodeSelector: 214 | 'kubernetes.io/os': 'linux' 215 | template: 216 | spec: 217 | nodeOperationTemplateName: 'optemplate1' 218 | rule: 219 | conditions: 220 | - type: PIDPressure 221 | status: "True" 222 | - type: OtherCondition 223 | status: "Unknown" 224 | ``` 225 | 226 | ## How to Release 227 | 228 | The release process is fully automated by [tagpr](https://github.com/Songmu/tagpr). To release, just merge [the latest release PR](https://github.com/pfnet-research/node-operation-controller/pulls?q=is:pr+is:open+label:tagpr). 229 | -------------------------------------------------------------------------------- /api/v1alpha1/groupversion_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // Package v1alpha1 contains API Schema definitions for the nodeops v1alpha1 API group 18 | // +kubebuilder:object:generate=true 19 | // +groupName=nodeops.k8s.preferred.jp 20 | package v1alpha1 21 | 22 | import ( 23 | "k8s.io/apimachinery/pkg/runtime/schema" 24 | "sigs.k8s.io/controller-runtime/pkg/scheme" 25 | ) 26 | 27 | var ( 28 | // GroupVersion is group version used to register these objects 29 | GroupVersion = schema.GroupVersion{Group: "nodeops.k8s.preferred.jp", Version: "v1alpha1"} 30 | 31 | // SchemeBuilder is used to add go types to the GroupVersionKind scheme 32 | SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} 33 | 34 | // AddToScheme adds the types in this group-version to the given scheme. 35 | AddToScheme = SchemeBuilder.AddToScheme 36 | ) 37 | -------------------------------------------------------------------------------- /api/v1alpha1/nodedisruptionbudget_types.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1alpha1 18 | 19 | import ( 20 | corev1 "k8s.io/api/core/v1" 21 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 22 | ) 23 | 24 | // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! 25 | // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. 26 | 27 | // NodeDisruptionBudgetSpec defines the desired state of NodeDisruptionBudget 28 | type NodeDisruptionBudgetSpec struct { 29 | // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster 30 | // Important: Run "make" to regenerate code after modifying this file 31 | 32 | Selector map[string]string `json:"selector"` 33 | MaxUnavailable *uint64 `json:"maxUnavailable,omitempty"` 34 | MinAvailable *uint64 `json:"minAvailable,omitempty"` 35 | // TaintTargets defines taints by which nodes are determined as unavailable. Default taints added by this controller are implicitly added to TaintTargets. 36 | TaintTargets []TaintTarget `json:"taintTargets,omitempty"` 37 | } 38 | 39 | type TaintTargetOperator string 40 | 41 | const ( 42 | TaintTargetOpExists TaintTargetOperator = "Exists" 43 | TaintTargetOpEqual TaintTargetOperator = "Equal" 44 | ) 45 | 46 | type TaintTarget struct { 47 | Key string `json:"key,omitempty"` 48 | Operator TaintTargetOperator `json:"operator,omitempty"` 49 | Value string `json:"value,omitempty"` 50 | Effect corev1.TaintEffect `json:"effect,omitempty"` 51 | } 52 | 53 | // NodeDisruptionBudgetStatus defines the observed state of NodeDisruptionBudget 54 | type NodeDisruptionBudgetStatus struct { 55 | // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster 56 | // Important: Run "make" to regenerate code after modifying this file 57 | } 58 | 59 | //+kubebuilder:object:root=true 60 | //+kubebuilder:resource:scope=Cluster 61 | //+kubebuilder:printcolumn:name="MaxUnavailable",type=integer,JSONPath=`.spec.maxUnavailable` 62 | //+kubebuilder:printcolumn:name="MinAvailable",type=integer,JSONPath=`.spec.minAvailable` 63 | 64 | // NodeDisruptionBudget is the Schema for the nodedisruptionbudgets API 65 | type NodeDisruptionBudget struct { 66 | metav1.TypeMeta `json:",inline"` 67 | metav1.ObjectMeta `json:"metadata,omitempty"` 68 | 69 | Spec NodeDisruptionBudgetSpec `json:"spec,omitempty"` 70 | Status NodeDisruptionBudgetStatus `json:"status,omitempty"` 71 | } 72 | 73 | //+kubebuilder:object:root=true 74 | 75 | // NodeDisruptionBudgetList contains a list of NodeDisruptionBudget 76 | type NodeDisruptionBudgetList struct { 77 | metav1.TypeMeta `json:",inline"` 78 | metav1.ListMeta `json:"metadata,omitempty"` 79 | Items []NodeDisruptionBudget `json:"items"` 80 | } 81 | 82 | func init() { 83 | SchemeBuilder.Register(&NodeDisruptionBudget{}, &NodeDisruptionBudgetList{}) 84 | } 85 | 86 | func (t *TaintTarget) IsTarget(taint *corev1.Taint) bool { 87 | if len(t.Effect) > 0 && t.Effect != taint.Effect { 88 | return false 89 | } 90 | 91 | if len(t.Key) > 0 && t.Key != taint.Key { 92 | return false 93 | } 94 | 95 | switch t.Operator { 96 | // empty operator means Equal 97 | case "", TaintTargetOpEqual: 98 | return t.Value == taint.Value 99 | case TaintTargetOpExists: 100 | return true 101 | default: 102 | return false 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /api/v1alpha1/nodeoperation_types.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1alpha1 18 | 19 | import ( 20 | batchv1 "k8s.io/api/batch/v1" 21 | corev1 "k8s.io/api/core/v1" 22 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 23 | ) 24 | 25 | // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! 26 | // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. 27 | 28 | type NodeOperationPhase string 29 | type NodeOperationEvictionStrategy string 30 | 31 | const ( 32 | NodeOperationPhasePending NodeOperationPhase = "Pending" 33 | NodeOperationPhaseDraining NodeOperationPhase = "Draining" 34 | NodeOperationPhaseDrained NodeOperationPhase = "Drained" 35 | NodeOperationPhaseJobCreating NodeOperationPhase = "JobCreating" 36 | NodeOperationPhaseRunning NodeOperationPhase = "Running" 37 | NodeOperationPhaseCompleted NodeOperationPhase = "Completed" 38 | NodeOperationPhaseFailed NodeOperationPhase = "Failed" 39 | 40 | NodeOperationEvictionStrategyEvict NodeOperationEvictionStrategy = "Evict" 41 | NodeOperationEvictionStrategyDelete NodeOperationEvictionStrategy = "Delete" 42 | NodeOperationEvictionStrategyForceDelete NodeOperationEvictionStrategy = "ForceDelete" 43 | NodeOperationEvictionStrategyNone NodeOperationEvictionStrategy = "None" 44 | ) 45 | 46 | type NodeOperationSpecTemplate struct { 47 | // EvictionStrategy defines how to evict pods before performing the node operation. 48 | // The value must be one of Evict, Delete, ForceDelete, None (default=Evict) 49 | // TODO(everpeace): add default markers in the future for CRD 50 | // ref: https://github.com/kubernetes-sigs/controller-tools/issues/250 51 | // +kubebuilder:validation:Enum=Evict;Delete;ForceDelete;None 52 | EvictionStrategy NodeOperationEvictionStrategy `json:"evictionStrategy,omitempty"` 53 | SkipWaitingForEviction bool `json:"skipWaitingForEviction,omitempty"` 54 | NodeDisruptionBudgetSelector map[string]string `json:"nodeDisruptionBudgetSelector,omitempty"` 55 | JobTemplate JobTemplateSpec `json:"jobTemplate"` 56 | } 57 | 58 | // NodeOperationSpec defines the desired state of NodeOperation 59 | type NodeOperationSpec struct { 60 | // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster 61 | // Important: Run "make" to regenerate code after modifying this file 62 | 63 | NodeName string `json:"nodeName"` 64 | NodeOperationSpecTemplate `json:",inline"` 65 | } 66 | 67 | type JobTemplateSpec struct { 68 | Metadata metav1.ObjectMeta `json:"metadata"` 69 | Spec batchv1.JobSpec `json:"spec"` 70 | } 71 | 72 | // NodeOperationStatus defines the observed state of NodeOperation 73 | type NodeOperationStatus struct { 74 | // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster 75 | // Important: Run "make" to regenerate code after modifying this file 76 | Phase NodeOperationPhase `json:"phase"` 77 | Reason string `json:"reason"` 78 | JobNamespace string `json:"jobNamespace"` // Deprecated 79 | JobName string `json:"jobName"` // Deprecated 80 | JobReference corev1.ObjectReference `json:"jobReference,omitempty"` 81 | } 82 | 83 | //+kubebuilder:object:root=true 84 | //+kubebuilder:resource:scope=Cluster 85 | //+kubebuilder:printcolumn:name="NodeName",type=string,JSONPath=`.spec.nodeName` 86 | //+kubebuilder:printcolumn:name="Phase",type=string,JSONPath=`.status.phase` 87 | //+kubebuilder:printcolumn:name="Job Namespace",type=string,JSONPath=`.status.jobReference.namespace`,priority=1 88 | //+kubebuilder:printcolumn:name="Job Name",type=string,JSONPath=`.status.jobReference.name`,priority=1 89 | //+kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` 90 | 91 | // NodeOperation is the Schema for the nodeoperations API 92 | type NodeOperation struct { 93 | metav1.TypeMeta `json:",inline"` 94 | metav1.ObjectMeta `json:"metadata,omitempty"` 95 | 96 | Spec NodeOperationSpec `json:"spec,omitempty"` 97 | Status NodeOperationStatus `json:"status,omitempty"` 98 | } 99 | 100 | //+kubebuilder:object:root=true 101 | 102 | // NodeOperationList contains a list of NodeOperation 103 | type NodeOperationList struct { 104 | metav1.TypeMeta `json:",inline"` 105 | metav1.ListMeta `json:"metadata,omitempty"` 106 | Items []NodeOperation `json:"items"` 107 | } 108 | 109 | func (o *NodeOperation) NodeRemediationName() string { 110 | for _, owner := range o.OwnerReferences { 111 | if owner.APIVersion == GroupVersion.String() && owner.Kind == "NodeRemediation" { 112 | return owner.Name 113 | } 114 | } 115 | return "" 116 | } 117 | 118 | func init() { 119 | SchemeBuilder.Register(&NodeOperation{}, &NodeOperationList{}) 120 | } 121 | -------------------------------------------------------------------------------- /api/v1alpha1/nodeoperationtemplate_types.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1alpha1 18 | 19 | import ( 20 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 21 | ) 22 | 23 | // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! 24 | // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. 25 | 26 | // NodeOperationTemplateSpec defines the desired state of NodeOperationTemplate 27 | type NodeOperationTemplateSpec struct { 28 | // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster 29 | // Important: Run "make" to regenerate code after modifying this file 30 | 31 | Template NodeOperationTemplateTemplateSpec `json:"template"` 32 | } 33 | 34 | type NodeOperationTemplateTemplateSpec struct { 35 | Metadata metav1.ObjectMeta `json:"metadata"` 36 | Spec NodeOperationSpecTemplate `json:"spec"` 37 | } 38 | 39 | // NodeOperationTemplateStatus defines the observed state of NodeOperationTemplate 40 | type NodeOperationTemplateStatus struct { 41 | // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster 42 | // Important: Run "make" to regenerate code after modifying this file 43 | } 44 | 45 | //+kubebuilder:object:root=true 46 | //+kubebuilder:resource:scope=Cluster 47 | 48 | // NodeOperationTemplate is the Schema for the nodeoperationtemplates API 49 | type NodeOperationTemplate struct { 50 | metav1.TypeMeta `json:",inline"` 51 | metav1.ObjectMeta `json:"metadata,omitempty"` 52 | 53 | Spec NodeOperationTemplateSpec `json:"spec,omitempty"` 54 | Status NodeOperationTemplateStatus `json:"status,omitempty"` 55 | } 56 | 57 | //+kubebuilder:object:root=true 58 | 59 | // NodeOperationTemplateList contains a list of NodeOperationTemplate 60 | type NodeOperationTemplateList struct { 61 | metav1.TypeMeta `json:",inline"` 62 | metav1.ListMeta `json:"metadata,omitempty"` 63 | Items []NodeOperationTemplate `json:"items"` 64 | } 65 | 66 | func init() { 67 | SchemeBuilder.Register(&NodeOperationTemplate{}, &NodeOperationTemplateList{}) 68 | } 69 | -------------------------------------------------------------------------------- /api/v1alpha1/noderemediation_types.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1alpha1 18 | 19 | import ( 20 | corev1 "k8s.io/api/core/v1" 21 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 22 | ) 23 | 24 | // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! 25 | // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. 26 | 27 | type NodeRemediationSpecTemplate struct { 28 | Rule NodeRemediationRule `json:"rule"` 29 | NodeOperationTemplateName string `json:"nodeOperationTemplateName"` 30 | } 31 | 32 | // NodeRemediationSpec defines the desired state of NodeRemediation 33 | type NodeRemediationSpec struct { 34 | // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster 35 | // Important: Run "make" to regenerate code after modifying this file 36 | 37 | NodeRemediationSpecTemplate `json:",inline"` 38 | NodeName string `json:"nodeName"` 39 | } 40 | 41 | type NodeRemediationRule struct { 42 | Conditions []NodeConditionMatcher `json:"conditions"` 43 | } 44 | 45 | type NodeConditionMatcher struct { 46 | Type corev1.NodeConditionType `json:"type"` 47 | Status corev1.ConditionStatus `json:"status"` 48 | } 49 | 50 | type NodeStatus string 51 | 52 | const ( 53 | NodeStatusUnknown NodeStatus = "" 54 | NodeStatusOK NodeStatus = "OK" 55 | NodeStatusBad NodeStatus = "Bad" 56 | ) 57 | 58 | // NodeRemediationStatus defines the observed state of NodeRemediation 59 | type NodeRemediationStatus struct { 60 | // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster 61 | // Important: Run "make" to regenerate code after modifying this file 62 | ActiveNodeOperation corev1.ObjectReference `json:"activeNodeOperation,omitempty"` 63 | // OperationsCount is num of NodeOperations executed by the NodeRemediation. Once the Node is remediated, this count will be reset to 0. 64 | OperationsCount int64 `json:"operationsCount"` 65 | // NodeStatus represents whether Node should be remediated or not. 66 | NodeStatus NodeStatus `json:"nodeStatus"` 67 | } 68 | 69 | //+kubebuilder:object:root=true 70 | //+kubebuilder:resource:scope=Cluster 71 | //+kubebuilder:subresource:status 72 | 73 | // NodeRemediation is the Schema for the noderemediations API 74 | type NodeRemediation struct { 75 | metav1.TypeMeta `json:",inline"` 76 | metav1.ObjectMeta `json:"metadata,omitempty"` 77 | 78 | Spec NodeRemediationSpec `json:"spec,omitempty"` 79 | Status NodeRemediationStatus `json:"status,omitempty"` 80 | } 81 | 82 | //+kubebuilder:object:root=true 83 | 84 | // NodeRemediationList contains a list of NodeRemediation 85 | type NodeRemediationList struct { 86 | metav1.TypeMeta `json:",inline"` 87 | metav1.ListMeta `json:"metadata,omitempty"` 88 | Items []NodeRemediation `json:"items"` 89 | } 90 | 91 | func init() { 92 | SchemeBuilder.Register(&NodeRemediation{}, &NodeRemediationList{}) 93 | } 94 | 95 | func (r *NodeRemediation) CompareNodeCondition(conditions []corev1.NodeCondition) NodeStatus { 96 | matchersLoop: 97 | for _, matcher := range r.Spec.Rule.Conditions { 98 | for _, cond := range conditions { 99 | if cond.Type == matcher.Type { 100 | switch cond.Status { 101 | case matcher.Status: 102 | continue matchersLoop 103 | case corev1.ConditionUnknown: 104 | return NodeStatusUnknown 105 | } 106 | } 107 | } 108 | return NodeStatusOK 109 | } 110 | return NodeStatusBad 111 | } 112 | -------------------------------------------------------------------------------- /api/v1alpha1/noderemediationtemplate_types.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package v1alpha1 18 | 19 | import ( 20 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 21 | ) 22 | 23 | // EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! 24 | // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. 25 | 26 | // NodeRemediationTemplateSpec defines the desired state of NodeRemediationTemplate 27 | type NodeRemediationTemplateSpec struct { 28 | // INSERT ADDITIONAL SPEC FIELDS - desired state of cluster 29 | // Important: Run "make" to regenerate code after modifying this file 30 | 31 | NodeSelector map[string]string `json:"nodeSelector"` 32 | Template NodeRemediationTemplateTemplateSpec `json:"template"` 33 | } 34 | 35 | type NodeRemediationTemplateTemplateSpec struct { 36 | Metadata metav1.ObjectMeta `json:"metadata,omitempty"` 37 | Spec NodeRemediationSpecTemplate `json:"spec"` 38 | } 39 | 40 | // NodeRemediationTemplateStatus defines the observed state of NodeRemediationTemplate 41 | type NodeRemediationTemplateStatus struct { 42 | // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster 43 | // Important: Run "make" to regenerate code after modifying this file 44 | } 45 | 46 | //+kubebuilder:object:root=true 47 | //+kubebuilder:resource:scope=Cluster 48 | 49 | // NodeRemediationTemplate is the Schema for the noderemediationtemplates API 50 | type NodeRemediationTemplate struct { 51 | metav1.TypeMeta `json:",inline"` 52 | metav1.ObjectMeta `json:"metadata,omitempty"` 53 | 54 | Spec NodeRemediationTemplateSpec `json:"spec,omitempty"` 55 | Status NodeRemediationTemplateStatus `json:"status,omitempty"` 56 | } 57 | 58 | //+kubebuilder:object:root=true 59 | 60 | // NodeRemediationTemplateList contains a list of NodeRemediationTemplate 61 | type NodeRemediationTemplateList struct { 62 | metav1.TypeMeta `json:",inline"` 63 | metav1.ListMeta `json:"metadata,omitempty"` 64 | Items []NodeRemediationTemplate `json:"items"` 65 | } 66 | 67 | func init() { 68 | SchemeBuilder.Register(&NodeRemediationTemplate{}, &NodeRemediationTemplateList{}) 69 | } 70 | -------------------------------------------------------------------------------- /api/v1alpha1/zz_generated.deepcopy.go: -------------------------------------------------------------------------------- 1 | //go:build !ignore_autogenerated 2 | 3 | /* 4 | Copyright 2021. 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | // Code generated by controller-gen. DO NOT EDIT. 20 | 21 | package v1alpha1 22 | 23 | import ( 24 | runtime "k8s.io/apimachinery/pkg/runtime" 25 | ) 26 | 27 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 28 | func (in *JobTemplateSpec) DeepCopyInto(out *JobTemplateSpec) { 29 | *out = *in 30 | in.Metadata.DeepCopyInto(&out.Metadata) 31 | in.Spec.DeepCopyInto(&out.Spec) 32 | } 33 | 34 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new JobTemplateSpec. 35 | func (in *JobTemplateSpec) DeepCopy() *JobTemplateSpec { 36 | if in == nil { 37 | return nil 38 | } 39 | out := new(JobTemplateSpec) 40 | in.DeepCopyInto(out) 41 | return out 42 | } 43 | 44 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 45 | func (in *NodeConditionMatcher) DeepCopyInto(out *NodeConditionMatcher) { 46 | *out = *in 47 | } 48 | 49 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeConditionMatcher. 50 | func (in *NodeConditionMatcher) DeepCopy() *NodeConditionMatcher { 51 | if in == nil { 52 | return nil 53 | } 54 | out := new(NodeConditionMatcher) 55 | in.DeepCopyInto(out) 56 | return out 57 | } 58 | 59 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 60 | func (in *NodeDisruptionBudget) DeepCopyInto(out *NodeDisruptionBudget) { 61 | *out = *in 62 | out.TypeMeta = in.TypeMeta 63 | in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) 64 | in.Spec.DeepCopyInto(&out.Spec) 65 | out.Status = in.Status 66 | } 67 | 68 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeDisruptionBudget. 69 | func (in *NodeDisruptionBudget) DeepCopy() *NodeDisruptionBudget { 70 | if in == nil { 71 | return nil 72 | } 73 | out := new(NodeDisruptionBudget) 74 | in.DeepCopyInto(out) 75 | return out 76 | } 77 | 78 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 79 | func (in *NodeDisruptionBudget) DeepCopyObject() runtime.Object { 80 | if c := in.DeepCopy(); c != nil { 81 | return c 82 | } 83 | return nil 84 | } 85 | 86 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 87 | func (in *NodeDisruptionBudgetList) DeepCopyInto(out *NodeDisruptionBudgetList) { 88 | *out = *in 89 | out.TypeMeta = in.TypeMeta 90 | in.ListMeta.DeepCopyInto(&out.ListMeta) 91 | if in.Items != nil { 92 | in, out := &in.Items, &out.Items 93 | *out = make([]NodeDisruptionBudget, len(*in)) 94 | for i := range *in { 95 | (*in)[i].DeepCopyInto(&(*out)[i]) 96 | } 97 | } 98 | } 99 | 100 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeDisruptionBudgetList. 101 | func (in *NodeDisruptionBudgetList) DeepCopy() *NodeDisruptionBudgetList { 102 | if in == nil { 103 | return nil 104 | } 105 | out := new(NodeDisruptionBudgetList) 106 | in.DeepCopyInto(out) 107 | return out 108 | } 109 | 110 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 111 | func (in *NodeDisruptionBudgetList) DeepCopyObject() runtime.Object { 112 | if c := in.DeepCopy(); c != nil { 113 | return c 114 | } 115 | return nil 116 | } 117 | 118 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 119 | func (in *NodeDisruptionBudgetSpec) DeepCopyInto(out *NodeDisruptionBudgetSpec) { 120 | *out = *in 121 | if in.Selector != nil { 122 | in, out := &in.Selector, &out.Selector 123 | *out = make(map[string]string, len(*in)) 124 | for key, val := range *in { 125 | (*out)[key] = val 126 | } 127 | } 128 | if in.MaxUnavailable != nil { 129 | in, out := &in.MaxUnavailable, &out.MaxUnavailable 130 | *out = new(uint64) 131 | **out = **in 132 | } 133 | if in.MinAvailable != nil { 134 | in, out := &in.MinAvailable, &out.MinAvailable 135 | *out = new(uint64) 136 | **out = **in 137 | } 138 | if in.TaintTargets != nil { 139 | in, out := &in.TaintTargets, &out.TaintTargets 140 | *out = make([]TaintTarget, len(*in)) 141 | copy(*out, *in) 142 | } 143 | } 144 | 145 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeDisruptionBudgetSpec. 146 | func (in *NodeDisruptionBudgetSpec) DeepCopy() *NodeDisruptionBudgetSpec { 147 | if in == nil { 148 | return nil 149 | } 150 | out := new(NodeDisruptionBudgetSpec) 151 | in.DeepCopyInto(out) 152 | return out 153 | } 154 | 155 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 156 | func (in *NodeDisruptionBudgetStatus) DeepCopyInto(out *NodeDisruptionBudgetStatus) { 157 | *out = *in 158 | } 159 | 160 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeDisruptionBudgetStatus. 161 | func (in *NodeDisruptionBudgetStatus) DeepCopy() *NodeDisruptionBudgetStatus { 162 | if in == nil { 163 | return nil 164 | } 165 | out := new(NodeDisruptionBudgetStatus) 166 | in.DeepCopyInto(out) 167 | return out 168 | } 169 | 170 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 171 | func (in *NodeOperation) DeepCopyInto(out *NodeOperation) { 172 | *out = *in 173 | out.TypeMeta = in.TypeMeta 174 | in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) 175 | in.Spec.DeepCopyInto(&out.Spec) 176 | out.Status = in.Status 177 | } 178 | 179 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeOperation. 180 | func (in *NodeOperation) DeepCopy() *NodeOperation { 181 | if in == nil { 182 | return nil 183 | } 184 | out := new(NodeOperation) 185 | in.DeepCopyInto(out) 186 | return out 187 | } 188 | 189 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 190 | func (in *NodeOperation) DeepCopyObject() runtime.Object { 191 | if c := in.DeepCopy(); c != nil { 192 | return c 193 | } 194 | return nil 195 | } 196 | 197 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 198 | func (in *NodeOperationList) DeepCopyInto(out *NodeOperationList) { 199 | *out = *in 200 | out.TypeMeta = in.TypeMeta 201 | in.ListMeta.DeepCopyInto(&out.ListMeta) 202 | if in.Items != nil { 203 | in, out := &in.Items, &out.Items 204 | *out = make([]NodeOperation, len(*in)) 205 | for i := range *in { 206 | (*in)[i].DeepCopyInto(&(*out)[i]) 207 | } 208 | } 209 | } 210 | 211 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeOperationList. 212 | func (in *NodeOperationList) DeepCopy() *NodeOperationList { 213 | if in == nil { 214 | return nil 215 | } 216 | out := new(NodeOperationList) 217 | in.DeepCopyInto(out) 218 | return out 219 | } 220 | 221 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 222 | func (in *NodeOperationList) DeepCopyObject() runtime.Object { 223 | if c := in.DeepCopy(); c != nil { 224 | return c 225 | } 226 | return nil 227 | } 228 | 229 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 230 | func (in *NodeOperationSpec) DeepCopyInto(out *NodeOperationSpec) { 231 | *out = *in 232 | in.NodeOperationSpecTemplate.DeepCopyInto(&out.NodeOperationSpecTemplate) 233 | } 234 | 235 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeOperationSpec. 236 | func (in *NodeOperationSpec) DeepCopy() *NodeOperationSpec { 237 | if in == nil { 238 | return nil 239 | } 240 | out := new(NodeOperationSpec) 241 | in.DeepCopyInto(out) 242 | return out 243 | } 244 | 245 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 246 | func (in *NodeOperationSpecTemplate) DeepCopyInto(out *NodeOperationSpecTemplate) { 247 | *out = *in 248 | if in.NodeDisruptionBudgetSelector != nil { 249 | in, out := &in.NodeDisruptionBudgetSelector, &out.NodeDisruptionBudgetSelector 250 | *out = make(map[string]string, len(*in)) 251 | for key, val := range *in { 252 | (*out)[key] = val 253 | } 254 | } 255 | in.JobTemplate.DeepCopyInto(&out.JobTemplate) 256 | } 257 | 258 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeOperationSpecTemplate. 259 | func (in *NodeOperationSpecTemplate) DeepCopy() *NodeOperationSpecTemplate { 260 | if in == nil { 261 | return nil 262 | } 263 | out := new(NodeOperationSpecTemplate) 264 | in.DeepCopyInto(out) 265 | return out 266 | } 267 | 268 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 269 | func (in *NodeOperationStatus) DeepCopyInto(out *NodeOperationStatus) { 270 | *out = *in 271 | out.JobReference = in.JobReference 272 | } 273 | 274 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeOperationStatus. 275 | func (in *NodeOperationStatus) DeepCopy() *NodeOperationStatus { 276 | if in == nil { 277 | return nil 278 | } 279 | out := new(NodeOperationStatus) 280 | in.DeepCopyInto(out) 281 | return out 282 | } 283 | 284 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 285 | func (in *NodeOperationTemplate) DeepCopyInto(out *NodeOperationTemplate) { 286 | *out = *in 287 | out.TypeMeta = in.TypeMeta 288 | in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) 289 | in.Spec.DeepCopyInto(&out.Spec) 290 | out.Status = in.Status 291 | } 292 | 293 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeOperationTemplate. 294 | func (in *NodeOperationTemplate) DeepCopy() *NodeOperationTemplate { 295 | if in == nil { 296 | return nil 297 | } 298 | out := new(NodeOperationTemplate) 299 | in.DeepCopyInto(out) 300 | return out 301 | } 302 | 303 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 304 | func (in *NodeOperationTemplate) DeepCopyObject() runtime.Object { 305 | if c := in.DeepCopy(); c != nil { 306 | return c 307 | } 308 | return nil 309 | } 310 | 311 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 312 | func (in *NodeOperationTemplateList) DeepCopyInto(out *NodeOperationTemplateList) { 313 | *out = *in 314 | out.TypeMeta = in.TypeMeta 315 | in.ListMeta.DeepCopyInto(&out.ListMeta) 316 | if in.Items != nil { 317 | in, out := &in.Items, &out.Items 318 | *out = make([]NodeOperationTemplate, len(*in)) 319 | for i := range *in { 320 | (*in)[i].DeepCopyInto(&(*out)[i]) 321 | } 322 | } 323 | } 324 | 325 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeOperationTemplateList. 326 | func (in *NodeOperationTemplateList) DeepCopy() *NodeOperationTemplateList { 327 | if in == nil { 328 | return nil 329 | } 330 | out := new(NodeOperationTemplateList) 331 | in.DeepCopyInto(out) 332 | return out 333 | } 334 | 335 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 336 | func (in *NodeOperationTemplateList) DeepCopyObject() runtime.Object { 337 | if c := in.DeepCopy(); c != nil { 338 | return c 339 | } 340 | return nil 341 | } 342 | 343 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 344 | func (in *NodeOperationTemplateSpec) DeepCopyInto(out *NodeOperationTemplateSpec) { 345 | *out = *in 346 | in.Template.DeepCopyInto(&out.Template) 347 | } 348 | 349 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeOperationTemplateSpec. 350 | func (in *NodeOperationTemplateSpec) DeepCopy() *NodeOperationTemplateSpec { 351 | if in == nil { 352 | return nil 353 | } 354 | out := new(NodeOperationTemplateSpec) 355 | in.DeepCopyInto(out) 356 | return out 357 | } 358 | 359 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 360 | func (in *NodeOperationTemplateStatus) DeepCopyInto(out *NodeOperationTemplateStatus) { 361 | *out = *in 362 | } 363 | 364 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeOperationTemplateStatus. 365 | func (in *NodeOperationTemplateStatus) DeepCopy() *NodeOperationTemplateStatus { 366 | if in == nil { 367 | return nil 368 | } 369 | out := new(NodeOperationTemplateStatus) 370 | in.DeepCopyInto(out) 371 | return out 372 | } 373 | 374 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 375 | func (in *NodeOperationTemplateTemplateSpec) DeepCopyInto(out *NodeOperationTemplateTemplateSpec) { 376 | *out = *in 377 | in.Metadata.DeepCopyInto(&out.Metadata) 378 | in.Spec.DeepCopyInto(&out.Spec) 379 | } 380 | 381 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeOperationTemplateTemplateSpec. 382 | func (in *NodeOperationTemplateTemplateSpec) DeepCopy() *NodeOperationTemplateTemplateSpec { 383 | if in == nil { 384 | return nil 385 | } 386 | out := new(NodeOperationTemplateTemplateSpec) 387 | in.DeepCopyInto(out) 388 | return out 389 | } 390 | 391 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 392 | func (in *NodeRemediation) DeepCopyInto(out *NodeRemediation) { 393 | *out = *in 394 | out.TypeMeta = in.TypeMeta 395 | in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) 396 | in.Spec.DeepCopyInto(&out.Spec) 397 | out.Status = in.Status 398 | } 399 | 400 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeRemediation. 401 | func (in *NodeRemediation) DeepCopy() *NodeRemediation { 402 | if in == nil { 403 | return nil 404 | } 405 | out := new(NodeRemediation) 406 | in.DeepCopyInto(out) 407 | return out 408 | } 409 | 410 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 411 | func (in *NodeRemediation) DeepCopyObject() runtime.Object { 412 | if c := in.DeepCopy(); c != nil { 413 | return c 414 | } 415 | return nil 416 | } 417 | 418 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 419 | func (in *NodeRemediationList) DeepCopyInto(out *NodeRemediationList) { 420 | *out = *in 421 | out.TypeMeta = in.TypeMeta 422 | in.ListMeta.DeepCopyInto(&out.ListMeta) 423 | if in.Items != nil { 424 | in, out := &in.Items, &out.Items 425 | *out = make([]NodeRemediation, len(*in)) 426 | for i := range *in { 427 | (*in)[i].DeepCopyInto(&(*out)[i]) 428 | } 429 | } 430 | } 431 | 432 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeRemediationList. 433 | func (in *NodeRemediationList) DeepCopy() *NodeRemediationList { 434 | if in == nil { 435 | return nil 436 | } 437 | out := new(NodeRemediationList) 438 | in.DeepCopyInto(out) 439 | return out 440 | } 441 | 442 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 443 | func (in *NodeRemediationList) DeepCopyObject() runtime.Object { 444 | if c := in.DeepCopy(); c != nil { 445 | return c 446 | } 447 | return nil 448 | } 449 | 450 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 451 | func (in *NodeRemediationRule) DeepCopyInto(out *NodeRemediationRule) { 452 | *out = *in 453 | if in.Conditions != nil { 454 | in, out := &in.Conditions, &out.Conditions 455 | *out = make([]NodeConditionMatcher, len(*in)) 456 | copy(*out, *in) 457 | } 458 | } 459 | 460 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeRemediationRule. 461 | func (in *NodeRemediationRule) DeepCopy() *NodeRemediationRule { 462 | if in == nil { 463 | return nil 464 | } 465 | out := new(NodeRemediationRule) 466 | in.DeepCopyInto(out) 467 | return out 468 | } 469 | 470 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 471 | func (in *NodeRemediationSpec) DeepCopyInto(out *NodeRemediationSpec) { 472 | *out = *in 473 | in.NodeRemediationSpecTemplate.DeepCopyInto(&out.NodeRemediationSpecTemplate) 474 | } 475 | 476 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeRemediationSpec. 477 | func (in *NodeRemediationSpec) DeepCopy() *NodeRemediationSpec { 478 | if in == nil { 479 | return nil 480 | } 481 | out := new(NodeRemediationSpec) 482 | in.DeepCopyInto(out) 483 | return out 484 | } 485 | 486 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 487 | func (in *NodeRemediationSpecTemplate) DeepCopyInto(out *NodeRemediationSpecTemplate) { 488 | *out = *in 489 | in.Rule.DeepCopyInto(&out.Rule) 490 | } 491 | 492 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeRemediationSpecTemplate. 493 | func (in *NodeRemediationSpecTemplate) DeepCopy() *NodeRemediationSpecTemplate { 494 | if in == nil { 495 | return nil 496 | } 497 | out := new(NodeRemediationSpecTemplate) 498 | in.DeepCopyInto(out) 499 | return out 500 | } 501 | 502 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 503 | func (in *NodeRemediationStatus) DeepCopyInto(out *NodeRemediationStatus) { 504 | *out = *in 505 | out.ActiveNodeOperation = in.ActiveNodeOperation 506 | } 507 | 508 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeRemediationStatus. 509 | func (in *NodeRemediationStatus) DeepCopy() *NodeRemediationStatus { 510 | if in == nil { 511 | return nil 512 | } 513 | out := new(NodeRemediationStatus) 514 | in.DeepCopyInto(out) 515 | return out 516 | } 517 | 518 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 519 | func (in *NodeRemediationTemplate) DeepCopyInto(out *NodeRemediationTemplate) { 520 | *out = *in 521 | out.TypeMeta = in.TypeMeta 522 | in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) 523 | in.Spec.DeepCopyInto(&out.Spec) 524 | out.Status = in.Status 525 | } 526 | 527 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeRemediationTemplate. 528 | func (in *NodeRemediationTemplate) DeepCopy() *NodeRemediationTemplate { 529 | if in == nil { 530 | return nil 531 | } 532 | out := new(NodeRemediationTemplate) 533 | in.DeepCopyInto(out) 534 | return out 535 | } 536 | 537 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 538 | func (in *NodeRemediationTemplate) DeepCopyObject() runtime.Object { 539 | if c := in.DeepCopy(); c != nil { 540 | return c 541 | } 542 | return nil 543 | } 544 | 545 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 546 | func (in *NodeRemediationTemplateList) DeepCopyInto(out *NodeRemediationTemplateList) { 547 | *out = *in 548 | out.TypeMeta = in.TypeMeta 549 | in.ListMeta.DeepCopyInto(&out.ListMeta) 550 | if in.Items != nil { 551 | in, out := &in.Items, &out.Items 552 | *out = make([]NodeRemediationTemplate, len(*in)) 553 | for i := range *in { 554 | (*in)[i].DeepCopyInto(&(*out)[i]) 555 | } 556 | } 557 | } 558 | 559 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeRemediationTemplateList. 560 | func (in *NodeRemediationTemplateList) DeepCopy() *NodeRemediationTemplateList { 561 | if in == nil { 562 | return nil 563 | } 564 | out := new(NodeRemediationTemplateList) 565 | in.DeepCopyInto(out) 566 | return out 567 | } 568 | 569 | // DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. 570 | func (in *NodeRemediationTemplateList) DeepCopyObject() runtime.Object { 571 | if c := in.DeepCopy(); c != nil { 572 | return c 573 | } 574 | return nil 575 | } 576 | 577 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 578 | func (in *NodeRemediationTemplateSpec) DeepCopyInto(out *NodeRemediationTemplateSpec) { 579 | *out = *in 580 | if in.NodeSelector != nil { 581 | in, out := &in.NodeSelector, &out.NodeSelector 582 | *out = make(map[string]string, len(*in)) 583 | for key, val := range *in { 584 | (*out)[key] = val 585 | } 586 | } 587 | in.Template.DeepCopyInto(&out.Template) 588 | } 589 | 590 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeRemediationTemplateSpec. 591 | func (in *NodeRemediationTemplateSpec) DeepCopy() *NodeRemediationTemplateSpec { 592 | if in == nil { 593 | return nil 594 | } 595 | out := new(NodeRemediationTemplateSpec) 596 | in.DeepCopyInto(out) 597 | return out 598 | } 599 | 600 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 601 | func (in *NodeRemediationTemplateStatus) DeepCopyInto(out *NodeRemediationTemplateStatus) { 602 | *out = *in 603 | } 604 | 605 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeRemediationTemplateStatus. 606 | func (in *NodeRemediationTemplateStatus) DeepCopy() *NodeRemediationTemplateStatus { 607 | if in == nil { 608 | return nil 609 | } 610 | out := new(NodeRemediationTemplateStatus) 611 | in.DeepCopyInto(out) 612 | return out 613 | } 614 | 615 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 616 | func (in *NodeRemediationTemplateTemplateSpec) DeepCopyInto(out *NodeRemediationTemplateTemplateSpec) { 617 | *out = *in 618 | in.Metadata.DeepCopyInto(&out.Metadata) 619 | in.Spec.DeepCopyInto(&out.Spec) 620 | } 621 | 622 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeRemediationTemplateTemplateSpec. 623 | func (in *NodeRemediationTemplateTemplateSpec) DeepCopy() *NodeRemediationTemplateTemplateSpec { 624 | if in == nil { 625 | return nil 626 | } 627 | out := new(NodeRemediationTemplateTemplateSpec) 628 | in.DeepCopyInto(out) 629 | return out 630 | } 631 | 632 | // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. 633 | func (in *TaintTarget) DeepCopyInto(out *TaintTarget) { 634 | *out = *in 635 | } 636 | 637 | // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TaintTarget. 638 | func (in *TaintTarget) DeepCopy() *TaintTarget { 639 | if in == nil { 640 | return nil 641 | } 642 | out := new(TaintTarget) 643 | in.DeepCopyInto(out) 644 | return out 645 | } 646 | -------------------------------------------------------------------------------- /cmd/kube-node-status/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "flag" 7 | "os" 8 | "strings" 9 | 10 | corev1 "k8s.io/api/core/v1" 11 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 12 | "k8s.io/client-go/kubernetes" 13 | _ "k8s.io/client-go/plugin/pkg/client/auth" 14 | "k8s.io/client-go/tools/clientcmd" 15 | ) 16 | 17 | func main() { 18 | ctx := context.Background() 19 | nodename := flag.String("nodename", "", "") 20 | addConditionsFlag := flag.String("add-conditions", "[]", "") 21 | removeConditionsFlag := flag.String("remove-condition-types", "", "comma separated") 22 | flag.Parse() 23 | 24 | // use the current context in kubeconfig 25 | config, err := clientcmd.BuildConfigFromFlags("", os.Getenv("KUBECONFIG")) 26 | if err != nil { 27 | panic(err.Error()) 28 | } 29 | 30 | // create the clientset 31 | clientset, err := kubernetes.NewForConfig(config) 32 | if err != nil { 33 | panic(err.Error()) 34 | } 35 | 36 | node, err := clientset.CoreV1().Nodes().Get(ctx, *nodename, metav1.GetOptions{}) 37 | if err != nil { 38 | panic(err.Error()) 39 | } 40 | 41 | var addConditions []corev1.NodeCondition 42 | if err := json.Unmarshal([]byte(*addConditionsFlag), &addConditions); err != nil { 43 | panic(err.Error()) 44 | } 45 | 46 | node.Status.Conditions = append(node.Status.Conditions, addConditions...) 47 | node.Status.Conditions = removeConditions(node.Status.Conditions, strings.Split(*removeConditionsFlag, ",")) 48 | 49 | if _, err := clientset.CoreV1().Nodes().UpdateStatus(ctx, node, metav1.UpdateOptions{}); err != nil { 50 | panic(err.Error()) 51 | } 52 | } 53 | 54 | func removeConditions(conditions []corev1.NodeCondition, strtypes []string) []corev1.NodeCondition { 55 | types := make([]corev1.NodeConditionType, len(strtypes)) 56 | for i, t := range strtypes { 57 | types[i] = corev1.NodeConditionType(t) 58 | } 59 | 60 | var filtered []corev1.NodeCondition 61 | nextCondition: 62 | for _, c := range conditions { 63 | for _, t := range types { 64 | if c.Type == t { 65 | continue nextCondition 66 | } 67 | } 68 | filtered = append(filtered, c) 69 | } 70 | return filtered 71 | } 72 | -------------------------------------------------------------------------------- /config/crd/bases/nodeops.k8s.preferred.jp_nodedisruptionbudgets.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apiextensions.k8s.io/v1 3 | kind: CustomResourceDefinition 4 | metadata: 5 | annotations: 6 | controller-gen.kubebuilder.io/version: v0.17.2 7 | name: nodedisruptionbudgets.nodeops.k8s.preferred.jp 8 | spec: 9 | group: nodeops.k8s.preferred.jp 10 | names: 11 | kind: NodeDisruptionBudget 12 | listKind: NodeDisruptionBudgetList 13 | plural: nodedisruptionbudgets 14 | singular: nodedisruptionbudget 15 | scope: Cluster 16 | versions: 17 | - additionalPrinterColumns: 18 | - jsonPath: .spec.maxUnavailable 19 | name: MaxUnavailable 20 | type: integer 21 | - jsonPath: .spec.minAvailable 22 | name: MinAvailable 23 | type: integer 24 | name: v1alpha1 25 | schema: 26 | openAPIV3Schema: 27 | description: NodeDisruptionBudget is the Schema for the nodedisruptionbudgets 28 | API 29 | properties: 30 | apiVersion: 31 | description: |- 32 | APIVersion defines the versioned schema of this representation of an object. 33 | Servers should convert recognized schemas to the latest internal value, and 34 | may reject unrecognized values. 35 | More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources 36 | type: string 37 | kind: 38 | description: |- 39 | Kind is a string value representing the REST resource this object represents. 40 | Servers may infer this from the endpoint the client submits requests to. 41 | Cannot be updated. 42 | In CamelCase. 43 | More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds 44 | type: string 45 | metadata: 46 | type: object 47 | spec: 48 | description: NodeDisruptionBudgetSpec defines the desired state of NodeDisruptionBudget 49 | properties: 50 | maxUnavailable: 51 | format: int64 52 | type: integer 53 | minAvailable: 54 | format: int64 55 | type: integer 56 | selector: 57 | additionalProperties: 58 | type: string 59 | type: object 60 | taintTargets: 61 | description: TaintTargets defines taints by which nodes are determined 62 | as unavailable. Default taints added by this controller are implicitly 63 | added to TaintTargets. 64 | items: 65 | properties: 66 | effect: 67 | type: string 68 | key: 69 | type: string 70 | operator: 71 | type: string 72 | value: 73 | type: string 74 | type: object 75 | type: array 76 | required: 77 | - selector 78 | type: object 79 | status: 80 | description: NodeDisruptionBudgetStatus defines the observed state of 81 | NodeDisruptionBudget 82 | type: object 83 | type: object 84 | served: true 85 | storage: true 86 | subresources: {} 87 | -------------------------------------------------------------------------------- /config/crd/bases/nodeops.k8s.preferred.jp_noderemediations.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apiextensions.k8s.io/v1 3 | kind: CustomResourceDefinition 4 | metadata: 5 | annotations: 6 | controller-gen.kubebuilder.io/version: v0.17.2 7 | name: noderemediations.nodeops.k8s.preferred.jp 8 | spec: 9 | group: nodeops.k8s.preferred.jp 10 | names: 11 | kind: NodeRemediation 12 | listKind: NodeRemediationList 13 | plural: noderemediations 14 | singular: noderemediation 15 | scope: Cluster 16 | versions: 17 | - name: v1alpha1 18 | schema: 19 | openAPIV3Schema: 20 | description: NodeRemediation is the Schema for the noderemediations API 21 | properties: 22 | apiVersion: 23 | description: |- 24 | APIVersion defines the versioned schema of this representation of an object. 25 | Servers should convert recognized schemas to the latest internal value, and 26 | may reject unrecognized values. 27 | More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources 28 | type: string 29 | kind: 30 | description: |- 31 | Kind is a string value representing the REST resource this object represents. 32 | Servers may infer this from the endpoint the client submits requests to. 33 | Cannot be updated. 34 | In CamelCase. 35 | More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds 36 | type: string 37 | metadata: 38 | type: object 39 | spec: 40 | description: NodeRemediationSpec defines the desired state of NodeRemediation 41 | properties: 42 | nodeName: 43 | type: string 44 | nodeOperationTemplateName: 45 | type: string 46 | rule: 47 | properties: 48 | conditions: 49 | items: 50 | properties: 51 | status: 52 | type: string 53 | type: 54 | type: string 55 | required: 56 | - status 57 | - type 58 | type: object 59 | type: array 60 | required: 61 | - conditions 62 | type: object 63 | required: 64 | - nodeName 65 | - nodeOperationTemplateName 66 | - rule 67 | type: object 68 | status: 69 | description: NodeRemediationStatus defines the observed state of NodeRemediation 70 | properties: 71 | activeNodeOperation: 72 | description: |- 73 | INSERT ADDITIONAL STATUS FIELD - define observed state of cluster 74 | Important: Run "make" to regenerate code after modifying this file 75 | properties: 76 | apiVersion: 77 | description: API version of the referent. 78 | type: string 79 | fieldPath: 80 | description: |- 81 | If referring to a piece of an object instead of an entire object, this string 82 | should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. 83 | For example, if the object reference is to a container within a pod, this would take on a value like: 84 | "spec.containers{name}" (where "name" refers to the name of the container that triggered 85 | the event) or if no container name is specified "spec.containers[2]" (container with 86 | index 2 in this pod). This syntax is chosen only to have some well-defined way of 87 | referencing a part of an object. 88 | type: string 89 | kind: 90 | description: |- 91 | Kind of the referent. 92 | More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds 93 | type: string 94 | name: 95 | description: |- 96 | Name of the referent. 97 | More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names 98 | type: string 99 | namespace: 100 | description: |- 101 | Namespace of the referent. 102 | More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ 103 | type: string 104 | resourceVersion: 105 | description: |- 106 | Specific resourceVersion to which this reference is made, if any. 107 | More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency 108 | type: string 109 | uid: 110 | description: |- 111 | UID of the referent. 112 | More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids 113 | type: string 114 | type: object 115 | x-kubernetes-map-type: atomic 116 | nodeStatus: 117 | description: NodeStatus represents whether Node should be remediated 118 | or not. 119 | type: string 120 | operationsCount: 121 | description: OperationsCount is num of NodeOperations executed by 122 | the NodeRemediation. Once the Node is remediated, this count will 123 | be reset to 0. 124 | format: int64 125 | type: integer 126 | required: 127 | - nodeStatus 128 | - operationsCount 129 | type: object 130 | type: object 131 | served: true 132 | storage: true 133 | subresources: 134 | status: {} 135 | -------------------------------------------------------------------------------- /config/crd/bases/nodeops.k8s.preferred.jp_noderemediationtemplates.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apiextensions.k8s.io/v1 3 | kind: CustomResourceDefinition 4 | metadata: 5 | annotations: 6 | controller-gen.kubebuilder.io/version: v0.17.2 7 | name: noderemediationtemplates.nodeops.k8s.preferred.jp 8 | spec: 9 | group: nodeops.k8s.preferred.jp 10 | names: 11 | kind: NodeRemediationTemplate 12 | listKind: NodeRemediationTemplateList 13 | plural: noderemediationtemplates 14 | singular: noderemediationtemplate 15 | scope: Cluster 16 | versions: 17 | - name: v1alpha1 18 | schema: 19 | openAPIV3Schema: 20 | description: NodeRemediationTemplate is the Schema for the noderemediationtemplates 21 | API 22 | properties: 23 | apiVersion: 24 | description: |- 25 | APIVersion defines the versioned schema of this representation of an object. 26 | Servers should convert recognized schemas to the latest internal value, and 27 | may reject unrecognized values. 28 | More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources 29 | type: string 30 | kind: 31 | description: |- 32 | Kind is a string value representing the REST resource this object represents. 33 | Servers may infer this from the endpoint the client submits requests to. 34 | Cannot be updated. 35 | In CamelCase. 36 | More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds 37 | type: string 38 | metadata: 39 | type: object 40 | spec: 41 | description: NodeRemediationTemplateSpec defines the desired state of 42 | NodeRemediationTemplate 43 | properties: 44 | nodeSelector: 45 | additionalProperties: 46 | type: string 47 | type: object 48 | template: 49 | properties: 50 | metadata: 51 | properties: 52 | annotations: 53 | additionalProperties: 54 | type: string 55 | type: object 56 | finalizers: 57 | items: 58 | type: string 59 | type: array 60 | labels: 61 | additionalProperties: 62 | type: string 63 | type: object 64 | name: 65 | type: string 66 | namespace: 67 | type: string 68 | type: object 69 | spec: 70 | properties: 71 | nodeOperationTemplateName: 72 | type: string 73 | rule: 74 | properties: 75 | conditions: 76 | items: 77 | properties: 78 | status: 79 | type: string 80 | type: 81 | type: string 82 | required: 83 | - status 84 | - type 85 | type: object 86 | type: array 87 | required: 88 | - conditions 89 | type: object 90 | required: 91 | - nodeOperationTemplateName 92 | - rule 93 | type: object 94 | required: 95 | - spec 96 | type: object 97 | required: 98 | - nodeSelector 99 | - template 100 | type: object 101 | status: 102 | description: NodeRemediationTemplateStatus defines the observed state 103 | of NodeRemediationTemplate 104 | type: object 105 | type: object 106 | served: true 107 | storage: true 108 | -------------------------------------------------------------------------------- /config/crd/kustomization.yaml: -------------------------------------------------------------------------------- 1 | # This kustomization.yaml is not intended to be run by itself, 2 | # since it depends on service name and namespace that are out of this kustomize package. 3 | # It should be run by config/default 4 | resources: 5 | - bases/nodeops.k8s.preferred.jp_nodeoperations.yaml 6 | - bases/nodeops.k8s.preferred.jp_nodedisruptionbudgets.yaml 7 | - bases/nodeops.k8s.preferred.jp_nodeoperationtemplates.yaml 8 | - bases/nodeops.k8s.preferred.jp_noderemediationtemplates.yaml 9 | - bases/nodeops.k8s.preferred.jp_noderemediations.yaml 10 | #+kubebuilder:scaffold:crdkustomizeresource 11 | 12 | patchesStrategicMerge: 13 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix. 14 | # patches here are for enabling the conversion webhook for each CRD 15 | #- patches/webhook_in_nodeoperations.yaml 16 | #- patches/webhook_in_nodedisruptionbudgets.yaml 17 | #- patches/webhook_in_nodeoperationtemplates.yaml 18 | #- patches/webhook_in_noderemediationtemplates.yaml 19 | #- patches/webhook_in_noderemediations.yaml 20 | #+kubebuilder:scaffold:crdkustomizewebhookpatch 21 | 22 | # [CERTMANAGER] To enable cert-manager, uncomment all the sections with [CERTMANAGER] prefix. 23 | # patches here are for enabling the CA injection for each CRD 24 | #- patches/cainjection_in_nodeoperations.yaml 25 | #- patches/cainjection_in_nodedisruptionbudgets.yaml 26 | #- patches/cainjection_in_nodeoperationtemplates.yaml 27 | #- patches/cainjection_in_noderemediationtemplates.yaml 28 | #- patches/cainjection_in_noderemediations.yaml 29 | #+kubebuilder:scaffold:crdkustomizecainjectionpatch 30 | 31 | # the following config is for teaching kustomize how to do kustomization for CRDs. 32 | configurations: 33 | - kustomizeconfig.yaml 34 | -------------------------------------------------------------------------------- /config/crd/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # This file is for teaching kustomize how to substitute name and namespace reference in CRD 2 | nameReference: 3 | - kind: Service 4 | version: v1 5 | fieldSpecs: 6 | - kind: CustomResourceDefinition 7 | version: v1 8 | group: apiextensions.k8s.io 9 | path: spec/conversion/webhook/clientConfig/service/name 10 | 11 | namespace: 12 | - kind: CustomResourceDefinition 13 | version: v1 14 | group: apiextensions.k8s.io 15 | path: spec/conversion/webhook/clientConfig/service/namespace 16 | create: false 17 | 18 | varReference: 19 | - path: metadata/annotations 20 | -------------------------------------------------------------------------------- /config/crd/patches/cainjection_in_nodedisruptionbudgets.yaml: -------------------------------------------------------------------------------- 1 | # The following patch adds a directive for certmanager to inject CA into the CRD 2 | apiVersion: apiextensions.k8s.io/v1 3 | kind: CustomResourceDefinition 4 | metadata: 5 | annotations: 6 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 7 | name: nodedisruptionbudgets.nodeops.k8s.preferred.jp 8 | -------------------------------------------------------------------------------- /config/crd/patches/cainjection_in_nodeoperations.yaml: -------------------------------------------------------------------------------- 1 | # The following patch adds a directive for certmanager to inject CA into the CRD 2 | apiVersion: apiextensions.k8s.io/v1 3 | kind: CustomResourceDefinition 4 | metadata: 5 | annotations: 6 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 7 | name: nodeoperations.nodeops.k8s.preferred.jp 8 | -------------------------------------------------------------------------------- /config/crd/patches/cainjection_in_nodeoperationtemplates.yaml: -------------------------------------------------------------------------------- 1 | # The following patch adds a directive for certmanager to inject CA into the CRD 2 | apiVersion: apiextensions.k8s.io/v1 3 | kind: CustomResourceDefinition 4 | metadata: 5 | annotations: 6 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 7 | name: nodeoperationtemplates.nodeops.k8s.preferred.jp 8 | -------------------------------------------------------------------------------- /config/crd/patches/cainjection_in_noderemediations.yaml: -------------------------------------------------------------------------------- 1 | # The following patch adds a directive for certmanager to inject CA into the CRD 2 | apiVersion: apiextensions.k8s.io/v1 3 | kind: CustomResourceDefinition 4 | metadata: 5 | annotations: 6 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 7 | name: noderemediations.nodeops.k8s.preferred.jp 8 | -------------------------------------------------------------------------------- /config/crd/patches/cainjection_in_noderemediationtemplates.yaml: -------------------------------------------------------------------------------- 1 | # The following patch adds a directive for certmanager to inject CA into the CRD 2 | apiVersion: apiextensions.k8s.io/v1 3 | kind: CustomResourceDefinition 4 | metadata: 5 | annotations: 6 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 7 | name: noderemediationtemplates.nodeops.k8s.preferred.jp 8 | -------------------------------------------------------------------------------- /config/crd/patches/webhook_in_nodedisruptionbudgets.yaml: -------------------------------------------------------------------------------- 1 | # The following patch enables a conversion webhook for the CRD 2 | apiVersion: apiextensions.k8s.io/v1 3 | kind: CustomResourceDefinition 4 | metadata: 5 | name: nodedisruptionbudgets.nodeops.k8s.preferred.jp 6 | spec: 7 | conversion: 8 | strategy: Webhook 9 | webhook: 10 | clientConfig: 11 | service: 12 | namespace: system 13 | name: webhook-service 14 | path: /convert 15 | conversionReviewVersions: 16 | - v1 17 | -------------------------------------------------------------------------------- /config/crd/patches/webhook_in_nodeoperations.yaml: -------------------------------------------------------------------------------- 1 | # The following patch enables a conversion webhook for the CRD 2 | apiVersion: apiextensions.k8s.io/v1 3 | kind: CustomResourceDefinition 4 | metadata: 5 | name: nodeoperations.nodeops.k8s.preferred.jp 6 | spec: 7 | conversion: 8 | strategy: Webhook 9 | webhook: 10 | clientConfig: 11 | service: 12 | namespace: system 13 | name: webhook-service 14 | path: /convert 15 | conversionReviewVersions: 16 | - v1 17 | -------------------------------------------------------------------------------- /config/crd/patches/webhook_in_nodeoperationtemplates.yaml: -------------------------------------------------------------------------------- 1 | # The following patch enables a conversion webhook for the CRD 2 | apiVersion: apiextensions.k8s.io/v1 3 | kind: CustomResourceDefinition 4 | metadata: 5 | name: nodeoperationtemplates.nodeops.k8s.preferred.jp 6 | spec: 7 | conversion: 8 | strategy: Webhook 9 | webhook: 10 | clientConfig: 11 | service: 12 | namespace: system 13 | name: webhook-service 14 | path: /convert 15 | conversionReviewVersions: 16 | - v1 17 | -------------------------------------------------------------------------------- /config/crd/patches/webhook_in_noderemediations.yaml: -------------------------------------------------------------------------------- 1 | # The following patch enables a conversion webhook for the CRD 2 | apiVersion: apiextensions.k8s.io/v1 3 | kind: CustomResourceDefinition 4 | metadata: 5 | name: noderemediations.nodeops.k8s.preferred.jp 6 | spec: 7 | conversion: 8 | strategy: Webhook 9 | webhook: 10 | clientConfig: 11 | service: 12 | namespace: system 13 | name: webhook-service 14 | path: /convert 15 | conversionReviewVersions: 16 | - v1 17 | -------------------------------------------------------------------------------- /config/crd/patches/webhook_in_noderemediationtemplates.yaml: -------------------------------------------------------------------------------- 1 | # The following patch enables a conversion webhook for the CRD 2 | apiVersion: apiextensions.k8s.io/v1 3 | kind: CustomResourceDefinition 4 | metadata: 5 | name: noderemediationtemplates.nodeops.k8s.preferred.jp 6 | spec: 7 | conversion: 8 | strategy: Webhook 9 | webhook: 10 | clientConfig: 11 | service: 12 | namespace: system 13 | name: webhook-service 14 | path: /convert 15 | conversionReviewVersions: 16 | - v1 17 | -------------------------------------------------------------------------------- /config/default/kustomization.yaml: -------------------------------------------------------------------------------- 1 | # Adds namespace to all resources. 2 | namespace: node-operation-controller-system 3 | 4 | # Value of this field is prepended to the 5 | # names of all resources, e.g. a deployment named 6 | # "wordpress" becomes "alices-wordpress". 7 | # Note that it should also match with the prefix (text before '-') of the namespace 8 | # field above. 9 | namePrefix: node-operation-controller- 10 | 11 | # Labels to add to all resources and selectors. 12 | #commonLabels: 13 | # someName: someValue 14 | 15 | bases: 16 | - ../crd 17 | - ../rbac 18 | - ../manager 19 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in 20 | # crd/kustomization.yaml 21 | #- ../webhook 22 | # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. 23 | #- ../certmanager 24 | # [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. 25 | #- ../prometheus 26 | 27 | patchesStrategicMerge: 28 | # Protect the /metrics endpoint by putting it behind auth. 29 | # If you want your controller-manager to expose the /metrics 30 | # endpoint w/o any authn/z, please comment the following line. 31 | - manager_auth_proxy_patch.yaml 32 | 33 | # Mount the controller config file for loading manager configurations 34 | # through a ComponentConfig type 35 | #- manager_config_patch.yaml 36 | 37 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in 38 | # crd/kustomization.yaml 39 | #- manager_webhook_patch.yaml 40 | 41 | # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 42 | # Uncomment 'CERTMANAGER' sections in crd/kustomization.yaml to enable the CA injection in the admission webhooks. 43 | # 'CERTMANAGER' needs to be enabled to use ca injection 44 | #- webhookcainjection_patch.yaml 45 | 46 | # the following config is for teaching kustomize how to do var substitution 47 | vars: 48 | # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix. 49 | #- name: CERTIFICATE_NAMESPACE # namespace of the certificate CR 50 | # objref: 51 | # kind: Certificate 52 | # group: cert-manager.io 53 | # version: v1 54 | # name: serving-cert # this name should match the one in certificate.yaml 55 | # fieldref: 56 | # fieldpath: metadata.namespace 57 | #- name: CERTIFICATE_NAME 58 | # objref: 59 | # kind: Certificate 60 | # group: cert-manager.io 61 | # version: v1 62 | # name: serving-cert # this name should match the one in certificate.yaml 63 | #- name: SERVICE_NAMESPACE # namespace of the service 64 | # objref: 65 | # kind: Service 66 | # version: v1 67 | # name: webhook-service 68 | # fieldref: 69 | # fieldpath: metadata.namespace 70 | #- name: SERVICE_NAME 71 | # objref: 72 | # kind: Service 73 | # version: v1 74 | # name: webhook-service 75 | -------------------------------------------------------------------------------- /config/default/manager_auth_proxy_patch.yaml: -------------------------------------------------------------------------------- 1 | # This patch inject a sidecar container which is a HTTP proxy for the 2 | # controller manager, it performs RBAC authorization against the Kubernetes API using SubjectAccessReviews. 3 | apiVersion: apps/v1 4 | kind: Deployment 5 | metadata: 6 | name: controller-manager 7 | namespace: system 8 | spec: 9 | template: 10 | spec: 11 | containers: 12 | - name: kube-rbac-proxy 13 | image: gcr.io/kubebuilder/kube-rbac-proxy:v0.13.0 14 | args: 15 | - "--secure-listen-address=0.0.0.0:8443" 16 | - "--upstream=http://127.0.0.1:8080/" 17 | - "--logtostderr=true" 18 | - "--v=10" 19 | ports: 20 | - containerPort: 8443 21 | protocol: TCP 22 | name: https 23 | - name: manager 24 | args: 25 | - "--health-probe-bind-address=:8081" 26 | - "--metrics-bind-address=127.0.0.1:8080" 27 | - "--leader-elect" 28 | -------------------------------------------------------------------------------- /config/default/manager_config_patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: manager 11 | args: 12 | - "--config=controller_manager_config.yaml" 13 | volumeMounts: 14 | - name: manager-config 15 | mountPath: /controller_manager_config.yaml 16 | subPath: controller_manager_config.yaml 17 | volumes: 18 | - name: manager-config 19 | configMap: 20 | name: manager-config 21 | -------------------------------------------------------------------------------- /config/kind/config.yaml: -------------------------------------------------------------------------------- 1 | kind: Cluster 2 | apiVersion: kind.x-k8s.io/v1alpha4 3 | name: node-operation-controller 4 | nodes: 5 | - role: control-plane 6 | - role: worker 7 | - role: worker 8 | -------------------------------------------------------------------------------- /config/kind/test.yaml: -------------------------------------------------------------------------------- 1 | kind: Cluster 2 | apiVersion: kind.x-k8s.io/v1alpha4 3 | nodes: 4 | - role: control-plane 5 | - role: worker 6 | -------------------------------------------------------------------------------- /config/manager/controller_manager_config.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: controller-runtime.sigs.k8s.io/v1alpha1 2 | kind: ControllerManagerConfig 3 | health: 4 | healthProbeBindAddress: :8081 5 | metrics: 6 | bindAddress: 127.0.0.1:8080 7 | webhook: 8 | port: 9443 9 | leaderElection: 10 | leaderElect: true 11 | resourceName: 869fe74b.k8s.preferred.jp 12 | -------------------------------------------------------------------------------- /config/manager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - manager.yaml 3 | 4 | generatorOptions: 5 | disableNameSuffixHash: true 6 | 7 | configMapGenerator: 8 | - name: manager-config 9 | files: 10 | - controller_manager_config.yaml 11 | -------------------------------------------------------------------------------- /config/manager/manager.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | labels: 5 | control-plane: controller-manager 6 | name: system 7 | --- 8 | apiVersion: apps/v1 9 | kind: Deployment 10 | metadata: 11 | name: controller-manager 12 | namespace: system 13 | labels: 14 | control-plane: controller-manager 15 | spec: 16 | selector: 17 | matchLabels: 18 | control-plane: controller-manager 19 | replicas: 1 20 | template: 21 | metadata: 22 | annotations: 23 | kubectl.kubernetes.io/default-container: manager 24 | labels: 25 | control-plane: controller-manager 26 | spec: 27 | securityContext: 28 | runAsNonRoot: true 29 | containers: 30 | - command: 31 | - /manager 32 | args: 33 | - --leader-elect 34 | image: controller:latest 35 | name: manager 36 | securityContext: 37 | allowPrivilegeEscalation: false 38 | livenessProbe: 39 | httpGet: 40 | path: /healthz 41 | port: 8081 42 | initialDelaySeconds: 15 43 | periodSeconds: 20 44 | readinessProbe: 45 | httpGet: 46 | path: /readyz 47 | port: 8081 48 | initialDelaySeconds: 5 49 | periodSeconds: 10 50 | # TODO(user): Configure the resources accordingly based on the project requirements. 51 | # More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ 52 | resources: 53 | limits: 54 | cpu: 500m 55 | memory: 128Mi 56 | requests: 57 | cpu: 10m 58 | memory: 64Mi 59 | serviceAccountName: controller-manager 60 | terminationGracePeriodSeconds: 10 61 | -------------------------------------------------------------------------------- /config/prometheus/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - monitor.yaml 3 | -------------------------------------------------------------------------------- /config/prometheus/monitor.yaml: -------------------------------------------------------------------------------- 1 | 2 | # Prometheus Monitor Service (Metrics) 3 | apiVersion: monitoring.coreos.com/v1 4 | kind: ServiceMonitor 5 | metadata: 6 | labels: 7 | control-plane: controller-manager 8 | name: controller-manager-metrics-monitor 9 | namespace: system 10 | spec: 11 | endpoints: 12 | - path: /metrics 13 | port: https 14 | scheme: https 15 | bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token 16 | tlsConfig: 17 | insecureSkipVerify: true 18 | selector: 19 | matchLabels: 20 | control-plane: controller-manager 21 | -------------------------------------------------------------------------------- /config/rbac/auth_proxy_client_clusterrole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: metrics-reader 5 | rules: 6 | - nonResourceURLs: 7 | - "/metrics" 8 | verbs: 9 | - get 10 | -------------------------------------------------------------------------------- /config/rbac/auth_proxy_role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: proxy-role 5 | rules: 6 | - apiGroups: 7 | - authentication.k8s.io 8 | resources: 9 | - tokenreviews 10 | verbs: 11 | - create 12 | - apiGroups: 13 | - authorization.k8s.io 14 | resources: 15 | - subjectaccessreviews 16 | verbs: 17 | - create 18 | -------------------------------------------------------------------------------- /config/rbac/auth_proxy_role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: proxy-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: proxy-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: controller-manager 12 | namespace: system 13 | -------------------------------------------------------------------------------- /config/rbac/auth_proxy_service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | control-plane: controller-manager 6 | name: controller-manager-metrics-service 7 | namespace: system 8 | spec: 9 | ports: 10 | - name: https 11 | port: 8443 12 | protocol: TCP 13 | targetPort: https 14 | selector: 15 | control-plane: controller-manager 16 | -------------------------------------------------------------------------------- /config/rbac/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | # All RBAC will be applied under this service account in 3 | # the deployment namespace. You may comment out this resource 4 | # if your manager will use a service account that exists at 5 | # runtime. Be sure to update RoleBinding and ClusterRoleBinding 6 | # subjects if changing service account names. 7 | - service_account.yaml 8 | - role.yaml 9 | - role_binding.yaml 10 | - leader_election_role.yaml 11 | - leader_election_role_binding.yaml 12 | # Comment the following 4 lines if you want to disable 13 | # the auth proxy (https://github.com/brancz/kube-rbac-proxy) 14 | # which protects your /metrics endpoint. 15 | - auth_proxy_service.yaml 16 | - auth_proxy_role.yaml 17 | - auth_proxy_role_binding.yaml 18 | - auth_proxy_client_clusterrole.yaml 19 | -------------------------------------------------------------------------------- /config/rbac/leader_election_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions to do leader election. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: Role 4 | metadata: 5 | name: leader-election-role 6 | rules: 7 | - apiGroups: 8 | - "" 9 | resources: 10 | - configmaps 11 | verbs: 12 | - get 13 | - list 14 | - watch 15 | - create 16 | - update 17 | - patch 18 | - delete 19 | - apiGroups: 20 | - coordination.k8s.io 21 | resources: 22 | - leases 23 | verbs: 24 | - get 25 | - list 26 | - watch 27 | - create 28 | - update 29 | - patch 30 | - delete 31 | - apiGroups: 32 | - "" 33 | resources: 34 | - events 35 | verbs: 36 | - create 37 | - patch 38 | -------------------------------------------------------------------------------- /config/rbac/leader_election_role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: leader-election-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: Role 8 | name: leader-election-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: controller-manager 12 | namespace: system 13 | -------------------------------------------------------------------------------- /config/rbac/nodedisruptionbudget_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit nodedisruptionbudgets. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: nodedisruptionbudget-editor-role 6 | rules: 7 | - apiGroups: 8 | - nodeops.k8s.preferred.jp 9 | resources: 10 | - nodedisruptionbudgets 11 | verbs: 12 | - create 13 | - delete 14 | - get 15 | - list 16 | - patch 17 | - update 18 | - watch 19 | - apiGroups: 20 | - nodeops.k8s.preferred.jp 21 | resources: 22 | - nodedisruptionbudgets/status 23 | verbs: 24 | - get 25 | -------------------------------------------------------------------------------- /config/rbac/nodedisruptionbudget_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view nodedisruptionbudgets. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: nodedisruptionbudget-viewer-role 6 | rules: 7 | - apiGroups: 8 | - nodeops.k8s.preferred.jp 9 | resources: 10 | - nodedisruptionbudgets 11 | verbs: 12 | - get 13 | - list 14 | - watch 15 | - apiGroups: 16 | - nodeops.k8s.preferred.jp 17 | resources: 18 | - nodedisruptionbudgets/status 19 | verbs: 20 | - get 21 | -------------------------------------------------------------------------------- /config/rbac/nodeoperation_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit nodeoperations. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: nodeoperation-editor-role 6 | rules: 7 | - apiGroups: 8 | - nodeops.k8s.preferred.jp 9 | resources: 10 | - nodeoperations 11 | verbs: 12 | - create 13 | - delete 14 | - get 15 | - list 16 | - patch 17 | - update 18 | - watch 19 | - apiGroups: 20 | - nodeops.k8s.preferred.jp 21 | resources: 22 | - nodeoperations/status 23 | verbs: 24 | - get 25 | -------------------------------------------------------------------------------- /config/rbac/nodeoperation_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view nodeoperations. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: nodeoperation-viewer-role 6 | rules: 7 | - apiGroups: 8 | - nodeops.k8s.preferred.jp 9 | resources: 10 | - nodeoperations 11 | verbs: 12 | - get 13 | - list 14 | - watch 15 | - apiGroups: 16 | - nodeops.k8s.preferred.jp 17 | resources: 18 | - nodeoperations/status 19 | verbs: 20 | - get 21 | -------------------------------------------------------------------------------- /config/rbac/nodeoperationtemplate_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit nodeoperationtemplates. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: nodeoperationtemplate-editor-role 6 | rules: 7 | - apiGroups: 8 | - nodeops.k8s.preferred.jp 9 | resources: 10 | - nodeoperationtemplates 11 | verbs: 12 | - create 13 | - delete 14 | - get 15 | - list 16 | - patch 17 | - update 18 | - watch 19 | - apiGroups: 20 | - nodeops.k8s.preferred.jp 21 | resources: 22 | - nodeoperationtemplates/status 23 | verbs: 24 | - get 25 | -------------------------------------------------------------------------------- /config/rbac/nodeoperationtemplate_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view nodeoperationtemplates. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: nodeoperationtemplate-viewer-role 6 | rules: 7 | - apiGroups: 8 | - nodeops.k8s.preferred.jp 9 | resources: 10 | - nodeoperationtemplates 11 | verbs: 12 | - get 13 | - list 14 | - watch 15 | - apiGroups: 16 | - nodeops.k8s.preferred.jp 17 | resources: 18 | - nodeoperationtemplates/status 19 | verbs: 20 | - get 21 | -------------------------------------------------------------------------------- /config/rbac/noderemediation_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit noderemediations. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: noderemediation-editor-role 6 | rules: 7 | - apiGroups: 8 | - nodeops.k8s.preferred.jp 9 | resources: 10 | - noderemediations 11 | verbs: 12 | - create 13 | - delete 14 | - get 15 | - list 16 | - patch 17 | - update 18 | - watch 19 | - apiGroups: 20 | - nodeops.k8s.preferred.jp 21 | resources: 22 | - noderemediations/status 23 | verbs: 24 | - get 25 | -------------------------------------------------------------------------------- /config/rbac/noderemediation_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view noderemediations. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: noderemediation-viewer-role 6 | rules: 7 | - apiGroups: 8 | - nodeops.k8s.preferred.jp 9 | resources: 10 | - noderemediations 11 | verbs: 12 | - get 13 | - list 14 | - watch 15 | - apiGroups: 16 | - nodeops.k8s.preferred.jp 17 | resources: 18 | - noderemediations/status 19 | verbs: 20 | - get 21 | -------------------------------------------------------------------------------- /config/rbac/noderemediationtemplate_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit noderemediationtemplates. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: noderemediationtemplate-editor-role 6 | rules: 7 | - apiGroups: 8 | - nodeops.k8s.preferred.jp 9 | resources: 10 | - noderemediationtemplates 11 | verbs: 12 | - create 13 | - delete 14 | - get 15 | - list 16 | - patch 17 | - update 18 | - watch 19 | - apiGroups: 20 | - nodeops.k8s.preferred.jp 21 | resources: 22 | - noderemediationtemplates/status 23 | verbs: 24 | - get 25 | -------------------------------------------------------------------------------- /config/rbac/noderemediationtemplate_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view noderemediationtemplates. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: noderemediationtemplate-viewer-role 6 | rules: 7 | - apiGroups: 8 | - nodeops.k8s.preferred.jp 9 | resources: 10 | - noderemediationtemplates 11 | verbs: 12 | - get 13 | - list 14 | - watch 15 | - apiGroups: 16 | - nodeops.k8s.preferred.jp 17 | resources: 18 | - noderemediationtemplates/status 19 | verbs: 20 | - get 21 | -------------------------------------------------------------------------------- /config/rbac/role.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: manager-role 6 | rules: 7 | - apiGroups: 8 | - "" 9 | resources: 10 | - events 11 | - nodes 12 | - pods/eviction 13 | verbs: 14 | - create 15 | - delete 16 | - get 17 | - list 18 | - patch 19 | - update 20 | - watch 21 | - apiGroups: 22 | - "" 23 | resources: 24 | - nodes/status 25 | verbs: 26 | - get 27 | - patch 28 | - update 29 | - apiGroups: 30 | - "" 31 | resources: 32 | - pods 33 | verbs: 34 | - delete 35 | - get 36 | - list 37 | - watch 38 | - apiGroups: 39 | - "" 40 | resources: 41 | - pods/status 42 | verbs: 43 | - get 44 | - apiGroups: 45 | - batch 46 | resources: 47 | - jobs 48 | verbs: 49 | - create 50 | - delete 51 | - get 52 | - list 53 | - patch 54 | - update 55 | - watch 56 | - apiGroups: 57 | - batch 58 | resources: 59 | - jobs/status 60 | verbs: 61 | - get 62 | - apiGroups: 63 | - nodeops.k8s.preferred.jp 64 | resources: 65 | - nodedisruptionbudgets 66 | - nodeoperations 67 | - nodeoperationtemplates 68 | - noderemediations 69 | - noderemediationtemplates 70 | verbs: 71 | - create 72 | - delete 73 | - get 74 | - list 75 | - patch 76 | - update 77 | - watch 78 | - apiGroups: 79 | - nodeops.k8s.preferred.jp 80 | resources: 81 | - nodedisruptionbudgets/finalizers 82 | - nodeoperations/finalizers 83 | - nodeoperationtemplates/finalizers 84 | - noderemediations/finalizers 85 | - noderemediationtemplates/finalizers 86 | verbs: 87 | - update 88 | - apiGroups: 89 | - nodeops.k8s.preferred.jp 90 | resources: 91 | - nodedisruptionbudgets/status 92 | - nodeoperations/status 93 | - nodeoperationtemplates/status 94 | - noderemediations/status 95 | - noderemediationtemplates/status 96 | verbs: 97 | - get 98 | - patch 99 | - update 100 | -------------------------------------------------------------------------------- /config/rbac/role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: manager-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: manager-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: controller-manager 12 | namespace: system 13 | -------------------------------------------------------------------------------- /config/rbac/service_account.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | -------------------------------------------------------------------------------- /config/samples/nodeops_v1alpha1_nodedisruptionbudget.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: nodeops.k8s.preferred.jp/v1alpha1 2 | kind: NodeDisruptionBudget 3 | metadata: 4 | name: nodedisruptionbudget-sample 5 | spec: 6 | selector: 7 | kubernetes.io/hostname: node-operation-controller-worker 8 | maxUnavailable: 1 9 | -------------------------------------------------------------------------------- /config/samples/nodeops_v1alpha1_nodeoperation.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: nodeops.k8s.preferred.jp/v1alpha1 2 | kind: NodeOperation 3 | metadata: 4 | name: nodeoperation-sample 5 | spec: 6 | nodeName: node-operation-controller-worker 7 | jobTemplate: 8 | metadata: 9 | namespace: default 10 | spec: 11 | template: 12 | spec: 13 | containers: 14 | - name: pi 15 | image: perl 16 | command: ["perl", "-Mbignum=bpi", "-wle", "print bpi(2000)"] 17 | restartPolicy: Never 18 | -------------------------------------------------------------------------------- /config/samples/nodeops_v1alpha1_nodeoperationtemplate.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: nodeops.k8s.preferred.jp/v1alpha1 2 | kind: NodeOperationTemplate 3 | metadata: 4 | name: nodeoperationtemplate-sample 5 | spec: 6 | template: 7 | metadata: 8 | labels: 9 | sample: '' 10 | spec: 11 | nodeName: node-operation-controller-worker 12 | jobSpec: 13 | template: 14 | spec: 15 | containers: 16 | - name: pi 17 | image: perl 18 | command: ["perl", "-Mbignum=bpi", "-wle", "print bpi(2000)"] 19 | restartPolicy: Never 20 | -------------------------------------------------------------------------------- /config/samples/nodeops_v1alpha1_noderemediation.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: nodeops.k8s.preferred.jp/v1alpha1 2 | kind: NodeRemediation 3 | metadata: 4 | name: noderemediation-sample 5 | spec: 6 | # TODO(user): Add fields here 7 | -------------------------------------------------------------------------------- /config/samples/nodeops_v1alpha1_noderemediationtemplate.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: nodeops.k8s.preferred.jp/v1alpha1 2 | kind: NodeRemediationTemplate 3 | metadata: 4 | name: noderemediationtemplate-sample 5 | spec: 6 | nodeSelector: 7 | 'kubernetes.io/os': 'linux' 8 | template: 9 | nodeOperationTemplateName: 'example1' 10 | rule: 11 | conditions: 12 | - type: 'type1' 13 | status: 'status2' 14 | -------------------------------------------------------------------------------- /controllers/eviction_stragegy.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "context" 5 | 6 | nodeopsv1alpha1 "github.com/pfnet-research/node-operation-controller/api/v1alpha1" 7 | corev1 "k8s.io/api/core/v1" 8 | policyv1beta1 "k8s.io/api/policy/v1beta1" 9 | "k8s.io/apimachinery/pkg/api/errors" 10 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 11 | "k8s.io/client-go/kubernetes" 12 | "k8s.io/client-go/tools/record" 13 | "sigs.k8s.io/controller-runtime/pkg/client" 14 | "sigs.k8s.io/controller-runtime/pkg/log" 15 | ) 16 | 17 | type evictionStrategyProcessor struct { 18 | client client.Client 19 | clientset *kubernetes.Clientset 20 | eventRecorder record.EventRecorder 21 | } 22 | 23 | func newEvictionStrategyProcessor(client client.Client, clientset *kubernetes.Clientset, eventRecorder record.EventRecorder) *evictionStrategyProcessor { 24 | return &evictionStrategyProcessor{client: client, clientset: clientset, eventRecorder: eventRecorder} 25 | } 26 | 27 | // do performs eviction strategy specified in nodeOps against pods and returns all the pods are drained or not, or error 28 | func (s *evictionStrategyProcessor) do(ctx context.Context, pods []corev1.Pod, nodeOp *nodeopsv1alpha1.NodeOperation) (bool, error) { 29 | logger := log.FromContext(ctx) 30 | if len(pods) == 0 { 31 | return true, nil 32 | } 33 | switch nodeOp.Spec.EvictionStrategy { 34 | case nodeopsv1alpha1.NodeOperationEvictionStrategyDelete: 35 | return s.processDelete(ctx, pods, nodeOp) 36 | case nodeopsv1alpha1.NodeOperationEvictionStrategyForceDelete: 37 | return s.processForceDelete(ctx, pods, nodeOp) 38 | case nodeopsv1alpha1.NodeOperationEvictionStrategyNone: 39 | return s.processNone(ctx, pods, nodeOp) 40 | case nodeopsv1alpha1.NodeOperationEvictionStrategyEvict: 41 | return s.processEvict(ctx, pods, nodeOp) 42 | default: 43 | logger.Info("EvictionStrategy seems empty. Falling back to 'Evict' EvictionStrategy", "strategy", nodeOp.Spec.EvictionStrategy, "nodeoperation", nodeOp.Name) 44 | return s.processEvict(ctx, pods, nodeOp) 45 | } 46 | } 47 | 48 | func (s *evictionStrategyProcessor) processEvict(ctx context.Context, pods []corev1.Pod, nodeOp *nodeopsv1alpha1.NodeOperation) (bool, error) { 49 | logger := log.FromContext(ctx) 50 | logger.Info("Performing EvictionStrategy", "strategy", nodeopsv1alpha1.NodeOperationEvictionStrategyEvict, "nodeoperation", nodeOp.Name) 51 | 52 | for _, pod := range pods { 53 | eviction := &policyv1beta1.Eviction{ 54 | ObjectMeta: metav1.ObjectMeta{ 55 | Name: pod.Name, 56 | Namespace: pod.Namespace, 57 | }, 58 | } 59 | 60 | logger.Info("Evicting a Pod", "namespace", pod.Namespace, "name", pod.Name, "nodeName", pod.Spec.NodeName) 61 | if err := s.clientset.CoreV1().Pods(pod.Namespace).Evict(ctx, eviction); err != nil { 62 | if errors.IsTooManyRequests(err) { 63 | logger.Info("Cannot do a Pod due to PDB", "namespace", pod.Namespace, "name", pod.Name, "nodeName", pod.Spec.NodeName) 64 | continue 65 | } 66 | return false, err 67 | } 68 | s.eventRecorder.Eventf(&pod, corev1.EventTypeNormal, "Evicted", `Node Operation "%s" evicted a Pod`, nodeOp.Name) 69 | } 70 | 71 | // returning false here to check no pods exists in next reconciliation round. 72 | return false, nil 73 | } 74 | 75 | func (s *evictionStrategyProcessor) processDelete(ctx context.Context, pods []corev1.Pod, nodeOp *nodeopsv1alpha1.NodeOperation) (bool, error) { 76 | logger := log.FromContext(ctx) 77 | logger.Info("Performing EvictionStrategy", "strategy", nodeopsv1alpha1.NodeOperationEvictionStrategyDelete, "nodeoperation", nodeOp.Name) 78 | return s.deletePods(ctx, pods, nodeOp, false) 79 | } 80 | 81 | func (s *evictionStrategyProcessor) processForceDelete(ctx context.Context, pods []corev1.Pod, nodeOp *nodeopsv1alpha1.NodeOperation) (bool, error) { 82 | logger := log.FromContext(ctx) 83 | logger.Info("Performing EvictionStrategy", "strategy", nodeopsv1alpha1.NodeOperationEvictionStrategyForceDelete, "nodeoperation", nodeOp.Name) 84 | return s.deletePods(ctx, pods, nodeOp, true) 85 | } 86 | 87 | func (s *evictionStrategyProcessor) deletePods(ctx context.Context, pods []corev1.Pod, nodeOp *nodeopsv1alpha1.NodeOperation, force bool) (bool, error) { 88 | logger := log.FromContext(ctx) 89 | opts := []client.DeleteOption{} 90 | if force { 91 | opts = append(opts, client.GracePeriodSeconds(0)) 92 | } 93 | 94 | for _, pod := range pods { 95 | logger.Info("Deleting a Pod", "namespace", pod.Namespace, "name", pod.Name, "nodeName", pod.Spec.NodeName) 96 | if err := s.client.Delete(context.Background(), &pod, opts...); err != nil { 97 | if errors.IsNotFound(err) { 98 | logger.Info("Pod Not found. Skip deletion", "namespace", pod.Namespace, "name", pod.Name, "nodeName", pod.Spec.NodeName) 99 | continue 100 | } 101 | logger.Error(err, "Couldn't Delete Pod", "namespace", pod.Namespace, "name", pod.Name, "nodeName", pod.Spec.NodeName) 102 | return false, err 103 | } 104 | if force { 105 | s.eventRecorder.Eventf(&pod, corev1.EventTypeNormal, "ForceDeleted", `Node Operation "%s" force deleted a Pod`, nodeOp.Name) 106 | } else { 107 | s.eventRecorder.Eventf(&pod, corev1.EventTypeNormal, "Deleted", `Node Operation "%s" deleted a Pod`, nodeOp.Name) 108 | } 109 | } 110 | 111 | // returning false here to check no pods exists in next reconciliation round. 112 | return false, nil 113 | } 114 | 115 | func (s *evictionStrategyProcessor) processNone(ctx context.Context, pods []corev1.Pod, nodeOp *nodeopsv1alpha1.NodeOperation) (bool, error) { 116 | logger := log.FromContext(ctx) 117 | logger.Info("Performing EvictionStrategy", "strategy", nodeopsv1alpha1.NodeOperationEvictionStrategyNone, "nodeoperation", nodeOp.Name) 118 | 119 | logger.Info("'None' EvictionStrategy performs nothing.") 120 | 121 | // returning false here to check no pods exists in next reconciliation round. 122 | return false, nil 123 | } 124 | -------------------------------------------------------------------------------- /controllers/nodedisruptionbudget_controller.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package controllers 18 | 19 | import ( 20 | "context" 21 | 22 | "k8s.io/apimachinery/pkg/runtime" 23 | ctrl "sigs.k8s.io/controller-runtime" 24 | "sigs.k8s.io/controller-runtime/pkg/client" 25 | "sigs.k8s.io/controller-runtime/pkg/log" 26 | 27 | nodeopsv1alpha1 "github.com/pfnet-research/node-operation-controller/api/v1alpha1" 28 | ) 29 | 30 | // NodeDisruptionBudgetReconciler reconciles a NodeDisruptionBudget object 31 | type NodeDisruptionBudgetReconciler struct { 32 | client.Client 33 | Scheme *runtime.Scheme 34 | } 35 | 36 | //+kubebuilder:rbac:groups=nodeops.k8s.preferred.jp,resources=nodedisruptionbudgets,verbs=get;list;watch;create;update;patch;delete 37 | //+kubebuilder:rbac:groups=nodeops.k8s.preferred.jp,resources=nodedisruptionbudgets/status,verbs=get;update;patch 38 | //+kubebuilder:rbac:groups=nodeops.k8s.preferred.jp,resources=nodedisruptionbudgets/finalizers,verbs=update 39 | 40 | // Reconcile is part of the main kubernetes reconciliation loop which aims to 41 | // move the current state of the cluster closer to the desired state. 42 | // TODO(user): Modify the Reconcile function to compare the state specified by 43 | // the NodeDisruptionBudget object against the actual cluster state, and then 44 | // perform operations to make the cluster state reflect the state specified by 45 | // the user. 46 | // 47 | // For more details, check Reconcile and its Result here: 48 | // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.10.0/pkg/reconcile 49 | func (r *NodeDisruptionBudgetReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 50 | _ = log.FromContext(ctx) 51 | 52 | // TODO(user): your logic here 53 | 54 | return ctrl.Result{}, nil 55 | } 56 | 57 | // SetupWithManager sets up the controller with the Manager. 58 | func (r *NodeDisruptionBudgetReconciler) SetupWithManager(mgr ctrl.Manager) error { 59 | return ctrl.NewControllerManagedBy(mgr). 60 | For(&nodeopsv1alpha1.NodeDisruptionBudget{}). 61 | Complete(r) 62 | } 63 | -------------------------------------------------------------------------------- /controllers/nodeoperation_controller.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package controllers 18 | 19 | import ( 20 | "context" 21 | "fmt" 22 | "net/http" 23 | "sync" 24 | "time" 25 | 26 | nodeopsv1alpha1 "github.com/pfnet-research/node-operation-controller/api/v1alpha1" 27 | batchv1 "k8s.io/api/batch/v1" 28 | corev1 "k8s.io/api/core/v1" 29 | "k8s.io/apimachinery/pkg/api/errors" 30 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 | "k8s.io/apimachinery/pkg/labels" 32 | "k8s.io/apimachinery/pkg/runtime" 33 | "k8s.io/client-go/kubernetes" 34 | "k8s.io/client-go/tools/record" 35 | "k8s.io/client-go/tools/reference" 36 | ctrl "sigs.k8s.io/controller-runtime" 37 | "sigs.k8s.io/controller-runtime/pkg/client" 38 | "sigs.k8s.io/controller-runtime/pkg/log" 39 | ) 40 | 41 | var controllerTaint = corev1.Taint{ 42 | Key: "nodeops.k8s.preferred.jp/operating", 43 | Effect: "NoSchedule", 44 | Value: "", 45 | } 46 | 47 | const jobOwnerKey = ".metadata.controller" 48 | const eventSourceName = "node-operation-controller" 49 | 50 | // NodeOperationReconciler reconciles a NodeOperation object 51 | type NodeOperationReconciler struct { 52 | client.Client 53 | Scheme *runtime.Scheme 54 | 55 | DrainInterval time.Duration 56 | NDBRetryInterval time.Duration 57 | 58 | clientset *kubernetes.Clientset 59 | mutex sync.Mutex 60 | eventRecorder record.EventRecorder 61 | evictionStrategyProcessor *evictionStrategyProcessor 62 | } 63 | 64 | //+kubebuilder:rbac:groups=nodeops.k8s.preferred.jp,resources=nodeoperations,verbs=get;list;watch;create;update;patch;delete 65 | //+kubebuilder:rbac:groups=nodeops.k8s.preferred.jp,resources=nodeoperations/status,verbs=get;update;patch 66 | //+kubebuilder:rbac:groups=nodeops.k8s.preferred.jp,resources=nodeoperations/finalizers,verbs=update 67 | // +kubebuilder:rbac:groups=nodeops.k8s.preferred.jp,resources=nodedisruptionbudgets,verbs=get;list;watch 68 | // +kubebuilder:rbac:groups=nodeops.k8s.preferred.jp,resources=nodedisruptionbudgets/status,verbs=get 69 | // +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;create;update;patch;delete 70 | // +kubebuilder:rbac:groups=batch,resources=jobs/status,verbs=get 71 | // +kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch;create;update;patch;delete 72 | // +kubebuilder:rbac:groups="",resources=nodes/status,verbs=get;update;patch 73 | // +kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;delete 74 | // +kubebuilder:rbac:groups="",resources=pods/status,verbs=get 75 | // +kubebuilder:rbac:groups="",resources=pods/eviction,verbs=get;list;watch;create;update;patch;delete 76 | // +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch;delete 77 | 78 | // Reconcile is part of the main kubernetes reconciliation loop which aims to 79 | // move the current state of the cluster closer to the desired state. 80 | // TODO(user): Modify the Reconcile function to compare the state specified by 81 | // the NodeOperation object against the actual cluster state, and then 82 | // perform operations to make the cluster state reflect the state specified by 83 | // the user. 84 | // 85 | // For more details, check Reconcile and its Result here: 86 | // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.10.0/pkg/reconcile 87 | func (r *NodeOperationReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 88 | logger := log.FromContext(ctx) 89 | 90 | nodeOp := &nodeopsv1alpha1.NodeOperation{} 91 | if err := r.Get(ctx, req.NamespacedName, nodeOp); err != nil { 92 | sterr, ok := err.(*errors.StatusError) 93 | if ok && sterr.Status().Code == 404 { 94 | if err := r.removeTaints(ctx); err != nil { 95 | return ctrl.Result{}, err 96 | } 97 | return ctrl.Result{}, nil 98 | } 99 | return ctrl.Result{}, err 100 | } 101 | 102 | var result ctrl.Result 103 | var err error 104 | 105 | prevPhase := nodeOp.Status.Phase 106 | switch nodeOp.Status.Phase { 107 | case "": 108 | nodeOp.Status.Reason = "NodeOperation is created" 109 | nodeOp.Status.Phase = nodeopsv1alpha1.NodeOperationPhasePending 110 | if err := r.Update(ctx, nodeOp); err != nil { 111 | return ctrl.Result{}, err 112 | } 113 | case nodeopsv1alpha1.NodeOperationPhasePending: 114 | result, err = r.reconcilePending(ctx, nodeOp) 115 | case nodeopsv1alpha1.NodeOperationPhaseDraining: 116 | result, err = r.reconcileDraining(ctx, nodeOp) 117 | case nodeopsv1alpha1.NodeOperationPhaseDrained: 118 | result, err = r.reconcileDrained(ctx, nodeOp) 119 | case nodeopsv1alpha1.NodeOperationPhaseJobCreating: 120 | result, err = r.reconcileJobCreating(ctx, nodeOp) 121 | case nodeopsv1alpha1.NodeOperationPhaseRunning: 122 | result, err = r.reconcileRunning(ctx, nodeOp) 123 | default: 124 | return ctrl.Result{}, nil 125 | } 126 | logger.Info("phase changed", "name", nodeOp.Name, "from", prevPhase, "to", nodeOp.Status.Phase) 127 | 128 | return result, err 129 | } 130 | 131 | func (r *NodeOperationReconciler) removeTaints(ctx context.Context) error { 132 | nodeOpList := nodeopsv1alpha1.NodeOperationList{} 133 | if err := r.List(ctx, &nodeOpList); err != nil { 134 | return err 135 | } 136 | 137 | activeNodeNames := map[string]struct{}{} 138 | for _, op := range nodeOpList.Items { 139 | phase := op.Status.Phase 140 | if phase == nodeopsv1alpha1.NodeOperationPhaseDrained || 141 | phase == nodeopsv1alpha1.NodeOperationPhaseDraining || 142 | phase == nodeopsv1alpha1.NodeOperationPhaseRunning { 143 | activeNodeNames[op.Spec.NodeName] = struct{}{} 144 | } 145 | } 146 | 147 | nodeList := corev1.NodeList{} 148 | if err := r.List(ctx, &nodeList); err != nil { 149 | return err 150 | } 151 | 152 | findTaint := func(node corev1.Node) bool { 153 | for _, taint := range node.Spec.Taints { 154 | if isControllerTaint(taint) { 155 | return true 156 | } 157 | } 158 | return false 159 | } 160 | 161 | for _, node := range nodeList.Items { 162 | if !findTaint(node) { 163 | continue 164 | } 165 | if _, active := activeNodeNames[node.Name]; active { 166 | continue 167 | } 168 | 169 | var taints []corev1.Taint 170 | for _, taint := range node.Spec.Taints { 171 | if isControllerTaint(taint) { 172 | continue 173 | } 174 | taints = append(taints, taint) 175 | } 176 | node.Spec.Taints = taints 177 | 178 | if err := r.Update(ctx, &node); err != nil { 179 | return err 180 | } 181 | } 182 | 183 | return nil 184 | } 185 | 186 | func (r *NodeOperationReconciler) reconcilePending(ctx context.Context, nodeOp *nodeopsv1alpha1.NodeOperation) (ctrl.Result, error) { 187 | r.mutex.Lock() 188 | defer r.mutex.Unlock() 189 | 190 | nodeList := &corev1.NodeList{} 191 | if err := r.List(ctx, nodeList); err != nil { 192 | return ctrl.Result{}, err 193 | } 194 | 195 | node := &corev1.Node{} 196 | if err := r.Get(ctx, client.ObjectKey{Namespace: "", Name: nodeOp.Spec.NodeName}, node); err != nil { 197 | return ctrl.Result{}, err 198 | } 199 | 200 | for _, taint := range node.Spec.Taints { 201 | if isControllerTaint(taint) { 202 | // This avoids multiple NodeOperations for the same Node to run simultaneously 203 | nodeOp.Status.Reason = "Another NodeOperation for the same node is running" 204 | return ctrl.Result{}, r.Update(ctx, nodeOp) 205 | } 206 | } 207 | 208 | violate, err := r.doesViolateNDB(ctx, nodeOp) 209 | if err != nil { 210 | return ctrl.Result{}, err 211 | } 212 | if violate { 213 | nodeOp.Status.Reason = "Due to NodeDisruptionBudget violation" 214 | if err := r.Update(ctx, nodeOp); err != nil { 215 | return ctrl.Result{}, err 216 | } 217 | return ctrl.Result{Requeue: true, RequeueAfter: r.NDBRetryInterval}, nil 218 | } 219 | 220 | // Taint the node 221 | if err := r.taintNode(node); err != nil { 222 | return ctrl.Result{}, err 223 | } 224 | r.eventRecorder.Eventf(nodeOp, "Normal", "TaintNode", `Tainted a Node "%s"`, node.Name) 225 | 226 | nodeOp.Status.Reason = "" 227 | nodeOp.Status.Phase = nodeopsv1alpha1.NodeOperationPhaseDraining 228 | if err := r.Update(ctx, nodeOp); err != nil { 229 | return ctrl.Result{}, err 230 | } 231 | 232 | r.eventRecorder.Event(nodeOp, "Normal", "Draining", "Start to drain Pods") 233 | 234 | return ctrl.Result{}, nil 235 | } 236 | 237 | func (r *NodeOperationReconciler) reconcileDraining(ctx context.Context, nodeOp *nodeopsv1alpha1.NodeOperation) (ctrl.Result, error) { 238 | // Try to drain Pods 239 | drained, err := r.drain(ctx, nodeOp) 240 | if err != nil { 241 | return ctrl.Result{}, err 242 | } 243 | 244 | if nodeOp.Spec.SkipWaitingForEviction { 245 | nodeOp.Status.Reason = "WaitingForEvictionSkipped" 246 | nodeOp.Status.Phase = nodeopsv1alpha1.NodeOperationPhaseDrained 247 | if err := r.Update(ctx, nodeOp); err != nil { 248 | return ctrl.Result{}, err 249 | } 250 | 251 | r.eventRecorder.Event(nodeOp, "Normal", "Drained", "Skipped waiting for pods eviction") 252 | 253 | return ctrl.Result{}, nil 254 | } 255 | 256 | if !drained { 257 | return ctrl.Result{Requeue: true, RequeueAfter: r.DrainInterval}, nil 258 | } 259 | 260 | nodeOp.Status.Reason = "" 261 | nodeOp.Status.Phase = nodeopsv1alpha1.NodeOperationPhaseDrained 262 | if err := r.Update(ctx, nodeOp); err != nil { 263 | return ctrl.Result{}, err 264 | } 265 | 266 | r.eventRecorder.Event(nodeOp, "Normal", "Drained", "All Pods are drained") 267 | 268 | return ctrl.Result{}, nil 269 | } 270 | 271 | func (r *NodeOperationReconciler) reconcileDrained(ctx context.Context, nodeOp *nodeopsv1alpha1.NodeOperation) (ctrl.Result, error) { 272 | logger := log.FromContext(ctx) 273 | var childJobs batchv1.JobList 274 | if err := r.List(ctx, &childJobs, client.MatchingFields{jobOwnerKey: nodeOp.Name}); err != nil { 275 | return ctrl.Result{}, err 276 | } 277 | 278 | var job *batchv1.Job 279 | if len(childJobs.Items) == 0 { 280 | // Run a Job 281 | metadata := nodeOp.Spec.JobTemplate.Metadata.DeepCopy() 282 | if metadata.Name == "" && metadata.GenerateName == "" { 283 | metadata.GenerateName = fmt.Sprintf("nodeops-%s-", nodeOp.Name) 284 | } 285 | 286 | spec := nodeOp.Spec.JobTemplate.Spec.DeepCopy() 287 | if spec.Template.ObjectMeta.Annotations == nil { 288 | spec.Template.ObjectMeta.Annotations = map[string]string{} 289 | } 290 | spec.Template.ObjectMeta.Annotations["nodeops.k8s.preferred.jp/nodename"] = nodeOp.Spec.NodeName 291 | 292 | job = &batchv1.Job{ 293 | ObjectMeta: *metadata, 294 | Spec: *spec, 295 | } 296 | logger.Info("Creating a Job", "job", job) 297 | if err := ctrl.SetControllerReference(nodeOp, job, r.Scheme); err != nil { 298 | return ctrl.Result{}, err 299 | } 300 | if err := r.Create(ctx, job); err != nil { 301 | return ctrl.Result{}, err 302 | } 303 | r.eventRecorder.Eventf(nodeOp, "Normal", "CreatedJob", `Created Job "%s" in "%s"`, job.Name, job.Namespace) 304 | } else if len(childJobs.Items) == 1 { 305 | job = &childJobs.Items[0] 306 | } else { 307 | nodeOp.Status.Phase = nodeopsv1alpha1.NodeOperationPhaseFailed 308 | nodeOp.Status.Reason = "more than 1 Job for this controller are found" 309 | if err := r.Update(ctx, nodeOp); err != nil { 310 | return ctrl.Result{}, err 311 | } 312 | return ctrl.Result{}, nil 313 | } 314 | 315 | ref, err := reference.GetReference(r.Scheme, job) 316 | if err != nil { 317 | return ctrl.Result{}, err 318 | } 319 | nodeOp.Status.JobReference = *ref 320 | nodeOp.Status.Reason = "" 321 | nodeOp.Status.Phase = nodeopsv1alpha1.NodeOperationPhaseJobCreating 322 | if err := r.Update(ctx, nodeOp); err != nil { 323 | return ctrl.Result{}, err 324 | } 325 | 326 | return ctrl.Result{}, nil 327 | } 328 | 329 | func (r *NodeOperationReconciler) reconcileJobCreating(ctx context.Context, nodeOp *nodeopsv1alpha1.NodeOperation) (ctrl.Result, error) { 330 | job := batchv1.Job{} 331 | ref := nodeOp.Status.JobReference 332 | if err := r.Get(ctx, client.ObjectKey{Namespace: ref.Namespace, Name: ref.Name}, &job); err != nil { 333 | sterr, ok := err.(*errors.StatusError) 334 | if ok && sterr.Status().Code == http.StatusNotFound { 335 | return ctrl.Result{}, nil 336 | } 337 | return ctrl.Result{}, err 338 | } 339 | 340 | nodeOp.Status.Phase = nodeopsv1alpha1.NodeOperationPhaseRunning 341 | if err := r.Update(ctx, nodeOp); err != nil { 342 | return ctrl.Result{}, err 343 | } 344 | 345 | return ctrl.Result{}, nil 346 | } 347 | 348 | func (r *NodeOperationReconciler) reconcileRunning(ctx context.Context, nodeOp *nodeopsv1alpha1.NodeOperation) (ctrl.Result, error) { 349 | job := batchv1.Job{} 350 | ref := nodeOp.Status.JobReference 351 | 352 | if err := r.Get(ctx, client.ObjectKey{Namespace: ref.Namespace, Name: ref.Name}, &job); err != nil { 353 | sterr, ok := err.(*errors.StatusError) 354 | if !ok || sterr.Status().Code != http.StatusNotFound { 355 | return ctrl.Result{}, err 356 | } 357 | // Job not found 358 | nodeOp.Status.Reason = "Job has been deleted" 359 | nodeOp.Status.Phase = nodeopsv1alpha1.NodeOperationPhaseFailed 360 | } else { 361 | _, condition := isJobFinished(&job) 362 | 363 | switch condition { 364 | case "": 365 | return ctrl.Result{}, nil 366 | case batchv1.JobFailed: 367 | r.eventRecorder.Eventf(nodeOp, "Normal", "JobFinished", `Job "%s" in "%s" has failed`, job.Name, job.Namespace) 368 | nodeOp.Status.Reason = "Job has failed" 369 | nodeOp.Status.Phase = nodeopsv1alpha1.NodeOperationPhaseFailed 370 | case batchv1.JobComplete: 371 | r.eventRecorder.Eventf(nodeOp, "Normal", "JobFinished", `Job "%s" in "%s" has completed`, job.Name, job.Namespace) 372 | 373 | if remediationName := nodeOp.NodeRemediationName(); remediationName != "" { 374 | var remediation nodeopsv1alpha1.NodeRemediation 375 | if err := r.Get(ctx, client.ObjectKey{Namespace: nodeOp.Namespace, Name: remediationName}, &remediation); err != nil { 376 | return ctrl.Result{}, err 377 | } 378 | 379 | if remediation.Status.NodeStatus != nodeopsv1alpha1.NodeStatusOK { 380 | r.eventRecorder.Eventf(nodeOp, corev1.EventTypeNormal, "JobCompletedButNotRemediated", `Job "%s" in "%s" has completed but the Node is not remediated yet.`, job.Name, job.Namespace) 381 | return ctrl.Result{}, nil 382 | } 383 | } 384 | 385 | nodeOp.Status.Reason = "Job has completed" 386 | nodeOp.Status.Phase = nodeopsv1alpha1.NodeOperationPhaseCompleted 387 | } 388 | } 389 | 390 | // untaint the Node 391 | node := &corev1.Node{} 392 | if err := r.Get(ctx, client.ObjectKey{Namespace: "", Name: nodeOp.Spec.NodeName}, node); err != nil { 393 | return ctrl.Result{}, err 394 | } 395 | // After untainting, other NodeOperations for the node can proceed from Pending phase. 396 | if err := r.untaintNode(node); err != nil { 397 | return ctrl.Result{Requeue: true}, nil 398 | } 399 | r.eventRecorder.Eventf(nodeOp, "Normal", "UntaintNode", `Untainted a Node "%s"`, node.Name) 400 | 401 | // update after untaint 402 | if err := r.Update(ctx, nodeOp); err != nil { 403 | return ctrl.Result{}, err 404 | } 405 | 406 | return ctrl.Result{}, nil 407 | } 408 | 409 | // SetupWithManager sets up the controller with the Manager. 410 | func (r *NodeOperationReconciler) SetupWithManager(mgr ctrl.Manager) error { 411 | r.clientset = kubernetes.NewForConfigOrDie(mgr.GetConfig()) 412 | r.mutex = sync.Mutex{} 413 | // create index for NodeOperation name 414 | if err := mgr.GetFieldIndexer().IndexField(context.Background(), &batchv1.Job{}, jobOwnerKey, func(rawObj client.Object) []string { 415 | job := rawObj.(*batchv1.Job) 416 | owner := metav1.GetControllerOf(job) 417 | if owner == nil { 418 | return nil 419 | } 420 | if owner.APIVersion != nodeopsv1alpha1.GroupVersion.String() || owner.Kind != "NodeOperation" { 421 | return nil 422 | } 423 | return []string{owner.Name} 424 | }); err != nil { 425 | return err 426 | } 427 | 428 | r.eventRecorder = mgr.GetEventRecorderFor(eventSourceName) 429 | r.evictionStrategyProcessor = newEvictionStrategyProcessor(r.Client, r.clientset, r.eventRecorder) 430 | 431 | return ctrl.NewControllerManagedBy(mgr). 432 | For(&nodeopsv1alpha1.NodeOperation{}). 433 | Owns(&batchv1.Job{}). 434 | Complete(r) 435 | } 436 | 437 | // drain try to perform drain pods in the node of nodeOp and returns drained or not, or error. 438 | func (r *NodeOperationReconciler) drain(ctx context.Context, nodeOp *nodeopsv1alpha1.NodeOperation) (bool, error) { 439 | podList := &corev1.PodList{} 440 | if err := r.List(ctx, podList); err != nil { 441 | return false, err 442 | } 443 | 444 | pods := []corev1.Pod{} 445 | for _, pod := range podList.Items { 446 | if pod.Status.Phase != corev1.PodRunning { 447 | continue 448 | } 449 | if pod.Spec.NodeName != nodeOp.Spec.NodeName { 450 | continue 451 | } 452 | if _, ok := pod.Annotations["kubernetes.io/config.mirror"]; ok { // mirror Pod 453 | continue 454 | } 455 | daemonSet := false 456 | for _, ref := range pod.OwnerReferences { 457 | if ref.Kind == "DaemonSet" { 458 | daemonSet = true 459 | break 460 | } 461 | } 462 | if daemonSet { 463 | continue 464 | } 465 | 466 | pods = append(pods, pod) 467 | } 468 | 469 | return r.evictionStrategyProcessor.do(ctx, pods, nodeOp) 470 | } 471 | 472 | func (r *NodeOperationReconciler) taintNode(node *corev1.Node) error { 473 | ctx := context.Background() 474 | for _, t := range node.Spec.Taints { 475 | if isControllerTaint(t) { 476 | return nil 477 | } 478 | } 479 | 480 | node.Spec.Taints = append(node.Spec.Taints, controllerTaint) 481 | if err := r.Update(ctx, node); err != nil { 482 | return err 483 | } 484 | return nil 485 | } 486 | 487 | func (r *NodeOperationReconciler) untaintNode(node *corev1.Node) error { 488 | ctx := context.Background() 489 | taints := []corev1.Taint{} 490 | for _, t := range node.Spec.Taints { 491 | if isControllerTaint(t) { 492 | continue 493 | } 494 | taints = append(taints, t) 495 | } 496 | node.Spec.Taints = taints 497 | if err := r.Update(ctx, node); err != nil { 498 | return err 499 | } 500 | return nil 501 | } 502 | 503 | func (r *NodeOperationReconciler) doesViolateNDB(ctx context.Context, nodeOp *nodeopsv1alpha1.NodeOperation) (bool, error) { 504 | nodeList := &corev1.NodeList{} 505 | if err := r.List(ctx, nodeList); err != nil { 506 | return true, err 507 | } 508 | 509 | ndbList := &nodeopsv1alpha1.NodeDisruptionBudgetList{} 510 | if err := r.List(ctx, ndbList); err != nil { 511 | return true, err 512 | } 513 | 514 | return doesViolateNDB(nodeOp, ndbList.Items, nodeList.Items), nil 515 | } 516 | 517 | func doesViolateNDB(nodeOp *nodeopsv1alpha1.NodeOperation, ndbs []nodeopsv1alpha1.NodeDisruptionBudget, nodes []corev1.Node) bool { 518 | for _, ndb := range ndbs { 519 | if !labels.SelectorFromSet(nodeOp.Spec.NodeDisruptionBudgetSelector).Matches(labels.Set(ndb.Labels)) { 520 | continue 521 | } 522 | 523 | taintTargets := ndb.Spec.TaintTargets 524 | taintTargets = append(taintTargets, nodeopsv1alpha1.TaintTarget{ 525 | Key: controllerTaint.Key, 526 | Effect: controllerTaint.Effect, 527 | Operator: nodeopsv1alpha1.TaintTargetOpExists, 528 | }) 529 | 530 | var unavailableCount uint64 531 | selectedNodeNames := map[string]struct{}{} 532 | 533 | nextNode: 534 | for _, n := range nodes { 535 | for k, v := range ndb.Spec.Selector { 536 | if n.Labels[k] != v { 537 | continue nextNode 538 | } 539 | } 540 | selectedNodeNames[n.Name] = struct{}{} 541 | 542 | nextTaint: 543 | for _, taint := range n.Spec.Taints { 544 | isTarget := false 545 | for _, target := range taintTargets { 546 | if target.IsTarget(&taint) { 547 | isTarget = true 548 | break 549 | } 550 | } 551 | if !isTarget { 552 | continue nextTaint 553 | } 554 | 555 | unavailableCount++ 556 | continue nextNode 557 | } 558 | } 559 | 560 | availableCount := uint64(len(selectedNodeNames)) - unavailableCount 561 | 562 | if ndb.Spec.MaxUnavailable != nil && *ndb.Spec.MaxUnavailable <= unavailableCount { 563 | return true 564 | } 565 | 566 | if ndb.Spec.MinAvailable != nil && availableCount <= *ndb.Spec.MinAvailable { 567 | return true 568 | } 569 | } 570 | 571 | return false 572 | } 573 | 574 | func isJobFinished(job *batchv1.Job) (bool, batchv1.JobConditionType) { 575 | for _, c := range job.Status.Conditions { 576 | if (c.Type == batchv1.JobComplete || c.Type == batchv1.JobFailed) && c.Status == corev1.ConditionTrue { 577 | return true, c.Type 578 | } 579 | } 580 | 581 | return false, "" 582 | } 583 | 584 | func isControllerTaint(taint corev1.Taint) bool { 585 | return taint == controllerTaint 586 | } 587 | -------------------------------------------------------------------------------- /controllers/nodeoperation_controller_test.go: -------------------------------------------------------------------------------- 1 | package controllers 2 | 3 | import ( 4 | "testing" 5 | 6 | nodeopsv1alpha1 "github.com/pfnet-research/node-operation-controller/api/v1alpha1" 7 | "github.com/stretchr/testify/assert" 8 | corev1 "k8s.io/api/core/v1" 9 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 10 | ) 11 | 12 | func TestDoesViolateNDB(t *testing.T) { 13 | nodeOp := &nodeopsv1alpha1.NodeOperation{} 14 | ndbs := []nodeopsv1alpha1.NodeDisruptionBudget{ 15 | {}, 16 | } 17 | nodes := []corev1.Node{ 18 | {}, 19 | } 20 | 21 | assert.False(t, doesViolateNDB(nodeOp, ndbs, nodes)) 22 | } 23 | 24 | func TestDoesViolateNDBWithMinAvailableViolation(t *testing.T) { 25 | minAvailable := uint64(1) 26 | nodeOp := &nodeopsv1alpha1.NodeOperation{} 27 | ndbs := []nodeopsv1alpha1.NodeDisruptionBudget{ 28 | { 29 | Spec: nodeopsv1alpha1.NodeDisruptionBudgetSpec{ 30 | MinAvailable: &minAvailable, 31 | }, 32 | }, 33 | } 34 | nodes := []corev1.Node{ 35 | {}, 36 | } 37 | 38 | assert.True(t, doesViolateNDB(nodeOp, ndbs, nodes)) 39 | } 40 | 41 | func TestDoesViolateNDBWithMinAvailableNoViolation(t *testing.T) { 42 | minAvailable := uint64(0) 43 | nodeOp := &nodeopsv1alpha1.NodeOperation{} 44 | ndbs := []nodeopsv1alpha1.NodeDisruptionBudget{ 45 | { 46 | Spec: nodeopsv1alpha1.NodeDisruptionBudgetSpec{ 47 | MinAvailable: &minAvailable, 48 | }, 49 | }, 50 | } 51 | nodes := []corev1.Node{ 52 | {}, 53 | } 54 | 55 | assert.False(t, doesViolateNDB(nodeOp, ndbs, nodes)) 56 | } 57 | 58 | func TestDoesViolateNDBWithMaxUnavailableViolation(t *testing.T) { 59 | n := uint64(0) 60 | nodeOp := &nodeopsv1alpha1.NodeOperation{} 61 | ndbs := []nodeopsv1alpha1.NodeDisruptionBudget{ 62 | { 63 | Spec: nodeopsv1alpha1.NodeDisruptionBudgetSpec{ 64 | MaxUnavailable: &n, 65 | }, 66 | }, 67 | } 68 | nodes := []corev1.Node{ 69 | {}, 70 | } 71 | 72 | assert.True(t, doesViolateNDB(nodeOp, ndbs, nodes)) 73 | } 74 | 75 | func TestDoesViolateNDBWithMaxUnavailableNoViolation(t *testing.T) { 76 | n := uint64(1) 77 | nodeOp := &nodeopsv1alpha1.NodeOperation{} 78 | ndbs := []nodeopsv1alpha1.NodeDisruptionBudget{ 79 | { 80 | Spec: nodeopsv1alpha1.NodeDisruptionBudgetSpec{ 81 | MaxUnavailable: &n, 82 | }, 83 | }, 84 | } 85 | nodes := []corev1.Node{ 86 | {}, 87 | } 88 | 89 | assert.False(t, doesViolateNDB(nodeOp, ndbs, nodes)) 90 | } 91 | 92 | func TestDoesViolateNDBWithNodeSelectorNoViolation(t *testing.T) { 93 | n := uint64(0) 94 | nodeOp := &nodeopsv1alpha1.NodeOperation{} 95 | ndbs := []nodeopsv1alpha1.NodeDisruptionBudget{ 96 | { 97 | Spec: nodeopsv1alpha1.NodeDisruptionBudgetSpec{ 98 | Selector: map[string]string{ 99 | "k1": "v1", 100 | }, 101 | MinAvailable: &n, 102 | }, 103 | }, 104 | } 105 | nodes := []corev1.Node{ 106 | { 107 | ObjectMeta: metav1.ObjectMeta{ 108 | Labels: map[string]string{ 109 | "k1": "v1", 110 | }, 111 | }, 112 | }, 113 | { 114 | ObjectMeta: metav1.ObjectMeta{ 115 | Labels: map[string]string{ 116 | "k1": "v2", 117 | }, 118 | }, 119 | }, 120 | } 121 | 122 | assert.False(t, doesViolateNDB(nodeOp, ndbs, nodes)) 123 | } 124 | 125 | func TestDoesViolateNDBWithNodeSelectorViolation(t *testing.T) { 126 | n := uint64(1) 127 | nodeOp := &nodeopsv1alpha1.NodeOperation{} 128 | ndbs := []nodeopsv1alpha1.NodeDisruptionBudget{ 129 | { 130 | Spec: nodeopsv1alpha1.NodeDisruptionBudgetSpec{ 131 | Selector: map[string]string{ 132 | "k1": "v1", 133 | }, 134 | MinAvailable: &n, 135 | }, 136 | }, 137 | } 138 | nodes := []corev1.Node{ 139 | { 140 | ObjectMeta: metav1.ObjectMeta{ 141 | Labels: map[string]string{ 142 | "k1": "v1", 143 | }, 144 | }, 145 | }, 146 | { 147 | ObjectMeta: metav1.ObjectMeta{ 148 | Labels: map[string]string{ 149 | "k1": "v2", 150 | }, 151 | }, 152 | }, 153 | } 154 | 155 | assert.True(t, doesViolateNDB(nodeOp, ndbs, nodes)) 156 | } 157 | 158 | func TestDoesViolateNDBWithNDBSelectorViolation(t *testing.T) { 159 | n := uint64(1) 160 | nodeOp := &nodeopsv1alpha1.NodeOperation{ 161 | Spec: nodeopsv1alpha1.NodeOperationSpec{ 162 | NodeOperationSpecTemplate: nodeopsv1alpha1.NodeOperationSpecTemplate{ 163 | NodeDisruptionBudgetSelector: map[string]string{ 164 | "k1": "v1", 165 | }, 166 | }, 167 | }, 168 | } 169 | ndbs := []nodeopsv1alpha1.NodeDisruptionBudget{ 170 | { 171 | ObjectMeta: metav1.ObjectMeta{ 172 | Labels: map[string]string{ 173 | "k1": "v1", 174 | }, 175 | }, 176 | Spec: nodeopsv1alpha1.NodeDisruptionBudgetSpec{ 177 | MinAvailable: &n, 178 | }, 179 | }, 180 | } 181 | nodes := []corev1.Node{ 182 | {}, 183 | } 184 | 185 | assert.True(t, doesViolateNDB(nodeOp, ndbs, nodes)) 186 | } 187 | 188 | func TestDoesViolateNDBWithNDBSelectorNoViolation(t *testing.T) { 189 | n := uint64(1) 190 | nodeOp := &nodeopsv1alpha1.NodeOperation{ 191 | Spec: nodeopsv1alpha1.NodeOperationSpec{ 192 | NodeOperationSpecTemplate: nodeopsv1alpha1.NodeOperationSpecTemplate{ 193 | NodeDisruptionBudgetSelector: map[string]string{ 194 | "k1": "v1", 195 | }, 196 | }, 197 | }, 198 | } 199 | ndbs := []nodeopsv1alpha1.NodeDisruptionBudget{ 200 | { 201 | ObjectMeta: metav1.ObjectMeta{ 202 | Labels: map[string]string{ 203 | "k1": "v2", 204 | }, 205 | }, 206 | Spec: nodeopsv1alpha1.NodeDisruptionBudgetSpec{ 207 | MinAvailable: &n, 208 | }, 209 | }, 210 | } 211 | nodes := []corev1.Node{ 212 | {}, 213 | } 214 | 215 | assert.False(t, doesViolateNDB(nodeOp, ndbs, nodes)) 216 | } 217 | 218 | func TestDoesViolateNDBWithTaintTargets(t *testing.T) { 219 | nodeOp := &nodeopsv1alpha1.NodeOperation{} 220 | buildNDBs := func(n uint64) []nodeopsv1alpha1.NodeDisruptionBudget { 221 | return []nodeopsv1alpha1.NodeDisruptionBudget{ 222 | { 223 | Spec: nodeopsv1alpha1.NodeDisruptionBudgetSpec{ 224 | TaintTargets: []nodeopsv1alpha1.TaintTarget{ 225 | { 226 | Key: "k1", 227 | Effect: corev1.TaintEffectNoSchedule, 228 | Operator: nodeopsv1alpha1.TaintTargetOpExists, 229 | }, 230 | }, 231 | MaxUnavailable: &n, 232 | }, 233 | }, 234 | } 235 | } 236 | nodes := []corev1.Node{ 237 | { 238 | Spec: corev1.NodeSpec{ 239 | Taints: []corev1.Taint{ 240 | controllerTaint, 241 | }, 242 | }, 243 | }, 244 | { 245 | Spec: corev1.NodeSpec{ 246 | Taints: []corev1.Taint{ 247 | { 248 | Key: "k1", 249 | Effect: corev1.TaintEffectNoSchedule, 250 | }, 251 | }, 252 | }, 253 | }, 254 | { 255 | Spec: corev1.NodeSpec{ 256 | Taints: []corev1.Taint{ 257 | { 258 | Key: "k2", 259 | Effect: corev1.TaintEffectNoSchedule, 260 | }, 261 | }, 262 | }, 263 | }, 264 | } 265 | 266 | assert.True(t, doesViolateNDB(nodeOp, buildNDBs(2), nodes)) 267 | assert.False(t, doesViolateNDB(nodeOp, buildNDBs(3), nodes)) 268 | } 269 | -------------------------------------------------------------------------------- /controllers/nodeoperationtemplate_controller.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package controllers 18 | 19 | import ( 20 | "context" 21 | 22 | "k8s.io/apimachinery/pkg/runtime" 23 | ctrl "sigs.k8s.io/controller-runtime" 24 | "sigs.k8s.io/controller-runtime/pkg/client" 25 | "sigs.k8s.io/controller-runtime/pkg/log" 26 | 27 | nodeopsv1alpha1 "github.com/pfnet-research/node-operation-controller/api/v1alpha1" 28 | ) 29 | 30 | // NodeOperationTemplateReconciler reconciles a NodeOperationTemplate object 31 | type NodeOperationTemplateReconciler struct { 32 | client.Client 33 | Scheme *runtime.Scheme 34 | } 35 | 36 | //+kubebuilder:rbac:groups=nodeops.k8s.preferred.jp,resources=nodeoperationtemplates,verbs=get;list;watch;create;update;patch;delete 37 | //+kubebuilder:rbac:groups=nodeops.k8s.preferred.jp,resources=nodeoperationtemplates/status,verbs=get;update;patch 38 | //+kubebuilder:rbac:groups=nodeops.k8s.preferred.jp,resources=nodeoperationtemplates/finalizers,verbs=update 39 | 40 | // Reconcile is part of the main kubernetes reconciliation loop which aims to 41 | // move the current state of the cluster closer to the desired state. 42 | // TODO(user): Modify the Reconcile function to compare the state specified by 43 | // the NodeOperationTemplate object against the actual cluster state, and then 44 | // perform operations to make the cluster state reflect the state specified by 45 | // the user. 46 | // 47 | // For more details, check Reconcile and its Result here: 48 | // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.10.0/pkg/reconcile 49 | func (r *NodeOperationTemplateReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 50 | _ = log.FromContext(ctx) 51 | 52 | // TODO(user): your logic here 53 | 54 | return ctrl.Result{}, nil 55 | } 56 | 57 | // SetupWithManager sets up the controller with the Manager. 58 | func (r *NodeOperationTemplateReconciler) SetupWithManager(mgr ctrl.Manager) error { 59 | return ctrl.NewControllerManagedBy(mgr). 60 | For(&nodeopsv1alpha1.NodeOperationTemplate{}). 61 | Complete(r) 62 | } 63 | -------------------------------------------------------------------------------- /controllers/noderemediation_controller.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package controllers 18 | 19 | import ( 20 | "context" 21 | "fmt" 22 | 23 | nodeopsv1alpha1 "github.com/pfnet-research/node-operation-controller/api/v1alpha1" 24 | corev1 "k8s.io/api/core/v1" 25 | apierrors "k8s.io/apimachinery/pkg/api/errors" 26 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 | "k8s.io/apimachinery/pkg/runtime" 28 | "k8s.io/apimachinery/pkg/types" 29 | "k8s.io/client-go/tools/record" 30 | "k8s.io/client-go/tools/reference" 31 | ctrl "sigs.k8s.io/controller-runtime" 32 | "sigs.k8s.io/controller-runtime/pkg/client" 33 | "sigs.k8s.io/controller-runtime/pkg/handler" 34 | "sigs.k8s.io/controller-runtime/pkg/log" 35 | "sigs.k8s.io/controller-runtime/pkg/reconcile" 36 | ) 37 | 38 | var operationRemediationOwnerKey = "operationRemediationOwner" 39 | 40 | // NodeRemediationReconciler reconciles a NodeRemediation object 41 | type NodeRemediationReconciler struct { 42 | client.Client 43 | Scheme *runtime.Scheme 44 | 45 | eventRecorder record.EventRecorder 46 | } 47 | 48 | //+kubebuilder:rbac:groups=nodeops.k8s.preferred.jp,resources=noderemediations,verbs=get;list;watch;create;update;patch;delete 49 | //+kubebuilder:rbac:groups=nodeops.k8s.preferred.jp,resources=noderemediations/status,verbs=get;update;patch 50 | //+kubebuilder:rbac:groups=nodeops.k8s.preferred.jp,resources=noderemediations/finalizers,verbs=update 51 | 52 | // Reconcile is part of the main kubernetes reconciliation loop which aims to 53 | // move the current state of the cluster closer to the desired state. 54 | // TODO(user): Modify the Reconcile function to compare the state specified by 55 | // the NodeRemediation object against the actual cluster state, and then 56 | // perform operations to make the cluster state reflect the state specified by 57 | // the user. 58 | // 59 | // For more details, check Reconcile and its Result here: 60 | // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.10.0/pkg/reconcile 61 | func (r *NodeRemediationReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 62 | _ = log.FromContext(ctx) 63 | var err error 64 | 65 | var remediation nodeopsv1alpha1.NodeRemediation 66 | if err := r.Get(ctx, req.NamespacedName, &remediation); err != nil { 67 | return ctrl.Result{}, client.IgnoreNotFound(err) 68 | } 69 | 70 | var node corev1.Node 71 | if err := r.Get(ctx, types.NamespacedName{Name: remediation.Spec.NodeName}, &node); err != nil { 72 | return ctrl.Result{}, err 73 | } 74 | 75 | nodeStatus := remediation.CompareNodeCondition(node.Status.Conditions) 76 | if nodeStatus != remediation.Status.NodeStatus { 77 | remediation.Status.NodeStatus = nodeStatus 78 | if err := r.Status().Update(ctx, &remediation); err != nil { 79 | return ctrl.Result{}, err 80 | } 81 | } 82 | 83 | var childOps nodeopsv1alpha1.NodeOperationList 84 | if err := r.List(ctx, &childOps, client.MatchingFields{operationRemediationOwnerKey: remediation.Name}); err != nil { 85 | return ctrl.Result{}, err 86 | } 87 | 88 | var activeOp *nodeopsv1alpha1.NodeOperation 89 | for _, op := range childOps.Items { 90 | if op.Status.Phase == nodeopsv1alpha1.NodeOperationPhaseCompleted || 91 | op.Status.Phase == nodeopsv1alpha1.NodeOperationPhaseFailed { 92 | continue 93 | } 94 | activeOp = &op 95 | break 96 | } 97 | 98 | var ref *corev1.ObjectReference 99 | if activeOp == nil { 100 | ref = &corev1.ObjectReference{} 101 | } else { 102 | ref, err = reference.GetReference(r.Scheme, activeOp) 103 | if err != nil { 104 | return ctrl.Result{}, err 105 | } 106 | } 107 | 108 | remediation.Status.ActiveNodeOperation = *ref 109 | if err := r.Status().Update(ctx, &remediation); err != nil { 110 | return ctrl.Result{}, err 111 | } 112 | 113 | // Check node condition 114 | switch remediation.Status.NodeStatus { 115 | case nodeopsv1alpha1.NodeStatusUnknown: 116 | r.eventRecorder.Eventf(&remediation, corev1.EventTypeNormal, "UnknownNodeStatus", "Because at least one Node condition is unknown status, remediation process is skipped") 117 | return ctrl.Result{}, nil 118 | case nodeopsv1alpha1.NodeStatusOK: 119 | // reset OperationsCount 120 | remediation.Status.OperationsCount = 0 121 | if err := r.Status().Update(ctx, &remediation); err != nil { 122 | return ctrl.Result{}, err 123 | } 124 | 125 | if ref := remediation.Status.ActiveNodeOperation; ref.Name != "" { 126 | // active operation exists 127 | var nodeOp nodeopsv1alpha1.NodeOperation 128 | if err := r.Get(ctx, client.ObjectKey{Namespace: ref.Namespace, Name: ref.Name}, &nodeOp); apierrors.IsNotFound(err) { 129 | // Do nothing 130 | } else if err != nil { 131 | return ctrl.Result{}, err 132 | } else { 133 | if err := r.Delete(ctx, &nodeOp); err != nil { 134 | return ctrl.Result{}, err 135 | } 136 | 137 | r.eventRecorder.Eventf(&remediation, corev1.EventTypeNormal, "DeleteNodeOperation", `Deleted NodeOperation %s because the Node is remediated`, nodeOp.Name) 138 | } 139 | 140 | remediation.Status.ActiveNodeOperation = corev1.ObjectReference{} 141 | if err := r.Status().Update(ctx, &remediation); err != nil { 142 | return ctrl.Result{}, err 143 | } 144 | } 145 | 146 | return ctrl.Result{}, nil 147 | } 148 | 149 | if remediation.Status.ActiveNodeOperation.Name != "" { 150 | // active operation exists 151 | return ctrl.Result{}, nil 152 | } 153 | 154 | // Avoid to create too many NodeOperations 155 | if 0 < remediation.Status.OperationsCount { 156 | // TODO: backoff feature. We can calculate the next backoff-ed trial timestamp from the counter value and the latest child NodeOperation completion timestamp 157 | r.eventRecorder.Eventf(&remediation, corev1.EventTypeNormal, "NodeIsNotRemediated", `Though a NodeOperation has finished, the Node is not remediated. Skipping to create a NodeOperation.`) 158 | return ctrl.Result{}, nil 159 | } 160 | 161 | // Create nodeOperation 162 | var nodeOpTemplate nodeopsv1alpha1.NodeOperationTemplate 163 | if err := r.Get(ctx, types.NamespacedName{Name: remediation.Spec.NodeOperationTemplateName}, &nodeOpTemplate); err != nil { 164 | return ctrl.Result{}, err 165 | } 166 | 167 | opMeta := nodeOpTemplate.Spec.Template.Metadata.DeepCopy() 168 | if opMeta.Name == "" && opMeta.GenerateName == "" { 169 | opMeta.GenerateName = fmt.Sprintf("%s-", remediation.Name) 170 | } 171 | if opMeta.Labels == nil { 172 | opMeta.Labels = map[string]string{} 173 | } 174 | 175 | op := nodeopsv1alpha1.NodeOperation{ 176 | ObjectMeta: *opMeta, 177 | Spec: nodeopsv1alpha1.NodeOperationSpec{ 178 | NodeName: node.Name, 179 | NodeOperationSpecTemplate: nodeOpTemplate.Spec.Template.Spec, 180 | }, 181 | } 182 | if err := ctrl.SetControllerReference(&remediation, &op, r.Scheme); err != nil { 183 | return ctrl.Result{}, err 184 | } 185 | 186 | if err := r.Create(ctx, &op); err != nil { 187 | return ctrl.Result{}, err 188 | } 189 | r.eventRecorder.Eventf(&remediation, corev1.EventTypeNormal, "CreatedNodeOperation", `Created a NodeOperation "%s"`, op.Name) 190 | 191 | // Update reference to NodeOperation 192 | ref, err = reference.GetReference(r.Scheme, &op) 193 | if err != nil { 194 | return ctrl.Result{}, err 195 | } 196 | remediation.Status.ActiveNodeOperation = *ref 197 | remediation.Status.OperationsCount++ 198 | if err := r.Status().Update(ctx, &remediation); err != nil { 199 | return ctrl.Result{}, err 200 | } 201 | 202 | return ctrl.Result{}, nil 203 | } 204 | 205 | // SetupWithManager sets up the controller with the Manager. 206 | func (r *NodeRemediationReconciler) SetupWithManager(mgr ctrl.Manager) error { 207 | logger := ctrl.Log.WithName("NodeRemediationControllerSetup") 208 | ctx := context.Background() 209 | 210 | r.eventRecorder = mgr.GetEventRecorderFor("node-operation-controller") 211 | 212 | if err := mgr.GetFieldIndexer().IndexField(ctx, &nodeopsv1alpha1.NodeOperation{}, operationRemediationOwnerKey, func(rawObj client.Object) []string { 213 | op := rawObj.(*nodeopsv1alpha1.NodeOperation) 214 | owner := metav1.GetControllerOf(op) 215 | if owner == nil { 216 | return nil 217 | } 218 | if owner.APIVersion != nodeopsv1alpha1GVStr || owner.Kind != "NodeRemediation" { 219 | return nil 220 | } 221 | return []string{owner.Name} 222 | }); err != nil { 223 | return err 224 | } 225 | 226 | nodeMapFn := func(ctx context.Context, a client.Object) []reconcile.Request { 227 | nodeName := a.GetName() 228 | 229 | remediations := &nodeopsv1alpha1.NodeRemediationList{} 230 | // TODO: use MatchingFields 231 | if err := r.List(ctx, remediations); err != nil { 232 | logger.Info("Failed to list NodeRemediations") 233 | return []reconcile.Request{} 234 | } 235 | 236 | var requests []reconcile.Request 237 | for _, remediation := range remediations.Items { 238 | if remediation.Spec.NodeName == nodeName { 239 | requests = append(requests, reconcile.Request{ 240 | NamespacedName: types.NamespacedName{ 241 | Name: remediation.Name, 242 | }, 243 | }) 244 | } 245 | } 246 | 247 | return requests 248 | } 249 | 250 | return ctrl.NewControllerManagedBy(mgr). 251 | For(&nodeopsv1alpha1.NodeRemediation{}). 252 | Owns(&nodeopsv1alpha1.NodeOperation{}). 253 | Watches(&corev1.Node{}, handler.EnqueueRequestsFromMapFunc(nodeMapFn)). 254 | Complete(r) 255 | } 256 | -------------------------------------------------------------------------------- /controllers/noderemediationtemplate_controller.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package controllers 18 | 19 | import ( 20 | "context" 21 | "fmt" 22 | 23 | corev1 "k8s.io/api/core/v1" 24 | "k8s.io/apimachinery/pkg/api/errors" 25 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 26 | "k8s.io/apimachinery/pkg/labels" 27 | "k8s.io/apimachinery/pkg/runtime" 28 | "k8s.io/apimachinery/pkg/types" 29 | "k8s.io/client-go/tools/record" 30 | ctrl "sigs.k8s.io/controller-runtime" 31 | "sigs.k8s.io/controller-runtime/pkg/client" 32 | "sigs.k8s.io/controller-runtime/pkg/handler" 33 | "sigs.k8s.io/controller-runtime/pkg/log" 34 | "sigs.k8s.io/controller-runtime/pkg/reconcile" 35 | 36 | nodeopsv1alpha1 "github.com/pfnet-research/node-operation-controller/api/v1alpha1" 37 | ) 38 | 39 | var ( 40 | remediationOwnerKey = "ownerNodeRemediationTemplate" 41 | nodeopsv1alpha1GVStr = nodeopsv1alpha1.GroupVersion.String() 42 | ) 43 | 44 | // NodeRemediationTemplateReconciler reconciles a NodeRemediationTemplate object 45 | type NodeRemediationTemplateReconciler struct { 46 | client.Client 47 | Scheme *runtime.Scheme 48 | 49 | eventRecorder record.EventRecorder 50 | } 51 | 52 | //+kubebuilder:rbac:groups=nodeops.k8s.preferred.jp,resources=noderemediationtemplates,verbs=get;list;watch;create;update;patch;delete 53 | //+kubebuilder:rbac:groups=nodeops.k8s.preferred.jp,resources=noderemediationtemplates/status,verbs=get;update;patch 54 | //+kubebuilder:rbac:groups=nodeops.k8s.preferred.jp,resources=noderemediationtemplates/finalizers,verbs=update 55 | 56 | // Reconcile is part of the main kubernetes reconciliation loop which aims to 57 | // move the current state of the cluster closer to the desired state. 58 | // TODO(user): Modify the Reconcile function to compare the state specified by 59 | // the NodeRemediationTemplate object against the actual cluster state, and then 60 | // perform operations to make the cluster state reflect the state specified by 61 | // the user. 62 | // 63 | // For more details, check Reconcile and its Result here: 64 | // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.10.0/pkg/reconcile 65 | func (r *NodeRemediationTemplateReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 66 | _ = log.FromContext(ctx) 67 | 68 | var template nodeopsv1alpha1.NodeRemediationTemplate 69 | if err := r.Get(ctx, req.NamespacedName, &template); err != nil { 70 | sterr, ok := err.(*errors.StatusError) 71 | if ok && sterr.Status().Code == 404 { 72 | return ctrl.Result{}, nil 73 | } 74 | return ctrl.Result{}, err 75 | } 76 | 77 | var childRemediations nodeopsv1alpha1.NodeRemediationList 78 | if err := r.List(ctx, &childRemediations, client.MatchingFields{remediationOwnerKey: template.Name}); err != nil { 79 | return ctrl.Result{}, err 80 | } 81 | 82 | childRemediationByNodeName := map[string]*nodeopsv1alpha1.NodeRemediation{} 83 | for _, remediation := range childRemediations.Items { 84 | childRemediationByNodeName[remediation.Spec.NodeName] = &remediation 85 | } 86 | 87 | var nodes corev1.NodeList 88 | if err := r.List(ctx, &nodes); err != nil { 89 | return ctrl.Result{}, err 90 | } 91 | 92 | nodeSelector := labels.SelectorFromSet(template.Spec.NodeSelector) 93 | for _, node := range nodes.Items { 94 | if !nodeSelector.Matches(labels.Set(node.Labels)) { 95 | continue 96 | } 97 | 98 | if childRemediation, ok := childRemediationByNodeName[node.Name]; ok { 99 | // update if remediation exists 100 | 101 | // update labels 102 | if childRemediation.ObjectMeta.Labels == nil { 103 | childRemediation.ObjectMeta.Labels = map[string]string{} 104 | } 105 | for k, v := range template.Spec.Template.Metadata.Labels { 106 | childRemediation.ObjectMeta.Labels[k] = v 107 | } 108 | 109 | // update annotations 110 | if childRemediation.ObjectMeta.Annotations == nil { 111 | childRemediation.ObjectMeta.Annotations = map[string]string{} 112 | } 113 | for k, v := range template.Spec.Template.Metadata.Annotations { 114 | childRemediation.ObjectMeta.Annotations[k] = v 115 | } 116 | 117 | childRemediation.Spec.NodeRemediationSpecTemplate = template.Spec.Template.Spec 118 | if err := r.Update(ctx, childRemediation); err != nil { 119 | return ctrl.Result{}, nil 120 | } 121 | } else { 122 | // new remediation 123 | meta := template.Spec.Template.Metadata.DeepCopy() 124 | if meta.Name == "" && meta.GenerateName == "" { 125 | meta.GenerateName = fmt.Sprintf("%s-%s-", template.Name, node.Name) 126 | } 127 | remediation := nodeopsv1alpha1.NodeRemediation{ 128 | ObjectMeta: *meta, 129 | Spec: nodeopsv1alpha1.NodeRemediationSpec{ 130 | NodeRemediationSpecTemplate: template.Spec.Template.Spec, 131 | NodeName: node.Name, 132 | }, 133 | } 134 | 135 | if err := ctrl.SetControllerReference(&template, &remediation, r.Scheme); err != nil { 136 | return ctrl.Result{}, err 137 | } 138 | 139 | if err := r.Create(ctx, &remediation); err != nil { 140 | return ctrl.Result{}, err 141 | } 142 | 143 | r.eventRecorder.Eventf(&template, corev1.EventTypeNormal, "CreatedRemediation", `Created a NodeRemediation "%s"`, remediation.Name) 144 | } 145 | 146 | delete(childRemediationByNodeName, node.Name) 147 | } 148 | 149 | for _, remediation := range childRemediationByNodeName { 150 | if err := r.Delete(ctx, remediation); err != nil { 151 | return ctrl.Result{}, nil 152 | } 153 | } 154 | 155 | return ctrl.Result{}, nil 156 | } 157 | 158 | // SetupWithManager sets up the controller with the Manager. 159 | func (r *NodeRemediationTemplateReconciler) SetupWithManager(mgr ctrl.Manager) error { 160 | logger := ctrl.Log.WithName("NodeRemediationControllerSetup") 161 | ctx := context.Background() 162 | 163 | r.eventRecorder = mgr.GetEventRecorderFor("node-operation-controller") 164 | 165 | if err := mgr.GetFieldIndexer().IndexField(ctx, &nodeopsv1alpha1.NodeRemediation{}, remediationOwnerKey, func(rawObj client.Object) []string { 166 | remediation := rawObj.(*nodeopsv1alpha1.NodeRemediation) 167 | owner := metav1.GetControllerOf(remediation) 168 | if owner == nil { 169 | return nil 170 | } 171 | if owner.APIVersion != nodeopsv1alpha1GVStr || owner.Kind != "NodeRemediationTemplate" { 172 | return nil 173 | } 174 | return []string{owner.Name} 175 | }); err != nil { 176 | return err 177 | } 178 | 179 | nodeMapFn := func(ctx context.Context, a client.Object) []reconcile.Request { 180 | templates := &nodeopsv1alpha1.NodeRemediationTemplateList{} 181 | if err := r.List(ctx, templates); err != nil { 182 | logger.Info("Failed to list NodeRemediationTemplates") 183 | return []reconcile.Request{} 184 | } 185 | 186 | nodeLabels := a.GetLabels() 187 | var requests []reconcile.Request 188 | 189 | nextTemplate: 190 | for _, template := range templates.Items { 191 | if !labels.SelectorFromSet(template.Spec.NodeSelector).Matches(labels.Set(nodeLabels)) { 192 | continue nextTemplate 193 | } 194 | requests = append(requests, reconcile.Request{ 195 | NamespacedName: types.NamespacedName{ 196 | Name: template.Name, 197 | }, 198 | }) 199 | } 200 | 201 | return requests 202 | } 203 | 204 | return ctrl.NewControllerManagedBy(mgr). 205 | For(&nodeopsv1alpha1.NodeRemediationTemplate{}). 206 | Owns(&nodeopsv1alpha1.NodeRemediation{}). 207 | Watches(&corev1.Node{}, handler.EnqueueRequestsFromMapFunc(nodeMapFn)). 208 | Complete(r) 209 | } 210 | -------------------------------------------------------------------------------- /doc/images/nodeoperation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pfnet-research/node-operation-controller/387882401c144640b8044e3d78e0549cbf3b0950/doc/images/nodeoperation.png -------------------------------------------------------------------------------- /doc/images/noderemediationrule.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pfnet-research/node-operation-controller/387882401c144640b8044e3d78e0549cbf3b0950/doc/images/noderemediationrule.png -------------------------------------------------------------------------------- /e2e/e2e_test.go: -------------------------------------------------------------------------------- 1 | package e2e 2 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/pfnet-research/node-operation-controller 2 | 3 | go 1.23.8 4 | 5 | require ( 6 | github.com/google/go-cmp v0.7.0 7 | github.com/onsi/ginkgo/v2 v2.22.0 8 | github.com/onsi/gomega v1.36.1 9 | github.com/stretchr/testify v1.10.0 10 | k8s.io/api v0.31.7 11 | k8s.io/apimachinery v0.31.7 12 | k8s.io/client-go v0.31.7 13 | sigs.k8s.io/controller-runtime v0.19.7 14 | ) 15 | 16 | require ( 17 | github.com/beorn7/perks v1.0.1 // indirect 18 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 19 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect 20 | github.com/emicklei/go-restful/v3 v3.11.0 // indirect 21 | github.com/evanphx/json-patch/v5 v5.9.0 // indirect 22 | github.com/fsnotify/fsnotify v1.7.0 // indirect 23 | github.com/fxamacker/cbor/v2 v2.7.0 // indirect 24 | github.com/go-logr/logr v1.4.2 // indirect 25 | github.com/go-logr/zapr v1.3.0 // indirect 26 | github.com/go-openapi/jsonpointer v0.19.6 // indirect 27 | github.com/go-openapi/jsonreference v0.20.2 // indirect 28 | github.com/go-openapi/swag v0.22.4 // indirect 29 | github.com/go-task/slim-sprig/v3 v3.0.0 // indirect 30 | github.com/gogo/protobuf v1.3.2 // indirect 31 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect 32 | github.com/golang/protobuf v1.5.4 // indirect 33 | github.com/google/gnostic-models v0.6.8 // indirect 34 | github.com/google/gofuzz v1.2.0 // indirect 35 | github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect 36 | github.com/google/uuid v1.6.0 // indirect 37 | github.com/imdario/mergo v0.3.6 // indirect 38 | github.com/josharian/intern v1.0.0 // indirect 39 | github.com/json-iterator/go v1.1.12 // indirect 40 | github.com/mailru/easyjson v0.7.7 // indirect 41 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 42 | github.com/modern-go/reflect2 v1.0.2 // indirect 43 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 44 | github.com/pkg/errors v0.9.1 // indirect 45 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect 46 | github.com/prometheus/client_golang v1.19.1 // indirect 47 | github.com/prometheus/client_model v0.6.1 // indirect 48 | github.com/prometheus/common v0.55.0 // indirect 49 | github.com/prometheus/procfs v0.15.1 // indirect 50 | github.com/spf13/pflag v1.0.5 // indirect 51 | github.com/x448/float16 v0.8.4 // indirect 52 | go.uber.org/multierr v1.11.0 // indirect 53 | go.uber.org/zap v1.26.0 // indirect 54 | golang.org/x/exp v0.0.0-20230515195305-f3d0a9c9a5cc // indirect 55 | golang.org/x/net v0.30.0 // indirect 56 | golang.org/x/oauth2 v0.21.0 // indirect 57 | golang.org/x/sys v0.26.0 // indirect 58 | golang.org/x/term v0.25.0 // indirect 59 | golang.org/x/text v0.19.0 // indirect 60 | golang.org/x/time v0.3.0 // indirect 61 | golang.org/x/tools v0.26.0 // indirect 62 | gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect 63 | google.golang.org/protobuf v1.35.1 // indirect 64 | gopkg.in/inf.v0 v0.9.1 // indirect 65 | gopkg.in/yaml.v2 v2.4.0 // indirect 66 | gopkg.in/yaml.v3 v3.0.1 // indirect 67 | k8s.io/apiextensions-apiserver v0.31.0 // indirect 68 | k8s.io/klog/v2 v2.130.1 // indirect 69 | k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect 70 | k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect 71 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect 72 | sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect 73 | sigs.k8s.io/yaml v1.4.0 // indirect 74 | ) 75 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= 2 | github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= 3 | github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= 4 | github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 5 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 6 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 7 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 8 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= 9 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 10 | github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= 11 | github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= 12 | github.com/evanphx/json-patch v0.5.2 h1:xVCHIVMUu1wtM/VkR9jVZ45N3FhZfYMMYGorLCR8P3k= 13 | github.com/evanphx/json-patch v0.5.2/go.mod h1:ZWS5hhDbVDyob71nXKNL0+PWn6ToqBHMikGIFbs31qQ= 14 | github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg= 15 | github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= 16 | github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= 17 | github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= 18 | github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= 19 | github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= 20 | github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= 21 | github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= 22 | github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= 23 | github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= 24 | github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= 25 | github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= 26 | github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= 27 | github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= 28 | github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= 29 | github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU= 30 | github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= 31 | github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= 32 | github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= 33 | github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= 34 | github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= 35 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= 36 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= 37 | github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= 38 | github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= 39 | github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= 40 | github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= 41 | github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 42 | github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= 43 | github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= 44 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 45 | github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= 46 | github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 47 | github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= 48 | github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= 49 | github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= 50 | github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 51 | github.com/imdario/mergo v0.3.6 h1:xTNEAn+kxVO7dTZGu0CegyqKZmoWFI0rF8UxjlB2d28= 52 | github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA= 53 | github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= 54 | github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= 55 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 56 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 57 | github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= 58 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= 59 | github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= 60 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= 61 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= 62 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 63 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 64 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 65 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 66 | github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= 67 | github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= 68 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 69 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= 70 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 71 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= 72 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= 73 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= 74 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= 75 | github.com/onsi/ginkgo/v2 v2.22.0 h1:Yed107/8DjTr0lKCNt7Dn8yQ6ybuDRQoMGrNFKzMfHg= 76 | github.com/onsi/ginkgo/v2 v2.22.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= 77 | github.com/onsi/gomega v1.36.1 h1:bJDPBO7ibjxcbHMgSCoo4Yj18UWbKDlLwX1x9sybDcw= 78 | github.com/onsi/gomega v1.36.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= 79 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 80 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 81 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 82 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= 83 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 84 | github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE= 85 | github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho= 86 | github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= 87 | github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= 88 | github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= 89 | github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= 90 | github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= 91 | github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= 92 | github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= 93 | github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= 94 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= 95 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 96 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 97 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 98 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 99 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 100 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 101 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 102 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 103 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 104 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 105 | github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= 106 | github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= 107 | github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 108 | github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 109 | go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= 110 | go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= 111 | go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= 112 | go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= 113 | go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo= 114 | go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= 115 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 116 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 117 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 118 | golang.org/x/exp v0.0.0-20230515195305-f3d0a9c9a5cc h1:mCRnTeVUjcrhlRmO0VK8a6k6Rrf6TF9htwo2pJVSjIU= 119 | golang.org/x/exp v0.0.0-20230515195305-f3d0a9c9a5cc/go.mod h1:V1LtkGg67GoY2N1AnLN78QLrzxkLyJw7RJb1gzOOz9w= 120 | golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 121 | golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 122 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 123 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 124 | golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 125 | golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= 126 | golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= 127 | golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= 128 | golang.org/x/oauth2 v0.21.0 h1:tsimM75w1tF/uws5rbeHzIWxEqElMehnc+iW793zsZs= 129 | golang.org/x/oauth2 v0.21.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= 130 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 131 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 132 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 133 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 134 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 135 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 136 | golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= 137 | golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 138 | golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= 139 | golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= 140 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 141 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 142 | golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= 143 | golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= 144 | golang.org/x/time v0.3.0 h1:rg5rLMjNzMS1RkNLzCG38eapWhnYLFYXDXj2gOlr8j4= 145 | golang.org/x/time v0.3.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= 146 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 147 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 148 | golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= 149 | golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= 150 | golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= 151 | golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= 152 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 153 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 154 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 155 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 156 | gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= 157 | gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= 158 | google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= 159 | google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= 160 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 161 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 162 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 163 | gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= 164 | gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= 165 | gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= 166 | gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= 167 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 168 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 169 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 170 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 171 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 172 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 173 | k8s.io/api v0.31.7 h1:wSo59nXpVXmaB6hgNVJCrdnKtyYoutIgpNNBbROBd2U= 174 | k8s.io/api v0.31.7/go.mod h1:vLUha4nXRUGtQdayzsmjur0lQApK/sJSxyR/fwuujcU= 175 | k8s.io/apiextensions-apiserver v0.31.0 h1:fZgCVhGwsclj3qCw1buVXCV6khjRzKC5eCFt24kyLSk= 176 | k8s.io/apiextensions-apiserver v0.31.0/go.mod h1:b9aMDEYaEe5sdK+1T0KU78ApR/5ZVp4i56VacZYEHxk= 177 | k8s.io/apimachinery v0.31.7 h1:fpV8yLerIZFAkj0of66+i1ArPv/Btf9KO6Aulng7RRw= 178 | k8s.io/apimachinery v0.31.7/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= 179 | k8s.io/client-go v0.31.7 h1:2+LFJc6Xw6rhmpDbN1NSmhoFLWBh62cPG/P+IfaTSGY= 180 | k8s.io/client-go v0.31.7/go.mod h1:hrrMorBQ17LqzoKIxKg5cSWvmWl94EwA/MUF0Mkf+Zw= 181 | k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= 182 | k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= 183 | k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= 184 | k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= 185 | k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= 186 | k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= 187 | sigs.k8s.io/controller-runtime v0.19.7 h1:DLABZfMr20A+AwCZOHhcbcu+TqBXnJZaVBri9K3EO48= 188 | sigs.k8s.io/controller-runtime v0.19.7/go.mod h1:iRmWllt8IlaLjvTTDLhRBXIEtkCK6hwVBJJsYS9Ajf4= 189 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= 190 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= 191 | sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= 192 | sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= 193 | sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= 194 | sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= 195 | -------------------------------------------------------------------------------- /hack/boilerplate.go.txt: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2021. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import ( 20 | "flag" 21 | "os" 22 | "time" 23 | 24 | // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) 25 | // to ensure that exec-entrypoint and run can make use of them. 26 | _ "k8s.io/client-go/plugin/pkg/client/auth" 27 | 28 | "k8s.io/apimachinery/pkg/runtime" 29 | utilruntime "k8s.io/apimachinery/pkg/util/runtime" 30 | clientgoscheme "k8s.io/client-go/kubernetes/scheme" 31 | ctrl "sigs.k8s.io/controller-runtime" 32 | "sigs.k8s.io/controller-runtime/pkg/healthz" 33 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 34 | "sigs.k8s.io/controller-runtime/pkg/metrics/server" 35 | 36 | nodeopsv1alpha1 "github.com/pfnet-research/node-operation-controller/api/v1alpha1" 37 | "github.com/pfnet-research/node-operation-controller/controllers" 38 | //+kubebuilder:scaffold:imports 39 | ) 40 | 41 | var ( 42 | scheme = runtime.NewScheme() 43 | setupLog = ctrl.Log.WithName("setup") 44 | ) 45 | 46 | func init() { 47 | utilruntime.Must(clientgoscheme.AddToScheme(scheme)) 48 | 49 | utilruntime.Must(nodeopsv1alpha1.AddToScheme(scheme)) 50 | //+kubebuilder:scaffold:scheme 51 | } 52 | 53 | func main() { 54 | var metricsAddr string 55 | var enableLeaderElection bool 56 | var probeAddr string 57 | flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") 58 | flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") 59 | flag.BoolVar(&enableLeaderElection, "leader-elect", false, 60 | "Enable leader election for controller manager. "+ 61 | "Enabling this will ensure there is only one active controller manager.") 62 | opts := zap.Options{ 63 | Development: true, 64 | } 65 | opts.BindFlags(flag.CommandLine) 66 | flag.Parse() 67 | 68 | ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) 69 | 70 | mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ 71 | Scheme: scheme, 72 | Metrics: server.Options{ 73 | BindAddress: metricsAddr, 74 | }, 75 | HealthProbeBindAddress: probeAddr, 76 | LeaderElection: enableLeaderElection, 77 | LeaderElectionID: "869fe74b.k8s.preferred.jp", 78 | }) 79 | if err != nil { 80 | setupLog.Error(err, "unable to start manager") 81 | os.Exit(1) 82 | } 83 | 84 | if err = (&controllers.NodeOperationReconciler{ 85 | Client: mgr.GetClient(), 86 | Scheme: mgr.GetScheme(), 87 | DrainInterval: time.Minute, 88 | NDBRetryInterval: time.Minute, 89 | }).SetupWithManager(mgr); err != nil { 90 | setupLog.Error(err, "unable to create controller", "controller", "NodeOperation") 91 | os.Exit(1) 92 | } 93 | if err = (&controllers.NodeDisruptionBudgetReconciler{ 94 | Client: mgr.GetClient(), 95 | Scheme: mgr.GetScheme(), 96 | }).SetupWithManager(mgr); err != nil { 97 | setupLog.Error(err, "unable to create controller", "controller", "NodeDisruptionBudget") 98 | os.Exit(1) 99 | } 100 | if err = (&controllers.NodeOperationTemplateReconciler{ 101 | Client: mgr.GetClient(), 102 | Scheme: mgr.GetScheme(), 103 | }).SetupWithManager(mgr); err != nil { 104 | setupLog.Error(err, "unable to create controller", "controller", "NodeOperationTemplate") 105 | os.Exit(1) 106 | } 107 | if err = (&controllers.NodeRemediationTemplateReconciler{ 108 | Client: mgr.GetClient(), 109 | Scheme: mgr.GetScheme(), 110 | }).SetupWithManager(mgr); err != nil { 111 | setupLog.Error(err, "unable to create controller", "controller", "NodeRemediationTemplate") 112 | os.Exit(1) 113 | } 114 | if err = (&controllers.NodeRemediationReconciler{ 115 | Client: mgr.GetClient(), 116 | Scheme: mgr.GetScheme(), 117 | }).SetupWithManager(mgr); err != nil { 118 | setupLog.Error(err, "unable to create controller", "controller", "NodeRemediation") 119 | os.Exit(1) 120 | } 121 | //+kubebuilder:scaffold:builder 122 | 123 | if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { 124 | setupLog.Error(err, "unable to set up health check") 125 | os.Exit(1) 126 | } 127 | if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { 128 | setupLog.Error(err, "unable to set up ready check") 129 | os.Exit(1) 130 | } 131 | 132 | setupLog.Info("starting manager") 133 | if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { 134 | setupLog.Error(err, "problem running manager") 135 | os.Exit(1) 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /tutorial/README.md: -------------------------------------------------------------------------------- 1 | # Tutorial 2 | 3 | ## Create a kind cluster 4 | 5 | ``` 6 | $ kind create cluster --config tutorial/kind.yaml --name nodeops-tutorial 7 | ``` 8 | 9 | ``` 10 | $ kubectl get node 11 | NAME STATUS ROLES AGE VERSION 12 | nodeops-tutorial-control-plane Ready control-plane 59s v1.24.0 13 | nodeops-tutorial-worker Ready 23s v1.24.0 14 | nodeops-tutorial-worker2 Ready 23s v1.24.0 15 | ``` 16 | 17 | ## Deploy a controller 18 | 19 | ``` 20 | $ make install 21 | $ make run 22 | ``` 23 | 24 | ## Create a first NodeOperation 25 | 26 | Open another terminal (keep `make run` running): 27 | 28 | ``` 29 | $ cat tutorial/nodeoperation-tutorial1.yaml 30 | $ kubectl apply -f tutorial/nodeoperation-tutorial1.yaml 31 | ``` 32 | 33 | ## Create a NodeDisruptionBudget (NDB) 34 | 35 | ``` 36 | $ cat tutorial/nodedisruptionbudget-tutorial1.yaml 37 | $ kubectl apply -f tutorial/nodedisruptionbudget-tutorial1.yaml 38 | ``` 39 | 40 | ## Create NodeOperations and see NDB works 41 | 42 | ``` 43 | $ cat tutorial/nodeoperation-tutorial2.yaml 44 | $ cat tutorial/nodeoperation-tutorial3.yaml 45 | 46 | $ kubectl apply -f tutorial/nodeoperation-tutorial2.yaml 47 | $ kubectl apply -f tutorial/nodeoperation-tutorial3.yaml 48 | 49 | $ kubectl get nodeoperation 50 | NAME NODENAME PHASE AGE 51 | tutorial1 nodeops-tutorial-worker Completed 10m 52 | tutorial2 nodeops-tutorial-worker Running 14s 53 | tutorial3 nodeops-tutorial-worker2 Pending 11s 54 | 55 | $ kubectl get nodeoperation 56 | NAME NODENAME PHASE AGE 57 | tutorial1 nodeops-tutorial-worker Completed 12m 58 | tutorial2 nodeops-tutorial-worker Completed 2m37s 59 | tutorial3 nodeops-tutorial-worker2 Running 2m34s 60 | 61 | $ kubectl get nodeoperation 62 | NAME NODENAME PHASE AGE 63 | tutorial1 nodeops-tutorial-worker Completed 15m 64 | tutorial2 nodeops-tutorial-worker Completed 5m7s 65 | tutorial3 nodeops-tutorial-worker2 Completed 5m4s 66 | ``` 67 | 68 | ## Create NodeRemediationTemplate 69 | 70 | ``` 71 | $ kubectl apply -f tutorial/nodeoperationtemplate-tutorial1.yaml 72 | $ kubectl apply -f tutorial/noderemediationtemplate-tutorial1.yaml 73 | ``` 74 | 75 | ``` 76 | $ kubectl label node nodeops-tutorial-worker 'auto-remediation=' 77 | ``` 78 | 79 | ``` 80 | $ kubectl proxy --port=8090 & 81 | $ curl -H 'content-type: application/json-patch+json' -d '[{"op": "add", "path": "/status/conditions", "value": [{"status": "True", "type": "Tutorial"}] }]' -XPATCH 'localhost:8090/api/v1/nodes/nodeops-tutorial-worker/status' 82 | ``` 83 | 84 | ``` 85 | $ kubectl get nodeoperation 86 | NAME NODENAME PHASE AGE 87 | tutorial1 nodeops-tutorial-worker Completed 20m 88 | tutorial1-nodeops-tutorial-worker-fpmg7-zs42p nodeops-tutorial-worker Completed 51s 89 | tutorial2 nodeops-tutorial-worker Completed 10m 90 | tutorial3 nodeops-tutorial-worker2 Completed 10m 91 | ``` 92 | 93 | ## Clean up 94 | 95 | ``` 96 | $ kind delete cluster --name=nodeops-tutorial 97 | ``` 98 | -------------------------------------------------------------------------------- /tutorial/kind.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kind.x-k8s.io/v1alpha4 2 | kind: Cluster 3 | nodes: 4 | - role: control-plane 5 | - role: worker 6 | - role: worker 7 | -------------------------------------------------------------------------------- /tutorial/nodedisruptionbudget-tutorial1.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: nodeops.k8s.preferred.jp/v1alpha1 2 | kind: NodeDisruptionBudget 3 | metadata: 4 | name: tutorial1 5 | spec: 6 | selector: {} 7 | maxUnavailable: 1 8 | -------------------------------------------------------------------------------- /tutorial/nodeoperation-tutorial1.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: nodeops.k8s.preferred.jp/v1alpha1 2 | kind: NodeOperation 3 | metadata: 4 | name: tutorial1 5 | spec: 6 | nodeName: "nodeops-tutorial-worker" 7 | jobTemplate: 8 | metadata: 9 | namespace: default 10 | spec: 11 | template: 12 | spec: 13 | containers: 14 | - name: operation 15 | image: busybox 16 | command: ["sh", "-c", "echo Do some operation for $TARGET_NODE && sleep 60 && echo Done"] 17 | env: 18 | - name: TARGET_NODE 19 | valueFrom: 20 | fieldRef: 21 | fieldPath: "metadata.annotations['nodeops.k8s.preferred.jp/nodename']" 22 | restartPolicy: Never 23 | -------------------------------------------------------------------------------- /tutorial/nodeoperation-tutorial2.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: nodeops.k8s.preferred.jp/v1alpha1 2 | kind: NodeOperation 3 | metadata: 4 | name: tutorial2 5 | spec: 6 | nodeName: "nodeops-tutorial-worker" 7 | jobTemplate: 8 | metadata: 9 | namespace: default 10 | spec: 11 | template: 12 | spec: 13 | containers: 14 | - name: operation 15 | image: busybox 16 | command: ["sh", "-c", "sleep 60 && echo done"] 17 | restartPolicy: Never 18 | -------------------------------------------------------------------------------- /tutorial/nodeoperation-tutorial3.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: nodeops.k8s.preferred.jp/v1alpha1 2 | kind: NodeOperation 3 | metadata: 4 | name: tutorial3 5 | spec: 6 | nodeName: "nodeops-tutorial-worker2" 7 | jobTemplate: 8 | metadata: 9 | namespace: default 10 | spec: 11 | template: 12 | spec: 13 | containers: 14 | - name: operation 15 | image: busybox 16 | command: ["sh", "-c", "sleep 60 && echo done"] 17 | restartPolicy: Never 18 | -------------------------------------------------------------------------------- /tutorial/nodeoperationtemplate-tutorial1.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: nodeops.k8s.preferred.jp/v1alpha1 2 | kind: NodeOperationTemplate 3 | metadata: 4 | name: tutorial1 5 | spec: 6 | template: 7 | metadata: {} 8 | spec: # NodeOperationSpec 9 | jobTemplate: 10 | metadata: 11 | namespace: default 12 | spec: # batchv1.JobSpec 13 | template: 14 | spec: 15 | containers: 16 | - name: operation 17 | image: busybox 18 | command: ["echo", "Do some operation here"] 19 | restartPolicy: Never 20 | -------------------------------------------------------------------------------- /tutorial/noderemediationtemplate-tutorial1.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: nodeops.k8s.preferred.jp/v1alpha1 2 | kind: NodeRemediationTemplate 3 | metadata: 4 | name: tutorial1 5 | spec: 6 | nodeSelector: 7 | auto-remediation: '' 8 | template: 9 | spec: 10 | nodeOperationTemplateName: tutorial1 11 | rule: 12 | conditions: 13 | - type: Tutorial 14 | status: "True" 15 | --------------------------------------------------------------------------------