├── .github └── workflows │ ├── chart-test.yml │ ├── e2e.yml │ ├── govulncheck.yml │ ├── release-chart.yml │ ├── release.yaml │ ├── test.yml │ └── validate-license.yml ├── .gitignore ├── .golangci.yml ├── .goreleaser.yml ├── Dockerfile ├── Dockerfile.windows ├── LICENSE ├── Makefile ├── README.md ├── charts └── aks-node-termination-handler │ ├── Chart.yaml │ ├── templates │ ├── configmap.yaml │ ├── daemonset.yaml │ ├── networkpolicy.yaml │ └── rbac.yaml │ └── values.yaml ├── cmd └── main.go ├── codecov.yml ├── e2e ├── main_test.go └── testdata │ └── config_test.yaml ├── go.mod ├── go.sum ├── internal └── internal.go ├── mock └── mock.go ├── pkg ├── alert │ └── alert.go ├── api │ └── api.go ├── cache │ ├── cache.go │ └── cache_test.go ├── client │ └── client.go ├── config │ ├── config.go │ ├── config_test.go │ └── testdata │ │ ├── config_test.yaml │ │ └── config_yaml_fake.yaml ├── events │ ├── events.go │ └── events_test.go ├── logger │ ├── logger.go │ └── logger_test.go ├── metrics │ ├── metrics.go │ └── metrics_test.go ├── template │ ├── README.md │ ├── template.go │ ├── template_test.go │ └── testdata │ │ └── message.json ├── types │ ├── testdata │ │ └── ScheduledEventsType.json │ ├── types.go │ └── types_test.go ├── utils │ ├── utils.go │ └── utils_test.go ├── web │ └── web.go └── webhook │ ├── testdata │ └── WebhookTemplateFile.txt │ ├── webhook.go │ └── webhook_test.go └── scripts └── validate-license.sh /.github/workflows/chart-test.yml: -------------------------------------------------------------------------------- 1 | on: pull_request 2 | 3 | jobs: 4 | lint-test: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - name: Checkout 8 | uses: actions/checkout@v2 9 | with: 10 | fetch-depth: 0 11 | 12 | - name: Set up Helm 13 | uses: azure/setup-helm@v1 14 | with: 15 | version: v3.8.1 16 | 17 | - uses: actions/setup-python@v2 18 | with: 19 | python-version: 3.7 20 | 21 | - name: Set up chart-testing 22 | uses: helm/chart-testing-action@v2.2.1 23 | 24 | - name: Run chart-testing (lint) 25 | run: ct lint --target-branch main -------------------------------------------------------------------------------- /.github/workflows/e2e.yml: -------------------------------------------------------------------------------- 1 | on: pull_request 2 | 3 | jobs: 4 | e2e: 5 | runs-on: ubuntu-latest 6 | strategy: 7 | matrix: 8 | k3s_version: ["v1.26.11+k3s2","v1.27.8+k3s2","v1.28.4+k3s2","v1.29.2+k3s1"] 9 | steps: 10 | - name: Setup Kubernetes 11 | run: curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION="${{ matrix.k3s_version }}" K3S_KUBECONFIG_MODE=777 sh - 12 | - name: Checkout 13 | uses: actions/checkout@v4 14 | - name: Setup Go 15 | uses: actions/setup-go@v4 16 | with: 17 | go-version: '1.23' 18 | - name: Run tests 19 | run: make e2e KUBECONFIG=/etc/rancher/k3s/k3s.yaml node=$(kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml get no --no-headers | awk '{print $1}' | tail -1) -------------------------------------------------------------------------------- /.github/workflows/govulncheck.yml: -------------------------------------------------------------------------------- 1 | on: pull_request 2 | 3 | jobs: 4 | govulncheck: 5 | name: test 6 | runs-on: ubuntu-latest 7 | steps: 8 | - uses: actions/checkout@v2 9 | - uses: actions/setup-go@v2 10 | with: 11 | stable: 'false' 12 | go-version: '1.23' 13 | - run: go run golang.org/x/vuln/cmd/govulncheck@latest ./... -------------------------------------------------------------------------------- /.github/workflows/release-chart.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | 6 | jobs: 7 | release: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout 11 | uses: actions/checkout@v2 12 | with: 13 | fetch-depth: 0 14 | - name: Configure Git 15 | run: | 16 | git config user.name "${GITHUB_ACTOR}" 17 | git config user.email "${GITHUB_ACTOR}@users.noreply.github.com" 18 | - name: Install Helm 19 | uses: azure/setup-helm@v1 20 | with: 21 | version: v3.4.2 22 | - name: Run chart-releaser 23 | uses: helm/chart-releaser-action@v1.1.0 24 | env: 25 | CR_TOKEN: ${{ secrets.GITHUB_TOKEN }} 26 | CR_RELEASE_NAME_TEMPLATE: "helm-chart-{{ .Version }}" -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | tags: 4 | - v* 5 | 6 | permissions: 7 | contents: write 8 | 9 | env: 10 | IMAGE: paskalmaksim/aks-node-termination-handler:${{github.ref_name}} 11 | IMAGE_LATEST: paskalmaksim/aks-node-termination-handler:latest 12 | # IMAGE: paskalmaksim/aks-node-termination-handler:test-${{ github.run_id }} 13 | # IMAGE_LATEST: paskalmaksim/aks-node-termination-handler:test-latest 14 | 15 | jobs: 16 | build: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - name: Checkout 20 | uses: actions/checkout@v4 21 | with: 22 | fetch-depth: 0 23 | - name: Set up Go 24 | uses: actions/setup-go@v4 25 | with: 26 | go-version: '1.23' 27 | - name: Remove Git Tags with Charts 28 | run: git tag -d $(git tag -l "helm-chart-*") 29 | - name: Run GoReleaser 30 | uses: goreleaser/goreleaser-action@v5 31 | with: 32 | distribution: goreleaser 33 | version: latest 34 | # args: build --clean --skip=validate --snapshot 35 | args: release --clean 36 | env: 37 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 38 | - run: cp Dockerfile ./dist/aks-node-termination-handler_linux_amd64_v1/Dockerfile 39 | - run: cp Dockerfile ./dist/aks-node-termination-handler_linux_arm64/Dockerfile 40 | - run: cp Dockerfile.windows ./dist/aks-node-termination-handler_windows_amd64_v1/Dockerfile 41 | - run: tar -cvf release.tar ./dist 42 | - uses: actions/upload-artifact@v4 43 | with: 44 | name: release 45 | retention-days: 1 46 | path: release.tar 47 | 48 | publish-linux-amd64: 49 | runs-on: ubuntu-latest 50 | needs: build 51 | steps: 52 | - uses: docker/login-action@v3 53 | with: 54 | username: ${{ secrets.DOCKER_USERNAME }} 55 | password: ${{ secrets.DOCKER_PASSWORD }} 56 | - uses: actions/download-artifact@v4 57 | - run: tar xvf ./release/release.tar 58 | - run: "docker build --pull --push --platform linux/amd64 -t ${{ env.IMAGE }}-linux-amd64 ." 59 | working-directory: ./dist/aks-node-termination-handler_linux_amd64_v1 60 | 61 | publish-linux-arm64: 62 | runs-on: ubuntu-latest 63 | needs: build 64 | steps: 65 | - uses: docker/login-action@v3 66 | with: 67 | username: ${{ secrets.DOCKER_USERNAME }} 68 | password: ${{ secrets.DOCKER_PASSWORD }} 69 | - uses: docker/setup-qemu-action@v2 70 | - uses: docker/setup-buildx-action@v2 71 | - uses: actions/download-artifact@v4 72 | - run: tar xvf ./release/release.tar 73 | - run: "docker build --pull --push --platform linux/arm64 -t ${{ env.IMAGE }}-linux-arm64 ." 74 | working-directory: ./dist/aks-node-termination-handler_linux_arm64 75 | 76 | publish-windows-amd64: 77 | runs-on: windows-latest 78 | strategy: 79 | matrix: 80 | windows-version: [ 'ltsc2019', 'ltsc2022' ] 81 | needs: build 82 | steps: 83 | - uses: docker/login-action@v3 84 | with: 85 | username: ${{ secrets.DOCKER_USERNAME }} 86 | password: ${{ secrets.DOCKER_PASSWORD }} 87 | - uses: actions/download-artifact@v4 88 | - run: tar xvf ./release/release.tar 89 | - run: "docker build --build-arg WINDOWS_VERSION=${{ matrix.windows-version }} --pull --platform windows/amd64 -t ${{ env.IMAGE }}-windows-${{ matrix.windows-version }}-amd64 ." 90 | working-directory: ./dist/aks-node-termination-handler_windows_amd64_v1 91 | - run: docker push ${{ env.IMAGE }}-windows-${{ matrix.windows-version }}-amd64 92 | 93 | publish-manifest: 94 | runs-on: ubuntu-latest 95 | needs: [publish-linux-amd64, publish-linux-arm64, publish-windows-amd64] 96 | steps: 97 | - uses: docker/login-action@v3 98 | with: 99 | username: ${{ secrets.DOCKER_USERNAME }} 100 | password: ${{ secrets.DOCKER_PASSWORD }} 101 | - run: docker manifest create ${{ env.IMAGE }} ${{ env.IMAGE }}-linux-amd64 ${{ env.IMAGE }}-linux-arm64 ${{ env.IMAGE }}-windows-ltsc2022-amd64 102 | - run: docker manifest push ${{ env.IMAGE }} 103 | - run: docker manifest create ${{ env.IMAGE_LATEST }} ${{ env.IMAGE }}-linux-amd64 ${{ env.IMAGE }}-linux-arm64 ${{ env.IMAGE }}-windows-ltsc2022-amd64 104 | - run: docker manifest push ${{ env.IMAGE_LATEST }} 105 | - run: docker manifest create ${{ env.IMAGE_LATEST }}-ltsc2019 ${{ env.IMAGE }}-linux-amd64 ${{ env.IMAGE }}-linux-arm64 ${{ env.IMAGE }}-windows-ltsc2019-amd64 106 | - run: docker manifest push ${{ env.IMAGE_LATEST }}-ltsc2019 -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | pull_request: 6 | 7 | jobs: 8 | test: 9 | name: test 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | - uses: actions/setup-go@v2 14 | with: 15 | stable: 'false' 16 | go-version: '1.23' 17 | - run: make test 18 | - name: Upload coverage to Codecov 19 | uses: codecov/codecov-action@v3 -------------------------------------------------------------------------------- /.github/workflows/validate-license.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | tags: 4 | - v* 5 | branches: 6 | - main 7 | pull_request: 8 | jobs: 9 | validate-license: 10 | name: validate-license 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v2 14 | - run: ./scripts/validate-license.sh -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | kubeconfig 2 | dist 3 | /aks-node-termination-handler 4 | simulateEviction 5 | coverage.out 6 | *.tmp -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | run: 2 | timeout: 5m 3 | issues: 4 | fix: true 5 | linters-settings: 6 | lll: 7 | line-length: 200 8 | linters: 9 | enable-all: true 10 | disable: 11 | - deadcode # deprecated 12 | - exhaustivestruct # deprecated 13 | - golint # deprecated 14 | - ifshort # deprecated 15 | - interfacer # deprecated 16 | - maligned # deprecated 17 | - nosnakecase # deprecated 18 | - scopelint # deprecated 19 | - structcheck # deprecated 20 | - varcheck # deprecated 21 | - gochecknoglobals 22 | - exhaustivestruct 23 | - exhaustruct 24 | - varnamelen 25 | - musttag 26 | - depguard -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | project_name: aks-node-termination-handler 2 | release: 3 | footer: | 4 | ## Docker Images 5 | - `paskalmaksim/{{.ProjectName}}:latest` 6 | - `paskalmaksim/{{.ProjectName}}:{{ .Tag }}` 7 | builds: 8 | - dir: ./cmd/ 9 | env: 10 | - CGO_ENABLED=0 11 | flags: 12 | - -trimpath 13 | ldflags: 14 | - -s -w -X github.com/maksim-paskal/aks-node-termination-handler/pkg/config.gitVersion={{.Version}}-{{.ShortCommit}}-{{.Timestamp}} 15 | goos: 16 | - linux 17 | - windows 18 | goarch: 19 | - amd64 20 | - arm64 21 | checksum: 22 | name_template: 'checksums.txt' 23 | snapshot: 24 | name_template: "{{ .Tag }}-next" 25 | changelog: 26 | sort: asc 27 | filters: 28 | exclude: 29 | - '^docs:' 30 | - '^test:' 31 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine:latest 2 | 3 | WORKDIR /app/ 4 | 5 | COPY ./aks-node-termination-handler /app/aks-node-termination-handler 6 | 7 | RUN apk upgrade \ 8 | && addgroup -g 30523 -S app \ 9 | && adduser -u 30523 -D -S -G app app 10 | 11 | USER 30523 12 | 13 | ENTRYPOINT [ "/app/aks-node-termination-handler" ] -------------------------------------------------------------------------------- /Dockerfile.windows: -------------------------------------------------------------------------------- 1 | ARG WINDOWS_VERSION=ltsc2022 2 | 3 | FROM mcr.microsoft.com/windows/nanoserver:$WINDOWS_VERSION 4 | 5 | WORKDIR /app/ 6 | 7 | COPY ./aks-node-termination-handler.exe /app/aks-node-termination-handler.exe 8 | USER ContainerUser 9 | 10 | ENTRYPOINT [ "/app/aks-node-termination-handler.exe" ] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright paskal.maksim@gmail.com 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | KUBECONFIG=$(HOME)/.kube/azure-stage 2 | tag=dev 3 | image=paskalmaksim/aks-node-termination-handler:$(tag) 4 | telegramToken=1072104160:AAH2sFpHELeH5oxMmd-tsVjgTuzoYO6hSLM 5 | telegramChatID=-439460552 6 | node=`kubectl get no -lkubernetes.azure.com/scalesetpriority=spot | awk '{print $$1}' | tail -1` 7 | 8 | chart-lint: 9 | ct lint --all 10 | helm template ./charts/aks-node-termination-handler | kubectl apply --dry-run=client -f - 11 | 12 | build: 13 | git tag -d `git tag -l "helm-chart-*"` 14 | go run github.com/goreleaser/goreleaser@latest build --clean --skip=validate --snapshot 15 | mv ./dist/aks-node-termination-handler_linux_amd64_v1/aks-node-termination-handler aks-node-termination-handler 16 | docker build --pull --push . -t $(image) 17 | 18 | push: 19 | docker push $(image) 20 | 21 | deploy: 22 | helm uninstall aks-node-termination-handler --namespace kube-system || true 23 | helm upgrade aks-node-termination-handler \ 24 | --install \ 25 | --namespace kube-system \ 26 | ./charts/aks-node-termination-handler \ 27 | --set image=paskalmaksim/aks-node-termination-handler:dev \ 28 | --set imagePullPolicy=Always \ 29 | --set priorityClassName=system-node-critical \ 30 | --set args[0]=-telegram.token=${telegramToken} \ 31 | --set args[1]=-telegram.chatID=${telegramChatID} \ 32 | --set args[2]=-taint.node \ 33 | --set args[3]=-taint.effect=NoExecute \ 34 | --set args[4]=-podGracePeriodSeconds=30 \ 35 | 36 | clean: 37 | kubectl delete ns aks-node-termination-handler 38 | 39 | run: 40 | # https://t.me/joinchat/iaWV0bPT_Io5NGYy 41 | go run --race ./cmd \ 42 | -kubeconfig=${KUBECONFIG} \ 43 | -node=$(node) \ 44 | -log.level=DEBUG \ 45 | -log.pretty \ 46 | -taint.node \ 47 | -taint.effect=NoExecute \ 48 | -podGracePeriodSeconds=30 \ 49 | -gracePeriodSeconds=0 \ 50 | -endpoint=http://localhost:28080/pkg/types/testdata/ScheduledEventsType.json \ 51 | -webhook.url=http://localhost:9091/metrics/job/aks-node-termination-handler \ 52 | -webhook.template='node_termination_event{node="{{ .NodeName }}"} 1' \ 53 | -telegram.token=${telegramToken} \ 54 | -telegram.chatID=${telegramChatID} \ 55 | -web.address=127.0.0.1:17923 56 | 57 | run-mock: 58 | go run --race ./mock -address=127.0.0.1:28080 59 | 60 | test: 61 | ./scripts/validate-license.sh 62 | go mod tidy 63 | go fmt ./cmd/... ./pkg/... ./internal/... 64 | go vet ./cmd/... ./pkg/... ./internal/... 65 | go test --race -coverprofile coverage.out ./cmd/... ./pkg/... 66 | go run github.com/golangci/golangci-lint/cmd/golangci-lint@latest run -v 67 | 68 | .PHONY: e2e 69 | e2e: 70 | go test -v -race ./e2e \ 71 | -kubeconfig=$(KUBECONFIG) \ 72 | -node=${node} \ 73 | -telegram.token=${telegramToken} \ 74 | -telegram.chatID=${telegramChatID} 75 | 76 | coverage: 77 | go tool cover -html=coverage.out 78 | 79 | test-release: 80 | go run github.com/goreleaser/goreleaser@latest release --snapshot --skip-publish --rm-dist 81 | 82 | heap: 83 | go tool pprof -http=127.0.0.1:8080 http://localhost:17923/debug/pprof/heap 84 | 85 | upgrade: 86 | go get -v -u k8s.io/client-go@v0.21.11 87 | go get -v -u k8s.io/kubectl@v0.21.11 88 | go get -v -u k8s.io/api@v0.21.11 || true 89 | go get -v -u k8s.io/apimachinery@v0.21.11 90 | go mod tidy 91 | 92 | scan: 93 | @trivy image \ 94 | -ignore-unfixed --no-progress --severity HIGH,CRITICAL \ 95 | $(image) 96 | @helm template ./charts/aks-node-termination-handler > /tmp/aks-node-termination-handler.yaml 97 | @trivy config /tmp/aks-node-termination-handler.yaml -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![codecov](https://codecov.io/gh/maksim-paskal/aks-node-termination-handler/graph/badge.svg?token=0Z0ENDM8VW) 2 | ![Docker Pulls](https://img.shields.io/docker/pulls/paskalmaksim/aks-node-termination-handler.svg) 3 | ![Licence](https://img.shields.io/github/license/maksim-paskal/aks-node-termination-handler.svg) 4 | 5 | # AKS Node Termination Handler 6 | 7 | Gracefully handle Azure Virtual Machines shutdown within Kubernetes 8 | 9 | ## Motivation 10 | 11 | This tool ensures that the Kubernetes cluster responds appropriately to events that can cause your Azure Virtual Machines to become unavailable, such as evictions of Azure Spot Virtual Machines or reboots. If not handled, your application code may not stop gracefully, recovery to full availability may take longer, or work might accidentally be scheduled to nodes that are shutting down. This tool can also send Telegram, Slack or Webhook messages before Azure Virtual Machines evictions occur. 12 | 13 | Based on [Azure Scheduled Events](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/scheduled-events) and [Safely Drain a Node](https://kubernetes.io/docs/tasks/administer-cluster/safely-drain-node/) 14 | 15 | Support Linux (amd64, arm64) and Windows 2022, 2019* (amd64) nodes. 16 | 17 | ## Create Azure Kubernetes Cluster 18 | 19 |
20 | Create basic AKS cluster with Azure CLI 21 | 22 | ```bash 23 | # https://learn.microsoft.com/en-us/azure/aks/learn/quick-kubernetes-deploy-cli 24 | 25 | # Azure CLI version is 2.50.0 26 | az --version 27 | 28 | # Create resource group 29 | az group create \ 30 | --name test-aks-group-eastus \ 31 | --location eastus 32 | 33 | # Create aks cluster, with not spot instances 34 | az aks create \ 35 | --resource-group test-aks-group-eastus \ 36 | --name MyManagedCluster \ 37 | --node-count 1 \ 38 | --node-vm-size Standard_DS2_v2 \ 39 | --enable-cluster-autoscaler \ 40 | --min-count 1 \ 41 | --max-count 3 42 | 43 | # Create Linux nodepool with Spot Virtual Machines and autoscaling 44 | az aks nodepool add \ 45 | --resource-group test-aks-group-eastus \ 46 | --cluster-name MyManagedCluster \ 47 | --name spotpool \ 48 | --priority Spot \ 49 | --eviction-policy Delete \ 50 | --spot-max-price -1 \ 51 | --enable-cluster-autoscaler \ 52 | --node-vm-size Standard_DS2_v2 \ 53 | --min-count 0 \ 54 | --max-count 10 55 | 56 | # Create Windows (Windows Server 2022) nodepool with Spot Virtual Machines and autoscaling 57 | az aks nodepool add \ 58 | --resource-group test-aks-group-eastus \ 59 | --cluster-name MyManagedCluster \ 60 | --os-type Windows \ 61 | --os-sku Windows2022 \ 62 | --priority Spot \ 63 | --eviction-policy Delete \ 64 | --spot-max-price -1 \ 65 | --enable-cluster-autoscaler \ 66 | --name spot01 \ 67 | --min-count 1 \ 68 | --max-count 3 69 | 70 | # Create Windows (Windows Server 2019) nodepool with Spot Virtual Machines and autoscaling 71 | az aks nodepool add \ 72 | --resource-group test-aks-group-eastus \ 73 | --cluster-name MyManagedCluster \ 74 | --os-type Windows \ 75 | --os-sku Windows2019 \ 76 | --priority Spot \ 77 | --eviction-policy Delete \ 78 | --spot-max-price -1 \ 79 | --enable-cluster-autoscaler \ 80 | --name spot2 \ 81 | --min-count 1 \ 82 | --max-count 3 83 | 84 | # Get config to connect to cluster 85 | az aks get-credentials \ 86 | --resource-group test-aks-group-eastus \ 87 | --name MyManagedCluster 88 | ``` 89 | 90 |
91 | 92 | ## Installation 93 | 94 | ```bash 95 | helm repo add aks-node-termination-handler https://maksim-paskal.github.io/aks-node-termination-handler/ 96 | helm repo update 97 | 98 | helm upgrade aks-node-termination-handler \ 99 | --install \ 100 | --namespace kube-system \ 101 | aks-node-termination-handler/aks-node-termination-handler \ 102 | --set priorityClassName=system-node-critical 103 | ``` 104 | 105 | ## Send notification events 106 | 107 | You can compose your payload with markers that are described [here](pkg/template/README.md) 108 | 109 |
110 | Send Telegram notification 111 | 112 | ```bash 113 | helm upgrade aks-node-termination-handler \ 114 | --install \ 115 | --namespace kube-system \ 116 | aks-node-termination-handler/aks-node-termination-handler \ 117 | --set priorityClassName=system-node-critical \ 118 | --set 'args[0]=-telegram.token=' \ 119 | --set 'args[1]=-telegram.chatID=' 120 | ``` 121 |
122 | 123 |
124 | Send Slack notification 125 | 126 | ```bash 127 | # create payload file 128 | cat < 158 | 159 |
160 | Send Prometheus Pushgateway event 161 | 162 | ```bash 163 | cat < 188 | 189 |
190 | Use an HTTP proxy for making webhook requests 191 | 192 | Use the flag `-webhook.http-proxy=http://someproxy:3128` for making requests with a proxy. This flag can use HTTP or HTTPS addresses. You can also use basic auth. 193 | 194 | ```bash 195 | cat < 220 | 221 | ## Simulate eviction 222 | 223 | ### Using Azure CLI 224 | 225 | You need to install [Azure Command-Line Interface](https://learn.microsoft.com/en-us/cli/azure/), also you need setup [kubectl](https://learn.microsoft.com/en-us/azure/aks/learn/quick-kubernetes-deploy-cli#connect-to-the-cluster) to your AKS cluster 226 | 227 | ```bash 228 | # Azure CLI version is 2.61.0 229 | az --version 230 | 231 | # Choose your AKS node to simulate eviction 232 | kubectl get no 233 | 234 | # Identify your node Azure ID 235 | # subscriptions/{}/resourceGroups/{}/providers/Microsoft.Compute/virtualMachineScaleSets/{}/virtualMachines/{} 236 | kubectl get no aks-nodename-to-simulate-eviction -o json | jq -r '.spec.providerID[9:]' 237 | 238 | # Append to your node Azure ID additional path /simulateEviction?api-version=2024-03-01 239 | # And execute this simulation with management.azure.com 240 | az rest --verbose -m post --header "Accept=application/json" -u "https://management.azure.com/{Azure ID}/simulateEviction?api-version=2024-03-01" 241 | ``` 242 | 243 | ### Using browser 244 | 245 | You can test with [Simulate Eviction API](https://docs.microsoft.com/en-us/rest/api/compute/virtual-machines/simulate-eviction) and change API endpoint to correspond `virtualMachineScaleSets` that are used in AKS. 246 | 247 | ```bash 248 | POST https://management.azure.com/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Compute/virtualMachineScaleSets/{vmScaleSetName}/virtualMachines/{instanceId}/simulateEviction?api-version=2021-11-01 249 | ``` 250 | 251 | ## Metrics 252 | 253 | The application exposes Prometheus metrics at the `/metrics` endpoint. Installing the latest chart will add annotations to the pods: 254 | 255 | ```yaml 256 | annotations: 257 | prometheus.io/port: "17923" 258 | prometheus.io/scrape: "true" 259 | ``` 260 | 261 | ## Windows 2019 support 262 | 263 | If your cluster has (Linux and Windows 2019 nodes), you need to use another image: 264 | 265 | ```bash 266 | helm upgrade aks-node-termination-handler \ 267 | --install \ 268 | --namespace kube-system \ 269 | aks-node-termination-handler/aks-node-termination-handler \ 270 | --set priorityClassName=system-node-critical \ 271 | --set image=paskalmaksim/aks-node-termination-handler:latest-ltsc2019 272 | ``` 273 | 274 | If your cluster includes Linux, Windows 2022, and Windows 2019 nodes, you will need two separate helm installations of `aks-node-termination-handler`, each with different values. 275 | 276 |
277 | linux-windows2022.values.yaml 278 | 279 | ```bash 280 | priorityClassName: system-node-critical 281 | 282 | image: paskalmaksim/aks-node-termination-handler:latest 283 | 284 | affinity: 285 | nodeAffinity: 286 | requiredDuringSchedulingIgnoredDuringExecution: 287 | nodeSelectorTerms: 288 | - matchExpressions: 289 | - key: kubernetes.azure.com/os-sku 290 | operator: NotIn 291 | values: 292 | - Windows2019 293 | ``` 294 |
295 | 296 |
297 | linux-windows2019.values.yaml 298 | 299 | ```bash 300 | priorityClassName: system-node-critical 301 | 302 | image: paskalmaksim/aks-node-termination-handler:latest-ltsc2019 303 | 304 | nodeSelector: 305 | kubernetes.azure.com/os-sku: Windows2019 306 | ``` 307 |
308 | 309 | ```bash 310 | # install aks-node-termination-handler for Linux and Windows 2022 nodes 311 | helm upgrade aks-node-termination-handler \ 312 | --install \ 313 | --namespace kube-system \ 314 | aks-node-termination-handler/aks-node-termination-handler \ 315 | --values=linux-windows2022.values.yaml 316 | 317 | # install aks-node-termination-handler for Windows 2019 nodes 318 | helm upgrade aks-node-termination-handler-windows-2019 \ 319 | --install \ 320 | --namespace kube-system \ 321 | aks-node-termination-handler/aks-node-termination-handler \ 322 | --values=linux-windows2019.values.yaml 323 | ``` 324 | 325 | ## Red Hat OpenShift support 326 | 327 | For OpenShift clusters that use Azure computes for their nodes, you must enable pod hostNetwork support because OpenShift networking has a [restriction](https://docs.openshift.com/container-platform/4.15/networking/understanding-networking.html) for using Azure Metadata Service. 328 | 329 | This support can be enabled with `--set hostNetwork=true` 330 | 331 | ```bash 332 | helm upgrade aks-node-termination-handler \ 333 | --install \ 334 | --namespace kube-system \ 335 | aks-node-termination-handler/aks-node-termination-handler \ 336 | --set priorityClassName=system-node-critical \ 337 | --set hostNetwork=true 338 | ``` 339 | 340 | ## NetworkPolicy support 341 | 342 | To limit what the workload can communicate with, Networkpolicy can be added via `--set networkPolicy.enabled=true`. To only allow egress communication towards required endpoints, supply the control plane IP address via `--set networkPolicy.controlPlaneIP=10.11.12.13`. Additional egress rules can be added via `--set networkPolicy.additionalEgressRules=[]`, see the chart-provided `values.yaml` file for examples. 343 | 344 | ```bash 345 | helm upgrade aks-node-termination-handler \ 346 | --install \ 347 | --namespace kube-system \ 348 | aks-node-termination-handler/aks-node-termination-handler \ 349 | --set networkPolicy.enabled=true \ 350 | --set networkPolicy.controlPlaneIP=10.11.12.2 351 | ``` 352 | -------------------------------------------------------------------------------- /charts/aks-node-termination-handler/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | icon: https://helm.sh/img/helm.svg 3 | name: aks-node-termination-handler 4 | version: 1.1.7 5 | description: Gracefully handle Azure Virtual Machines shutdown within Kubernetes 6 | maintainers: 7 | - name: maksim-paskal # Maksim Paskal 8 | email: paskal.maksim@gmail.com 9 | -------------------------------------------------------------------------------- /charts/aks-node-termination-handler/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | {{ if .Values.configMap.create }} 2 | apiVersion: v1 3 | kind: ConfigMap 4 | metadata: 5 | name: {{ tpl .Values.configMap.name . }} 6 | data: 7 | {{ toYaml .Values.configMap.data | indent 2 }} 8 | {{ end }} -------------------------------------------------------------------------------- /charts/aks-node-termination-handler/templates/daemonset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: DaemonSet 3 | metadata: 4 | name: {{ .Release.Name }} 5 | labels: 6 | app: {{ .Release.Name }} 7 | spec: 8 | selector: 9 | matchLabels: 10 | app: {{ .Release.Name }} 11 | template: 12 | metadata: 13 | annotations: 14 | {{ if .Values.metrics.addAnnotations }} 15 | prometheus.io/port: "17923" 16 | prometheus.io/scrape: "true" 17 | {{ end }} 18 | {{ if .Values.annotations }} 19 | {{ toYaml .Values.annotations | indent 8 }} 20 | {{ end }} 21 | labels: 22 | app: {{ .Release.Name }} 23 | {{ if .Values.labels }} 24 | {{ toYaml .Values.labels | indent 8 }} 25 | {{ end }} 26 | spec: 27 | hostNetwork: {{ .Values.hostNetwork }} 28 | serviceAccount: {{ .Release.Name }} 29 | {{ if .Values.priorityClassName }} 30 | priorityClassName: {{ .Values.priorityClassName | quote }} 31 | {{ end }} 32 | {{- if .Values.tolerations }} 33 | tolerations: 34 | {{- toYaml .Values.tolerations | nindent 6 }} 35 | {{- end }} 36 | {{- if .Values.nodeSelector}} 37 | nodeSelector: 38 | {{- toYaml .Values.nodeSelector | nindent 8 }} 39 | {{- end }} 40 | {{- if .Values.affinity }} 41 | affinity: 42 | {{- toYaml .Values.affinity | nindent 8 }} 43 | {{- end }} 44 | volumes: 45 | - name: files 46 | configMap: 47 | name: {{ tpl .Values.configMap.name . }} 48 | {{ if .Values.extraVolumes }} 49 | {{ toYaml .Values.extraVolumes | indent 6 }} 50 | {{ end }} 51 | {{- if .Values.imagePullSecrets }} 52 | imagePullSecrets: 53 | {{- range .Values.imagePullSecrets }} 54 | - name: {{ . }} 55 | {{- end }} 56 | {{- end }} 57 | containers: 58 | - name: aks-node-termination-handler 59 | resources: 60 | {{ toYaml .Values.resources | indent 10 }} 61 | image: {{ .Values.image }} 62 | imagePullPolicy: {{ .Values.imagePullPolicy }} 63 | securityContext: 64 | {{ toYaml .Values.securityContext | indent 10 }} 65 | args: 66 | {{- range .Values.args }} 67 | - {{ . }} 68 | {{- end}} 69 | env: 70 | - name: MY_NODE_NAME 71 | valueFrom: 72 | fieldRef: 73 | fieldPath: spec.nodeName 74 | {{ if .Values.env }} 75 | {{ toYaml .Values.env | indent 8 }} 76 | {{ end }} 77 | livenessProbe: 78 | httpGet: 79 | path: /healthz 80 | port: http 81 | scheme: HTTP 82 | initialDelaySeconds: 30 83 | periodSeconds: 30 84 | timeoutSeconds: 5 85 | ports: 86 | - name: http 87 | containerPort: 17923 88 | protocol: TCP 89 | volumeMounts: 90 | - name: files 91 | mountPath: {{ .Values.configMap.mountPath }} 92 | readOnly: true 93 | {{ if .Values.extraVolumeMounts}} 94 | {{ toYaml .Values.extraVolumeMounts | indent 8 }} 95 | {{ end }} -------------------------------------------------------------------------------- /charts/aks-node-termination-handler/templates/networkpolicy.yaml: -------------------------------------------------------------------------------- 1 | {{ if .Values.networkPolicy.enabled }} 2 | apiVersion: networking.k8s.io/v1 3 | kind: NetworkPolicy 4 | metadata: 5 | name: {{ .Release.Name }} 6 | spec: 7 | egress: 8 | - ports: 9 | - port: 80 10 | protocol: TCP 11 | to: 12 | - ipBlock: 13 | cidr: 169.254.169.254/32 14 | - ports: 15 | - port: 443 16 | protocol: TCP 17 | {{- if .Values.networkPolicy.controlPlaneIP }} 18 | to: 19 | - ipBlock: 20 | cidr: {{ .Values.networkPolicy.controlPlaneIP }}/32 21 | {{- end }} 22 | {{- if .Values.networkPolicy.additionalEgressRules }} 23 | {{ toYaml .Values.networkPolicy.additionalEgressRules | indent 2 }} 24 | {{- end }} 25 | - ports: 26 | - port: 53 27 | protocol: UDP 28 | - port: 53 29 | protocol: TCP 30 | to: 31 | - namespaceSelector: {} 32 | podSelector: 33 | matchLabels: 34 | k8s-app: kube-dns 35 | ingress: 36 | - from: 37 | - namespaceSelector: {} 38 | ports: 39 | - port: 17923 40 | protocol: TCP 41 | podSelector: 42 | matchLabels: 43 | app: {{ .Release.Name }} 44 | policyTypes: 45 | - Ingress 46 | - Egress 47 | {{ end }} -------------------------------------------------------------------------------- /charts/aks-node-termination-handler/templates/rbac.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: {{ .Release.Name }} 5 | namespace: {{ .Release.Namespace }} 6 | --- 7 | kind: ClusterRole 8 | apiVersion: rbac.authorization.k8s.io/v1 9 | metadata: 10 | name: {{ .Release.Name }} 11 | rules: 12 | - apiGroups: 13 | - "" 14 | resources: 15 | - nodes 16 | verbs: 17 | - get 18 | - list 19 | - patch 20 | - update 21 | - apiGroups: 22 | - "" 23 | resources: 24 | - pods 25 | verbs: 26 | - list 27 | - get 28 | - apiGroups: 29 | - "" 30 | resources: 31 | - pods/eviction 32 | verbs: 33 | - create 34 | - apiGroups: 35 | - extensions 36 | resources: 37 | - daemonsets 38 | verbs: 39 | - get 40 | - apiGroups: 41 | - apps 42 | resources: 43 | - daemonsets 44 | verbs: 45 | - get 46 | - apiGroups: 47 | - "" 48 | resources: 49 | - events 50 | verbs: 51 | - create 52 | --- 53 | kind: ClusterRoleBinding 54 | apiVersion: rbac.authorization.k8s.io/v1 55 | metadata: 56 | name: {{ .Release.Name }} 57 | subjects: 58 | - kind: ServiceAccount 59 | name: {{ .Release.Name }} 60 | namespace: {{ .Release.Namespace }} 61 | roleRef: 62 | kind: ClusterRole 63 | name: {{ .Release.Name }} 64 | apiGroup: rbac.authorization.k8s.io -------------------------------------------------------------------------------- /charts/aks-node-termination-handler/values.yaml: -------------------------------------------------------------------------------- 1 | image: paskalmaksim/aks-node-termination-handler:latest 2 | imagePullPolicy: Always 3 | imagePullSecrets: [] 4 | 5 | args: [] 6 | env: [] 7 | 8 | priorityClassName: "" 9 | annotations: {} 10 | labels: {} 11 | 12 | configMap: 13 | create: true 14 | name: "{{ .Release.Name }}-files" 15 | mountPath: /files 16 | data: {} 17 | # slack-payload.json: | 18 | # { 19 | # "channel": "#mychannel", 20 | # "username": "webhookbot", 21 | # "text": "This is message for {{ .NodeName }}, {{ .InstanceType }} from {{ .NodeRegion }}", 22 | # "icon_emoji": ":ghost:" 23 | # } 24 | # prometheus-pushgateway-payload.txt: | 25 | # node_termination_event{node="{{ .NodeName }}"} 1 26 | 27 | extraVolumes: [] 28 | extraVolumeMounts: [] 29 | 30 | networkPolicy: 31 | enabled: false 32 | # controlPlaneIP: "123.X.X.X" # If not provided, network policy will allow all access to port 443/tcp 33 | # additionalEgressRules: 34 | # - ports: 35 | # - port: 443 36 | # protocol: TCP 37 | # to: 38 | # - ipBlock: 39 | # cidr: 124.X.X.X/24 40 | 41 | metrics: 42 | addAnnotations: true 43 | 44 | hostNetwork: false 45 | 46 | securityContext: 47 | runAsNonRoot: true 48 | privileged: false 49 | readOnlyRootFilesystem: true 50 | allowPrivilegeEscalation: false 51 | capabilities: 52 | drop: 53 | - ALL 54 | windowsOptions: 55 | runAsUserName: "ContainerUser" 56 | seccompProfile: 57 | type: RuntimeDefault 58 | 59 | affinity: {} 60 | 61 | tolerations: 62 | - key: "kubernetes.azure.com/scalesetpriority" 63 | operator: "Equal" 64 | value: "spot" 65 | effect: "NoSchedule" 66 | 67 | nodeSelector: {} 68 | # if you want handle events only from spot instances 69 | # nodeSelector: 70 | # kubernetes.azure.com/scalesetpriority: spot 71 | 72 | resources: 73 | limits: 74 | memory: 100Mi 75 | requests: 76 | cpu: 20m 77 | memory: 100Mi 78 | -------------------------------------------------------------------------------- /cmd/main.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package main 14 | 15 | import ( 16 | "context" 17 | "flag" 18 | "fmt" 19 | "os" 20 | "os/signal" 21 | "syscall" 22 | "time" 23 | 24 | "github.com/maksim-paskal/aks-node-termination-handler/internal" 25 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/config" 26 | logrushooksentry "github.com/maksim-paskal/logrus-hook-sentry" 27 | log "github.com/sirupsen/logrus" 28 | ) 29 | 30 | var version = flag.Bool("version", false, "version") 31 | 32 | func main() { 33 | flag.Parse() 34 | 35 | if *version { 36 | fmt.Println(config.GetVersion()) //nolint:forbidigo 37 | os.Exit(0) 38 | } 39 | 40 | logLevel, err := log.ParseLevel(*config.Get().LogLevel) 41 | if err != nil { 42 | log.WithError(err).Fatal() 43 | } 44 | 45 | log.SetLevel(logLevel) 46 | log.SetReportCaller(true) 47 | 48 | if !*config.Get().LogPretty { 49 | log.SetFormatter(&log.JSONFormatter{}) 50 | } 51 | 52 | ctx, cancel := context.WithCancel(context.Background()) 53 | defer cancel() 54 | 55 | log.Infof("Starting %s...", config.GetVersion()) 56 | 57 | hook, err := logrushooksentry.NewHook(ctx, logrushooksentry.Options{ 58 | SentryDSN: *config.Get().SentryDSN, 59 | Release: config.GetVersion(), 60 | }) 61 | if err != nil { 62 | log.WithError(err).Error() 63 | } 64 | 65 | log.AddHook(hook) 66 | 67 | signalChanInterrupt := make(chan os.Signal, 1) 68 | signal.Notify(signalChanInterrupt, syscall.SIGINT, syscall.SIGTERM) 69 | 70 | log.RegisterExitHandler(func() { 71 | cancel() 72 | }) 73 | 74 | go func() { 75 | select { 76 | case <-signalChanInterrupt: 77 | log.Error("Got interruption signal...") 78 | cancel() 79 | case <-ctx.Done(): 80 | } 81 | <-signalChanInterrupt 82 | os.Exit(1) 83 | }() 84 | 85 | if err := internal.Run(ctx); err != nil { 86 | log.WithError(err).Fatal() 87 | } 88 | 89 | <-ctx.Done() 90 | 91 | log.Infof("Waiting %s before shutdown...", config.Get().GracePeriod()) 92 | time.Sleep(config.Get().GracePeriod()) 93 | } 94 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | ignore: 2 | # ignore cmd/main.go because it's a main file 3 | - "cmd/main.go" 4 | # ignore because to test need active connection to the Telegram 5 | - "pkg/alert/alert.go" 6 | # ignore because to test need active connection to the kubernetes cluster 7 | - "pkg/web/web.go" 8 | - "pkg/api/api.go" 9 | - "pkg/client/client.go" -------------------------------------------------------------------------------- /e2e/main_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package main_test 14 | 15 | import ( 16 | "context" 17 | "encoding/json" 18 | "flag" 19 | "net/http" 20 | "net/http/httptest" 21 | "testing" 22 | 23 | "github.com/maksim-paskal/aks-node-termination-handler/internal" 24 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/client" 25 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/config" 26 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types" 27 | "github.com/pkg/errors" 28 | log "github.com/sirupsen/logrus" 29 | corev1 "k8s.io/api/core/v1" 30 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 | ) 32 | 33 | const ( 34 | azureResourceName = "test-e2e-resource" 35 | eventID = "test-event-id" 36 | eventType = types.EventTypePreempt 37 | taintKey = "aks-node-termination-handler/preempt" 38 | taintEffect = corev1.TaintEffectNoSchedule 39 | ) 40 | 41 | func TestDrain(t *testing.T) { //nolint:funlen,cyclop 42 | t.Parallel() 43 | 44 | log.SetLevel(log.DebugLevel) 45 | log.SetReportCaller(true) 46 | 47 | handler := http.NewServeMux() 48 | handler.HandleFunc("/document", func(w http.ResponseWriter, _ *http.Request) { 49 | message, _ := json.Marshal(types.ScheduledEventsType{ 50 | DocumentIncarnation: 1, 51 | Events: []types.ScheduledEventsEvent{ 52 | { 53 | EventId: eventID, 54 | EventType: eventType, 55 | ResourceType: "resourceType", 56 | Resources: []string{azureResourceName}, 57 | }, 58 | }, 59 | }) 60 | 61 | w.WriteHeader(http.StatusOK) 62 | _, _ = w.Write(message) 63 | }) 64 | 65 | testServer := httptest.NewServer(handler) 66 | 67 | _ = flag.Set("config", "./testdata/config_test.yaml") 68 | _ = flag.Set("endpoint", testServer.URL+"/document") 69 | _ = flag.Set("resource.name", azureResourceName) 70 | 71 | flag.Parse() 72 | 73 | ctx := context.TODO() 74 | 75 | if err := internal.Run(ctx); err != nil { 76 | t.Fatal(err) 77 | } 78 | 79 | node, err := client.GetKubernetesClient().CoreV1().Nodes().Get(ctx, *config.Get().NodeName, metav1.GetOptions{}) 80 | if err != nil { 81 | t.Fatal(err) 82 | } 83 | 84 | if !node.Spec.Unschedulable { 85 | t.Fatal("node must be unschedulable") 86 | } 87 | 88 | if len(node.Spec.Taints) == 0 { 89 | t.Fatal("node must have taints") 90 | } 91 | 92 | taintFound := false 93 | 94 | for _, taint := range node.Spec.Taints { 95 | if taint.Key == taintKey && taint.Value == eventID && taint.Effect == taintEffect { 96 | taintFound = true 97 | 98 | break 99 | } 100 | } 101 | 102 | if !taintFound { 103 | t.Fatal("taint not found") 104 | } 105 | 106 | if err := checkNodeEvent(ctx); err != nil { 107 | t.Fatal(err) 108 | } 109 | } 110 | 111 | func checkNodeEvent(ctx context.Context) error { //nolint:cyclop 112 | events, err := client.GetKubernetesClient().CoreV1().Events("").List(ctx, metav1.ListOptions{}) 113 | if err != nil { 114 | return errors.Wrap(err, "error in list events") 115 | } 116 | 117 | nodeName := *config.Get().NodeName 118 | eventMessageReceived := 0 119 | eventMessageBeforeListen := 0 120 | 121 | for _, event := range events.Items { 122 | if event.Source.Component != "aks-node-termination-handler" { 123 | continue 124 | } 125 | 126 | if event.InvolvedObject.Name != nodeName { 127 | continue 128 | } 129 | 130 | if event.Reason == eventType && event.Message == config.EventMessageReceived { 131 | eventMessageReceived++ 132 | } 133 | 134 | if event.Reason == "ReadEvents" && event.Message == config.EventMessageBeforeListen { 135 | eventMessageBeforeListen++ 136 | } 137 | } 138 | 139 | if eventMessageReceived == 0 { 140 | return errors.New("eventMessageReceived not found in events") 141 | } 142 | 143 | if eventMessageBeforeListen == 0 { 144 | return errors.New("eventMessageBeforeListen not found in events") 145 | } 146 | 147 | return nil 148 | } 149 | -------------------------------------------------------------------------------- /e2e/testdata/config_test.yaml: -------------------------------------------------------------------------------- 1 | taintnode: true 2 | tainteffect: NoSchedule 3 | podgraceperiodseconds: 30 4 | exitafternodedrain: true -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/maksim-paskal/aks-node-termination-handler 2 | 3 | go 1.23.0 4 | 5 | toolchain go1.23.4 6 | 7 | require ( 8 | github.com/go-telegram-bot-api/telegram-bot-api v4.6.4+incompatible 9 | github.com/google/uuid v1.6.0 10 | github.com/hashicorp/go-retryablehttp v0.7.7 11 | github.com/maksim-paskal/logrus-hook-sentry v0.1.1 12 | github.com/pkg/errors v0.9.1 13 | github.com/prometheus/client_golang v1.20.5 14 | github.com/sirupsen/logrus v1.9.3 15 | github.com/stretchr/testify v1.10.0 16 | gopkg.in/yaml.v3 v3.0.1 17 | k8s.io/api v0.32.0 18 | k8s.io/apimachinery v0.32.0 19 | k8s.io/client-go v0.32.0 20 | k8s.io/kubectl v0.32.0 21 | ) 22 | 23 | require ( 24 | github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect 25 | github.com/MakeNowJust/heredoc v1.0.0 // indirect 26 | github.com/beorn7/perks v1.0.1 // indirect 27 | github.com/blang/semver/v4 v4.0.0 // indirect 28 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 29 | github.com/chai2010/gettext-go v1.0.3 // indirect 30 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect 31 | github.com/emicklei/go-restful/v3 v3.12.1 // indirect 32 | github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f // indirect 33 | github.com/fxamacker/cbor/v2 v2.7.0 // indirect 34 | github.com/getsentry/sentry-go v0.30.0 // indirect 35 | github.com/go-errors/errors v1.5.1 // indirect 36 | github.com/go-logr/logr v1.4.2 // indirect 37 | github.com/go-openapi/jsonpointer v0.21.0 // indirect 38 | github.com/go-openapi/jsonreference v0.21.0 // indirect 39 | github.com/go-openapi/swag v0.23.0 // indirect 40 | github.com/gogo/protobuf v1.3.2 // indirect 41 | github.com/golang/protobuf v1.5.4 // indirect 42 | github.com/google/btree v1.1.3 // indirect 43 | github.com/google/gnostic-models v0.6.9 // indirect 44 | github.com/google/go-cmp v0.6.0 // indirect 45 | github.com/google/gofuzz v1.2.0 // indirect 46 | github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect 47 | github.com/gorilla/websocket v1.5.3 // indirect 48 | github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect 49 | github.com/hashicorp/go-cleanhttp v0.5.2 // indirect 50 | github.com/inconshreveable/mousetrap v1.1.0 // indirect 51 | github.com/josharian/intern v1.0.0 // indirect 52 | github.com/json-iterator/go v1.1.12 // indirect 53 | github.com/klauspost/compress v1.17.11 // indirect 54 | github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect 55 | github.com/mailru/easyjson v0.9.0 // indirect 56 | github.com/mitchellh/go-wordwrap v1.0.1 // indirect 57 | github.com/moby/spdystream v0.5.0 // indirect 58 | github.com/moby/term v0.5.0 // indirect 59 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 60 | github.com/modern-go/reflect2 v1.0.2 // indirect 61 | github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect 62 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 63 | github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect 64 | github.com/peterbourgon/diskv v2.0.1+incompatible // indirect 65 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect 66 | github.com/prometheus/client_model v0.6.1 // indirect 67 | github.com/prometheus/common v0.61.0 // indirect 68 | github.com/prometheus/procfs v0.15.1 // indirect 69 | github.com/russross/blackfriday/v2 v2.1.0 // indirect 70 | github.com/spf13/cobra v1.8.1 // indirect 71 | github.com/spf13/pflag v1.0.5 // indirect 72 | github.com/technoweenie/multipartstreamer v1.0.1 // indirect 73 | github.com/x448/float16 v0.8.4 // indirect 74 | github.com/xlab/treeprint v1.2.0 // indirect 75 | golang.org/x/net v0.33.0 // indirect 76 | golang.org/x/oauth2 v0.24.0 // indirect 77 | golang.org/x/sync v0.10.0 // indirect 78 | golang.org/x/sys v0.28.0 // indirect 79 | golang.org/x/term v0.27.0 // indirect 80 | golang.org/x/text v0.21.0 // indirect 81 | golang.org/x/time v0.8.0 // indirect 82 | google.golang.org/protobuf v1.36.0 // indirect 83 | gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect 84 | gopkg.in/inf.v0 v0.9.1 // indirect 85 | k8s.io/cli-runtime v0.32.0 // indirect 86 | k8s.io/component-base v0.32.0 // indirect 87 | k8s.io/klog/v2 v2.130.1 // indirect 88 | k8s.io/kube-openapi v0.0.0-20241212222426-2c72e554b1e7 // indirect 89 | k8s.io/utils v0.0.0-20241210054802-24370beab758 // indirect 90 | sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect 91 | sigs.k8s.io/kustomize/api v0.18.0 // indirect 92 | sigs.k8s.io/kustomize/kyaml v0.18.1 // indirect 93 | sigs.k8s.io/structured-merge-diff/v4 v4.5.0 // indirect 94 | sigs.k8s.io/yaml v1.4.0 // indirect 95 | ) 96 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= 2 | github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= 3 | github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ= 4 | github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= 5 | github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= 6 | github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= 7 | github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= 8 | github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= 9 | github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= 10 | github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= 11 | github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= 12 | github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 13 | github.com/chai2010/gettext-go v1.0.3 h1:9liNh8t+u26xl5ddmWLmsOsdNLwkdRTg5AG+JnTiM80= 14 | github.com/chai2010/gettext-go v1.0.3/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHeQQ+5AjwawxA= 15 | github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= 16 | github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= 17 | github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= 18 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 19 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 20 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= 21 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 22 | github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU= 23 | github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= 24 | github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f h1:Wl78ApPPB2Wvf/TIe2xdyJxTlb6obmF18d8QdkxNDu4= 25 | github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f/go.mod h1:OSYXu++VVOHnXeitef/D8n/6y4QV8uLHSFXX4NeXMGc= 26 | github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= 27 | github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= 28 | github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= 29 | github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= 30 | github.com/getsentry/sentry-go v0.30.0 h1:lWUwDnY7sKHaVIoZ9wYqRHJ5iEmoc0pqcRqFkosKzBo= 31 | github.com/getsentry/sentry-go v0.30.0/go.mod h1:WU9B9/1/sHDqeV8T+3VwwbjeR5MSXs/6aqG3mqZrezA= 32 | github.com/go-errors/errors v1.5.1 h1:ZwEMSLRCapFLflTpT7NKaAc7ukJ8ZPEjzlxt8rPN8bk= 33 | github.com/go-errors/errors v1.5.1/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= 34 | github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= 35 | github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= 36 | github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= 37 | github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= 38 | github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= 39 | github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= 40 | github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= 41 | github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= 42 | github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= 43 | github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= 44 | github.com/go-telegram-bot-api/telegram-bot-api v4.6.4+incompatible h1:2cauKuaELYAEARXRkq2LrJ0yDDv1rW7+wrTEdVL3uaU= 45 | github.com/go-telegram-bot-api/telegram-bot-api v4.6.4+incompatible/go.mod h1:qf9acutJ8cwBUhm1bqgz6Bei9/C/c93FPDljKWwsOgM= 46 | github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= 47 | github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= 48 | github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= 49 | github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= 50 | github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg= 51 | github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4= 52 | github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw= 53 | github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw= 54 | github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 55 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 56 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 57 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 58 | github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= 59 | github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 60 | github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo= 61 | github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= 62 | github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= 63 | github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= 64 | github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= 65 | github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 66 | github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg= 67 | github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= 68 | github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 h1:+ngKgrYPPJrOjhax5N+uePQ0Fh1Z7PheYoUI/0nzkPA= 69 | github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= 70 | github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= 71 | github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= 72 | github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k= 73 | github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= 74 | github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU= 75 | github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= 76 | github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= 77 | github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= 78 | github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= 79 | github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= 80 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 81 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 82 | github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= 83 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= 84 | github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= 85 | github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= 86 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= 87 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= 88 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 89 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 90 | github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= 91 | github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= 92 | github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhnIaL+V+BEER86oLrvS+kWobKpbJuye0= 93 | github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE= 94 | github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4= 95 | github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU= 96 | github.com/maksim-paskal/logrus-hook-sentry v0.1.1 h1:9IQ8kn6XwZJ/yDjkIyTLAce7k78J3WfeZtjIh3jA/MY= 97 | github.com/maksim-paskal/logrus-hook-sentry v0.1.1/go.mod h1:FpJn8dMDsuG8/lt65HQauZuXIiG2LqAYM+vbKV//Ga0= 98 | github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= 99 | github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= 100 | github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= 101 | github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= 102 | github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0= 103 | github.com/mitchellh/go-wordwrap v1.0.1/go.mod h1:R62XHJLzvMFRBbcrT7m7WgmE1eOyTSsCt+hzestvNj0= 104 | github.com/moby/spdystream v0.5.0 h1:7r0J1Si3QO/kjRitvSLVVFUjxMEb/YLj6S9FF62JBCU= 105 | github.com/moby/spdystream v0.5.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI= 106 | github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0= 107 | github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= 108 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 109 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= 110 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 111 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= 112 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= 113 | github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 h1:n6/2gBQ3RWajuToeY6ZtZTIKv2v7ThUy5KKusIT0yc0= 114 | github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00/go.mod h1:Pm3mSP3c5uWn86xMLZ5Sa7JB9GsEZySvHYXCTK4E9q4= 115 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= 116 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= 117 | github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= 118 | github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= 119 | github.com/onsi/ginkgo/v2 v2.21.0 h1:7rg/4f3rB88pb5obDgNZrNHrQ4e6WpjonchcpuBRnZM= 120 | github.com/onsi/ginkgo/v2 v2.21.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo= 121 | github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4= 122 | github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog= 123 | github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI= 124 | github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU= 125 | github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4= 126 | github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= 127 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 128 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 129 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 130 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= 131 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 132 | github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= 133 | github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= 134 | github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= 135 | github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= 136 | github.com/prometheus/common v0.61.0 h1:3gv/GThfX0cV2lpO7gkTUwZru38mxevy90Bj8YFSRQQ= 137 | github.com/prometheus/common v0.61.0/go.mod h1:zr29OCN/2BsJRaFwG8QOBr41D6kkchKbpeNH7pAjb/s= 138 | github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= 139 | github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= 140 | github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= 141 | github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= 142 | github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= 143 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 144 | github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ= 145 | github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= 146 | github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= 147 | github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= 148 | github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= 149 | github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= 150 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= 151 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 152 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 153 | github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= 154 | github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= 155 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 156 | github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 157 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 158 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= 159 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 160 | github.com/technoweenie/multipartstreamer v1.0.1 h1:XRztA5MXiR1TIRHxH2uNxXxaIkKQDeX7m2XsSOlQEnM= 161 | github.com/technoweenie/multipartstreamer v1.0.1/go.mod h1:jNVxdtShOxzAsukZwTSw6MDx5eUJoiEBsSvzDU9uzog= 162 | github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= 163 | github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= 164 | github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ= 165 | github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0= 166 | github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 167 | github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 168 | go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= 169 | go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= 170 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 171 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 172 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 173 | golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 174 | golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 175 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 176 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 177 | golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 178 | golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= 179 | golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= 180 | golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= 181 | golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= 182 | golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= 183 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 184 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 185 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 186 | golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= 187 | golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 188 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 189 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 190 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 191 | golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 192 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 193 | golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= 194 | golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 195 | golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= 196 | golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= 197 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 198 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 199 | golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= 200 | golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= 201 | golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg= 202 | golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= 203 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 204 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 205 | golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= 206 | golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= 207 | golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= 208 | golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= 209 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 210 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 211 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 212 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 213 | google.golang.org/protobuf v1.36.0 h1:mjIs9gYtt56AzC4ZaffQuh88TZurBGhIJMBZGSxNerQ= 214 | google.golang.org/protobuf v1.36.0/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= 215 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 216 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 217 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 218 | gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4= 219 | gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= 220 | gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= 221 | gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= 222 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 223 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 224 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 225 | k8s.io/api v0.32.0 h1:OL9JpbvAU5ny9ga2fb24X8H6xQlVp+aJMFlgtQjR9CE= 226 | k8s.io/api v0.32.0/go.mod h1:4LEwHZEf6Q/cG96F3dqR965sYOfmPM7rq81BLgsE0p0= 227 | k8s.io/apimachinery v0.32.0 h1:cFSE7N3rmEEtv4ei5X6DaJPHHX0C+upp+v5lVPiEwpg= 228 | k8s.io/apimachinery v0.32.0/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE= 229 | k8s.io/cli-runtime v0.32.0 h1:dP+OZqs7zHPpGQMCGAhectbHU2SNCuZtIimRKTv2T1c= 230 | k8s.io/cli-runtime v0.32.0/go.mod h1:Mai8ht2+esoDRK5hr861KRy6z0zHsSTYttNVJXgP3YQ= 231 | k8s.io/client-go v0.32.0 h1:DimtMcnN/JIKZcrSrstiwvvZvLjG0aSxy8PxN8IChp8= 232 | k8s.io/client-go v0.32.0/go.mod h1:boDWvdM1Drk4NJj/VddSLnx59X3OPgwrOo0vGbtq9+8= 233 | k8s.io/component-base v0.32.0 h1:d6cWHZkCiiep41ObYQS6IcgzOUQUNpywm39KVYaUqzU= 234 | k8s.io/component-base v0.32.0/go.mod h1:JLG2W5TUxUu5uDyKiH2R/7NnxJo1HlPoRIIbVLkK5eM= 235 | k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= 236 | k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= 237 | k8s.io/kube-openapi v0.0.0-20241212222426-2c72e554b1e7 h1:hcha5B1kVACrLujCKLbr8XWMxCxzQx42DY8QKYJrDLg= 238 | k8s.io/kube-openapi v0.0.0-20241212222426-2c72e554b1e7/go.mod h1:GewRfANuJ70iYzvn+i4lezLDAFzvjxZYK1gn1lWcfas= 239 | k8s.io/kubectl v0.32.0 h1:rpxl+ng9qeG79YA4Em9tLSfX0G8W0vfaiPVrc/WR7Xw= 240 | k8s.io/kubectl v0.32.0/go.mod h1:qIjSX+QgPQUgdy8ps6eKsYNF+YmFOAO3WygfucIqFiE= 241 | k8s.io/utils v0.0.0-20241210054802-24370beab758 h1:sdbE21q2nlQtFh65saZY+rRM6x6aJJI8IUa1AmH/qa0= 242 | k8s.io/utils v0.0.0-20241210054802-24370beab758/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= 243 | sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= 244 | sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= 245 | sigs.k8s.io/kustomize/api v0.18.0 h1:hTzp67k+3NEVInwz5BHyzc9rGxIauoXferXyjv5lWPo= 246 | sigs.k8s.io/kustomize/api v0.18.0/go.mod h1:f8isXnX+8b+SGLHQ6yO4JG1rdkZlvhaCf/uZbLVMb0U= 247 | sigs.k8s.io/kustomize/kyaml v0.18.1 h1:WvBo56Wzw3fjS+7vBjN6TeivvpbW9GmRaWZ9CIVmt4E= 248 | sigs.k8s.io/kustomize/kyaml v0.18.1/go.mod h1:C3L2BFVU1jgcddNBE1TxuVLgS46TjObMwW5FT9FcjYo= 249 | sigs.k8s.io/structured-merge-diff/v4 v4.5.0 h1:nbCitCK2hfnhyiKo6uf2HxUPTCodY6Qaf85SbDIaMBk= 250 | sigs.k8s.io/structured-merge-diff/v4 v4.5.0/go.mod h1:N8f93tFZh9U6vpxwRArLiikrE5/2tiu1w1AGfACIGE4= 251 | sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= 252 | sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= 253 | -------------------------------------------------------------------------------- /internal/internal.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package internal 14 | 15 | import ( 16 | "context" 17 | 18 | "github.com/hashicorp/go-retryablehttp" 19 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/alert" 20 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/api" 21 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/cache" 22 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/client" 23 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/config" 24 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/events" 25 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/metrics" 26 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/template" 27 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types" 28 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/web" 29 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/webhook" 30 | "github.com/pkg/errors" 31 | log "github.com/sirupsen/logrus" 32 | ) 33 | 34 | func Run(ctx context.Context) error { 35 | err := config.Load() 36 | if err != nil { 37 | return errors.Wrap(err, "error in config load") 38 | } 39 | 40 | err = config.Check() 41 | if err != nil { 42 | return errors.Wrap(err, "error in config check") 43 | } 44 | 45 | log.Debugf("using config: %s", config.Get().String()) 46 | 47 | retryClient := retryablehttp.NewClient() 48 | retryClient.HTTPClient.Transport = metrics.NewInstrumenter("webhook"). 49 | WithProxy(*config.Get().WebhookProxy). 50 | WithInsecureSkipVerify(*config.Get().WebhookInsecure). 51 | InstrumentedRoundTripper() 52 | retryClient.RetryMax = *config.Get().WebhookRetries 53 | webhook.SetHTTPClient(retryClient) 54 | 55 | err = alert.Init() 56 | if err != nil { 57 | return errors.Wrap(err, "error in init alerts") 58 | } 59 | 60 | err = client.Init() 61 | if err != nil { 62 | return errors.Wrap(err, "error in init api") 63 | } 64 | 65 | go cache.SheduleCleaning(ctx) 66 | go web.Start(ctx) 67 | 68 | if err := startReadingEvents(ctx); err != nil { 69 | return errors.Wrap(err, "error in startReadingEvents") 70 | } 71 | 72 | return nil 73 | } 74 | 75 | func startReadingEvents(ctx context.Context) error { 76 | azureResource, err := api.GetAzureResourceName(ctx, *config.Get().NodeName) 77 | if err != nil { 78 | return errors.Wrap(err, "error in getting azure resource name") 79 | } 80 | 81 | eventReader := events.NewReader() 82 | eventReader.AzureResource = azureResource 83 | eventReader.Period = *config.Get().Period 84 | eventReader.Endpoint = *config.Get().Endpoint 85 | eventReader.RequestTimeout = *config.Get().RequestTimeout 86 | eventReader.NodeName = *config.Get().NodeName 87 | eventReader.BeforeReading = func(ctx context.Context) error { 88 | // add event to node 89 | if err := api.AddNodeEvent(ctx, "Info", "ReadEvents", config.EventMessageBeforeListen); err != nil { 90 | return errors.Wrap(err, "error in add node event") 91 | } 92 | 93 | return nil 94 | } 95 | 96 | eventReader.EventReceived = func(ctx context.Context, event types.ScheduledEventsEvent) (bool, error) { 97 | // add event to node 98 | if err := api.AddNodeEvent(ctx, "Warning", string(event.EventType), config.EventMessageReceived); err != nil { 99 | return false, errors.Wrap(err, "error in add node event") 100 | } 101 | 102 | // check if event is excludedm by default Freeze event is excluded 103 | if config.Get().IsExcludedEvent(event.EventType) { 104 | log.Infof("Excluded event %s by user config", event.EventType) 105 | 106 | return false, nil 107 | } 108 | 109 | // send event in separate goroutine 110 | go func() { 111 | if err := sendEvent(ctx, event); err != nil { 112 | log.WithError(err).Error("error in sendEvent") 113 | } 114 | }() 115 | 116 | // drain node 117 | if err := api.DrainNode(ctx, *config.Get().NodeName, string(event.EventType), event.EventId); err != nil { 118 | return false, errors.Wrap(err, "error in DrainNode") 119 | } 120 | 121 | return true, nil 122 | } 123 | 124 | // check for run in synchronous mode or not 125 | // synchronous mode is used for e2e tests 126 | if *config.Get().ExitAfterNodeDrain { 127 | eventReader.ReadEvents(ctx) 128 | } else { 129 | go eventReader.ReadEvents(ctx) 130 | } 131 | 132 | return nil 133 | } 134 | 135 | func sendEvent(ctx context.Context, event types.ScheduledEventsEvent) error { 136 | message, err := template.NewMessageType(ctx, *config.Get().NodeName, event) 137 | if err != nil { 138 | return errors.Wrap(err, "error in template.NewMessageType") 139 | } 140 | 141 | log.Infof("Message: %+v", message) 142 | 143 | message.Template = *config.Get().AlertMessage 144 | 145 | if err := alert.SendTelegram(message); err != nil { 146 | log.WithError(err).Error("error in alert.SendTelegram") 147 | } 148 | 149 | if err := webhook.SendWebHook(ctx, message); err != nil { 150 | log.WithError(err).Error("error in webhook.SendWebHook") 151 | } 152 | 153 | return nil 154 | } 155 | -------------------------------------------------------------------------------- /mock/mock.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package main 14 | 15 | import ( 16 | "flag" 17 | "fmt" 18 | "io" 19 | "net/http" 20 | "path/filepath" 21 | "strings" 22 | "time" 23 | 24 | log "github.com/sirupsen/logrus" 25 | ) 26 | 27 | func debugHandler(w http.ResponseWriter, r *http.Request) { 28 | // Create return string 29 | request := []string{} 30 | // Add the request string 31 | url := fmt.Sprintf("%v %v %v", r.Method, r.URL, r.Proto) 32 | request = append(request, url) 33 | // Add the host 34 | request = append(request, fmt.Sprintf("Host: %v", r.Host)) 35 | 36 | request = append(request, "--HEADERS--") 37 | // Loop through headers 38 | for name, headers := range r.Header { 39 | name = strings.ToLower(name) 40 | 41 | for _, h := range headers { 42 | request = append(request, fmt.Sprintf("%v: %v", name, h)) 43 | } 44 | } 45 | 46 | bodyBytes, err := io.ReadAll(r.Body) 47 | if err != nil { 48 | log.WithError(err).Fatal() 49 | } 50 | 51 | defer r.Body.Close() 52 | 53 | request = append(request, "--BODY--") 54 | request = append(request, string(bodyBytes)) 55 | 56 | _, _ = w.Write([]byte(strings.Join(request, "\n"))) 57 | } 58 | 59 | // simple server for test env. 60 | func main() { 61 | address := flag.String("address", ":28080", "address") 62 | flag.Parse() 63 | 64 | http.HandleFunc("/debug", debugHandler) 65 | http.Handle("/", http.FileServer(http.Dir("."))) 66 | 67 | scheduledEventsType, err := filepath.Abs("pkg/types/testdata/ScheduledEventsType.json") 68 | if err != nil { 69 | log.WithError(err).Fatal() 70 | } 71 | 72 | log.Infof("edit %s file to test events", scheduledEventsType) 73 | 74 | const ( 75 | readTimeout = 5 * time.Second 76 | writeTimeout = 10 * time.Second 77 | ) 78 | 79 | server := &http.Server{ 80 | Addr: *address, 81 | ReadTimeout: readTimeout, 82 | WriteTimeout: writeTimeout, 83 | } 84 | 85 | log.Infof("Listen %s", server.Addr) 86 | 87 | err = server.ListenAndServe() 88 | if err != nil { 89 | log.WithError(err).Fatal() 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /pkg/alert/alert.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package alert 14 | 15 | import ( 16 | "strconv" 17 | 18 | tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api" 19 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/config" 20 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/template" 21 | "github.com/pkg/errors" 22 | log "github.com/sirupsen/logrus" 23 | ) 24 | 25 | var bot *tgbotapi.BotAPI 26 | 27 | func Init() error { 28 | if len(*config.Get().TelegramToken) == 0 { 29 | log.Warning("not sending Telegram message, no token") 30 | 31 | return nil 32 | } 33 | 34 | var err error 35 | 36 | bot, err = tgbotapi.NewBotAPI(*config.Get().TelegramToken) 37 | if err != nil { 38 | return errors.Wrap(err, "error in NewBotAPI") 39 | } 40 | 41 | log.Printf("Authorized on account %s", bot.Self.UserName) 42 | 43 | return nil 44 | } 45 | 46 | // healthcheck. 47 | func Ping() error { 48 | if len(*config.Get().TelegramToken) != 0 { 49 | if _, err := bot.GetMe(); err != nil { 50 | return errors.Wrap(err, "error in bot.GetMe") 51 | } 52 | } 53 | 54 | return nil 55 | } 56 | 57 | func SendTelegram(obj *template.MessageType) error { 58 | if len(*config.Get().TelegramToken) == 0 { 59 | return nil 60 | } 61 | 62 | messageText, err := template.Message(obj) 63 | if err != nil { 64 | return errors.Wrap(err, "error in template.Message") 65 | } 66 | 67 | chatID, err := strconv.Atoi(*config.Get().TelegramChatID) 68 | if err != nil { 69 | return errors.Wrap(err, "error converting chatID") 70 | } 71 | 72 | msg := tgbotapi.NewMessage(int64(chatID), messageText) 73 | 74 | result, err := bot.Send(msg) 75 | if err != nil { 76 | return errors.Wrap(err, "error in bot.Send") 77 | } 78 | 79 | log.Infof("Telegram MessageID=%d", result.MessageID) 80 | 81 | return nil 82 | } 83 | -------------------------------------------------------------------------------- /pkg/api/api.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package api 14 | 15 | import ( 16 | "context" 17 | "fmt" 18 | "strings" 19 | 20 | "github.com/google/uuid" 21 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/client" 22 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/config" 23 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/logger" 24 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types" 25 | "github.com/pkg/errors" 26 | log "github.com/sirupsen/logrus" 27 | corev1 "k8s.io/api/core/v1" 28 | apierrorrs "k8s.io/apimachinery/pkg/api/errors" 29 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 30 | "k8s.io/apimachinery/pkg/util/wait" 31 | "k8s.io/client-go/util/retry" 32 | "k8s.io/kubectl/pkg/drain" 33 | ) 34 | 35 | const taintKeyPrefix = "aks-node-termination-handler" 36 | 37 | func GetAzureResourceName(ctx context.Context, nodeName string) (string, error) { 38 | // return user defined resource name 39 | if len(*config.Get().ResourceName) > 0 { 40 | return *config.Get().ResourceName, nil 41 | } 42 | 43 | node, err := client.GetKubernetesClient().CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) 44 | if err != nil { 45 | return "", errors.Wrap(err, "error in Clientset.CoreV1().Nodes().Get") 46 | } 47 | 48 | azureResourceName, err := types.NewAzureResource(node.Spec.ProviderID) 49 | if err != nil { 50 | return "", errors.Wrap(err, "error in types.NewAzureResource") 51 | } 52 | 53 | return azureResourceName.EventResourceName, nil 54 | } 55 | 56 | func DrainNode(ctx context.Context, nodeName string, eventType string, eventID string) error { //nolint:cyclop 57 | log.Infof("Draining node %s", nodeName) 58 | 59 | node, err := GetNode(ctx, nodeName) 60 | if err != nil { 61 | return errors.Wrap(err, "error in nodes.get") 62 | } 63 | 64 | if node.Spec.Unschedulable { 65 | log.Infof("Node %s is already Unschedulable", node.Name) 66 | 67 | return nil 68 | } 69 | 70 | // taint node before draining if effect is NoSchedule or TaintEffectPreferNoSchedule 71 | if *config.Get().TaintNode && *config.Get().TaintEffect != string(corev1.TaintEffectNoExecute) { 72 | err = addTaint(ctx, node, getTaintKey(eventType), eventID) 73 | if err != nil { 74 | return errors.Wrap(err, "failed to taint node") 75 | } 76 | } 77 | 78 | logger := &logger.KubectlLogger{} 79 | logger.Log = func(message string) { 80 | log.Info(message) 81 | } 82 | 83 | helper := &drain.Helper{ 84 | Ctx: ctx, 85 | Client: client.GetKubernetesClient(), 86 | Force: true, 87 | GracePeriodSeconds: *config.Get().PodGracePeriodSeconds, 88 | IgnoreAllDaemonSets: true, 89 | Out: logger, 90 | ErrOut: logger, 91 | DeleteEmptyDirData: true, 92 | Timeout: config.Get().NodeGracePeriod(), 93 | } 94 | 95 | if err := drain.RunCordonOrUncordon(helper, node, true); err != nil { 96 | return errors.Wrap(err, "error in drain.RunCordonOrUncordon") 97 | } 98 | 99 | if err := drain.RunNodeDrain(helper, node.Name); err != nil { 100 | return errors.Wrap(err, "error in drain.RunNodeDrain") 101 | } 102 | 103 | // taint node after draining if effect is TaintEffectNoExecute 104 | // this NoExecute taint effect will stop all daemonsents on the node that can not handle this effect 105 | if *config.Get().TaintNode && *config.Get().TaintEffect == string(corev1.TaintEffectNoExecute) { 106 | err = addTaint(ctx, node, getTaintKey(eventType), eventID) 107 | if err != nil { 108 | return errors.Wrap(err, "failed to taint node") 109 | } 110 | } 111 | 112 | return nil 113 | } 114 | 115 | func getTaintKey(eventType string) string { 116 | return fmt.Sprintf("%s/%s", taintKeyPrefix, strings.ToLower(eventType)) 117 | } 118 | 119 | func addTaint(ctx context.Context, node *corev1.Node, taintKey string, taintValue string) error { 120 | log.Infof("Adding taint %s=%s on node %s", taintKey, taintValue, node.Name) 121 | 122 | freshNode := node.DeepCopy() 123 | 124 | var err error 125 | 126 | updateErr := wait.ExponentialBackoff(retry.DefaultBackoff, func() (bool, error) { 127 | if freshNode, err = client.GetKubernetesClient().CoreV1().Nodes().Get(ctx, freshNode.Name, metav1.GetOptions{}); err != nil { 128 | nodeErr := errors.Wrapf(err, "failed to get node %s", freshNode.Name) 129 | log.Error(nodeErr) 130 | 131 | return false, nodeErr 132 | } 133 | 134 | err = updateNodeWith(ctx, taintKey, taintValue, freshNode) 135 | 136 | switch { 137 | case err == nil: 138 | return true, nil 139 | case apierrorrs.IsConflict(err): 140 | return false, nil 141 | case err != nil: 142 | return false, errors.Wrapf(err, "failed to taint node %s with key %s", freshNode.Name, taintKey) 143 | } 144 | 145 | return false, nil 146 | }) 147 | 148 | if updateErr != nil { 149 | return err 150 | } 151 | 152 | log.Warnf("Successfully added taint %s on node %s", taintKey, freshNode.Name) 153 | 154 | return nil 155 | } 156 | 157 | func updateNodeWith(ctx context.Context, taintKey string, taintValue string, node *corev1.Node) error { 158 | node.Spec.Taints = append(node.Spec.Taints, corev1.Taint{ 159 | Key: taintKey, 160 | Value: taintValue, 161 | Effect: corev1.TaintEffect(*config.Get().TaintEffect), 162 | }) 163 | _, err := client.GetKubernetesClient().CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{}) 164 | 165 | return errors.Wrap(err, "failed to update node with taint") 166 | } 167 | 168 | func GetNode(ctx context.Context, nodeName string) (*corev1.Node, error) { 169 | node, err := client.GetKubernetesClient().CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) 170 | if err != nil { 171 | return nil, errors.Wrap(err, "error in nodes.get") 172 | } 173 | 174 | return node, nil 175 | } 176 | 177 | func AddNodeEvent(ctx context.Context, eventType, eventReason, eventMessage string) error { 178 | message := &types.EventMessage{ 179 | Type: eventType, 180 | Reason: eventReason, 181 | Message: eventMessage, 182 | } 183 | 184 | return AddNodeEventMessage(ctx, message) 185 | } 186 | 187 | func AddNodeEventMessage(ctx context.Context, message *types.EventMessage) error { 188 | node, err := GetNode(ctx, *config.Get().NodeName) 189 | if err != nil { 190 | return errors.Wrap(err, "error in GetNode") 191 | } 192 | 193 | event := corev1.Event{ 194 | InvolvedObject: corev1.ObjectReference{ 195 | APIVersion: "v1", 196 | Kind: "Node", 197 | Name: node.Name, 198 | UID: node.UID, 199 | ResourceVersion: node.ResourceVersion, 200 | }, 201 | Count: 1, 202 | FirstTimestamp: metav1.Now(), 203 | LastTimestamp: metav1.Now(), 204 | ObjectMeta: metav1.ObjectMeta{ 205 | Name: fmt.Sprintf("%s.%s", *config.Get().NodeName, uuid.New().String()), 206 | }, 207 | Type: message.Type, 208 | Reason: message.Reason, 209 | Message: message.Message, 210 | Source: corev1.EventSource{ 211 | Component: "aks-node-termination-handler", 212 | }, 213 | } 214 | 215 | err = wait.ExponentialBackoff(retry.DefaultBackoff, func() (bool, error) { 216 | _, err = client.GetKubernetesClient().CoreV1().Events("default").Create(ctx, &event, metav1.CreateOptions{}) 217 | 218 | switch { 219 | case err == nil: 220 | return true, nil 221 | case apierrorrs.IsConflict(err): 222 | return false, nil 223 | case err != nil: 224 | return false, errors.Wrap(err, "failed to create event") 225 | } 226 | 227 | return false, nil 228 | }) 229 | if err != nil { 230 | return errors.Wrap(err, "failed to add event") 231 | } 232 | 233 | return nil 234 | } 235 | 236 | func GetNodeLabels(ctx context.Context, nodeName string) (map[string]string, error) { 237 | // this need for unit tests 238 | if nodeName == "!!invalid!!GetNodeLabels" { 239 | return nil, errors.New("invalid node name") 240 | } 241 | 242 | // this need for unit tests 243 | if client.GetKubernetesClient() == nil { 244 | return make(map[string]string), nil 245 | } 246 | 247 | node, err := client.GetKubernetesClient().CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{}) 248 | if err != nil { 249 | return nil, errors.Wrap(err, "error in nodes.get") 250 | } 251 | 252 | return node.Labels, nil 253 | } 254 | 255 | func GetNodePods(ctx context.Context, nodeName string) ([]string, error) { 256 | // this need for unit tests 257 | if nodeName == "!!invalid!!GetNodePods" { 258 | return nil, errors.New("invalid node name") 259 | } 260 | 261 | // this need for unit tests 262 | if client.GetKubernetesClient() == nil { 263 | return []string{}, nil 264 | } 265 | 266 | pods, err := client.GetKubernetesClient().CoreV1().Pods("").List(ctx, metav1.ListOptions{}) 267 | if err != nil { 268 | return nil, errors.Wrap(err, "error in pods.list") 269 | } 270 | 271 | result := make([]string, 0) 272 | 273 | for _, pod := range pods.Items { 274 | // ignore DaemonSet pods from pods list, because they are not affected by node termination 275 | if getPodReferenceKind(pod) == "DaemonSet" { 276 | continue 277 | } 278 | 279 | if pod.Spec.NodeName == nodeName { 280 | result = append(result, pod.Name) 281 | } 282 | } 283 | 284 | return result, nil 285 | } 286 | 287 | func getPodReferenceKind(pod corev1.Pod) string { 288 | for _, ownerReference := range pod.OwnerReferences { 289 | if len(ownerReference.Kind) > 0 { 290 | return ownerReference.Kind 291 | } 292 | } 293 | 294 | return "" 295 | } 296 | -------------------------------------------------------------------------------- /pkg/cache/cache.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package cache 14 | 15 | import ( 16 | "context" 17 | "sync" 18 | "time" 19 | 20 | log "github.com/sirupsen/logrus" 21 | ) 22 | 23 | var data = sync.Map{} 24 | 25 | func Add(key string, ttl time.Duration) { 26 | data.Store(key, time.Now().Add(ttl)) 27 | } 28 | 29 | func HasKey(key string) bool { 30 | _, exists := data.Load(key) 31 | 32 | return exists 33 | } 34 | 35 | func SheduleCleaning(ctx context.Context) { 36 | for ctx.Err() == nil { 37 | data.Range(func(key, value interface{}) bool { 38 | expireTime, ok := value.(time.Time) 39 | 40 | if ok && expireTime.Before(time.Now()) { 41 | log.Infof("delete %s", key) 42 | 43 | data.Delete(key) 44 | } 45 | 46 | return true 47 | }) 48 | 49 | select { 50 | case <-time.After(time.Second): 51 | case <-ctx.Done(): 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /pkg/cache/cache_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package cache_test 14 | 15 | import ( 16 | "context" 17 | "testing" 18 | "time" 19 | 20 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/cache" 21 | ) 22 | 23 | func TestCache(t *testing.T) { 24 | t.Parallel() 25 | 26 | ctx, cancel := context.WithCancel(context.TODO()) 27 | defer cancel() 28 | 29 | go cache.SheduleCleaning(ctx) 30 | 31 | const ( 32 | test1sec = "test1sec" 33 | test3sec = "test3sec" 34 | ) 35 | 36 | cache.Add(test1sec, time.Second) 37 | cache.Add(test3sec, 0) 38 | cache.Add(test3sec, 3*time.Second) 39 | 40 | time.Sleep(2 * time.Second) 41 | 42 | if cache.HasKey(test1sec) { 43 | t.Fatalf("%s not expired", test1sec) 44 | } 45 | 46 | if !cache.HasKey(test3sec) { 47 | t.Fatalf("%s expired", test3sec) 48 | } 49 | 50 | time.Sleep(2 * time.Second) 51 | 52 | if cache.HasKey(test3sec) { 53 | t.Fatalf("%s expired", test3sec) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /pkg/client/client.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package client 14 | 15 | import ( 16 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/config" 17 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/metrics" 18 | "github.com/pkg/errors" 19 | log "github.com/sirupsen/logrus" 20 | "k8s.io/client-go/kubernetes" 21 | "k8s.io/client-go/rest" 22 | "k8s.io/client-go/tools/clientcmd" 23 | k8sMetrics "k8s.io/client-go/tools/metrics" 24 | ) 25 | 26 | var ( 27 | clientset *kubernetes.Clientset 28 | restconfig *rest.Config 29 | ) 30 | 31 | func Init() error { 32 | var err error 33 | 34 | k8sMetrics.Register(k8sMetrics.RegisterOpts{ 35 | RequestResult: &metrics.KubernetesMetricsResult{}, 36 | RequestLatency: &metrics.KubernetesMetricsLatency{}, 37 | }) 38 | 39 | if len(*config.Get().KubeConfigFile) > 0 { 40 | restconfig, err = clientcmd.BuildConfigFromFlags("", *config.Get().KubeConfigFile) 41 | if err != nil { 42 | return errors.Wrap(err, "error in clientcmd.BuildConfigFromFlags") 43 | } 44 | } else { 45 | log.Info("No kubeconfig file use incluster") 46 | 47 | restconfig, err = rest.InClusterConfig() 48 | if err != nil { 49 | return errors.Wrap(err, "error in rest.InClusterConfig") 50 | } 51 | } 52 | 53 | clientset, err = kubernetes.NewForConfig(restconfig) 54 | if err != nil { 55 | log.WithError(err).Fatal() 56 | } 57 | 58 | return nil 59 | } 60 | 61 | func GetKubernetesClient() *kubernetes.Clientset { 62 | return clientset 63 | } 64 | -------------------------------------------------------------------------------- /pkg/config/config.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package config 14 | 15 | import ( 16 | "encoding/json" 17 | "flag" 18 | "os" 19 | "strconv" 20 | "time" 21 | 22 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types" 23 | "github.com/pkg/errors" 24 | "gopkg.in/yaml.v3" 25 | corev1 "k8s.io/api/core/v1" 26 | ) 27 | 28 | const ( 29 | azureEndpoint = "http://169.254.169.254/metadata/scheduledevents?api-version=2020-07-01" 30 | defaultAlertMessage = "Draining node={{ .NodeName }}, type={{ .Event.EventType }}" 31 | defaultPeriod = 5 * time.Second 32 | defaultPodGracePeriodSeconds = -1 33 | defaultNodeGracePeriodSeconds = 120 34 | defaultGracePeriodSecond = 10 35 | defaultRequestTimeout = 5 * time.Second 36 | defaultWebHookTimeout = 30 * time.Second 37 | ) 38 | 39 | const ( 40 | EventMessageReceived = "Azure API sended schedule event for this node" 41 | EventMessageBeforeListen = "Start to listen events from Azure API" 42 | ) 43 | 44 | var ( 45 | errNoNode = errors.New("no node name is defined, run with -node=test") 46 | errChatIDMustBeInt = errors.New("TelegramChatID must be integer") 47 | errInvalidTaintEffect = errors.New("TaintEffect must be either NoSchedule, NoExecute or PreferNoSchedule") 48 | ) 49 | 50 | type Type struct { 51 | ConfigFile *string 52 | LogPretty *bool 53 | LogLevel *string 54 | DevelopmentMode *bool 55 | KubeConfigFile *string 56 | Endpoint *string 57 | NodeName *string 58 | Period *time.Duration 59 | RequestTimeout *time.Duration 60 | TelegramToken *string 61 | TelegramChatID *string 62 | AlertMessage *string 63 | WebHookContentType *string 64 | WebHookURL *string 65 | WebHookTemplate *string 66 | WebHookTemplateFile *string 67 | WebHookMethod *string 68 | WebHookTimeout *time.Duration 69 | WebhookInsecure *bool 70 | WebhookProxy *string 71 | WebhookRetries *int 72 | SentryDSN *string 73 | WebHTTPAddress *string 74 | TaintNode *bool 75 | TaintEffect *string 76 | PodGracePeriodSeconds *int 77 | NodeGracePeriodSeconds *int 78 | GracePeriodSeconds *int 79 | DrainOnFreezeEvent *bool 80 | ResourceName *string 81 | ExitAfterNodeDrain *bool 82 | } 83 | 84 | var config = Type{ 85 | ConfigFile: flag.String("config", os.Getenv("CONFIG"), "config file"), 86 | LogLevel: flag.String("log.level", "INFO", "log level"), 87 | LogPretty: flag.Bool("log.pretty", false, "log in text"), 88 | KubeConfigFile: flag.String("kubeconfig", "", "kubeconfig file"), 89 | Endpoint: flag.String("endpoint", azureEndpoint, "scheduled-events endpoint"), 90 | NodeName: flag.String("node", os.Getenv("MY_NODE_NAME"), "node to drain"), 91 | Period: flag.Duration("period", defaultPeriod, "period to scrape endpoint"), 92 | RequestTimeout: flag.Duration("request.timeout", defaultRequestTimeout, "request timeout"), 93 | TelegramToken: flag.String("telegram.token", os.Getenv("TELEGRAM_TOKEN"), "telegram token"), 94 | TelegramChatID: flag.String("telegram.chatID", os.Getenv("TELEGRAM_CHATID"), "telegram chatID"), 95 | AlertMessage: flag.String("alert.message", defaultAlertMessage, "default message"), 96 | WebHookMethod: flag.String("webhook.method", "POST", "request method"), 97 | WebHookContentType: flag.String("webhook.contentType", "application/json", "request content-type header"), 98 | WebHookURL: flag.String("webhook.url", os.Getenv("WEBHOOK_URL"), "send alerts to webhook"), 99 | WebHookTimeout: flag.Duration("webhook.timeout", defaultWebHookTimeout, "request timeout"), 100 | WebHookTemplate: flag.String("webhook.template", os.Getenv("WEBHOOK_TEMPLATE"), "request body"), 101 | WebHookTemplateFile: flag.String("webhook.template-file", os.Getenv("WEBHOOK_TEMPLATE_FILE"), "path to request body template file"), 102 | WebhookInsecure: flag.Bool("webhook.insecureSkip", true, "skip tls verification for webhook"), 103 | WebhookProxy: flag.String("webhook.http-proxy", os.Getenv("WEBHOOK_HTTP_PROXY"), "use http proxy for webhook"), 104 | WebhookRetries: flag.Int("webhook.retries", 3, "number of retries for webhook"), //nolint:mnd 105 | SentryDSN: flag.String("sentry.dsn", "", "sentry DSN"), 106 | WebHTTPAddress: flag.String("web.address", ":17923", ""), 107 | TaintNode: flag.Bool("taint.node", false, "Taint the node before cordon and draining"), 108 | TaintEffect: flag.String("taint.effect", "NoSchedule", "Taint effect to set on the node"), 109 | PodGracePeriodSeconds: flag.Int("podGracePeriodSeconds", defaultPodGracePeriodSeconds, "grace period is seconds for pods termination"), 110 | NodeGracePeriodSeconds: flag.Int("nodeGracePeriodSeconds", defaultNodeGracePeriodSeconds, "maximum time in seconds to drain the node"), 111 | GracePeriodSeconds: flag.Int("gracePeriodSeconds", defaultGracePeriodSecond, "grace period is seconds for application termination"), 112 | DrainOnFreezeEvent: flag.Bool("drainOnFreezeEvent", false, "drain node on freeze event"), 113 | ResourceName: flag.String("resource.name", "", "Azure resource name to drain"), 114 | ExitAfterNodeDrain: flag.Bool("exitAfterNodeDrain", false, "process will exit after node drain"), 115 | } 116 | 117 | func (t *Type) GracePeriod() time.Duration { 118 | return time.Duration(*t.GracePeriodSeconds) * time.Second 119 | } 120 | 121 | func (t *Type) NodeGracePeriod() time.Duration { 122 | return time.Duration(*t.NodeGracePeriodSeconds) * time.Second 123 | } 124 | 125 | // check is event is excluded from draining node. 126 | func (t *Type) IsExcludedEvent(e types.ScheduledEventsEventType) bool { 127 | if e == types.EventTypeFreeze && !*t.DrainOnFreezeEvent { 128 | return true 129 | } 130 | 131 | return false 132 | } 133 | 134 | func (t *Type) String() string { 135 | b, err := json.Marshal(t) 136 | if err != nil { 137 | return err.Error() 138 | } 139 | 140 | return string(b) 141 | } 142 | 143 | func Check() error { 144 | if len(*config.NodeName) == 0 { 145 | return errNoNode 146 | } 147 | 148 | if len(*config.TelegramChatID) > 0 { 149 | if _, err := strconv.Atoi(*config.TelegramChatID); err != nil { 150 | return errChatIDMustBeInt 151 | } 152 | } 153 | 154 | taintEffect := *config.TaintEffect 155 | if taintEffect != string(corev1.TaintEffectNoSchedule) && 156 | taintEffect != string(corev1.TaintEffectNoExecute) && 157 | taintEffect != string(corev1.TaintEffectPreferNoSchedule) { 158 | return errInvalidTaintEffect 159 | } 160 | 161 | return nil 162 | } 163 | 164 | func Get() *Type { 165 | return &config 166 | } 167 | 168 | func Set(specifiedConfig Type) { 169 | config = specifiedConfig 170 | } 171 | 172 | func Load() error { 173 | if len(*config.ConfigFile) == 0 { 174 | return nil 175 | } 176 | 177 | configByte, err := os.ReadFile(*config.ConfigFile) 178 | if err != nil { 179 | return errors.Wrap(err, "error in os.ReadFile") 180 | } 181 | 182 | err = yaml.Unmarshal(configByte, &config) 183 | if err != nil { 184 | return errors.Wrap(err, "error in yaml.Unmarshal") 185 | } 186 | 187 | return nil 188 | } 189 | 190 | var gitVersion = "dev" 191 | 192 | func GetVersion() string { 193 | return gitVersion 194 | } 195 | -------------------------------------------------------------------------------- /pkg/config/config_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package config_test 14 | 15 | import ( 16 | "testing" 17 | "time" 18 | 19 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/config" 20 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types" 21 | "github.com/stretchr/testify/assert" 22 | "github.com/stretchr/testify/require" 23 | ) 24 | 25 | //nolint:paralleltest 26 | func TestConfigDefaults(t *testing.T) { 27 | if err := config.Load(); err != nil { 28 | t.Fatal(err) 29 | } 30 | 31 | assert.Equal(t, "http://169.254.169.254/metadata/scheduledevents?api-version=2020-07-01", *config.Get().Endpoint) 32 | } 33 | 34 | //nolint:paralleltest 35 | func TestValidConfigFile(t *testing.T) { 36 | configFile := "./testdata/config_test.yaml" 37 | testPeriod := 123 38 | 39 | newConfig := config.Type{ 40 | ConfigFile: &configFile, 41 | GracePeriodSeconds: &testPeriod, 42 | NodeGracePeriodSeconds: &testPeriod, 43 | } 44 | config.Set(newConfig) 45 | 46 | err := config.Load() 47 | require.NoError(t, err) 48 | 49 | assert.Equal(t, "/some/test/path", *config.Get().KubeConfigFile) 50 | assert.Equal(t, time.Duration(testPeriod)*time.Second, config.Get().GracePeriod()) 51 | assert.Equal(t, time.Duration(testPeriod)*time.Second, config.Get().NodeGracePeriod()) 52 | assert.Contains(t, config.Get().String(), "123") 53 | } 54 | 55 | //nolint:paralleltest 56 | func TestInvalidConfigFile(t *testing.T) { 57 | configFile := "testdata/config_yaml_fake.yaml" 58 | newConfig := config.Type{ConfigFile: &configFile} 59 | config.Set(newConfig) 60 | 61 | err := config.Load() 62 | require.Error(t, err) 63 | } 64 | 65 | //nolint:paralleltest 66 | func TestNotFoundConfigFile(t *testing.T) { 67 | configFile := "testdata/fake.yaml" 68 | newConfig := config.Type{ConfigFile: &configFile} 69 | config.Set(newConfig) 70 | 71 | err := config.Load() 72 | require.Error(t, err) 73 | } 74 | 75 | //nolint:paralleltest 76 | func TestVersion(t *testing.T) { 77 | if config.GetVersion() != "dev" { 78 | t.Fatal("version is not dev") 79 | } 80 | } 81 | 82 | //nolint:paralleltest,funlen 83 | func TestConfig(t *testing.T) { 84 | testCases := []struct { 85 | taintEffect string 86 | nodeName string 87 | telegramID string 88 | err bool 89 | testName string 90 | }{ 91 | { 92 | testName: "noSchedule", 93 | taintEffect: "NoSchedule", 94 | telegramID: "1", 95 | nodeName: "validNode", 96 | err: false, 97 | }, 98 | { 99 | testName: "noExecute", 100 | taintEffect: "NoExecute", 101 | nodeName: "validNode", 102 | telegramID: "1", 103 | err: false, 104 | }, 105 | { 106 | testName: "preferNoSchedule", 107 | taintEffect: "PreferNoSchedule", 108 | nodeName: "validNode", 109 | telegramID: "1", 110 | err: false, 111 | }, 112 | { 113 | testName: "invalidNodeName", 114 | taintEffect: "NoSchedule", 115 | nodeName: "", 116 | telegramID: "1", 117 | err: true, 118 | }, 119 | { 120 | testName: "InvalidTelegramId", 121 | taintEffect: "NoSchedule", 122 | nodeName: "validNode", 123 | telegramID: "invalidTelegramId", 124 | err: true, 125 | }, 126 | { 127 | testName: "InvalidNodeName", 128 | taintEffect: "NoSchedule", 129 | nodeName: "", 130 | telegramID: "1", 131 | err: true, 132 | }, 133 | { 134 | testName: "InvalidTaintEffect", 135 | taintEffect: "InvalidTaintEffect", 136 | nodeName: "validNode", 137 | telegramID: "1", 138 | err: true, 139 | }, 140 | } 141 | 142 | for i := range testCases { 143 | t.Run(testCases[i].testName, func(t *testing.T) { 144 | newConfig := config.Type{ 145 | TaintEffect: &testCases[i].taintEffect, 146 | NodeName: &testCases[i].nodeName, 147 | TelegramChatID: &testCases[i].telegramID, 148 | } 149 | config.Set(newConfig) 150 | err := config.Check() 151 | 152 | if testCases[i].err { 153 | require.Error(t, err) 154 | } else { 155 | require.NoError(t, err) 156 | } 157 | }) 158 | } 159 | } 160 | 161 | func TestIsExcludedEvent(t *testing.T) { 162 | t.Parallel() 163 | 164 | trueValue := true 165 | falseValue := false 166 | 167 | testConfigValid := config.Type{ 168 | DrainOnFreezeEvent: &falseValue, 169 | } 170 | 171 | // test DrainOnFreezeEvent logic 172 | testConfigValid.DrainOnFreezeEvent = &falseValue 173 | if b := testConfigValid.IsExcludedEvent(types.EventTypeFreeze); b != true { 174 | t.Fatal("when DrainOnFreezeEvent is false, IsExcludedEvent must be true") 175 | } 176 | 177 | testConfigValid.DrainOnFreezeEvent = &trueValue 178 | if b := testConfigValid.IsExcludedEvent(types.EventTypeFreeze); b == true { 179 | t.Fatal("when DrainOnFreezeEvent is true, IsExcludedEvent must be false") 180 | } 181 | } 182 | -------------------------------------------------------------------------------- /pkg/config/testdata/config_test.yaml: -------------------------------------------------------------------------------- 1 | kubeconfigfile: /some/test/path -------------------------------------------------------------------------------- /pkg/config/testdata/config_yaml_fake.yaml: -------------------------------------------------------------------------------- 1 | key: value 2 | - test -------------------------------------------------------------------------------- /pkg/events/events.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package events 14 | 15 | import ( 16 | "context" 17 | "encoding/json" 18 | "io" 19 | "net/http" 20 | "time" 21 | 22 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/cache" 23 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/metrics" 24 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types" 25 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/utils" 26 | "github.com/pkg/errors" 27 | log "github.com/sirupsen/logrus" 28 | ) 29 | 30 | const ( 31 | requestTimeout = 10 * time.Second 32 | readInterval = 5 * time.Second 33 | eventCacheTTL = 10 * time.Minute 34 | ) 35 | 36 | var httpClient = &http.Client{ 37 | Transport: metrics.NewInstrumenter("events").InstrumentedRoundTripper(), 38 | } 39 | 40 | type Reader struct { 41 | // method of making request 42 | Method string 43 | // endpoint to read events 44 | Endpoint string 45 | // timeout of making request 46 | RequestTimeout time.Duration 47 | // intervals of reading events 48 | Period time.Duration 49 | // name of the node 50 | NodeName string 51 | // name of the resource to watch 52 | AzureResource string 53 | // BeforeReading is a function that will be called before reading events 54 | BeforeReading func(ctx context.Context) error `json:"-"` 55 | // EventReceived is a function that will be called when event received 56 | // return true if you want to stop reading events 57 | EventReceived func(ctx context.Context, event types.ScheduledEventsEvent) (bool, error) `json:"-"` 58 | } 59 | 60 | func NewReader() *Reader { 61 | return &Reader{ 62 | Method: http.MethodGet, 63 | Endpoint: "http://169.254.169.254/metadata/scheduledevents?api-version=2020-07-01", 64 | RequestTimeout: requestTimeout, 65 | Period: readInterval, 66 | } 67 | } 68 | 69 | func (r *Reader) ReadEvents(ctx context.Context) { 70 | log.Infof("Start reading events %s", r.String()) 71 | 72 | if r.BeforeReading != nil { 73 | if err := r.BeforeReading(ctx); err != nil { 74 | log.WithError(err).Error("Error in BeforeReading") 75 | } 76 | } 77 | 78 | for ctx.Err() == nil { 79 | stopReadingEvents, err := r.ReadEndpoint(ctx) 80 | if err != nil { 81 | metrics.ErrorReadingEndpoint.WithLabelValues(r.getMetricsLabels()...).Inc() 82 | 83 | log.WithError(err).Error() 84 | } 85 | 86 | if stopReadingEvents { 87 | log.Info("Stop reading events") 88 | 89 | return 90 | } 91 | 92 | utils.SleepWithContext(ctx, r.Period) 93 | } 94 | } 95 | 96 | func (r *Reader) getScheduledEvents(ctx context.Context) (*types.ScheduledEventsType, error) { 97 | ctx, cancel := context.WithTimeout(ctx, r.RequestTimeout) 98 | defer cancel() 99 | 100 | req, err := http.NewRequestWithContext(ctx, r.Method, r.Endpoint, nil) 101 | if err != nil { 102 | return nil, errors.Wrap(err, "error in http.NewRequestWithContext") 103 | } 104 | 105 | req.Header.Add("Metadata", "true") 106 | 107 | log.WithFields(log.Fields{ 108 | "method": req.Method, 109 | "url": req.URL, 110 | "headers": req.Header, 111 | }).Debug("Doing request") 112 | 113 | resp, err := httpClient.Do(req) 114 | if err != nil { 115 | return nil, errors.Wrap(err, "error in client.Do(req)") 116 | } 117 | 118 | defer resp.Body.Close() 119 | 120 | log.Debugf("response status: %s", resp.Status) 121 | 122 | body, err := io.ReadAll(resp.Body) 123 | if err != nil { 124 | return nil, errors.Wrap(err, "error in io.ReadAll") 125 | } 126 | 127 | log.Debugf("response body: %s", string(body)) 128 | 129 | if len(body) == 0 { 130 | log.Warn("Events response is empty") 131 | 132 | return &types.ScheduledEventsType{}, nil 133 | } 134 | 135 | message := types.ScheduledEventsType{} 136 | 137 | if err := json.Unmarshal(body, &message); err != nil { 138 | return nil, errors.Wrap(err, "error in json.Unmarshal") 139 | } 140 | 141 | return &message, nil 142 | } 143 | 144 | func (r *Reader) ReadEndpoint(ctx context.Context) (bool, error) { 145 | message, err := r.getScheduledEvents(ctx) 146 | if err != nil { 147 | return false, errors.Wrap(err, "error in getScheduledEvents") 148 | } 149 | 150 | for _, event := range message.Events { 151 | for _, resource := range event.Resources { 152 | if resource == r.AzureResource { 153 | log.Infof("%+v", message) 154 | 155 | if cache.HasKey(event.EventId) { 156 | log.Infof("Event %s already processed", event.EventId) 157 | 158 | continue 159 | } 160 | 161 | // add to cache, ignore similar events for 10 minutes 162 | cache.Add(event.EventId, eventCacheTTL) 163 | 164 | metrics.ScheduledEventsTotal.WithLabelValues(append(r.getMetricsLabels(), string(event.EventType))...).Inc() 165 | 166 | if r.EventReceived != nil { 167 | return r.EventReceived(ctx, event) 168 | } 169 | } 170 | } 171 | } 172 | 173 | return false, nil 174 | } 175 | 176 | func (r *Reader) getMetricsLabels() []string { 177 | return []string{ 178 | r.NodeName, 179 | r.AzureResource, 180 | } 181 | } 182 | 183 | func (r *Reader) String() string { 184 | b, _ := json.Marshal(r) //nolint:errchkjson 185 | 186 | return string(b) 187 | } 188 | -------------------------------------------------------------------------------- /pkg/events/events_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package events_test 14 | 15 | import ( 16 | "context" 17 | "encoding/json" 18 | "errors" 19 | "net/http" 20 | "net/http/httptest" 21 | "testing" 22 | "time" 23 | 24 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/events" 25 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types" 26 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/utils" 27 | log "github.com/sirupsen/logrus" 28 | ) 29 | 30 | func TestReadingEvents(t *testing.T) { //nolint:funlen 31 | t.Parallel() 32 | 33 | log.SetLevel(log.DebugLevel) 34 | 35 | ctx := context.TODO() 36 | 37 | handler := http.NewServeMux() 38 | handler.HandleFunc("/badjson", func(w http.ResponseWriter, _ *http.Request) { 39 | w.WriteHeader(http.StatusOK) 40 | _, _ = w.Write([]byte(`!!!{"DocumentIncarnation":1,"Events":[]}`)) 41 | }) 42 | handler.HandleFunc("/emptyjson", func(w http.ResponseWriter, _ *http.Request) { 43 | w.WriteHeader(http.StatusOK) 44 | _, _ = w.Write([]byte(``)) 45 | }) 46 | handler.HandleFunc("/incorrectcontentlen", func(w http.ResponseWriter, _ *http.Request) { 47 | w.Header().Add("Content-Length", "50") 48 | 49 | _, _ = w.Write([]byte("a")) 50 | }) 51 | handler.HandleFunc("/timeout", func(w http.ResponseWriter, r *http.Request) { 52 | utils.SleepWithContext(r.Context(), 5*time.Second) 53 | w.WriteHeader(http.StatusOK) 54 | _, _ = w.Write([]byte(``)) 55 | }) 56 | handler.HandleFunc("/document", func(w http.ResponseWriter, _ *http.Request) { 57 | message, _ := json.Marshal(types.ScheduledEventsType{ 58 | DocumentIncarnation: 1, 59 | Events: []types.ScheduledEventsEvent{ 60 | { 61 | EventId: time.Now().String(), 62 | EventType: types.EventTypeFreeze, 63 | ResourceType: "resourceType", 64 | Resources: []string{"resource1", "resource2"}, 65 | }, 66 | }, 67 | }) 68 | 69 | w.WriteHeader(http.StatusOK) 70 | _, _ = w.Write(message) 71 | }) 72 | 73 | testServer := httptest.NewServer(handler) 74 | 75 | t.Run("badjson", func(t *testing.T) { 76 | t.Parallel() 77 | 78 | eventReader := events.NewReader() 79 | eventReader.Endpoint = testServer.URL + "/badjson" 80 | 81 | if _, err := eventReader.ReadEndpoint(ctx); err == nil { 82 | t.Error("expected error") 83 | } 84 | }) 85 | 86 | t.Run("badhttp", func(t *testing.T) { 87 | t.Parallel() 88 | 89 | eventReader := events.NewReader() 90 | eventReader.Method = "bad method" 91 | eventReader.Endpoint = "fake://fake" 92 | 93 | if _, err := eventReader.ReadEndpoint(ctx); err == nil { 94 | t.Error("expected error") 95 | } 96 | 97 | ctx, cancel := context.WithTimeout(ctx, 1*time.Second) 98 | defer cancel() 99 | 100 | eventReader.ReadEvents(ctx) 101 | }) 102 | 103 | t.Run("badhttpcontent", func(t *testing.T) { 104 | t.Parallel() 105 | 106 | eventReader := events.NewReader() 107 | eventReader.Endpoint = testServer.URL + "/incorrectcontentlen" 108 | 109 | if _, err := eventReader.ReadEndpoint(ctx); err == nil { 110 | t.Error("expected error") 111 | } 112 | }) 113 | 114 | t.Run("emptyjson", func(t *testing.T) { 115 | t.Parallel() 116 | 117 | eventReader := events.NewReader() 118 | eventReader.Endpoint = testServer.URL + "/emptyjson" 119 | 120 | if _, err := eventReader.ReadEndpoint(ctx); err != nil { 121 | t.Error(err) 122 | } 123 | }) 124 | 125 | t.Run("timeout", func(t *testing.T) { 126 | t.Parallel() 127 | 128 | eventReader := events.NewReader() 129 | eventReader.Endpoint = testServer.URL + "/timeout" 130 | eventReader.RequestTimeout = 1 * time.Second 131 | 132 | if _, err := eventReader.ReadEndpoint(ctx); !errors.Is(err, context.DeadlineExceeded) { 133 | t.Error(err) 134 | } 135 | }) 136 | 137 | t.Run("document", func(t *testing.T) { 138 | t.Parallel() 139 | 140 | receivedDocument := types.ScheduledEventsEvent{} 141 | 142 | eventReader := events.NewReader() 143 | eventReader.Endpoint = testServer.URL + "/document" 144 | eventReader.AzureResource = "resource1" 145 | eventReader.BeforeReading = func(_ context.Context) error { 146 | return errors.New("error in BeforeReading") //nolint:goerr113 147 | } 148 | eventReader.EventReceived = func(_ context.Context, event types.ScheduledEventsEvent) (bool, error) { 149 | receivedDocument = event 150 | 151 | return true, nil 152 | } 153 | 154 | ctx, cancel := context.WithTimeout(ctx, 5*time.Second) 155 | defer cancel() 156 | 157 | eventReader.ReadEvents(ctx) 158 | 159 | t.Logf("%+v", receivedDocument) 160 | 161 | if receivedDocument.EventId == "" { 162 | t.Error("unexpected event id") 163 | } 164 | }) 165 | } 166 | -------------------------------------------------------------------------------- /pkg/logger/logger.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package logger 14 | 15 | type KubectlLogger struct { 16 | Log func(string) 17 | } 18 | 19 | func (b *KubectlLogger) Write(p []byte) (int, error) { 20 | if b.Log != nil { 21 | b.Log(string(p)) 22 | } 23 | 24 | return 0, nil 25 | } 26 | -------------------------------------------------------------------------------- /pkg/logger/logger_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package logger_test 14 | 15 | import ( 16 | "testing" 17 | 18 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/logger" 19 | ) 20 | 21 | func TestKubectlLogger(t *testing.T) { 22 | t.Parallel() 23 | 24 | logger := logger.KubectlLogger{} 25 | 26 | logText := "" 27 | 28 | logger.Log = func(message string) { 29 | logText = message 30 | } 31 | 32 | i, err := logger.Write([]byte("test")) 33 | if err != nil { 34 | t.Fatal(err) 35 | } 36 | 37 | if i != 0 { 38 | t.Fatalf("expected: %d, got: %d", 0, i) 39 | } 40 | 41 | if logText != "test" { 42 | t.Fatalf("expected: %s, got: %s", "test", logText) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /pkg/metrics/metrics.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package metrics 14 | 15 | import ( 16 | "context" 17 | "crypto/tls" 18 | "fmt" 19 | "net/http" 20 | "net/url" 21 | "strconv" 22 | "strings" 23 | "time" 24 | 25 | "github.com/pkg/errors" 26 | "github.com/prometheus/client_golang/prometheus" 27 | "github.com/prometheus/client_golang/prometheus/promauto" 28 | "github.com/prometheus/client_golang/prometheus/promhttp" 29 | log "github.com/sirupsen/logrus" 30 | ) 31 | 32 | const namespace = "aks_node_termination_handler" 33 | 34 | type Instrumenter struct { 35 | subsystemIdentifier string 36 | insecureSkipVerify bool 37 | proxyURL *url.URL 38 | } 39 | 40 | // New creates a new Instrumenter. The subsystemIdentifier will be used as part of 41 | // the metric names (e.g. http__requests_total). 42 | func NewInstrumenter(subsystemIdentifier string) *Instrumenter { 43 | return &Instrumenter{ 44 | subsystemIdentifier: subsystemIdentifier, 45 | } 46 | } 47 | 48 | func (i *Instrumenter) WithProxy(proxyURL string) *Instrumenter { 49 | if proxyURL == "" { 50 | return i 51 | } 52 | 53 | proxy, err := url.Parse(proxyURL) 54 | if err != nil { 55 | log.WithError(err).Errorf("error parsing proxy url %s for %s", proxyURL, i.subsystemIdentifier) 56 | } else { 57 | i.proxyURL = proxy 58 | } 59 | 60 | return i 61 | } 62 | 63 | func (i *Instrumenter) WithInsecureSkipVerify(insecure bool) *Instrumenter { 64 | i.insecureSkipVerify = insecure 65 | 66 | return i 67 | } 68 | 69 | // InstrumentedRoundTripper returns an instrumented round tripper. 70 | func (i *Instrumenter) InstrumentedRoundTripper() http.RoundTripper { 71 | inFlightRequestsGauge := promauto.NewGauge(prometheus.GaugeOpts{ 72 | Namespace: namespace, 73 | Name: fmt.Sprintf("http_%s_in_flight_requests", i.subsystemIdentifier), 74 | Help: fmt.Sprintf("A gauge of in-flight requests to the http %s.", i.subsystemIdentifier), 75 | }) 76 | 77 | requestsPerEndpointCounter := promauto.NewCounterVec( 78 | prometheus.CounterOpts{ 79 | Namespace: namespace, 80 | Name: fmt.Sprintf("http_%s_requests_total", i.subsystemIdentifier), 81 | Help: fmt.Sprintf("A counter for requests to the http %s per endpoint.", i.subsystemIdentifier), 82 | }, 83 | []string{"code", "method", "endpoint"}, 84 | ) 85 | 86 | requestLatencyHistogram := promauto.NewHistogramVec( 87 | prometheus.HistogramOpts{ 88 | Namespace: namespace, 89 | Name: fmt.Sprintf("http_%s_request_duration_seconds", i.subsystemIdentifier), 90 | Help: fmt.Sprintf("A histogram of request latencies to the http %s .", i.subsystemIdentifier), 91 | Buckets: prometheus.DefBuckets, 92 | }, 93 | []string{"method"}, 94 | ) 95 | 96 | defaultTransport := &http.Transport{ 97 | TLSClientConfig: &tls.Config{ 98 | InsecureSkipVerify: i.insecureSkipVerify, //nolint:gosec 99 | }, 100 | } 101 | 102 | if i.proxyURL != nil { 103 | log.Infof("using http_proxy %s for %s", i.proxyURL.String(), i.subsystemIdentifier) 104 | 105 | defaultTransport.Proxy = http.ProxyURL(i.proxyURL) 106 | } 107 | 108 | return promhttp.InstrumentRoundTripperInFlight(inFlightRequestsGauge, 109 | promhttp.InstrumentRoundTripperDuration(requestLatencyHistogram, 110 | i.instrumentRoundTripperEndpoint(requestsPerEndpointCounter, defaultTransport), 111 | ), 112 | ) 113 | } 114 | 115 | func (i *Instrumenter) instrumentRoundTripperEndpoint(counter *prometheus.CounterVec, next http.RoundTripper) promhttp.RoundTripperFunc { 116 | return func(r *http.Request) (*http.Response, error) { 117 | resp, err := next.RoundTrip(r) 118 | if err == nil { 119 | statusCode := strconv.Itoa(resp.StatusCode) 120 | counter.WithLabelValues(statusCode, strings.ToLower(resp.Request.Method), resp.Request.URL.Path).Inc() 121 | } 122 | 123 | return resp, errors.Wrap(err, "error making roundtrip") 124 | } 125 | } 126 | 127 | var ErrorReadingEndpoint = promauto.NewCounterVec( 128 | prometheus.CounterOpts{ 129 | Namespace: namespace, 130 | Name: "error_reading_endpoint_total", 131 | Help: "A counter for errored reading endpoint", 132 | }, 133 | []string{"node", "resource"}, 134 | ) 135 | 136 | var ScheduledEventsTotal = promauto.NewCounterVec( 137 | prometheus.CounterOpts{ 138 | Namespace: namespace, 139 | Name: "scheduled_events_total", 140 | Help: "Scheduled Events from Azure", 141 | }, 142 | []string{"node", "resource", "type"}, 143 | ) 144 | 145 | var KubernetesAPIRequest = promauto.NewCounterVec(prometheus.CounterOpts{ 146 | Namespace: namespace, 147 | Name: "apiserver_request_total", 148 | Help: "The total number of kunernetes API requests", 149 | }, []string{"cluster", "code"}) 150 | 151 | var KubernetesAPIRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ 152 | Namespace: namespace, 153 | Name: "apiserver_request_duration", 154 | Help: "The duration in seconds of kunernetes API requests", 155 | }, []string{"cluster"}) 156 | 157 | func GetHandler() http.Handler { 158 | return promhttp.Handler() 159 | } 160 | 161 | type KubernetesMetricsResult struct { 162 | Cluster string 163 | } 164 | 165 | func (r *KubernetesMetricsResult) Increment(_ context.Context, code string, _ string, host string) { 166 | KubernetesAPIRequest.WithLabelValues(host, code).Inc() 167 | } 168 | 169 | type KubernetesMetricsLatency struct { 170 | Cluster string 171 | } 172 | 173 | func (r *KubernetesMetricsLatency) Observe(_ context.Context, _ string, u url.URL, latency time.Duration) { 174 | KubernetesAPIRequestDuration.WithLabelValues(u.Host).Observe(latency.Seconds()) 175 | } 176 | -------------------------------------------------------------------------------- /pkg/metrics/metrics_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package metrics_test 14 | 15 | import ( 16 | "context" 17 | "io" 18 | "net/http" 19 | "net/http/httptest" 20 | "strings" 21 | "testing" 22 | "time" 23 | 24 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/metrics" 25 | ) 26 | 27 | var ( 28 | client = &http.Client{} 29 | ts = httptest.NewServer(metrics.GetHandler()) 30 | ctx = context.TODO() 31 | ) 32 | 33 | func TestMetricsInc(t *testing.T) { 34 | t.Parallel() 35 | 36 | metrics.KubernetesAPIRequest.WithLabelValues("test", "200").Inc() 37 | metrics.KubernetesAPIRequestDuration.WithLabelValues("test").Observe(1) 38 | } 39 | 40 | func TestMetricsHandler(t *testing.T) { 41 | t.Parallel() 42 | 43 | // wait for server 44 | time.Sleep(time.Second) 45 | 46 | req, err := http.NewRequestWithContext(ctx, http.MethodGet, ts.URL, nil) 47 | if err != nil { 48 | t.Fatal(err) 49 | } 50 | 51 | resp, err := client.Do(req) 52 | if err != nil { 53 | t.Fatal(err) 54 | } 55 | 56 | body, err := io.ReadAll(resp.Body) 57 | if err != nil { 58 | t.Fatal(err) 59 | } 60 | defer resp.Body.Close() 61 | 62 | if m := "aks_node_termination_handler_apiserver_request_duration"; !strings.Contains(string(body), m) { 63 | t.Fatalf("no metric %s found", m) 64 | } 65 | } 66 | 67 | func TestKubernetesMetrics(t *testing.T) { 68 | t.Parallel() 69 | 70 | kubernetesMetricsResult := metrics.KubernetesMetricsResult{} 71 | kubernetesMetricsLatency := metrics.KubernetesMetricsLatency{} 72 | 73 | kubernetesMetricsResult.Increment(ctx, "200", "test", "test") 74 | kubernetesMetricsLatency.Observe(ctx, "test", *httptest.NewRequest(http.MethodGet, ts.URL, nil).URL, time.Second) 75 | } 76 | 77 | func TestInstrumenter(t *testing.T) { 78 | t.Parallel() 79 | 80 | instrumenter := metrics.NewInstrumenter("test") 81 | 82 | r, err := instrumenter.InstrumentedRoundTripper().RoundTrip(httptest.NewRequest(http.MethodGet, ts.URL, nil)) 83 | if err != nil { 84 | t.Fatal(err) 85 | } 86 | defer r.Body.Close() 87 | } 88 | 89 | func TestInstrumenterWithEmptyProxy(t *testing.T) { 90 | t.Parallel() 91 | 92 | instrumenter := metrics.NewInstrumenter("TestInstrumenterWithEmptyProxy").WithProxy("") 93 | 94 | r, err := instrumenter.InstrumentedRoundTripper().RoundTrip(httptest.NewRequest(http.MethodGet, ts.URL, nil)) 95 | if err != nil { 96 | t.Fatal(err) 97 | } 98 | defer r.Body.Close() 99 | } 100 | 101 | func TestInstrumenterProxy(t *testing.T) { 102 | t.Parallel() 103 | 104 | instrumenter := metrics.NewInstrumenter("testproxy"). 105 | WithInsecureSkipVerify(true). 106 | WithProxy(ts.URL) 107 | 108 | r, err := instrumenter.InstrumentedRoundTripper().RoundTrip(httptest.NewRequest(http.MethodGet, ts.URL, nil)) 109 | if err != nil { 110 | t.Fatal(err) 111 | } 112 | defer r.Body.Close() 113 | } 114 | 115 | func TestInstrumenterBabProxy(t *testing.T) { 116 | t.Parallel() 117 | 118 | instrumenter := metrics.NewInstrumenter("testbadproxy"). 119 | WithInsecureSkipVerify(true). 120 | WithProxy("badproxy://badproxy:badproxy") 121 | 122 | r, err := instrumenter.InstrumentedRoundTripper().RoundTrip(httptest.NewRequest(http.MethodGet, ts.URL, nil)) 123 | if err != nil { 124 | t.Fatal(err) 125 | } 126 | defer r.Body.Close() 127 | } 128 | -------------------------------------------------------------------------------- /pkg/template/README.md: -------------------------------------------------------------------------------- 1 | # Templating Options 2 | 3 | | Template | Description | Example | 4 | | --------- | ----------- | ------- | 5 | | `{{ .Event.EventId }}` | Globally unique identifier for this event. | 602d9444-d2cd-49c7-8624-8643e7171297 | 6 | | `{{ .Event.EventType }}` | Impact this event causes. | Reboot | 7 | | `{{ .Event.ResourceType }}` | Type of resource this event affects. | VirtualMachine | 8 | | `{{ .Event.Resources }}` | List of resources this event affects. | [ FrontEnd_IN_0 ...] | 9 | | `{{ .Event.EventStatus }}` | Status of this event. | Scheduled | 10 | | `{{ .Event.NotBefore }}` | Time after which this event can start. The event is guaranteed to not start before this time. Will be blank if the event has already started | Mon, 19 Sep 2016 18:29:47 GMT | 11 | | `{{ .Event.Description }}` | Description of this event. | Host server is undergoing maintenance | 12 | | `{{ .Event.EventSource }}` | Initiator of the event. | Platform | 13 | | `{{ .Event.DurationInSeconds }}` | The expected duration of the interruption caused by the event. | -1 | 14 | | `{{ .NodeLabels }}` | Node labels | kubernetes.azure.com/agentpool:spotcpu4m16n ... | 15 | | `{{ .NodeName }}` | Node name | aks-spotcpu4m16n-41289323-vmss0000ny | 16 | | `{{ .ClusterName }}` | Node label kubernetes.azure.com/cluster | MC_EAST-US-RC-STAGE_stage-cluster_eastus | 17 | | `{{ .InstanceType }}` | Node label node.kubernetes.io/instance-type | Standard_D4as_v5 | 18 | | `{{ .NodeArch }}` | Node label kubernetes.io/arch | amd64 | 19 | | `{{ .NodeOS }}` | Node label kubernetes.io/os | linux | 20 | | `{{ .NodeRole }}` | Node label kubernetes.io/role | agent | 21 | | `{{ .NodeRegion }}` | Node label topology.kubernetes.io/region | eastus | 22 | | `{{ .NodeZone }}` | Node label topology.kubernetes.io/zone | 0 | 23 | | `{{ .NodePods }}` | List of pods on node | [ pod1 ...] | 24 | -------------------------------------------------------------------------------- /pkg/template/template.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package template 14 | 15 | import ( 16 | "bytes" 17 | "context" 18 | "html/template" 19 | 20 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/api" 21 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types" 22 | "github.com/pkg/errors" 23 | ) 24 | 25 | type MessageType struct { 26 | Event types.ScheduledEventsEvent 27 | Template string 28 | NodeLabels map[string]string `description:"Node labels"` 29 | NodeName string `description:"Node name"` 30 | ClusterName string `description:"Node label kubernetes.azure.com/cluster"` 31 | InstanceType string `description:"Node label node.kubernetes.io/instance-type"` 32 | NodeArch string `description:"Node label kubernetes.io/arch"` 33 | NodeOS string `description:"Node label kubernetes.io/os"` 34 | NodeRole string `description:"Node label kubernetes.io/role"` 35 | NodeRegion string `description:"Node label topology.kubernetes.io/region"` 36 | NodeZone string `description:"Node label topology.kubernetes.io/zone"` 37 | NodePods []string `description:"List of pods on node"` 38 | } 39 | 40 | func NewMessageType(ctx context.Context, nodeName string, event types.ScheduledEventsEvent) (*MessageType, error) { 41 | nodeLabels, err := api.GetNodeLabels(ctx, nodeName) 42 | if err != nil { 43 | return nil, errors.Wrap(err, "error in nodes.get") 44 | } 45 | 46 | nodePods, err := api.GetNodePods(ctx, nodeName) 47 | if err != nil { 48 | return nil, errors.Wrap(err, "error in getNodePods") 49 | } 50 | 51 | return &MessageType{ 52 | Event: event, 53 | NodeName: nodeName, 54 | NodeLabels: nodeLabels, 55 | ClusterName: nodeLabels["kubernetes.azure.com/cluster"], 56 | InstanceType: nodeLabels["node.kubernetes.io/instance-type"], 57 | NodeArch: nodeLabels["kubernetes.io/arch"], 58 | NodeOS: nodeLabels["kubernetes.io/os"], 59 | NodeRole: nodeLabels["kubernetes.io/role"], 60 | NodeRegion: nodeLabels["topology.kubernetes.io/region"], 61 | NodeZone: nodeLabels["topology.kubernetes.io/zone"], 62 | NodePods: nodePods, 63 | }, nil 64 | } 65 | 66 | func Message(obj *MessageType) (string, error) { 67 | tmpl, err := template.New("message").Parse(obj.Template) 68 | if err != nil { 69 | return "", errors.Wrap(err, "error in template.Parse") 70 | } 71 | 72 | var tpl bytes.Buffer 73 | 74 | err = tmpl.Execute(&tpl, obj) 75 | if err != nil { 76 | return "", errors.Wrap(err, "error in template.Execute") 77 | } 78 | 79 | return tpl.String(), nil 80 | } 81 | -------------------------------------------------------------------------------- /pkg/template/template_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package template_test 14 | 15 | import ( 16 | "context" 17 | "encoding/json" 18 | "fmt" 19 | "os" 20 | "reflect" 21 | "strings" 22 | "testing" 23 | 24 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/template" 25 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types" 26 | ) 27 | 28 | const fakeTemplate = "{{" 29 | 30 | func TestTemplateMessage(t *testing.T) { 31 | t.Parallel() 32 | 33 | obj := &template.MessageType{ 34 | Event: types.ScheduledEventsEvent{ 35 | EventId: "someID", 36 | EventType: "someType", 37 | }, 38 | NodePods: []string{"pod1", "pod2"}, 39 | Template: "test {{ .Event.EventId }} {{ .Event.EventType }} {{ .NodePods}}", 40 | } 41 | 42 | tpl, err := template.Message(obj) 43 | if err != nil { 44 | t.Fatal(err) 45 | } 46 | 47 | if want := "test someID someType [pod1 pod2]"; tpl != want { 48 | t.Fatalf("want=%s,got=%s", want, tpl) 49 | } 50 | } 51 | 52 | func TestFakeTemplate(t *testing.T) { 53 | t.Parallel() 54 | 55 | _, err := template.Message(&template.MessageType{ 56 | Template: fakeTemplate, 57 | }) 58 | if err == nil { 59 | t.Fatal("must be error") 60 | } 61 | } 62 | 63 | func TestFakeTemplateFunc(t *testing.T) { 64 | t.Parallel() 65 | 66 | _, err := template.Message(&template.MessageType{ 67 | Template: "{{ .DDD }}", 68 | }) 69 | if err == nil { 70 | t.Fatal("must be error") 71 | } 72 | 73 | t.Log(err) 74 | } 75 | 76 | func TestTemplateMarkdown(t *testing.T) { 77 | t.Parallel() 78 | 79 | message := template.MessageType{} 80 | 81 | messageBytes, err := os.ReadFile("testdata/message.json") 82 | if err != nil { 83 | t.Fatal(err) 84 | } 85 | 86 | if err := json.Unmarshal(messageBytes, &message); err != nil { 87 | t.Fatal(err) 88 | } 89 | 90 | printType("", message) 91 | 92 | if err = os.WriteFile("README.md.tmp", []byte(buf.String()), 0o644); err != nil { //nolint:gosec 93 | t.Fatal(err) 94 | } 95 | } 96 | 97 | var buf strings.Builder 98 | 99 | func printType(prefix string, message interface{}) { 100 | v := reflect.ValueOf(message) 101 | typeOfS := v.Type() 102 | 103 | for i := range v.NumField() { 104 | switch typeOfS.Field(i).Name { 105 | case "Template": 106 | case "Event": 107 | printType(typeOfS.Field(i).Name+".", v.Field(i).Interface()) 108 | default: 109 | value := v.Field(i).Interface() 110 | 111 | switch v.Field(i).Type().Kind() { //nolint:exhaustive 112 | case reflect.Slice: 113 | a := reflect.ValueOf(value).Interface().([]string) //nolint:forcetypeassert 114 | if len(a) > 0 { 115 | value = fmt.Sprintf("[ %s ...]", a[0]) 116 | } 117 | case reflect.Int: 118 | value = fmt.Sprintf("%d", value) 119 | case reflect.Map: 120 | a := reflect.ValueOf(value).Interface().(map[string]string) //nolint:forcetypeassert 121 | for k, v := range a { 122 | value = fmt.Sprintf("%s:%s ...", k, v) 123 | 124 | break 125 | } 126 | } 127 | 128 | buf.WriteString(fmt.Sprintf( 129 | "| `{{ .%s%s }}` | %v | %v |\n", 130 | prefix, 131 | typeOfS.Field(i).Name, 132 | typeOfS.Field(i).Tag.Get("description"), 133 | value, 134 | )) 135 | } 136 | } 137 | } 138 | 139 | func TestNewMessageType(t *testing.T) { 140 | t.Parallel() 141 | 142 | if _, err := template.NewMessageType(context.TODO(), "!!invalid!!GetNodeLabels", types.ScheduledEventsEvent{}); err == nil { 143 | t.Fatal("error expected") 144 | } 145 | 146 | if _, err := template.NewMessageType(context.TODO(), "!!invalid!!GetNodePods", types.ScheduledEventsEvent{}); err == nil { 147 | t.Fatal("error expected") 148 | } 149 | 150 | messageType, err := template.NewMessageType(context.TODO(), "somenode", types.ScheduledEventsEvent{}) 151 | if err != nil { 152 | t.Fatal(err) 153 | } 154 | 155 | if messageType.NodeName != "somenode" { 156 | t.Fatal("NodePods is nil") 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /pkg/template/testdata/message.json: -------------------------------------------------------------------------------- 1 | { 2 | "Event": { 3 | "EventId": "602d9444-d2cd-49c7-8624-8643e7171297", 4 | "EventType": "Reboot", 5 | "ResourceType": "VirtualMachine", 6 | "Resources": [ 7 | "FrontEnd_IN_0", 8 | "aks-spotcpu2d2as-24469130-vmss_1", 9 | "aks-spotcpu4m16n-41289323-vmss_862" 10 | ], 11 | "EventStatus": "Scheduled", 12 | "NotBefore": "Mon, 19 Sep 2016 18:29:47 GMT", 13 | "Description": "Host server is undergoing maintenance", 14 | "EventSource": "Platform", 15 | "DurationInSeconds": -1 16 | }, 17 | "Template": "", 18 | "NodeLabels": { 19 | "agentpool": "spotcpu4m16n", 20 | "beta.kubernetes.io/arch": "amd64", 21 | "beta.kubernetes.io/instance-type": "Standard_D4as_v5", 22 | "beta.kubernetes.io/os": "linux", 23 | "failure-domain.beta.kubernetes.io/region": "eastus", 24 | "failure-domain.beta.kubernetes.io/zone": "0", 25 | "kubernetes.azure.com/agentpool": "spotcpu4m16n", 26 | "kubernetes.azure.com/cluster": "MC_EAST-US-RC-STAGE_stage-cluster_eastus", 27 | "kubernetes.azure.com/consolidated-additional-properties": "d9a49827-aede-11ee-832c-fe2d222ef432", 28 | "kubernetes.azure.com/kubelet-identity-client-id": "6781a919-9379-417c-8aff-257ecacd1139", 29 | "kubernetes.azure.com/mode": "user", 30 | "kubernetes.azure.com/network-policy": "none", 31 | "kubernetes.azure.com/node-image-version": "AKSUbuntu-2204gen2containerd-202312.06.0", 32 | "kubernetes.azure.com/nodepool-type": "VirtualMachineScaleSets", 33 | "kubernetes.azure.com/os-sku": "Ubuntu", 34 | "kubernetes.azure.com/role": "agent", 35 | "kubernetes.azure.com/scalesetpriority": "spot", 36 | "kubernetes.azure.com/storageprofile": "managed", 37 | "kubernetes.azure.com/storagetier": "Premium_LRS", 38 | "kubernetes.io/arch": "amd64", 39 | "kubernetes.io/hostname": "aks-spotcpu4m16n-41289323-vmss0000ny", 40 | "kubernetes.io/os": "linux", 41 | "kubernetes.io/role": "agent", 42 | "node-role.kubernetes.io/agent": "", 43 | "node.kubernetes.io/instance-type": "Standard_D4as_v5", 44 | "storageprofile": "managed", 45 | "storagetier": "Premium_LRS", 46 | "topology.disk.csi.azure.com/zone": "", 47 | "topology.kubernetes.io/region": "eastus", 48 | "topology.kubernetes.io/zone": "0" 49 | }, 50 | "NodeName": "aks-spotcpu4m16n-41289323-vmss0000ny", 51 | "ClusterName": "MC_EAST-US-RC-STAGE_stage-cluster_eastus", 52 | "InstanceType": "Standard_D4as_v5", 53 | "NodeArch": "amd64", 54 | "NodeOS": "linux", 55 | "NodeRole": "agent", 56 | "NodeRegion": "eastus", 57 | "NodeZone": "0", 58 | "NodePods": [ 59 | "pod1", 60 | "pod2" 61 | ] 62 | } -------------------------------------------------------------------------------- /pkg/types/testdata/ScheduledEventsType.json: -------------------------------------------------------------------------------- 1 | { 2 | "DocumentIncarnation": 1, 3 | "Events": [ 4 | { 5 | "EventId": "602d9444-d2cd-49c7-8624-8643e7171297", 6 | "EventType": "Reboot", 7 | "ResourceType": "VirtualMachine", 8 | "Resources": [ 9 | "FrontEnd_IN_0", 10 | "aks-spotcpu2d2as-24469130-vmss_1" 11 | ], 12 | "EventStatus": "Scheduled", 13 | "NotBefore": "Mon, 19 Sep 2016 18:29:47 GMT", 14 | "Description": "Host server is undergoing maintenance", 15 | "EventSource": "Platform", 16 | "DurationInSeconds": -1 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /pkg/types/types.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package types 14 | 15 | import ( 16 | "fmt" 17 | "regexp" 18 | 19 | "github.com/pkg/errors" 20 | ) 21 | 22 | type ScheduledEventsEventType string 23 | 24 | const ( 25 | // The Virtual Machine is scheduled to pause for a few seconds. CPU and network connectivity 26 | // may be suspended, but there's no impact on memory or open files. 27 | EventTypeFreeze = "Freeze" 28 | // The Virtual Machine is scheduled for reboot (non-persistent memory is lost). 29 | // This event is made available on a best effort basis. 30 | EventTypeReboot = "Reboot" 31 | // The Virtual Machine is scheduled to move to another node (ephemeral disks are lost). 32 | // This event is delivered on a best effort basis. 33 | EventTypeRedeploy = "Redeploy" 34 | // The Spot Virtual Machine is being deleted (ephemeral disks are lost). 35 | EventTypePreempt = "Preempt" 36 | // The virtual machine is scheduled to be deleted. 37 | EventTypeTerminate = "Terminate" 38 | ) 39 | 40 | // https://docs.microsoft.com/en-us/azure/virtual-machines/linux/scheduled-events 41 | type ScheduledEventsEvent struct { 42 | EventId string `description:"Globally unique identifier for this event."` //nolint:golint,revive,stylecheck 43 | EventType ScheduledEventsEventType `description:"Impact this event causes."` 44 | ResourceType string `description:"Type of resource this event affects."` 45 | Resources []string `description:"List of resources this event affects."` 46 | EventStatus string `description:"Status of this event."` 47 | NotBefore string `description:"Time after which this event can start. The event is guaranteed to not start before this time. Will be blank if the event has already started"` //nolint:lll 48 | Description string `description:"Description of this event."` 49 | EventSource string `description:"Initiator of the event."` 50 | DurationInSeconds int `description:"The expected duration of the interruption caused by the event."` 51 | } 52 | 53 | var ( 54 | virtualMachineScaleSetsRe = regexp.MustCompile("^azure:///subscriptions/(.+)/resourceGroups/(.+)/providers/Microsoft.Compute/virtualMachineScaleSets/(.+)/virtualMachines/(.+)$") 55 | virtualMachineRe = regexp.MustCompile("^azure:///subscriptions/(.+)/resourceGroups/(.+)/providers/Microsoft.Compute/virtualMachines/(.+)$") 56 | ) 57 | 58 | type AzureResource struct { 59 | ProviderID string 60 | EventResourceName string 61 | SubscriptionID string 62 | ResourceGroup string 63 | } 64 | 65 | func NewAzureResource(providerID string) (*AzureResource, error) { 66 | resource := &AzureResource{ 67 | ProviderID: providerID, 68 | } 69 | 70 | switch { 71 | case virtualMachineScaleSetsRe.MatchString(providerID): 72 | v := virtualMachineScaleSetsRe.FindAllStringSubmatch(providerID, 1) 73 | 74 | resource.SubscriptionID = v[0][1] 75 | resource.ResourceGroup = v[0][2] 76 | resource.EventResourceName = fmt.Sprintf("%s_%s", v[0][3], v[0][4]) 77 | 78 | case virtualMachineRe.MatchString(providerID): 79 | v := virtualMachineRe.FindAllStringSubmatch(providerID, 1) 80 | 81 | resource.SubscriptionID = v[0][1] 82 | resource.ResourceGroup = v[0][2] 83 | resource.EventResourceName = v[0][3] 84 | 85 | default: 86 | return nil, errors.Errorf("providerID not recognized: %s", providerID) 87 | } 88 | 89 | return resource, nil 90 | } 91 | 92 | // api-version=2020-07-01. 93 | type ScheduledEventsType struct { 94 | DocumentIncarnation int 95 | Events []ScheduledEventsEvent 96 | } 97 | 98 | type EventMessage struct { 99 | Type string 100 | Reason string 101 | Message string 102 | } 103 | -------------------------------------------------------------------------------- /pkg/types/types_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package types_test 14 | 15 | import ( 16 | "encoding/json" 17 | "os" 18 | "reflect" 19 | "strconv" 20 | "testing" 21 | 22 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types" 23 | ) 24 | 25 | func TestScheduledEventsType(t *testing.T) { 26 | t.Parallel() 27 | 28 | messageBytes, err := os.ReadFile("testdata/ScheduledEventsType.json") 29 | if err != nil { 30 | t.Fatal(err) 31 | } 32 | 33 | message := types.ScheduledEventsType{} 34 | 35 | err = json.Unmarshal(messageBytes, &message) 36 | if err != nil { 37 | t.Fatal(err) 38 | } 39 | 40 | if len(message.Events) == 0 { 41 | t.Fatal("events is empty") 42 | } 43 | 44 | if want := "VirtualMachine"; message.Events[0].ResourceType != want { 45 | t.Fatalf("want=%s, got=%s", want, message.Events[0].ResourceType) 46 | } 47 | } 48 | 49 | func TestAzureResource(t *testing.T) { 50 | t.Parallel() 51 | 52 | type azureResourceTest struct { 53 | providerID string 54 | want *types.AzureResource 55 | } 56 | 57 | tests := make([]azureResourceTest, 0) 58 | 59 | tests = append(tests, azureResourceTest{ 60 | providerID: "azure:///subscriptions/12345a05-1234-1234-12345-922b47912341/resourceGroups/mc_prod_prod_eastus/providers/Microsoft.Compute/virtualMachineScaleSets/aks-spotcpu2v2-19654750-vmss/virtualMachines/2768", //nolint:lll 61 | want: &types.AzureResource{ 62 | EventResourceName: "aks-spotcpu2v2-19654750-vmss_2768", 63 | SubscriptionID: "12345a05-1234-1234-12345-922b47912341", 64 | ResourceGroup: "mc_prod_prod_eastus", 65 | }, 66 | }) 67 | 68 | tests = append(tests, azureResourceTest{ 69 | providerID: "azure:///subscriptions/12345a05-1234-1234-12345-922b47912342/resourceGroups/aro-infra-lth8qmzr-test-openshift-cluster1/providers/Microsoft.Compute/virtualMachines/test-openshift-cluste-t98dd-master-0", //nolint:lll 70 | want: &types.AzureResource{ 71 | EventResourceName: "test-openshift-cluste-t98dd-master-0", 72 | SubscriptionID: "12345a05-1234-1234-12345-922b47912342", 73 | ResourceGroup: "aro-infra-lth8qmzr-test-openshift-cluster1", 74 | }, 75 | }) 76 | 77 | tests = append(tests, azureResourceTest{ 78 | providerID: "azure:///subscriptions/12345a05-1234-1234-12345-922b47912343/resourceGroups/aro-infra-lth8qmzr-test-openshift-cluster2/providers/Microsoft.Compute/virtualMachines/test-openshift-cluste-t98dd-worker-eastus1-rz2t8", //nolint:lll 79 | want: &types.AzureResource{ 80 | EventResourceName: "test-openshift-cluste-t98dd-worker-eastus1-rz2t8", 81 | SubscriptionID: "12345a05-1234-1234-12345-922b47912343", 82 | ResourceGroup: "aro-infra-lth8qmzr-test-openshift-cluster2", 83 | }, 84 | }) 85 | 86 | for testID, test := range tests { 87 | t.Run("Test"+strconv.Itoa(testID), func(t *testing.T) { 88 | t.Parallel() 89 | 90 | azureResource, err := types.NewAzureResource(test.providerID) 91 | if err != nil { 92 | t.Fatal(err) 93 | } 94 | 95 | // need to set providerID for comparison 96 | test.want.ProviderID = test.providerID 97 | 98 | if !reflect.DeepEqual(azureResource, test.want) { 99 | t.Fatalf("want=%+v, got=%+v", test.want, azureResource) 100 | } 101 | }) 102 | } 103 | 104 | // test invalid providerID 105 | if _, err := types.NewAzureResource("azure://fake"); err == nil { 106 | t.Fatal("error expected") 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /pkg/utils/utils.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package utils 14 | 15 | import ( 16 | "context" 17 | "time" 18 | 19 | log "github.com/sirupsen/logrus" 20 | ) 21 | 22 | func SleepWithContext(ctx context.Context, d time.Duration) { 23 | log.Debugf("Sleep %s", d) 24 | 25 | select { 26 | case <-ctx.Done(): 27 | return 28 | case <-time.After(d): 29 | return 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /pkg/utils/utils_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package utils_test 14 | 15 | import ( 16 | "context" 17 | "testing" 18 | "time" 19 | 20 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/utils" 21 | ) 22 | 23 | func TestSleepWithContext(t *testing.T) { 24 | t.Parallel() 25 | 26 | ctx, cancel := context.WithCancel(context.TODO()) 27 | defer cancel() 28 | 29 | startTime := time.Now() 30 | 31 | utils.SleepWithContext(ctx, 1*time.Second) 32 | 33 | if time.Since(startTime) < 1*time.Second || time.Since(startTime) > 2*time.Second { 34 | t.Error("SleepWithContext() not working as expected") 35 | } 36 | 37 | cancel() 38 | 39 | startTime = time.Now() 40 | utils.SleepWithContext(ctx, 1*time.Second) 41 | 42 | if time.Since(startTime) >= 1*time.Second { 43 | t.Error("SleepWithContext() not working as expected") 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /pkg/web/web.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package web 14 | 15 | import ( 16 | "context" 17 | "net/http" 18 | "net/http/pprof" 19 | "time" 20 | 21 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/alert" 22 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/api" 23 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/config" 24 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/metrics" 25 | log "github.com/sirupsen/logrus" 26 | ) 27 | 28 | func Start(ctx context.Context) { 29 | const ( 30 | readTimeout = 5 * time.Second 31 | requestTimeout = 10 * time.Second 32 | writeTimeout = 20 * time.Second 33 | ) 34 | 35 | server := &http.Server{ 36 | Addr: *config.Get().WebHTTPAddress, 37 | Handler: http.TimeoutHandler(GetHandler(), requestTimeout, "timeout"), 38 | ReadTimeout: readTimeout, 39 | WriteTimeout: writeTimeout, 40 | } 41 | 42 | log.Info("web.address=", server.Addr) 43 | 44 | go func() { 45 | <-ctx.Done() 46 | 47 | shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), config.Get().GracePeriod()) 48 | defer shutdownCancel() 49 | 50 | _ = server.Shutdown(shutdownCtx) //nolint:contextcheck 51 | }() 52 | 53 | if err := server.ListenAndServe(); err != nil && ctx.Err() == nil { 54 | log.WithError(err).Fatal() 55 | } 56 | } 57 | 58 | func GetHandler() *http.ServeMux { 59 | mux := http.NewServeMux() 60 | 61 | mux.HandleFunc("/healthz", handlerHealthz) 62 | mux.HandleFunc("/drainNode", handlerDrainNode) 63 | 64 | mux.Handle("/metrics", metrics.GetHandler()) 65 | 66 | mux.HandleFunc("/debug/pprof/", pprof.Index) 67 | mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) 68 | mux.HandleFunc("/debug/pprof/profile", pprof.Profile) 69 | mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) 70 | mux.HandleFunc("/debug/pprof/trace", pprof.Trace) 71 | 72 | return mux 73 | } 74 | 75 | func handlerHealthz(w http.ResponseWriter, r *http.Request) { 76 | // check alerts transports 77 | if err := alert.Ping(); err != nil { 78 | log.WithError(err).Error("alerts transport is not working") 79 | http.Error(w, err.Error(), http.StatusInternalServerError) 80 | 81 | return 82 | } 83 | 84 | // check kubernetes API 85 | if _, err := api.GetNode(r.Context(), *config.Get().NodeName); err != nil { 86 | log.WithError(err).Error("kubernetes API is not available") 87 | http.Error(w, err.Error(), http.StatusInternalServerError) 88 | 89 | return 90 | } 91 | 92 | _, _ = w.Write([]byte("LIVE")) 93 | } 94 | 95 | func handlerDrainNode(w http.ResponseWriter, r *http.Request) { 96 | err := api.DrainNode(r.Context(), *config.Get().NodeName, "Preempt", "manual") 97 | if err != nil { 98 | http.Error(w, err.Error(), http.StatusInternalServerError) 99 | 100 | return 101 | } 102 | 103 | _, _ = w.Write([]byte("done")) 104 | } 105 | -------------------------------------------------------------------------------- /pkg/webhook/testdata/WebhookTemplateFile.txt: -------------------------------------------------------------------------------- 1 | node_termination_event{node="{{ .NodeName }}"} 1 -------------------------------------------------------------------------------- /pkg/webhook/webhook.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | package webhook 14 | 15 | import ( 16 | "bytes" 17 | "context" 18 | "fmt" 19 | "os" 20 | 21 | "github.com/hashicorp/go-retryablehttp" 22 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/config" 23 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/template" 24 | "github.com/pkg/errors" 25 | log "github.com/sirupsen/logrus" 26 | ) 27 | 28 | var client = &retryablehttp.Client{} 29 | 30 | var errHTTPNotOK = errors.New("http result not OK") 31 | 32 | func SetHTTPClient(c *retryablehttp.Client) { 33 | client = c 34 | } 35 | 36 | func isResponseStatusOK(statusCode int) bool { 37 | return statusCode >= 200 && statusCode < 300 38 | } 39 | 40 | func SendWebHook(ctx context.Context, obj *template.MessageType) error { 41 | ctx, cancel := context.WithTimeout(ctx, *config.Get().WebHookTimeout) 42 | defer cancel() 43 | 44 | if len(*config.Get().WebHookURL) == 0 { 45 | return nil 46 | } 47 | 48 | message, err := template.NewMessageType(ctx, obj.NodeName, obj.Event) 49 | if err != nil { 50 | return errors.Wrap(err, "error in template.NewMessageType") 51 | } 52 | 53 | message.Template = *config.Get().WebHookTemplate 54 | 55 | if len(*config.Get().WebHookTemplateFile) > 0 { 56 | templateFile, err := os.ReadFile(*config.Get().WebHookTemplateFile) 57 | if err != nil { 58 | return errors.Wrap(err, "error in os.ReadFile") 59 | } 60 | 61 | message.Template = string(templateFile) 62 | } 63 | 64 | webhookBody, err := template.Message(message) 65 | if err != nil { 66 | return errors.Wrap(err, "error in template.Message") 67 | } 68 | 69 | requestBody := bytes.NewBufferString(webhookBody + "\n") 70 | 71 | req, err := retryablehttp.NewRequest(*config.Get().WebHookMethod, *config.Get().WebHookURL, requestBody) 72 | if err != nil { 73 | return errors.Wrap(err, "error in retryablehttp.NewRequest") 74 | } 75 | 76 | req.Header.Set("Content-Type", *config.Get().WebHookContentType) 77 | 78 | log.WithFields(log.Fields{ 79 | "method": req.Method, 80 | "url": req.URL, 81 | "headers": req.Header, 82 | }).Infof("Doing request with body: %s", requestBody.String()) 83 | 84 | resp, err := client.Do(req) 85 | if err != nil { 86 | return errors.Wrap(err, "error in client.Do") 87 | } 88 | defer resp.Body.Close() 89 | 90 | log.Infof("response status: %s", resp.Status) 91 | 92 | if !isResponseStatusOK(resp.StatusCode) { 93 | return errors.Wrap(errHTTPNotOK, fmt.Sprintf("StatusCode=%d", resp.StatusCode)) 94 | } 95 | 96 | return nil 97 | } 98 | -------------------------------------------------------------------------------- /pkg/webhook/webhook_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright paskal.maksim@gmail.com 3 | Licensed under the Apache License, Version 2.0 (the "License") 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | Unless required by applicable law or agreed to in writing, software 8 | distributed under the License is distributed on an "AS IS" BASIS, 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | See the License for the specific language governing permissions and 11 | limitations under the License. 12 | */ 13 | //nolint:goerr113 14 | package webhook_test 15 | 16 | import ( 17 | "context" 18 | "errors" 19 | "flag" 20 | "fmt" 21 | "io" 22 | "net/http" 23 | "net/http/httptest" 24 | "testing" 25 | 26 | "github.com/hashicorp/go-retryablehttp" 27 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/metrics" 28 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/template" 29 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/webhook" 30 | log "github.com/sirupsen/logrus" 31 | "github.com/stretchr/testify/require" 32 | ) 33 | 34 | var retryableRequestCount = 0 35 | 36 | var ts = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 37 | if r.RequestURI == "/-/400" { 38 | w.WriteHeader(http.StatusBadRequest) 39 | 40 | return 41 | } 42 | 43 | if r.RequestURI == "/test-retryable" { 44 | retryableRequestCount++ 45 | 46 | // return 500 for first 2 requests 47 | if retryableRequestCount < 3 { 48 | w.WriteHeader(http.StatusInternalServerError) 49 | } else { 50 | _, _ = w.Write([]byte("OK")) 51 | } 52 | 53 | return 54 | } 55 | 56 | if err := testWebhookRequest(r); err != nil { 57 | log.WithError(err).Error() 58 | w.WriteHeader(http.StatusInternalServerError) 59 | } else { 60 | _, _ = w.Write([]byte("OK")) 61 | } 62 | })) 63 | 64 | func getWebhookRetryableURL() string { 65 | return ts.URL + "/test-retryable" 66 | } 67 | 68 | func getWebhookURL() string { 69 | return ts.URL + "/metrics/job/aks-node-termination-handler" 70 | } 71 | 72 | func testWebhookRequest(r *http.Request) error { 73 | if r.RequestURI != "/metrics/job/aks-node-termination-handler" { 74 | return errors.New("Request URI is not correct") 75 | } 76 | 77 | defer r.Body.Close() 78 | 79 | body, _ := io.ReadAll(r.Body) 80 | 81 | if bodyString := string(body); bodyString != "node_termination_event{node=\"test\"} 1\n" { 82 | return fmt.Errorf("Response body [%s] is not correct", bodyString) 83 | } 84 | 85 | return nil 86 | } 87 | 88 | func TestWebHook(t *testing.T) { //nolint:funlen,tparallel 89 | t.Parallel() 90 | 91 | retryClient := retryablehttp.NewClient() 92 | retryClient.HTTPClient.Transport = metrics.NewInstrumenter("TestWebHook"). 93 | WithProxy(""). 94 | WithInsecureSkipVerify(true). 95 | InstrumentedRoundTripper() 96 | retryClient.RetryMax = 0 97 | 98 | retryClientProxy := retryablehttp.NewClient() 99 | retryClientProxy.HTTPClient.Transport = metrics.NewInstrumenter("TestWebHookWithProxy"). 100 | WithProxy("http://someproxy"). 101 | WithInsecureSkipVerify(true). 102 | InstrumentedRoundTripper() 103 | retryClientProxy.RetryMax = 0 104 | 105 | // retryable client with default retry settings 106 | retryClientDefault := retryablehttp.NewClient() 107 | retryClientDefault.HTTPClient.Transport = metrics.NewInstrumenter("TestWebHookWithDefaultSettings"). 108 | WithProxy(""). 109 | WithInsecureSkipVerify(true). 110 | InstrumentedRoundTripper() 111 | retryClientDefault.RetryMax = 3 112 | 113 | type Test struct { 114 | Name string 115 | Args map[string]string 116 | Error bool 117 | ErrorMessage string 118 | NodeName string 119 | HTTPClient *retryablehttp.Client 120 | } 121 | 122 | tests := []Test{ 123 | { 124 | Name: "TestRetryable", 125 | Args: map[string]string{ 126 | "webhook.url": getWebhookRetryableURL(), 127 | }, 128 | HTTPClient: retryClientDefault, 129 | }, 130 | { 131 | Name: "TestRetryableCustomStatusCodes", 132 | Args: map[string]string{ 133 | "webhook.url": ts.URL + "/-/400", 134 | }, 135 | HTTPClient: retryClientDefault, 136 | Error: true, 137 | ErrorMessage: "http result not OK", 138 | }, 139 | { 140 | Name: "ValidHookAndTemplate", 141 | Args: map[string]string{ 142 | "webhook.url": getWebhookURL(), 143 | "webhook.template": `node_termination_event{node="{{ .NodeName }}"} 1`, 144 | }, 145 | }, 146 | { 147 | Name: "EmptyURL", 148 | Args: map[string]string{ 149 | "webhook.url": "", 150 | "webhook.template": `node_termination_event{node="{{ .NodeName }}"} 1`, 151 | }, 152 | }, 153 | { 154 | Name: "InvalidTemplate", 155 | Args: map[string]string{ 156 | "webhook.url": getWebhookURL(), 157 | "webhook.template": `{{`, 158 | }, 159 | Error: true, 160 | }, 161 | { 162 | Name: "InvalidContext", 163 | Args: map[string]string{ 164 | "webhook.url": "example.com", 165 | "webhook.template": `{{ .NodeName }}`, 166 | }, 167 | Error: true, 168 | }, 169 | { 170 | Name: "InvalidStatus", 171 | Args: map[string]string{ 172 | "webhook.url": ts.URL, 173 | "webhook.template": `{{ .NodeName }}`, 174 | }, 175 | Error: true, 176 | ErrorMessage: "giving up after 1 attempt", 177 | }, 178 | { 179 | Name: "InvalidMethod", 180 | Args: map[string]string{ 181 | "webhook.url": getWebhookURL(), 182 | "webhook.template": `{{ .NodeName }}`, 183 | "webhook.method": `???`, 184 | }, 185 | Error: true, 186 | }, 187 | { 188 | Name: "WebhookTemplateFile", 189 | Args: map[string]string{ 190 | "webhook.url": getWebhookURL(), 191 | "webhook.template-file": "testdata/WebhookTemplateFile.txt", 192 | }, 193 | }, 194 | { 195 | Error: true, 196 | Name: "WebhookTemplateFileInvalid", 197 | Args: map[string]string{ 198 | "webhook.url": getWebhookURL(), 199 | "webhook.template-file": "faketestdata/WebhookTemplateFile.txt", 200 | }, 201 | }, 202 | { 203 | Error: true, 204 | Name: "InvalidNodeName", 205 | Args: map[string]string{ 206 | "webhook.url": getWebhookURL(), 207 | }, 208 | NodeName: "!!invalid!!GetNodeLabels", 209 | }, 210 | { 211 | Error: true, 212 | ErrorMessage: "error making roundtrip: proxyconnect tcp: dial tcp", 213 | Name: "HTTPClientProxy", 214 | Args: map[string]string{ 215 | "webhook.url": getWebhookURL(), 216 | }, 217 | HTTPClient: retryClientProxy, 218 | }, 219 | } 220 | 221 | // clear flags 222 | cleanAllFlags := func() { 223 | for _, test := range tests { 224 | for key := range test.Args { 225 | _ = flag.Set(key, "") 226 | } 227 | } 228 | } 229 | 230 | for _, tc := range tests { //nolint:paralleltest 231 | t.Run(tc.Name, func(t *testing.T) { 232 | cleanAllFlags() 233 | 234 | for key, value := range tc.Args { 235 | _ = flag.Set(key, value) 236 | } 237 | 238 | messageType := &template.MessageType{ 239 | NodeName: "test", 240 | } 241 | 242 | if len(tc.NodeName) > 0 { 243 | messageType.NodeName = tc.NodeName 244 | } 245 | 246 | if httpClient := tc.HTTPClient; httpClient != nil { 247 | webhook.SetHTTPClient(httpClient) 248 | } else { 249 | webhook.SetHTTPClient(retryClient) 250 | } 251 | 252 | err := webhook.SendWebHook(context.TODO(), messageType) 253 | if tc.Error { 254 | require.Error(t, err) 255 | require.Contains(t, err.Error(), tc.ErrorMessage) 256 | } else { 257 | require.NoError(t, err) 258 | } 259 | }) 260 | } 261 | 262 | // Check retryable request counter, 3 requests should be made 263 | require.Equal(t, 3, retryableRequestCount) 264 | } 265 | -------------------------------------------------------------------------------- /scripts/validate-license.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Copyright paskal.maksim@gmail.com 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | set -euo pipefail 17 | IFS=$'\n\t' 18 | 19 | find_files() { 20 | find . -not \( \ 21 | \( \ 22 | -wholename './vendor' \ 23 | -o -wholename '*testdata*' \ 24 | -o -wholename '*third_party*' \ 25 | -o -wholename '*node_modules*' \ 26 | \) -prune \ 27 | \) \ 28 | \( -name '*.go' -o -name '*.sh' -o -name 'LICENSE' \) 29 | } 30 | 31 | # Use "|| :" to ignore the error code when grep returns empty 32 | failed_license_header=($(find_files | xargs grep -L 'Licensed under the Apache License, Version 2.0 (the "License")' || :)) 33 | if (( ${#failed_license_header[@]} > 0 )); then 34 | echo "Some source files are missing license headers." 35 | printf '%s\n' "${failed_license_header[@]}" 36 | exit 1 37 | fi 38 | 39 | # Use "|| :" to ignore the error code when grep returns empty 40 | failed_copyright_header=($(find_files | xargs grep -L 'Copyright paskal.maksim@gmail.com' || :)) 41 | if (( ${#failed_copyright_header[@]} > 0 )); then 42 | echo "Some source files are missing the copyright header." 43 | printf '%s\n' "${failed_copyright_header[@]}" 44 | exit 1 45 | fi 46 | 47 | if grep --exclude-dir=.git --exclude=validate-license.sh --exclude=test.sh -rn . -e 'alldigital'; then 48 | echo "Some files have bad links" 49 | exit 1 50 | fi --------------------------------------------------------------------------------