├── .github
└── workflows
│ ├── chart-test.yml
│ ├── e2e.yml
│ ├── govulncheck.yml
│ ├── release-chart.yml
│ ├── release.yaml
│ ├── test.yml
│ └── validate-license.yml
├── .gitignore
├── .golangci.yml
├── .goreleaser.yml
├── Dockerfile
├── Dockerfile.windows
├── LICENSE
├── Makefile
├── README.md
├── charts
└── aks-node-termination-handler
│ ├── Chart.yaml
│ ├── templates
│ ├── configmap.yaml
│ ├── daemonset.yaml
│ ├── networkpolicy.yaml
│ └── rbac.yaml
│ └── values.yaml
├── cmd
└── main.go
├── codecov.yml
├── e2e
├── main_test.go
└── testdata
│ └── config_test.yaml
├── go.mod
├── go.sum
├── internal
└── internal.go
├── mock
└── mock.go
├── pkg
├── alert
│ └── alert.go
├── api
│ └── api.go
├── cache
│ ├── cache.go
│ └── cache_test.go
├── client
│ └── client.go
├── config
│ ├── config.go
│ ├── config_test.go
│ └── testdata
│ │ ├── config_test.yaml
│ │ └── config_yaml_fake.yaml
├── events
│ ├── events.go
│ └── events_test.go
├── logger
│ ├── logger.go
│ └── logger_test.go
├── metrics
│ ├── metrics.go
│ └── metrics_test.go
├── template
│ ├── README.md
│ ├── template.go
│ ├── template_test.go
│ └── testdata
│ │ └── message.json
├── types
│ ├── testdata
│ │ └── ScheduledEventsType.json
│ ├── types.go
│ └── types_test.go
├── utils
│ ├── utils.go
│ └── utils_test.go
├── web
│ └── web.go
└── webhook
│ ├── testdata
│ └── WebhookTemplateFile.txt
│ ├── webhook.go
│ └── webhook_test.go
└── scripts
└── validate-license.sh
/.github/workflows/chart-test.yml:
--------------------------------------------------------------------------------
1 | on: pull_request
2 |
3 | jobs:
4 | lint-test:
5 | runs-on: ubuntu-latest
6 | steps:
7 | - name: Checkout
8 | uses: actions/checkout@v2
9 | with:
10 | fetch-depth: 0
11 |
12 | - name: Set up Helm
13 | uses: azure/setup-helm@v1
14 | with:
15 | version: v3.8.1
16 |
17 | - uses: actions/setup-python@v2
18 | with:
19 | python-version: 3.7
20 |
21 | - name: Set up chart-testing
22 | uses: helm/chart-testing-action@v2.2.1
23 |
24 | - name: Run chart-testing (lint)
25 | run: ct lint --target-branch main
--------------------------------------------------------------------------------
/.github/workflows/e2e.yml:
--------------------------------------------------------------------------------
1 | on: pull_request
2 |
3 | jobs:
4 | e2e:
5 | runs-on: ubuntu-latest
6 | strategy:
7 | matrix:
8 | k3s_version: ["v1.26.11+k3s2","v1.27.8+k3s2","v1.28.4+k3s2","v1.29.2+k3s1"]
9 | steps:
10 | - name: Setup Kubernetes
11 | run: curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION="${{ matrix.k3s_version }}" K3S_KUBECONFIG_MODE=777 sh -
12 | - name: Checkout
13 | uses: actions/checkout@v4
14 | - name: Setup Go
15 | uses: actions/setup-go@v4
16 | with:
17 | go-version: '1.23'
18 | - name: Run tests
19 | run: make e2e KUBECONFIG=/etc/rancher/k3s/k3s.yaml node=$(kubectl --kubeconfig=/etc/rancher/k3s/k3s.yaml get no --no-headers | awk '{print $1}' | tail -1)
--------------------------------------------------------------------------------
/.github/workflows/govulncheck.yml:
--------------------------------------------------------------------------------
1 | on: pull_request
2 |
3 | jobs:
4 | govulncheck:
5 | name: test
6 | runs-on: ubuntu-latest
7 | steps:
8 | - uses: actions/checkout@v2
9 | - uses: actions/setup-go@v2
10 | with:
11 | stable: 'false'
12 | go-version: '1.23'
13 | - run: go run golang.org/x/vuln/cmd/govulncheck@latest ./...
--------------------------------------------------------------------------------
/.github/workflows/release-chart.yml:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | branches:
4 | - main
5 |
6 | jobs:
7 | release:
8 | runs-on: ubuntu-latest
9 | steps:
10 | - name: Checkout
11 | uses: actions/checkout@v2
12 | with:
13 | fetch-depth: 0
14 | - name: Configure Git
15 | run: |
16 | git config user.name "${GITHUB_ACTOR}"
17 | git config user.email "${GITHUB_ACTOR}@users.noreply.github.com"
18 | - name: Install Helm
19 | uses: azure/setup-helm@v1
20 | with:
21 | version: v3.4.2
22 | - name: Run chart-releaser
23 | uses: helm/chart-releaser-action@v1.1.0
24 | env:
25 | CR_TOKEN: ${{ secrets.GITHUB_TOKEN }}
26 | CR_RELEASE_NAME_TEMPLATE: "helm-chart-{{ .Version }}"
--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | tags:
4 | - v*
5 |
6 | permissions:
7 | contents: write
8 |
9 | env:
10 | IMAGE: paskalmaksim/aks-node-termination-handler:${{github.ref_name}}
11 | IMAGE_LATEST: paskalmaksim/aks-node-termination-handler:latest
12 | # IMAGE: paskalmaksim/aks-node-termination-handler:test-${{ github.run_id }}
13 | # IMAGE_LATEST: paskalmaksim/aks-node-termination-handler:test-latest
14 |
15 | jobs:
16 | build:
17 | runs-on: ubuntu-latest
18 | steps:
19 | - name: Checkout
20 | uses: actions/checkout@v4
21 | with:
22 | fetch-depth: 0
23 | - name: Set up Go
24 | uses: actions/setup-go@v4
25 | with:
26 | go-version: '1.23'
27 | - name: Remove Git Tags with Charts
28 | run: git tag -d $(git tag -l "helm-chart-*")
29 | - name: Run GoReleaser
30 | uses: goreleaser/goreleaser-action@v5
31 | with:
32 | distribution: goreleaser
33 | version: latest
34 | # args: build --clean --skip=validate --snapshot
35 | args: release --clean
36 | env:
37 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
38 | - run: cp Dockerfile ./dist/aks-node-termination-handler_linux_amd64_v1/Dockerfile
39 | - run: cp Dockerfile ./dist/aks-node-termination-handler_linux_arm64/Dockerfile
40 | - run: cp Dockerfile.windows ./dist/aks-node-termination-handler_windows_amd64_v1/Dockerfile
41 | - run: tar -cvf release.tar ./dist
42 | - uses: actions/upload-artifact@v4
43 | with:
44 | name: release
45 | retention-days: 1
46 | path: release.tar
47 |
48 | publish-linux-amd64:
49 | runs-on: ubuntu-latest
50 | needs: build
51 | steps:
52 | - uses: docker/login-action@v3
53 | with:
54 | username: ${{ secrets.DOCKER_USERNAME }}
55 | password: ${{ secrets.DOCKER_PASSWORD }}
56 | - uses: actions/download-artifact@v4
57 | - run: tar xvf ./release/release.tar
58 | - run: "docker build --pull --push --platform linux/amd64 -t ${{ env.IMAGE }}-linux-amd64 ."
59 | working-directory: ./dist/aks-node-termination-handler_linux_amd64_v1
60 |
61 | publish-linux-arm64:
62 | runs-on: ubuntu-latest
63 | needs: build
64 | steps:
65 | - uses: docker/login-action@v3
66 | with:
67 | username: ${{ secrets.DOCKER_USERNAME }}
68 | password: ${{ secrets.DOCKER_PASSWORD }}
69 | - uses: docker/setup-qemu-action@v2
70 | - uses: docker/setup-buildx-action@v2
71 | - uses: actions/download-artifact@v4
72 | - run: tar xvf ./release/release.tar
73 | - run: "docker build --pull --push --platform linux/arm64 -t ${{ env.IMAGE }}-linux-arm64 ."
74 | working-directory: ./dist/aks-node-termination-handler_linux_arm64
75 |
76 | publish-windows-amd64:
77 | runs-on: windows-latest
78 | strategy:
79 | matrix:
80 | windows-version: [ 'ltsc2019', 'ltsc2022' ]
81 | needs: build
82 | steps:
83 | - uses: docker/login-action@v3
84 | with:
85 | username: ${{ secrets.DOCKER_USERNAME }}
86 | password: ${{ secrets.DOCKER_PASSWORD }}
87 | - uses: actions/download-artifact@v4
88 | - run: tar xvf ./release/release.tar
89 | - run: "docker build --build-arg WINDOWS_VERSION=${{ matrix.windows-version }} --pull --platform windows/amd64 -t ${{ env.IMAGE }}-windows-${{ matrix.windows-version }}-amd64 ."
90 | working-directory: ./dist/aks-node-termination-handler_windows_amd64_v1
91 | - run: docker push ${{ env.IMAGE }}-windows-${{ matrix.windows-version }}-amd64
92 |
93 | publish-manifest:
94 | runs-on: ubuntu-latest
95 | needs: [publish-linux-amd64, publish-linux-arm64, publish-windows-amd64]
96 | steps:
97 | - uses: docker/login-action@v3
98 | with:
99 | username: ${{ secrets.DOCKER_USERNAME }}
100 | password: ${{ secrets.DOCKER_PASSWORD }}
101 | - run: docker manifest create ${{ env.IMAGE }} ${{ env.IMAGE }}-linux-amd64 ${{ env.IMAGE }}-linux-arm64 ${{ env.IMAGE }}-windows-ltsc2022-amd64
102 | - run: docker manifest push ${{ env.IMAGE }}
103 | - run: docker manifest create ${{ env.IMAGE_LATEST }} ${{ env.IMAGE }}-linux-amd64 ${{ env.IMAGE }}-linux-arm64 ${{ env.IMAGE }}-windows-ltsc2022-amd64
104 | - run: docker manifest push ${{ env.IMAGE_LATEST }}
105 | - run: docker manifest create ${{ env.IMAGE_LATEST }}-ltsc2019 ${{ env.IMAGE }}-linux-amd64 ${{ env.IMAGE }}-linux-arm64 ${{ env.IMAGE }}-windows-ltsc2019-amd64
106 | - run: docker manifest push ${{ env.IMAGE_LATEST }}-ltsc2019
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | branches:
4 | - main
5 | pull_request:
6 |
7 | jobs:
8 | test:
9 | name: test
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@v2
13 | - uses: actions/setup-go@v2
14 | with:
15 | stable: 'false'
16 | go-version: '1.23'
17 | - run: make test
18 | - name: Upload coverage to Codecov
19 | uses: codecov/codecov-action@v3
--------------------------------------------------------------------------------
/.github/workflows/validate-license.yml:
--------------------------------------------------------------------------------
1 | on:
2 | push:
3 | tags:
4 | - v*
5 | branches:
6 | - main
7 | pull_request:
8 | jobs:
9 | validate-license:
10 | name: validate-license
11 | runs-on: ubuntu-latest
12 | steps:
13 | - uses: actions/checkout@v2
14 | - run: ./scripts/validate-license.sh
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | kubeconfig
2 | dist
3 | /aks-node-termination-handler
4 | simulateEviction
5 | coverage.out
6 | *.tmp
--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
1 | run:
2 | timeout: 5m
3 | issues:
4 | fix: true
5 | linters-settings:
6 | lll:
7 | line-length: 200
8 | linters:
9 | enable-all: true
10 | disable:
11 | - deadcode # deprecated
12 | - exhaustivestruct # deprecated
13 | - golint # deprecated
14 | - ifshort # deprecated
15 | - interfacer # deprecated
16 | - maligned # deprecated
17 | - nosnakecase # deprecated
18 | - scopelint # deprecated
19 | - structcheck # deprecated
20 | - varcheck # deprecated
21 | - gochecknoglobals
22 | - exhaustivestruct
23 | - exhaustruct
24 | - varnamelen
25 | - musttag
26 | - depguard
--------------------------------------------------------------------------------
/.goreleaser.yml:
--------------------------------------------------------------------------------
1 | project_name: aks-node-termination-handler
2 | release:
3 | footer: |
4 | ## Docker Images
5 | - `paskalmaksim/{{.ProjectName}}:latest`
6 | - `paskalmaksim/{{.ProjectName}}:{{ .Tag }}`
7 | builds:
8 | - dir: ./cmd/
9 | env:
10 | - CGO_ENABLED=0
11 | flags:
12 | - -trimpath
13 | ldflags:
14 | - -s -w -X github.com/maksim-paskal/aks-node-termination-handler/pkg/config.gitVersion={{.Version}}-{{.ShortCommit}}-{{.Timestamp}}
15 | goos:
16 | - linux
17 | - windows
18 | goarch:
19 | - amd64
20 | - arm64
21 | checksum:
22 | name_template: 'checksums.txt'
23 | snapshot:
24 | name_template: "{{ .Tag }}-next"
25 | changelog:
26 | sort: asc
27 | filters:
28 | exclude:
29 | - '^docs:'
30 | - '^test:'
31 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine:latest
2 |
3 | WORKDIR /app/
4 |
5 | COPY ./aks-node-termination-handler /app/aks-node-termination-handler
6 |
7 | RUN apk upgrade \
8 | && addgroup -g 30523 -S app \
9 | && adduser -u 30523 -D -S -G app app
10 |
11 | USER 30523
12 |
13 | ENTRYPOINT [ "/app/aks-node-termination-handler" ]
--------------------------------------------------------------------------------
/Dockerfile.windows:
--------------------------------------------------------------------------------
1 | ARG WINDOWS_VERSION=ltsc2022
2 |
3 | FROM mcr.microsoft.com/windows/nanoserver:$WINDOWS_VERSION
4 |
5 | WORKDIR /app/
6 |
7 | COPY ./aks-node-termination-handler.exe /app/aks-node-termination-handler.exe
8 | USER ContainerUser
9 |
10 | ENTRYPOINT [ "/app/aks-node-termination-handler.exe" ]
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright paskal.maksim@gmail.com
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | KUBECONFIG=$(HOME)/.kube/azure-stage
2 | tag=dev
3 | image=paskalmaksim/aks-node-termination-handler:$(tag)
4 | telegramToken=1072104160:AAH2sFpHELeH5oxMmd-tsVjgTuzoYO6hSLM
5 | telegramChatID=-439460552
6 | node=`kubectl get no -lkubernetes.azure.com/scalesetpriority=spot | awk '{print $$1}' | tail -1`
7 |
8 | chart-lint:
9 | ct lint --all
10 | helm template ./charts/aks-node-termination-handler | kubectl apply --dry-run=client -f -
11 |
12 | build:
13 | git tag -d `git tag -l "helm-chart-*"`
14 | go run github.com/goreleaser/goreleaser@latest build --clean --skip=validate --snapshot
15 | mv ./dist/aks-node-termination-handler_linux_amd64_v1/aks-node-termination-handler aks-node-termination-handler
16 | docker build --pull --push . -t $(image)
17 |
18 | push:
19 | docker push $(image)
20 |
21 | deploy:
22 | helm uninstall aks-node-termination-handler --namespace kube-system || true
23 | helm upgrade aks-node-termination-handler \
24 | --install \
25 | --namespace kube-system \
26 | ./charts/aks-node-termination-handler \
27 | --set image=paskalmaksim/aks-node-termination-handler:dev \
28 | --set imagePullPolicy=Always \
29 | --set priorityClassName=system-node-critical \
30 | --set args[0]=-telegram.token=${telegramToken} \
31 | --set args[1]=-telegram.chatID=${telegramChatID} \
32 | --set args[2]=-taint.node \
33 | --set args[3]=-taint.effect=NoExecute \
34 | --set args[4]=-podGracePeriodSeconds=30 \
35 |
36 | clean:
37 | kubectl delete ns aks-node-termination-handler
38 |
39 | run:
40 | # https://t.me/joinchat/iaWV0bPT_Io5NGYy
41 | go run --race ./cmd \
42 | -kubeconfig=${KUBECONFIG} \
43 | -node=$(node) \
44 | -log.level=DEBUG \
45 | -log.pretty \
46 | -taint.node \
47 | -taint.effect=NoExecute \
48 | -podGracePeriodSeconds=30 \
49 | -gracePeriodSeconds=0 \
50 | -endpoint=http://localhost:28080/pkg/types/testdata/ScheduledEventsType.json \
51 | -webhook.url=http://localhost:9091/metrics/job/aks-node-termination-handler \
52 | -webhook.template='node_termination_event{node="{{ .NodeName }}"} 1' \
53 | -telegram.token=${telegramToken} \
54 | -telegram.chatID=${telegramChatID} \
55 | -web.address=127.0.0.1:17923
56 |
57 | run-mock:
58 | go run --race ./mock -address=127.0.0.1:28080
59 |
60 | test:
61 | ./scripts/validate-license.sh
62 | go mod tidy
63 | go fmt ./cmd/... ./pkg/... ./internal/...
64 | go vet ./cmd/... ./pkg/... ./internal/...
65 | go test --race -coverprofile coverage.out ./cmd/... ./pkg/...
66 | go run github.com/golangci/golangci-lint/cmd/golangci-lint@latest run -v
67 |
68 | .PHONY: e2e
69 | e2e:
70 | go test -v -race ./e2e \
71 | -kubeconfig=$(KUBECONFIG) \
72 | -node=${node} \
73 | -telegram.token=${telegramToken} \
74 | -telegram.chatID=${telegramChatID}
75 |
76 | coverage:
77 | go tool cover -html=coverage.out
78 |
79 | test-release:
80 | go run github.com/goreleaser/goreleaser@latest release --snapshot --skip-publish --rm-dist
81 |
82 | heap:
83 | go tool pprof -http=127.0.0.1:8080 http://localhost:17923/debug/pprof/heap
84 |
85 | upgrade:
86 | go get -v -u k8s.io/client-go@v0.21.11
87 | go get -v -u k8s.io/kubectl@v0.21.11
88 | go get -v -u k8s.io/api@v0.21.11 || true
89 | go get -v -u k8s.io/apimachinery@v0.21.11
90 | go mod tidy
91 |
92 | scan:
93 | @trivy image \
94 | -ignore-unfixed --no-progress --severity HIGH,CRITICAL \
95 | $(image)
96 | @helm template ./charts/aks-node-termination-handler > /tmp/aks-node-termination-handler.yaml
97 | @trivy config /tmp/aks-node-termination-handler.yaml
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 
4 |
5 | # AKS Node Termination Handler
6 |
7 | Gracefully handle Azure Virtual Machines shutdown within Kubernetes
8 |
9 | ## Motivation
10 |
11 | This tool ensures that the Kubernetes cluster responds appropriately to events that can cause your Azure Virtual Machines to become unavailable, such as evictions of Azure Spot Virtual Machines or reboots. If not handled, your application code may not stop gracefully, recovery to full availability may take longer, or work might accidentally be scheduled to nodes that are shutting down. This tool can also send Telegram, Slack or Webhook messages before Azure Virtual Machines evictions occur.
12 |
13 | Based on [Azure Scheduled Events](https://docs.microsoft.com/en-us/azure/virtual-machines/linux/scheduled-events) and [Safely Drain a Node](https://kubernetes.io/docs/tasks/administer-cluster/safely-drain-node/)
14 |
15 | Support Linux (amd64, arm64) and Windows 2022, 2019* (amd64) nodes.
16 |
17 | ## Create Azure Kubernetes Cluster
18 |
19 |
20 | Create basic AKS cluster with Azure CLI
21 |
22 | ```bash
23 | # https://learn.microsoft.com/en-us/azure/aks/learn/quick-kubernetes-deploy-cli
24 |
25 | # Azure CLI version is 2.50.0
26 | az --version
27 |
28 | # Create resource group
29 | az group create \
30 | --name test-aks-group-eastus \
31 | --location eastus
32 |
33 | # Create aks cluster, with not spot instances
34 | az aks create \
35 | --resource-group test-aks-group-eastus \
36 | --name MyManagedCluster \
37 | --node-count 1 \
38 | --node-vm-size Standard_DS2_v2 \
39 | --enable-cluster-autoscaler \
40 | --min-count 1 \
41 | --max-count 3
42 |
43 | # Create Linux nodepool with Spot Virtual Machines and autoscaling
44 | az aks nodepool add \
45 | --resource-group test-aks-group-eastus \
46 | --cluster-name MyManagedCluster \
47 | --name spotpool \
48 | --priority Spot \
49 | --eviction-policy Delete \
50 | --spot-max-price -1 \
51 | --enable-cluster-autoscaler \
52 | --node-vm-size Standard_DS2_v2 \
53 | --min-count 0 \
54 | --max-count 10
55 |
56 | # Create Windows (Windows Server 2022) nodepool with Spot Virtual Machines and autoscaling
57 | az aks nodepool add \
58 | --resource-group test-aks-group-eastus \
59 | --cluster-name MyManagedCluster \
60 | --os-type Windows \
61 | --os-sku Windows2022 \
62 | --priority Spot \
63 | --eviction-policy Delete \
64 | --spot-max-price -1 \
65 | --enable-cluster-autoscaler \
66 | --name spot01 \
67 | --min-count 1 \
68 | --max-count 3
69 |
70 | # Create Windows (Windows Server 2019) nodepool with Spot Virtual Machines and autoscaling
71 | az aks nodepool add \
72 | --resource-group test-aks-group-eastus \
73 | --cluster-name MyManagedCluster \
74 | --os-type Windows \
75 | --os-sku Windows2019 \
76 | --priority Spot \
77 | --eviction-policy Delete \
78 | --spot-max-price -1 \
79 | --enable-cluster-autoscaler \
80 | --name spot2 \
81 | --min-count 1 \
82 | --max-count 3
83 |
84 | # Get config to connect to cluster
85 | az aks get-credentials \
86 | --resource-group test-aks-group-eastus \
87 | --name MyManagedCluster
88 | ```
89 |
90 |
91 |
92 | ## Installation
93 |
94 | ```bash
95 | helm repo add aks-node-termination-handler https://maksim-paskal.github.io/aks-node-termination-handler/
96 | helm repo update
97 |
98 | helm upgrade aks-node-termination-handler \
99 | --install \
100 | --namespace kube-system \
101 | aks-node-termination-handler/aks-node-termination-handler \
102 | --set priorityClassName=system-node-critical
103 | ```
104 |
105 | ## Send notification events
106 |
107 | You can compose your payload with markers that are described [here](pkg/template/README.md)
108 |
109 |
110 | Send Telegram notification
111 |
112 | ```bash
113 | helm upgrade aks-node-termination-handler \
114 | --install \
115 | --namespace kube-system \
116 | aks-node-termination-handler/aks-node-termination-handler \
117 | --set priorityClassName=system-node-critical \
118 | --set 'args[0]=-telegram.token=' \
119 | --set 'args[1]=-telegram.chatID='
120 | ```
121 |
122 |
123 |
124 | Send Slack notification
125 |
126 | ```bash
127 | # create payload file
128 | cat <
158 |
159 |
160 | Send Prometheus Pushgateway event
161 |
162 | ```bash
163 | cat <
188 |
189 |
190 | Use an HTTP proxy for making webhook requests
191 |
192 | Use the flag `-webhook.http-proxy=http://someproxy:3128` for making requests with a proxy. This flag can use HTTP or HTTPS addresses. You can also use basic auth.
193 |
194 | ```bash
195 | cat <
220 |
221 | ## Simulate eviction
222 |
223 | ### Using Azure CLI
224 |
225 | You need to install [Azure Command-Line Interface](https://learn.microsoft.com/en-us/cli/azure/), also you need setup [kubectl](https://learn.microsoft.com/en-us/azure/aks/learn/quick-kubernetes-deploy-cli#connect-to-the-cluster) to your AKS cluster
226 |
227 | ```bash
228 | # Azure CLI version is 2.61.0
229 | az --version
230 |
231 | # Choose your AKS node to simulate eviction
232 | kubectl get no
233 |
234 | # Identify your node Azure ID
235 | # subscriptions/{}/resourceGroups/{}/providers/Microsoft.Compute/virtualMachineScaleSets/{}/virtualMachines/{}
236 | kubectl get no aks-nodename-to-simulate-eviction -o json | jq -r '.spec.providerID[9:]'
237 |
238 | # Append to your node Azure ID additional path /simulateEviction?api-version=2024-03-01
239 | # And execute this simulation with management.azure.com
240 | az rest --verbose -m post --header "Accept=application/json" -u "https://management.azure.com/{Azure ID}/simulateEviction?api-version=2024-03-01"
241 | ```
242 |
243 | ### Using browser
244 |
245 | You can test with [Simulate Eviction API](https://docs.microsoft.com/en-us/rest/api/compute/virtual-machines/simulate-eviction) and change API endpoint to correspond `virtualMachineScaleSets` that are used in AKS.
246 |
247 | ```bash
248 | POST https://management.azure.com/subscriptions/{subscriptionId}/resourceGroups/{resourceGroupName}/providers/Microsoft.Compute/virtualMachineScaleSets/{vmScaleSetName}/virtualMachines/{instanceId}/simulateEviction?api-version=2021-11-01
249 | ```
250 |
251 | ## Metrics
252 |
253 | The application exposes Prometheus metrics at the `/metrics` endpoint. Installing the latest chart will add annotations to the pods:
254 |
255 | ```yaml
256 | annotations:
257 | prometheus.io/port: "17923"
258 | prometheus.io/scrape: "true"
259 | ```
260 |
261 | ## Windows 2019 support
262 |
263 | If your cluster has (Linux and Windows 2019 nodes), you need to use another image:
264 |
265 | ```bash
266 | helm upgrade aks-node-termination-handler \
267 | --install \
268 | --namespace kube-system \
269 | aks-node-termination-handler/aks-node-termination-handler \
270 | --set priorityClassName=system-node-critical \
271 | --set image=paskalmaksim/aks-node-termination-handler:latest-ltsc2019
272 | ```
273 |
274 | If your cluster includes Linux, Windows 2022, and Windows 2019 nodes, you will need two separate helm installations of `aks-node-termination-handler`, each with different values.
275 |
276 |
277 | linux-windows2022.values.yaml
278 |
279 | ```bash
280 | priorityClassName: system-node-critical
281 |
282 | image: paskalmaksim/aks-node-termination-handler:latest
283 |
284 | affinity:
285 | nodeAffinity:
286 | requiredDuringSchedulingIgnoredDuringExecution:
287 | nodeSelectorTerms:
288 | - matchExpressions:
289 | - key: kubernetes.azure.com/os-sku
290 | operator: NotIn
291 | values:
292 | - Windows2019
293 | ```
294 |
295 |
296 |
297 | linux-windows2019.values.yaml
298 |
299 | ```bash
300 | priorityClassName: system-node-critical
301 |
302 | image: paskalmaksim/aks-node-termination-handler:latest-ltsc2019
303 |
304 | nodeSelector:
305 | kubernetes.azure.com/os-sku: Windows2019
306 | ```
307 |
308 |
309 | ```bash
310 | # install aks-node-termination-handler for Linux and Windows 2022 nodes
311 | helm upgrade aks-node-termination-handler \
312 | --install \
313 | --namespace kube-system \
314 | aks-node-termination-handler/aks-node-termination-handler \
315 | --values=linux-windows2022.values.yaml
316 |
317 | # install aks-node-termination-handler for Windows 2019 nodes
318 | helm upgrade aks-node-termination-handler-windows-2019 \
319 | --install \
320 | --namespace kube-system \
321 | aks-node-termination-handler/aks-node-termination-handler \
322 | --values=linux-windows2019.values.yaml
323 | ```
324 |
325 | ## Red Hat OpenShift support
326 |
327 | For OpenShift clusters that use Azure computes for their nodes, you must enable pod hostNetwork support because OpenShift networking has a [restriction](https://docs.openshift.com/container-platform/4.15/networking/understanding-networking.html) for using Azure Metadata Service.
328 |
329 | This support can be enabled with `--set hostNetwork=true`
330 |
331 | ```bash
332 | helm upgrade aks-node-termination-handler \
333 | --install \
334 | --namespace kube-system \
335 | aks-node-termination-handler/aks-node-termination-handler \
336 | --set priorityClassName=system-node-critical \
337 | --set hostNetwork=true
338 | ```
339 |
340 | ## NetworkPolicy support
341 |
342 | To limit what the workload can communicate with, Networkpolicy can be added via `--set networkPolicy.enabled=true`. To only allow egress communication towards required endpoints, supply the control plane IP address via `--set networkPolicy.controlPlaneIP=10.11.12.13`. Additional egress rules can be added via `--set networkPolicy.additionalEgressRules=[]`, see the chart-provided `values.yaml` file for examples.
343 |
344 | ```bash
345 | helm upgrade aks-node-termination-handler \
346 | --install \
347 | --namespace kube-system \
348 | aks-node-termination-handler/aks-node-termination-handler \
349 | --set networkPolicy.enabled=true \
350 | --set networkPolicy.controlPlaneIP=10.11.12.2
351 | ```
352 |
--------------------------------------------------------------------------------
/charts/aks-node-termination-handler/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v2
2 | icon: https://helm.sh/img/helm.svg
3 | name: aks-node-termination-handler
4 | version: 1.1.7
5 | description: Gracefully handle Azure Virtual Machines shutdown within Kubernetes
6 | maintainers:
7 | - name: maksim-paskal # Maksim Paskal
8 | email: paskal.maksim@gmail.com
9 |
--------------------------------------------------------------------------------
/charts/aks-node-termination-handler/templates/configmap.yaml:
--------------------------------------------------------------------------------
1 | {{ if .Values.configMap.create }}
2 | apiVersion: v1
3 | kind: ConfigMap
4 | metadata:
5 | name: {{ tpl .Values.configMap.name . }}
6 | data:
7 | {{ toYaml .Values.configMap.data | indent 2 }}
8 | {{ end }}
--------------------------------------------------------------------------------
/charts/aks-node-termination-handler/templates/daemonset.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: apps/v1
2 | kind: DaemonSet
3 | metadata:
4 | name: {{ .Release.Name }}
5 | labels:
6 | app: {{ .Release.Name }}
7 | spec:
8 | selector:
9 | matchLabels:
10 | app: {{ .Release.Name }}
11 | template:
12 | metadata:
13 | annotations:
14 | {{ if .Values.metrics.addAnnotations }}
15 | prometheus.io/port: "17923"
16 | prometheus.io/scrape: "true"
17 | {{ end }}
18 | {{ if .Values.annotations }}
19 | {{ toYaml .Values.annotations | indent 8 }}
20 | {{ end }}
21 | labels:
22 | app: {{ .Release.Name }}
23 | {{ if .Values.labels }}
24 | {{ toYaml .Values.labels | indent 8 }}
25 | {{ end }}
26 | spec:
27 | hostNetwork: {{ .Values.hostNetwork }}
28 | serviceAccount: {{ .Release.Name }}
29 | {{ if .Values.priorityClassName }}
30 | priorityClassName: {{ .Values.priorityClassName | quote }}
31 | {{ end }}
32 | {{- if .Values.tolerations }}
33 | tolerations:
34 | {{- toYaml .Values.tolerations | nindent 6 }}
35 | {{- end }}
36 | {{- if .Values.nodeSelector}}
37 | nodeSelector:
38 | {{- toYaml .Values.nodeSelector | nindent 8 }}
39 | {{- end }}
40 | {{- if .Values.affinity }}
41 | affinity:
42 | {{- toYaml .Values.affinity | nindent 8 }}
43 | {{- end }}
44 | volumes:
45 | - name: files
46 | configMap:
47 | name: {{ tpl .Values.configMap.name . }}
48 | {{ if .Values.extraVolumes }}
49 | {{ toYaml .Values.extraVolumes | indent 6 }}
50 | {{ end }}
51 | {{- if .Values.imagePullSecrets }}
52 | imagePullSecrets:
53 | {{- range .Values.imagePullSecrets }}
54 | - name: {{ . }}
55 | {{- end }}
56 | {{- end }}
57 | containers:
58 | - name: aks-node-termination-handler
59 | resources:
60 | {{ toYaml .Values.resources | indent 10 }}
61 | image: {{ .Values.image }}
62 | imagePullPolicy: {{ .Values.imagePullPolicy }}
63 | securityContext:
64 | {{ toYaml .Values.securityContext | indent 10 }}
65 | args:
66 | {{- range .Values.args }}
67 | - {{ . }}
68 | {{- end}}
69 | env:
70 | - name: MY_NODE_NAME
71 | valueFrom:
72 | fieldRef:
73 | fieldPath: spec.nodeName
74 | {{ if .Values.env }}
75 | {{ toYaml .Values.env | indent 8 }}
76 | {{ end }}
77 | livenessProbe:
78 | httpGet:
79 | path: /healthz
80 | port: http
81 | scheme: HTTP
82 | initialDelaySeconds: 30
83 | periodSeconds: 30
84 | timeoutSeconds: 5
85 | ports:
86 | - name: http
87 | containerPort: 17923
88 | protocol: TCP
89 | volumeMounts:
90 | - name: files
91 | mountPath: {{ .Values.configMap.mountPath }}
92 | readOnly: true
93 | {{ if .Values.extraVolumeMounts}}
94 | {{ toYaml .Values.extraVolumeMounts | indent 8 }}
95 | {{ end }}
--------------------------------------------------------------------------------
/charts/aks-node-termination-handler/templates/networkpolicy.yaml:
--------------------------------------------------------------------------------
1 | {{ if .Values.networkPolicy.enabled }}
2 | apiVersion: networking.k8s.io/v1
3 | kind: NetworkPolicy
4 | metadata:
5 | name: {{ .Release.Name }}
6 | spec:
7 | egress:
8 | - ports:
9 | - port: 80
10 | protocol: TCP
11 | to:
12 | - ipBlock:
13 | cidr: 169.254.169.254/32
14 | - ports:
15 | - port: 443
16 | protocol: TCP
17 | {{- if .Values.networkPolicy.controlPlaneIP }}
18 | to:
19 | - ipBlock:
20 | cidr: {{ .Values.networkPolicy.controlPlaneIP }}/32
21 | {{- end }}
22 | {{- if .Values.networkPolicy.additionalEgressRules }}
23 | {{ toYaml .Values.networkPolicy.additionalEgressRules | indent 2 }}
24 | {{- end }}
25 | - ports:
26 | - port: 53
27 | protocol: UDP
28 | - port: 53
29 | protocol: TCP
30 | to:
31 | - namespaceSelector: {}
32 | podSelector:
33 | matchLabels:
34 | k8s-app: kube-dns
35 | ingress:
36 | - from:
37 | - namespaceSelector: {}
38 | ports:
39 | - port: 17923
40 | protocol: TCP
41 | podSelector:
42 | matchLabels:
43 | app: {{ .Release.Name }}
44 | policyTypes:
45 | - Ingress
46 | - Egress
47 | {{ end }}
--------------------------------------------------------------------------------
/charts/aks-node-termination-handler/templates/rbac.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ServiceAccount
3 | metadata:
4 | name: {{ .Release.Name }}
5 | namespace: {{ .Release.Namespace }}
6 | ---
7 | kind: ClusterRole
8 | apiVersion: rbac.authorization.k8s.io/v1
9 | metadata:
10 | name: {{ .Release.Name }}
11 | rules:
12 | - apiGroups:
13 | - ""
14 | resources:
15 | - nodes
16 | verbs:
17 | - get
18 | - list
19 | - patch
20 | - update
21 | - apiGroups:
22 | - ""
23 | resources:
24 | - pods
25 | verbs:
26 | - list
27 | - get
28 | - apiGroups:
29 | - ""
30 | resources:
31 | - pods/eviction
32 | verbs:
33 | - create
34 | - apiGroups:
35 | - extensions
36 | resources:
37 | - daemonsets
38 | verbs:
39 | - get
40 | - apiGroups:
41 | - apps
42 | resources:
43 | - daemonsets
44 | verbs:
45 | - get
46 | - apiGroups:
47 | - ""
48 | resources:
49 | - events
50 | verbs:
51 | - create
52 | ---
53 | kind: ClusterRoleBinding
54 | apiVersion: rbac.authorization.k8s.io/v1
55 | metadata:
56 | name: {{ .Release.Name }}
57 | subjects:
58 | - kind: ServiceAccount
59 | name: {{ .Release.Name }}
60 | namespace: {{ .Release.Namespace }}
61 | roleRef:
62 | kind: ClusterRole
63 | name: {{ .Release.Name }}
64 | apiGroup: rbac.authorization.k8s.io
--------------------------------------------------------------------------------
/charts/aks-node-termination-handler/values.yaml:
--------------------------------------------------------------------------------
1 | image: paskalmaksim/aks-node-termination-handler:latest
2 | imagePullPolicy: Always
3 | imagePullSecrets: []
4 |
5 | args: []
6 | env: []
7 |
8 | priorityClassName: ""
9 | annotations: {}
10 | labels: {}
11 |
12 | configMap:
13 | create: true
14 | name: "{{ .Release.Name }}-files"
15 | mountPath: /files
16 | data: {}
17 | # slack-payload.json: |
18 | # {
19 | # "channel": "#mychannel",
20 | # "username": "webhookbot",
21 | # "text": "This is message for {{ .NodeName }}, {{ .InstanceType }} from {{ .NodeRegion }}",
22 | # "icon_emoji": ":ghost:"
23 | # }
24 | # prometheus-pushgateway-payload.txt: |
25 | # node_termination_event{node="{{ .NodeName }}"} 1
26 |
27 | extraVolumes: []
28 | extraVolumeMounts: []
29 |
30 | networkPolicy:
31 | enabled: false
32 | # controlPlaneIP: "123.X.X.X" # If not provided, network policy will allow all access to port 443/tcp
33 | # additionalEgressRules:
34 | # - ports:
35 | # - port: 443
36 | # protocol: TCP
37 | # to:
38 | # - ipBlock:
39 | # cidr: 124.X.X.X/24
40 |
41 | metrics:
42 | addAnnotations: true
43 |
44 | hostNetwork: false
45 |
46 | securityContext:
47 | runAsNonRoot: true
48 | privileged: false
49 | readOnlyRootFilesystem: true
50 | allowPrivilegeEscalation: false
51 | capabilities:
52 | drop:
53 | - ALL
54 | windowsOptions:
55 | runAsUserName: "ContainerUser"
56 | seccompProfile:
57 | type: RuntimeDefault
58 |
59 | affinity: {}
60 |
61 | tolerations:
62 | - key: "kubernetes.azure.com/scalesetpriority"
63 | operator: "Equal"
64 | value: "spot"
65 | effect: "NoSchedule"
66 |
67 | nodeSelector: {}
68 | # if you want handle events only from spot instances
69 | # nodeSelector:
70 | # kubernetes.azure.com/scalesetpriority: spot
71 |
72 | resources:
73 | limits:
74 | memory: 100Mi
75 | requests:
76 | cpu: 20m
77 | memory: 100Mi
78 |
--------------------------------------------------------------------------------
/cmd/main.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package main
14 |
15 | import (
16 | "context"
17 | "flag"
18 | "fmt"
19 | "os"
20 | "os/signal"
21 | "syscall"
22 | "time"
23 |
24 | "github.com/maksim-paskal/aks-node-termination-handler/internal"
25 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/config"
26 | logrushooksentry "github.com/maksim-paskal/logrus-hook-sentry"
27 | log "github.com/sirupsen/logrus"
28 | )
29 |
30 | var version = flag.Bool("version", false, "version")
31 |
32 | func main() {
33 | flag.Parse()
34 |
35 | if *version {
36 | fmt.Println(config.GetVersion()) //nolint:forbidigo
37 | os.Exit(0)
38 | }
39 |
40 | logLevel, err := log.ParseLevel(*config.Get().LogLevel)
41 | if err != nil {
42 | log.WithError(err).Fatal()
43 | }
44 |
45 | log.SetLevel(logLevel)
46 | log.SetReportCaller(true)
47 |
48 | if !*config.Get().LogPretty {
49 | log.SetFormatter(&log.JSONFormatter{})
50 | }
51 |
52 | ctx, cancel := context.WithCancel(context.Background())
53 | defer cancel()
54 |
55 | log.Infof("Starting %s...", config.GetVersion())
56 |
57 | hook, err := logrushooksentry.NewHook(ctx, logrushooksentry.Options{
58 | SentryDSN: *config.Get().SentryDSN,
59 | Release: config.GetVersion(),
60 | })
61 | if err != nil {
62 | log.WithError(err).Error()
63 | }
64 |
65 | log.AddHook(hook)
66 |
67 | signalChanInterrupt := make(chan os.Signal, 1)
68 | signal.Notify(signalChanInterrupt, syscall.SIGINT, syscall.SIGTERM)
69 |
70 | log.RegisterExitHandler(func() {
71 | cancel()
72 | })
73 |
74 | go func() {
75 | select {
76 | case <-signalChanInterrupt:
77 | log.Error("Got interruption signal...")
78 | cancel()
79 | case <-ctx.Done():
80 | }
81 | <-signalChanInterrupt
82 | os.Exit(1)
83 | }()
84 |
85 | if err := internal.Run(ctx); err != nil {
86 | log.WithError(err).Fatal()
87 | }
88 |
89 | <-ctx.Done()
90 |
91 | log.Infof("Waiting %s before shutdown...", config.Get().GracePeriod())
92 | time.Sleep(config.Get().GracePeriod())
93 | }
94 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | ignore:
2 | # ignore cmd/main.go because it's a main file
3 | - "cmd/main.go"
4 | # ignore because to test need active connection to the Telegram
5 | - "pkg/alert/alert.go"
6 | # ignore because to test need active connection to the kubernetes cluster
7 | - "pkg/web/web.go"
8 | - "pkg/api/api.go"
9 | - "pkg/client/client.go"
--------------------------------------------------------------------------------
/e2e/main_test.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package main_test
14 |
15 | import (
16 | "context"
17 | "encoding/json"
18 | "flag"
19 | "net/http"
20 | "net/http/httptest"
21 | "testing"
22 |
23 | "github.com/maksim-paskal/aks-node-termination-handler/internal"
24 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/client"
25 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/config"
26 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types"
27 | "github.com/pkg/errors"
28 | log "github.com/sirupsen/logrus"
29 | corev1 "k8s.io/api/core/v1"
30 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
31 | )
32 |
33 | const (
34 | azureResourceName = "test-e2e-resource"
35 | eventID = "test-event-id"
36 | eventType = types.EventTypePreempt
37 | taintKey = "aks-node-termination-handler/preempt"
38 | taintEffect = corev1.TaintEffectNoSchedule
39 | )
40 |
41 | func TestDrain(t *testing.T) { //nolint:funlen,cyclop
42 | t.Parallel()
43 |
44 | log.SetLevel(log.DebugLevel)
45 | log.SetReportCaller(true)
46 |
47 | handler := http.NewServeMux()
48 | handler.HandleFunc("/document", func(w http.ResponseWriter, _ *http.Request) {
49 | message, _ := json.Marshal(types.ScheduledEventsType{
50 | DocumentIncarnation: 1,
51 | Events: []types.ScheduledEventsEvent{
52 | {
53 | EventId: eventID,
54 | EventType: eventType,
55 | ResourceType: "resourceType",
56 | Resources: []string{azureResourceName},
57 | },
58 | },
59 | })
60 |
61 | w.WriteHeader(http.StatusOK)
62 | _, _ = w.Write(message)
63 | })
64 |
65 | testServer := httptest.NewServer(handler)
66 |
67 | _ = flag.Set("config", "./testdata/config_test.yaml")
68 | _ = flag.Set("endpoint", testServer.URL+"/document")
69 | _ = flag.Set("resource.name", azureResourceName)
70 |
71 | flag.Parse()
72 |
73 | ctx := context.TODO()
74 |
75 | if err := internal.Run(ctx); err != nil {
76 | t.Fatal(err)
77 | }
78 |
79 | node, err := client.GetKubernetesClient().CoreV1().Nodes().Get(ctx, *config.Get().NodeName, metav1.GetOptions{})
80 | if err != nil {
81 | t.Fatal(err)
82 | }
83 |
84 | if !node.Spec.Unschedulable {
85 | t.Fatal("node must be unschedulable")
86 | }
87 |
88 | if len(node.Spec.Taints) == 0 {
89 | t.Fatal("node must have taints")
90 | }
91 |
92 | taintFound := false
93 |
94 | for _, taint := range node.Spec.Taints {
95 | if taint.Key == taintKey && taint.Value == eventID && taint.Effect == taintEffect {
96 | taintFound = true
97 |
98 | break
99 | }
100 | }
101 |
102 | if !taintFound {
103 | t.Fatal("taint not found")
104 | }
105 |
106 | if err := checkNodeEvent(ctx); err != nil {
107 | t.Fatal(err)
108 | }
109 | }
110 |
111 | func checkNodeEvent(ctx context.Context) error { //nolint:cyclop
112 | events, err := client.GetKubernetesClient().CoreV1().Events("").List(ctx, metav1.ListOptions{})
113 | if err != nil {
114 | return errors.Wrap(err, "error in list events")
115 | }
116 |
117 | nodeName := *config.Get().NodeName
118 | eventMessageReceived := 0
119 | eventMessageBeforeListen := 0
120 |
121 | for _, event := range events.Items {
122 | if event.Source.Component != "aks-node-termination-handler" {
123 | continue
124 | }
125 |
126 | if event.InvolvedObject.Name != nodeName {
127 | continue
128 | }
129 |
130 | if event.Reason == eventType && event.Message == config.EventMessageReceived {
131 | eventMessageReceived++
132 | }
133 |
134 | if event.Reason == "ReadEvents" && event.Message == config.EventMessageBeforeListen {
135 | eventMessageBeforeListen++
136 | }
137 | }
138 |
139 | if eventMessageReceived == 0 {
140 | return errors.New("eventMessageReceived not found in events")
141 | }
142 |
143 | if eventMessageBeforeListen == 0 {
144 | return errors.New("eventMessageBeforeListen not found in events")
145 | }
146 |
147 | return nil
148 | }
149 |
--------------------------------------------------------------------------------
/e2e/testdata/config_test.yaml:
--------------------------------------------------------------------------------
1 | taintnode: true
2 | tainteffect: NoSchedule
3 | podgraceperiodseconds: 30
4 | exitafternodedrain: true
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/maksim-paskal/aks-node-termination-handler
2 |
3 | go 1.23.0
4 |
5 | toolchain go1.23.4
6 |
7 | require (
8 | github.com/go-telegram-bot-api/telegram-bot-api v4.6.4+incompatible
9 | github.com/google/uuid v1.6.0
10 | github.com/hashicorp/go-retryablehttp v0.7.7
11 | github.com/maksim-paskal/logrus-hook-sentry v0.1.1
12 | github.com/pkg/errors v0.9.1
13 | github.com/prometheus/client_golang v1.20.5
14 | github.com/sirupsen/logrus v1.9.3
15 | github.com/stretchr/testify v1.10.0
16 | gopkg.in/yaml.v3 v3.0.1
17 | k8s.io/api v0.32.0
18 | k8s.io/apimachinery v0.32.0
19 | k8s.io/client-go v0.32.0
20 | k8s.io/kubectl v0.32.0
21 | )
22 |
23 | require (
24 | github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect
25 | github.com/MakeNowJust/heredoc v1.0.0 // indirect
26 | github.com/beorn7/perks v1.0.1 // indirect
27 | github.com/blang/semver/v4 v4.0.0 // indirect
28 | github.com/cespare/xxhash/v2 v2.3.0 // indirect
29 | github.com/chai2010/gettext-go v1.0.3 // indirect
30 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
31 | github.com/emicklei/go-restful/v3 v3.12.1 // indirect
32 | github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f // indirect
33 | github.com/fxamacker/cbor/v2 v2.7.0 // indirect
34 | github.com/getsentry/sentry-go v0.30.0 // indirect
35 | github.com/go-errors/errors v1.5.1 // indirect
36 | github.com/go-logr/logr v1.4.2 // indirect
37 | github.com/go-openapi/jsonpointer v0.21.0 // indirect
38 | github.com/go-openapi/jsonreference v0.21.0 // indirect
39 | github.com/go-openapi/swag v0.23.0 // indirect
40 | github.com/gogo/protobuf v1.3.2 // indirect
41 | github.com/golang/protobuf v1.5.4 // indirect
42 | github.com/google/btree v1.1.3 // indirect
43 | github.com/google/gnostic-models v0.6.9 // indirect
44 | github.com/google/go-cmp v0.6.0 // indirect
45 | github.com/google/gofuzz v1.2.0 // indirect
46 | github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
47 | github.com/gorilla/websocket v1.5.3 // indirect
48 | github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect
49 | github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
50 | github.com/inconshreveable/mousetrap v1.1.0 // indirect
51 | github.com/josharian/intern v1.0.0 // indirect
52 | github.com/json-iterator/go v1.1.12 // indirect
53 | github.com/klauspost/compress v1.17.11 // indirect
54 | github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
55 | github.com/mailru/easyjson v0.9.0 // indirect
56 | github.com/mitchellh/go-wordwrap v1.0.1 // indirect
57 | github.com/moby/spdystream v0.5.0 // indirect
58 | github.com/moby/term v0.5.0 // indirect
59 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
60 | github.com/modern-go/reflect2 v1.0.2 // indirect
61 | github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect
62 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
63 | github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
64 | github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
65 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
66 | github.com/prometheus/client_model v0.6.1 // indirect
67 | github.com/prometheus/common v0.61.0 // indirect
68 | github.com/prometheus/procfs v0.15.1 // indirect
69 | github.com/russross/blackfriday/v2 v2.1.0 // indirect
70 | github.com/spf13/cobra v1.8.1 // indirect
71 | github.com/spf13/pflag v1.0.5 // indirect
72 | github.com/technoweenie/multipartstreamer v1.0.1 // indirect
73 | github.com/x448/float16 v0.8.4 // indirect
74 | github.com/xlab/treeprint v1.2.0 // indirect
75 | golang.org/x/net v0.33.0 // indirect
76 | golang.org/x/oauth2 v0.24.0 // indirect
77 | golang.org/x/sync v0.10.0 // indirect
78 | golang.org/x/sys v0.28.0 // indirect
79 | golang.org/x/term v0.27.0 // indirect
80 | golang.org/x/text v0.21.0 // indirect
81 | golang.org/x/time v0.8.0 // indirect
82 | google.golang.org/protobuf v1.36.0 // indirect
83 | gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
84 | gopkg.in/inf.v0 v0.9.1 // indirect
85 | k8s.io/cli-runtime v0.32.0 // indirect
86 | k8s.io/component-base v0.32.0 // indirect
87 | k8s.io/klog/v2 v2.130.1 // indirect
88 | k8s.io/kube-openapi v0.0.0-20241212222426-2c72e554b1e7 // indirect
89 | k8s.io/utils v0.0.0-20241210054802-24370beab758 // indirect
90 | sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
91 | sigs.k8s.io/kustomize/api v0.18.0 // indirect
92 | sigs.k8s.io/kustomize/kyaml v0.18.1 // indirect
93 | sigs.k8s.io/structured-merge-diff/v4 v4.5.0 // indirect
94 | sigs.k8s.io/yaml v1.4.0 // indirect
95 | )
96 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0=
2 | github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
3 | github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ=
4 | github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE=
5 | github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
6 | github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
7 | github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
8 | github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
9 | github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
10 | github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
11 | github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
12 | github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
13 | github.com/chai2010/gettext-go v1.0.3 h1:9liNh8t+u26xl5ddmWLmsOsdNLwkdRTg5AG+JnTiM80=
14 | github.com/chai2010/gettext-go v1.0.3/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHeQQ+5AjwawxA=
15 | github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
16 | github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY=
17 | github.com/creack/pty v1.1.18/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4=
18 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
19 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
20 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
21 | github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
22 | github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU=
23 | github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
24 | github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f h1:Wl78ApPPB2Wvf/TIe2xdyJxTlb6obmF18d8QdkxNDu4=
25 | github.com/exponent-io/jsonpath v0.0.0-20210407135951-1de76d718b3f/go.mod h1:OSYXu++VVOHnXeitef/D8n/6y4QV8uLHSFXX4NeXMGc=
26 | github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
27 | github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
28 | github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
29 | github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
30 | github.com/getsentry/sentry-go v0.30.0 h1:lWUwDnY7sKHaVIoZ9wYqRHJ5iEmoc0pqcRqFkosKzBo=
31 | github.com/getsentry/sentry-go v0.30.0/go.mod h1:WU9B9/1/sHDqeV8T+3VwwbjeR5MSXs/6aqG3mqZrezA=
32 | github.com/go-errors/errors v1.5.1 h1:ZwEMSLRCapFLflTpT7NKaAc7ukJ8ZPEjzlxt8rPN8bk=
33 | github.com/go-errors/errors v1.5.1/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og=
34 | github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
35 | github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
36 | github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
37 | github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
38 | github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ=
39 | github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4=
40 | github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE=
41 | github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
42 | github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
43 | github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
44 | github.com/go-telegram-bot-api/telegram-bot-api v4.6.4+incompatible h1:2cauKuaELYAEARXRkq2LrJ0yDDv1rW7+wrTEdVL3uaU=
45 | github.com/go-telegram-bot-api/telegram-bot-api v4.6.4+incompatible/go.mod h1:qf9acutJ8cwBUhm1bqgz6Bei9/C/c93FPDljKWwsOgM=
46 | github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
47 | github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
48 | github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
49 | github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
50 | github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
51 | github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
52 | github.com/google/gnostic-models v0.6.9 h1:MU/8wDLif2qCXZmzncUQ/BOfxWfthHi63KqpoNbWqVw=
53 | github.com/google/gnostic-models v0.6.9/go.mod h1:CiWsm0s6BSQd1hRn8/QmxqB6BesYcbSZxsz9b0KuDBw=
54 | github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
55 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
56 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
57 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
58 | github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0=
59 | github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
60 | github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgYQBbFN4U4JNXUNYpxael3UzMyo=
61 | github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
62 | github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4=
63 | github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ=
64 | github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
65 | github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
66 | github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
67 | github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
68 | github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 h1:+ngKgrYPPJrOjhax5N+uePQ0Fh1Z7PheYoUI/0nzkPA=
69 | github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
70 | github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
71 | github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
72 | github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=
73 | github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
74 | github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU=
75 | github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk=
76 | github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
77 | github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
78 | github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
79 | github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
80 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
81 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
82 | github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
83 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
84 | github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
85 | github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
86 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
87 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
88 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
89 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
90 | github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
91 | github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
92 | github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de h1:9TO3cAIGXtEhnIaL+V+BEER86oLrvS+kWobKpbJuye0=
93 | github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de/go.mod h1:zAbeS9B/r2mtpb6U+EI2rYA5OAXxsYw6wTamcNW+zcE=
94 | github.com/mailru/easyjson v0.9.0 h1:PrnmzHw7262yW8sTBwxi1PdJA3Iw/EKBa8psRf7d9a4=
95 | github.com/mailru/easyjson v0.9.0/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
96 | github.com/maksim-paskal/logrus-hook-sentry v0.1.1 h1:9IQ8kn6XwZJ/yDjkIyTLAce7k78J3WfeZtjIh3jA/MY=
97 | github.com/maksim-paskal/logrus-hook-sentry v0.1.1/go.mod h1:FpJn8dMDsuG8/lt65HQauZuXIiG2LqAYM+vbKV//Ga0=
98 | github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
99 | github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
100 | github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
101 | github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
102 | github.com/mitchellh/go-wordwrap v1.0.1 h1:TLuKupo69TCn6TQSyGxwI1EblZZEsQ0vMlAFQflz0v0=
103 | github.com/mitchellh/go-wordwrap v1.0.1/go.mod h1:R62XHJLzvMFRBbcrT7m7WgmE1eOyTSsCt+hzestvNj0=
104 | github.com/moby/spdystream v0.5.0 h1:7r0J1Si3QO/kjRitvSLVVFUjxMEb/YLj6S9FF62JBCU=
105 | github.com/moby/spdystream v0.5.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI=
106 | github.com/moby/term v0.5.0 h1:xt8Q1nalod/v7BqbG21f8mQPqH+xAaC9C3N3wfWbVP0=
107 | github.com/moby/term v0.5.0/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
108 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
109 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
110 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
111 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
112 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
113 | github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 h1:n6/2gBQ3RWajuToeY6ZtZTIKv2v7ThUy5KKusIT0yc0=
114 | github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00/go.mod h1:Pm3mSP3c5uWn86xMLZ5Sa7JB9GsEZySvHYXCTK4E9q4=
115 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
116 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
117 | github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus=
118 | github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
119 | github.com/onsi/ginkgo/v2 v2.21.0 h1:7rg/4f3rB88pb5obDgNZrNHrQ4e6WpjonchcpuBRnZM=
120 | github.com/onsi/ginkgo/v2 v2.21.0/go.mod h1:7Du3c42kxCUegi0IImZ1wUQzMBVecgIHjR1C+NkhLQo=
121 | github.com/onsi/gomega v1.35.1 h1:Cwbd75ZBPxFSuZ6T+rN/WCb/gOc6YgFBXLlZLhC7Ds4=
122 | github.com/onsi/gomega v1.35.1/go.mod h1:PvZbdDc8J6XJEpDK4HCuRBm8a6Fzp9/DmhC9C7yFlog=
123 | github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI=
124 | github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
125 | github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4=
126 | github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
127 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
128 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
129 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
130 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
131 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
132 | github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y=
133 | github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
134 | github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
135 | github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
136 | github.com/prometheus/common v0.61.0 h1:3gv/GThfX0cV2lpO7gkTUwZru38mxevy90Bj8YFSRQQ=
137 | github.com/prometheus/common v0.61.0/go.mod h1:zr29OCN/2BsJRaFwG8QOBr41D6kkchKbpeNH7pAjb/s=
138 | github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
139 | github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
140 | github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
141 | github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
142 | github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
143 | github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
144 | github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ=
145 | github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
146 | github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
147 | github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
148 | github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM=
149 | github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y=
150 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
151 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
152 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
153 | github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
154 | github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
155 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
156 | github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
157 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
158 | github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
159 | github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
160 | github.com/technoweenie/multipartstreamer v1.0.1 h1:XRztA5MXiR1TIRHxH2uNxXxaIkKQDeX7m2XsSOlQEnM=
161 | github.com/technoweenie/multipartstreamer v1.0.1/go.mod h1:jNVxdtShOxzAsukZwTSw6MDx5eUJoiEBsSvzDU9uzog=
162 | github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
163 | github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
164 | github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ=
165 | github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0=
166 | github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
167 | github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
168 | go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
169 | go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
170 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
171 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
172 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
173 | golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
174 | golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
175 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
176 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
177 | golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
178 | golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
179 | golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
180 | golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
181 | golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE=
182 | golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
183 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
184 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
185 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
186 | golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
187 | golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
188 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
189 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
190 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
191 | golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
192 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
193 | golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
194 | golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
195 | golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q=
196 | golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
197 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
198 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
199 | golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
200 | golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
201 | golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg=
202 | golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
203 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
204 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
205 | golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
206 | golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
207 | golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ=
208 | golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0=
209 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
210 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
211 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
212 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
213 | google.golang.org/protobuf v1.36.0 h1:mjIs9gYtt56AzC4ZaffQuh88TZurBGhIJMBZGSxNerQ=
214 | google.golang.org/protobuf v1.36.0/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
215 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
216 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
217 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
218 | gopkg.in/evanphx/json-patch.v4 v4.12.0 h1:n6jtcsulIzXPJaxegRbvFNNrZDjbij7ny3gmSPG+6V4=
219 | gopkg.in/evanphx/json-patch.v4 v4.12.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M=
220 | gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc=
221 | gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw=
222 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
223 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
224 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
225 | k8s.io/api v0.32.0 h1:OL9JpbvAU5ny9ga2fb24X8H6xQlVp+aJMFlgtQjR9CE=
226 | k8s.io/api v0.32.0/go.mod h1:4LEwHZEf6Q/cG96F3dqR965sYOfmPM7rq81BLgsE0p0=
227 | k8s.io/apimachinery v0.32.0 h1:cFSE7N3rmEEtv4ei5X6DaJPHHX0C+upp+v5lVPiEwpg=
228 | k8s.io/apimachinery v0.32.0/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE=
229 | k8s.io/cli-runtime v0.32.0 h1:dP+OZqs7zHPpGQMCGAhectbHU2SNCuZtIimRKTv2T1c=
230 | k8s.io/cli-runtime v0.32.0/go.mod h1:Mai8ht2+esoDRK5hr861KRy6z0zHsSTYttNVJXgP3YQ=
231 | k8s.io/client-go v0.32.0 h1:DimtMcnN/JIKZcrSrstiwvvZvLjG0aSxy8PxN8IChp8=
232 | k8s.io/client-go v0.32.0/go.mod h1:boDWvdM1Drk4NJj/VddSLnx59X3OPgwrOo0vGbtq9+8=
233 | k8s.io/component-base v0.32.0 h1:d6cWHZkCiiep41ObYQS6IcgzOUQUNpywm39KVYaUqzU=
234 | k8s.io/component-base v0.32.0/go.mod h1:JLG2W5TUxUu5uDyKiH2R/7NnxJo1HlPoRIIbVLkK5eM=
235 | k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
236 | k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
237 | k8s.io/kube-openapi v0.0.0-20241212222426-2c72e554b1e7 h1:hcha5B1kVACrLujCKLbr8XWMxCxzQx42DY8QKYJrDLg=
238 | k8s.io/kube-openapi v0.0.0-20241212222426-2c72e554b1e7/go.mod h1:GewRfANuJ70iYzvn+i4lezLDAFzvjxZYK1gn1lWcfas=
239 | k8s.io/kubectl v0.32.0 h1:rpxl+ng9qeG79YA4Em9tLSfX0G8W0vfaiPVrc/WR7Xw=
240 | k8s.io/kubectl v0.32.0/go.mod h1:qIjSX+QgPQUgdy8ps6eKsYNF+YmFOAO3WygfucIqFiE=
241 | k8s.io/utils v0.0.0-20241210054802-24370beab758 h1:sdbE21q2nlQtFh65saZY+rRM6x6aJJI8IUa1AmH/qa0=
242 | k8s.io/utils v0.0.0-20241210054802-24370beab758/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
243 | sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE=
244 | sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg=
245 | sigs.k8s.io/kustomize/api v0.18.0 h1:hTzp67k+3NEVInwz5BHyzc9rGxIauoXferXyjv5lWPo=
246 | sigs.k8s.io/kustomize/api v0.18.0/go.mod h1:f8isXnX+8b+SGLHQ6yO4JG1rdkZlvhaCf/uZbLVMb0U=
247 | sigs.k8s.io/kustomize/kyaml v0.18.1 h1:WvBo56Wzw3fjS+7vBjN6TeivvpbW9GmRaWZ9CIVmt4E=
248 | sigs.k8s.io/kustomize/kyaml v0.18.1/go.mod h1:C3L2BFVU1jgcddNBE1TxuVLgS46TjObMwW5FT9FcjYo=
249 | sigs.k8s.io/structured-merge-diff/v4 v4.5.0 h1:nbCitCK2hfnhyiKo6uf2HxUPTCodY6Qaf85SbDIaMBk=
250 | sigs.k8s.io/structured-merge-diff/v4 v4.5.0/go.mod h1:N8f93tFZh9U6vpxwRArLiikrE5/2tiu1w1AGfACIGE4=
251 | sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
252 | sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY=
253 |
--------------------------------------------------------------------------------
/internal/internal.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package internal
14 |
15 | import (
16 | "context"
17 |
18 | "github.com/hashicorp/go-retryablehttp"
19 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/alert"
20 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/api"
21 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/cache"
22 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/client"
23 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/config"
24 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/events"
25 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/metrics"
26 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/template"
27 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types"
28 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/web"
29 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/webhook"
30 | "github.com/pkg/errors"
31 | log "github.com/sirupsen/logrus"
32 | )
33 |
34 | func Run(ctx context.Context) error {
35 | err := config.Load()
36 | if err != nil {
37 | return errors.Wrap(err, "error in config load")
38 | }
39 |
40 | err = config.Check()
41 | if err != nil {
42 | return errors.Wrap(err, "error in config check")
43 | }
44 |
45 | log.Debugf("using config: %s", config.Get().String())
46 |
47 | retryClient := retryablehttp.NewClient()
48 | retryClient.HTTPClient.Transport = metrics.NewInstrumenter("webhook").
49 | WithProxy(*config.Get().WebhookProxy).
50 | WithInsecureSkipVerify(*config.Get().WebhookInsecure).
51 | InstrumentedRoundTripper()
52 | retryClient.RetryMax = *config.Get().WebhookRetries
53 | webhook.SetHTTPClient(retryClient)
54 |
55 | err = alert.Init()
56 | if err != nil {
57 | return errors.Wrap(err, "error in init alerts")
58 | }
59 |
60 | err = client.Init()
61 | if err != nil {
62 | return errors.Wrap(err, "error in init api")
63 | }
64 |
65 | go cache.SheduleCleaning(ctx)
66 | go web.Start(ctx)
67 |
68 | if err := startReadingEvents(ctx); err != nil {
69 | return errors.Wrap(err, "error in startReadingEvents")
70 | }
71 |
72 | return nil
73 | }
74 |
75 | func startReadingEvents(ctx context.Context) error {
76 | azureResource, err := api.GetAzureResourceName(ctx, *config.Get().NodeName)
77 | if err != nil {
78 | return errors.Wrap(err, "error in getting azure resource name")
79 | }
80 |
81 | eventReader := events.NewReader()
82 | eventReader.AzureResource = azureResource
83 | eventReader.Period = *config.Get().Period
84 | eventReader.Endpoint = *config.Get().Endpoint
85 | eventReader.RequestTimeout = *config.Get().RequestTimeout
86 | eventReader.NodeName = *config.Get().NodeName
87 | eventReader.BeforeReading = func(ctx context.Context) error {
88 | // add event to node
89 | if err := api.AddNodeEvent(ctx, "Info", "ReadEvents", config.EventMessageBeforeListen); err != nil {
90 | return errors.Wrap(err, "error in add node event")
91 | }
92 |
93 | return nil
94 | }
95 |
96 | eventReader.EventReceived = func(ctx context.Context, event types.ScheduledEventsEvent) (bool, error) {
97 | // add event to node
98 | if err := api.AddNodeEvent(ctx, "Warning", string(event.EventType), config.EventMessageReceived); err != nil {
99 | return false, errors.Wrap(err, "error in add node event")
100 | }
101 |
102 | // check if event is excludedm by default Freeze event is excluded
103 | if config.Get().IsExcludedEvent(event.EventType) {
104 | log.Infof("Excluded event %s by user config", event.EventType)
105 |
106 | return false, nil
107 | }
108 |
109 | // send event in separate goroutine
110 | go func() {
111 | if err := sendEvent(ctx, event); err != nil {
112 | log.WithError(err).Error("error in sendEvent")
113 | }
114 | }()
115 |
116 | // drain node
117 | if err := api.DrainNode(ctx, *config.Get().NodeName, string(event.EventType), event.EventId); err != nil {
118 | return false, errors.Wrap(err, "error in DrainNode")
119 | }
120 |
121 | return true, nil
122 | }
123 |
124 | // check for run in synchronous mode or not
125 | // synchronous mode is used for e2e tests
126 | if *config.Get().ExitAfterNodeDrain {
127 | eventReader.ReadEvents(ctx)
128 | } else {
129 | go eventReader.ReadEvents(ctx)
130 | }
131 |
132 | return nil
133 | }
134 |
135 | func sendEvent(ctx context.Context, event types.ScheduledEventsEvent) error {
136 | message, err := template.NewMessageType(ctx, *config.Get().NodeName, event)
137 | if err != nil {
138 | return errors.Wrap(err, "error in template.NewMessageType")
139 | }
140 |
141 | log.Infof("Message: %+v", message)
142 |
143 | message.Template = *config.Get().AlertMessage
144 |
145 | if err := alert.SendTelegram(message); err != nil {
146 | log.WithError(err).Error("error in alert.SendTelegram")
147 | }
148 |
149 | if err := webhook.SendWebHook(ctx, message); err != nil {
150 | log.WithError(err).Error("error in webhook.SendWebHook")
151 | }
152 |
153 | return nil
154 | }
155 |
--------------------------------------------------------------------------------
/mock/mock.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package main
14 |
15 | import (
16 | "flag"
17 | "fmt"
18 | "io"
19 | "net/http"
20 | "path/filepath"
21 | "strings"
22 | "time"
23 |
24 | log "github.com/sirupsen/logrus"
25 | )
26 |
27 | func debugHandler(w http.ResponseWriter, r *http.Request) {
28 | // Create return string
29 | request := []string{}
30 | // Add the request string
31 | url := fmt.Sprintf("%v %v %v", r.Method, r.URL, r.Proto)
32 | request = append(request, url)
33 | // Add the host
34 | request = append(request, fmt.Sprintf("Host: %v", r.Host))
35 |
36 | request = append(request, "--HEADERS--")
37 | // Loop through headers
38 | for name, headers := range r.Header {
39 | name = strings.ToLower(name)
40 |
41 | for _, h := range headers {
42 | request = append(request, fmt.Sprintf("%v: %v", name, h))
43 | }
44 | }
45 |
46 | bodyBytes, err := io.ReadAll(r.Body)
47 | if err != nil {
48 | log.WithError(err).Fatal()
49 | }
50 |
51 | defer r.Body.Close()
52 |
53 | request = append(request, "--BODY--")
54 | request = append(request, string(bodyBytes))
55 |
56 | _, _ = w.Write([]byte(strings.Join(request, "\n")))
57 | }
58 |
59 | // simple server for test env.
60 | func main() {
61 | address := flag.String("address", ":28080", "address")
62 | flag.Parse()
63 |
64 | http.HandleFunc("/debug", debugHandler)
65 | http.Handle("/", http.FileServer(http.Dir(".")))
66 |
67 | scheduledEventsType, err := filepath.Abs("pkg/types/testdata/ScheduledEventsType.json")
68 | if err != nil {
69 | log.WithError(err).Fatal()
70 | }
71 |
72 | log.Infof("edit %s file to test events", scheduledEventsType)
73 |
74 | const (
75 | readTimeout = 5 * time.Second
76 | writeTimeout = 10 * time.Second
77 | )
78 |
79 | server := &http.Server{
80 | Addr: *address,
81 | ReadTimeout: readTimeout,
82 | WriteTimeout: writeTimeout,
83 | }
84 |
85 | log.Infof("Listen %s", server.Addr)
86 |
87 | err = server.ListenAndServe()
88 | if err != nil {
89 | log.WithError(err).Fatal()
90 | }
91 | }
92 |
--------------------------------------------------------------------------------
/pkg/alert/alert.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package alert
14 |
15 | import (
16 | "strconv"
17 |
18 | tgbotapi "github.com/go-telegram-bot-api/telegram-bot-api"
19 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/config"
20 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/template"
21 | "github.com/pkg/errors"
22 | log "github.com/sirupsen/logrus"
23 | )
24 |
25 | var bot *tgbotapi.BotAPI
26 |
27 | func Init() error {
28 | if len(*config.Get().TelegramToken) == 0 {
29 | log.Warning("not sending Telegram message, no token")
30 |
31 | return nil
32 | }
33 |
34 | var err error
35 |
36 | bot, err = tgbotapi.NewBotAPI(*config.Get().TelegramToken)
37 | if err != nil {
38 | return errors.Wrap(err, "error in NewBotAPI")
39 | }
40 |
41 | log.Printf("Authorized on account %s", bot.Self.UserName)
42 |
43 | return nil
44 | }
45 |
46 | // healthcheck.
47 | func Ping() error {
48 | if len(*config.Get().TelegramToken) != 0 {
49 | if _, err := bot.GetMe(); err != nil {
50 | return errors.Wrap(err, "error in bot.GetMe")
51 | }
52 | }
53 |
54 | return nil
55 | }
56 |
57 | func SendTelegram(obj *template.MessageType) error {
58 | if len(*config.Get().TelegramToken) == 0 {
59 | return nil
60 | }
61 |
62 | messageText, err := template.Message(obj)
63 | if err != nil {
64 | return errors.Wrap(err, "error in template.Message")
65 | }
66 |
67 | chatID, err := strconv.Atoi(*config.Get().TelegramChatID)
68 | if err != nil {
69 | return errors.Wrap(err, "error converting chatID")
70 | }
71 |
72 | msg := tgbotapi.NewMessage(int64(chatID), messageText)
73 |
74 | result, err := bot.Send(msg)
75 | if err != nil {
76 | return errors.Wrap(err, "error in bot.Send")
77 | }
78 |
79 | log.Infof("Telegram MessageID=%d", result.MessageID)
80 |
81 | return nil
82 | }
83 |
--------------------------------------------------------------------------------
/pkg/api/api.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package api
14 |
15 | import (
16 | "context"
17 | "fmt"
18 | "strings"
19 |
20 | "github.com/google/uuid"
21 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/client"
22 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/config"
23 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/logger"
24 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types"
25 | "github.com/pkg/errors"
26 | log "github.com/sirupsen/logrus"
27 | corev1 "k8s.io/api/core/v1"
28 | apierrorrs "k8s.io/apimachinery/pkg/api/errors"
29 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
30 | "k8s.io/apimachinery/pkg/util/wait"
31 | "k8s.io/client-go/util/retry"
32 | "k8s.io/kubectl/pkg/drain"
33 | )
34 |
35 | const taintKeyPrefix = "aks-node-termination-handler"
36 |
37 | func GetAzureResourceName(ctx context.Context, nodeName string) (string, error) {
38 | // return user defined resource name
39 | if len(*config.Get().ResourceName) > 0 {
40 | return *config.Get().ResourceName, nil
41 | }
42 |
43 | node, err := client.GetKubernetesClient().CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
44 | if err != nil {
45 | return "", errors.Wrap(err, "error in Clientset.CoreV1().Nodes().Get")
46 | }
47 |
48 | azureResourceName, err := types.NewAzureResource(node.Spec.ProviderID)
49 | if err != nil {
50 | return "", errors.Wrap(err, "error in types.NewAzureResource")
51 | }
52 |
53 | return azureResourceName.EventResourceName, nil
54 | }
55 |
56 | func DrainNode(ctx context.Context, nodeName string, eventType string, eventID string) error { //nolint:cyclop
57 | log.Infof("Draining node %s", nodeName)
58 |
59 | node, err := GetNode(ctx, nodeName)
60 | if err != nil {
61 | return errors.Wrap(err, "error in nodes.get")
62 | }
63 |
64 | if node.Spec.Unschedulable {
65 | log.Infof("Node %s is already Unschedulable", node.Name)
66 |
67 | return nil
68 | }
69 |
70 | // taint node before draining if effect is NoSchedule or TaintEffectPreferNoSchedule
71 | if *config.Get().TaintNode && *config.Get().TaintEffect != string(corev1.TaintEffectNoExecute) {
72 | err = addTaint(ctx, node, getTaintKey(eventType), eventID)
73 | if err != nil {
74 | return errors.Wrap(err, "failed to taint node")
75 | }
76 | }
77 |
78 | logger := &logger.KubectlLogger{}
79 | logger.Log = func(message string) {
80 | log.Info(message)
81 | }
82 |
83 | helper := &drain.Helper{
84 | Ctx: ctx,
85 | Client: client.GetKubernetesClient(),
86 | Force: true,
87 | GracePeriodSeconds: *config.Get().PodGracePeriodSeconds,
88 | IgnoreAllDaemonSets: true,
89 | Out: logger,
90 | ErrOut: logger,
91 | DeleteEmptyDirData: true,
92 | Timeout: config.Get().NodeGracePeriod(),
93 | }
94 |
95 | if err := drain.RunCordonOrUncordon(helper, node, true); err != nil {
96 | return errors.Wrap(err, "error in drain.RunCordonOrUncordon")
97 | }
98 |
99 | if err := drain.RunNodeDrain(helper, node.Name); err != nil {
100 | return errors.Wrap(err, "error in drain.RunNodeDrain")
101 | }
102 |
103 | // taint node after draining if effect is TaintEffectNoExecute
104 | // this NoExecute taint effect will stop all daemonsents on the node that can not handle this effect
105 | if *config.Get().TaintNode && *config.Get().TaintEffect == string(corev1.TaintEffectNoExecute) {
106 | err = addTaint(ctx, node, getTaintKey(eventType), eventID)
107 | if err != nil {
108 | return errors.Wrap(err, "failed to taint node")
109 | }
110 | }
111 |
112 | return nil
113 | }
114 |
115 | func getTaintKey(eventType string) string {
116 | return fmt.Sprintf("%s/%s", taintKeyPrefix, strings.ToLower(eventType))
117 | }
118 |
119 | func addTaint(ctx context.Context, node *corev1.Node, taintKey string, taintValue string) error {
120 | log.Infof("Adding taint %s=%s on node %s", taintKey, taintValue, node.Name)
121 |
122 | freshNode := node.DeepCopy()
123 |
124 | var err error
125 |
126 | updateErr := wait.ExponentialBackoff(retry.DefaultBackoff, func() (bool, error) {
127 | if freshNode, err = client.GetKubernetesClient().CoreV1().Nodes().Get(ctx, freshNode.Name, metav1.GetOptions{}); err != nil {
128 | nodeErr := errors.Wrapf(err, "failed to get node %s", freshNode.Name)
129 | log.Error(nodeErr)
130 |
131 | return false, nodeErr
132 | }
133 |
134 | err = updateNodeWith(ctx, taintKey, taintValue, freshNode)
135 |
136 | switch {
137 | case err == nil:
138 | return true, nil
139 | case apierrorrs.IsConflict(err):
140 | return false, nil
141 | case err != nil:
142 | return false, errors.Wrapf(err, "failed to taint node %s with key %s", freshNode.Name, taintKey)
143 | }
144 |
145 | return false, nil
146 | })
147 |
148 | if updateErr != nil {
149 | return err
150 | }
151 |
152 | log.Warnf("Successfully added taint %s on node %s", taintKey, freshNode.Name)
153 |
154 | return nil
155 | }
156 |
157 | func updateNodeWith(ctx context.Context, taintKey string, taintValue string, node *corev1.Node) error {
158 | node.Spec.Taints = append(node.Spec.Taints, corev1.Taint{
159 | Key: taintKey,
160 | Value: taintValue,
161 | Effect: corev1.TaintEffect(*config.Get().TaintEffect),
162 | })
163 | _, err := client.GetKubernetesClient().CoreV1().Nodes().Update(ctx, node, metav1.UpdateOptions{})
164 |
165 | return errors.Wrap(err, "failed to update node with taint")
166 | }
167 |
168 | func GetNode(ctx context.Context, nodeName string) (*corev1.Node, error) {
169 | node, err := client.GetKubernetesClient().CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
170 | if err != nil {
171 | return nil, errors.Wrap(err, "error in nodes.get")
172 | }
173 |
174 | return node, nil
175 | }
176 |
177 | func AddNodeEvent(ctx context.Context, eventType, eventReason, eventMessage string) error {
178 | message := &types.EventMessage{
179 | Type: eventType,
180 | Reason: eventReason,
181 | Message: eventMessage,
182 | }
183 |
184 | return AddNodeEventMessage(ctx, message)
185 | }
186 |
187 | func AddNodeEventMessage(ctx context.Context, message *types.EventMessage) error {
188 | node, err := GetNode(ctx, *config.Get().NodeName)
189 | if err != nil {
190 | return errors.Wrap(err, "error in GetNode")
191 | }
192 |
193 | event := corev1.Event{
194 | InvolvedObject: corev1.ObjectReference{
195 | APIVersion: "v1",
196 | Kind: "Node",
197 | Name: node.Name,
198 | UID: node.UID,
199 | ResourceVersion: node.ResourceVersion,
200 | },
201 | Count: 1,
202 | FirstTimestamp: metav1.Now(),
203 | LastTimestamp: metav1.Now(),
204 | ObjectMeta: metav1.ObjectMeta{
205 | Name: fmt.Sprintf("%s.%s", *config.Get().NodeName, uuid.New().String()),
206 | },
207 | Type: message.Type,
208 | Reason: message.Reason,
209 | Message: message.Message,
210 | Source: corev1.EventSource{
211 | Component: "aks-node-termination-handler",
212 | },
213 | }
214 |
215 | err = wait.ExponentialBackoff(retry.DefaultBackoff, func() (bool, error) {
216 | _, err = client.GetKubernetesClient().CoreV1().Events("default").Create(ctx, &event, metav1.CreateOptions{})
217 |
218 | switch {
219 | case err == nil:
220 | return true, nil
221 | case apierrorrs.IsConflict(err):
222 | return false, nil
223 | case err != nil:
224 | return false, errors.Wrap(err, "failed to create event")
225 | }
226 |
227 | return false, nil
228 | })
229 | if err != nil {
230 | return errors.Wrap(err, "failed to add event")
231 | }
232 |
233 | return nil
234 | }
235 |
236 | func GetNodeLabels(ctx context.Context, nodeName string) (map[string]string, error) {
237 | // this need for unit tests
238 | if nodeName == "!!invalid!!GetNodeLabels" {
239 | return nil, errors.New("invalid node name")
240 | }
241 |
242 | // this need for unit tests
243 | if client.GetKubernetesClient() == nil {
244 | return make(map[string]string), nil
245 | }
246 |
247 | node, err := client.GetKubernetesClient().CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
248 | if err != nil {
249 | return nil, errors.Wrap(err, "error in nodes.get")
250 | }
251 |
252 | return node.Labels, nil
253 | }
254 |
255 | func GetNodePods(ctx context.Context, nodeName string) ([]string, error) {
256 | // this need for unit tests
257 | if nodeName == "!!invalid!!GetNodePods" {
258 | return nil, errors.New("invalid node name")
259 | }
260 |
261 | // this need for unit tests
262 | if client.GetKubernetesClient() == nil {
263 | return []string{}, nil
264 | }
265 |
266 | pods, err := client.GetKubernetesClient().CoreV1().Pods("").List(ctx, metav1.ListOptions{})
267 | if err != nil {
268 | return nil, errors.Wrap(err, "error in pods.list")
269 | }
270 |
271 | result := make([]string, 0)
272 |
273 | for _, pod := range pods.Items {
274 | // ignore DaemonSet pods from pods list, because they are not affected by node termination
275 | if getPodReferenceKind(pod) == "DaemonSet" {
276 | continue
277 | }
278 |
279 | if pod.Spec.NodeName == nodeName {
280 | result = append(result, pod.Name)
281 | }
282 | }
283 |
284 | return result, nil
285 | }
286 |
287 | func getPodReferenceKind(pod corev1.Pod) string {
288 | for _, ownerReference := range pod.OwnerReferences {
289 | if len(ownerReference.Kind) > 0 {
290 | return ownerReference.Kind
291 | }
292 | }
293 |
294 | return ""
295 | }
296 |
--------------------------------------------------------------------------------
/pkg/cache/cache.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package cache
14 |
15 | import (
16 | "context"
17 | "sync"
18 | "time"
19 |
20 | log "github.com/sirupsen/logrus"
21 | )
22 |
23 | var data = sync.Map{}
24 |
25 | func Add(key string, ttl time.Duration) {
26 | data.Store(key, time.Now().Add(ttl))
27 | }
28 |
29 | func HasKey(key string) bool {
30 | _, exists := data.Load(key)
31 |
32 | return exists
33 | }
34 |
35 | func SheduleCleaning(ctx context.Context) {
36 | for ctx.Err() == nil {
37 | data.Range(func(key, value interface{}) bool {
38 | expireTime, ok := value.(time.Time)
39 |
40 | if ok && expireTime.Before(time.Now()) {
41 | log.Infof("delete %s", key)
42 |
43 | data.Delete(key)
44 | }
45 |
46 | return true
47 | })
48 |
49 | select {
50 | case <-time.After(time.Second):
51 | case <-ctx.Done():
52 | }
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/pkg/cache/cache_test.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package cache_test
14 |
15 | import (
16 | "context"
17 | "testing"
18 | "time"
19 |
20 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/cache"
21 | )
22 |
23 | func TestCache(t *testing.T) {
24 | t.Parallel()
25 |
26 | ctx, cancel := context.WithCancel(context.TODO())
27 | defer cancel()
28 |
29 | go cache.SheduleCleaning(ctx)
30 |
31 | const (
32 | test1sec = "test1sec"
33 | test3sec = "test3sec"
34 | )
35 |
36 | cache.Add(test1sec, time.Second)
37 | cache.Add(test3sec, 0)
38 | cache.Add(test3sec, 3*time.Second)
39 |
40 | time.Sleep(2 * time.Second)
41 |
42 | if cache.HasKey(test1sec) {
43 | t.Fatalf("%s not expired", test1sec)
44 | }
45 |
46 | if !cache.HasKey(test3sec) {
47 | t.Fatalf("%s expired", test3sec)
48 | }
49 |
50 | time.Sleep(2 * time.Second)
51 |
52 | if cache.HasKey(test3sec) {
53 | t.Fatalf("%s expired", test3sec)
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/pkg/client/client.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package client
14 |
15 | import (
16 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/config"
17 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/metrics"
18 | "github.com/pkg/errors"
19 | log "github.com/sirupsen/logrus"
20 | "k8s.io/client-go/kubernetes"
21 | "k8s.io/client-go/rest"
22 | "k8s.io/client-go/tools/clientcmd"
23 | k8sMetrics "k8s.io/client-go/tools/metrics"
24 | )
25 |
26 | var (
27 | clientset *kubernetes.Clientset
28 | restconfig *rest.Config
29 | )
30 |
31 | func Init() error {
32 | var err error
33 |
34 | k8sMetrics.Register(k8sMetrics.RegisterOpts{
35 | RequestResult: &metrics.KubernetesMetricsResult{},
36 | RequestLatency: &metrics.KubernetesMetricsLatency{},
37 | })
38 |
39 | if len(*config.Get().KubeConfigFile) > 0 {
40 | restconfig, err = clientcmd.BuildConfigFromFlags("", *config.Get().KubeConfigFile)
41 | if err != nil {
42 | return errors.Wrap(err, "error in clientcmd.BuildConfigFromFlags")
43 | }
44 | } else {
45 | log.Info("No kubeconfig file use incluster")
46 |
47 | restconfig, err = rest.InClusterConfig()
48 | if err != nil {
49 | return errors.Wrap(err, "error in rest.InClusterConfig")
50 | }
51 | }
52 |
53 | clientset, err = kubernetes.NewForConfig(restconfig)
54 | if err != nil {
55 | log.WithError(err).Fatal()
56 | }
57 |
58 | return nil
59 | }
60 |
61 | func GetKubernetesClient() *kubernetes.Clientset {
62 | return clientset
63 | }
64 |
--------------------------------------------------------------------------------
/pkg/config/config.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package config
14 |
15 | import (
16 | "encoding/json"
17 | "flag"
18 | "os"
19 | "strconv"
20 | "time"
21 |
22 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types"
23 | "github.com/pkg/errors"
24 | "gopkg.in/yaml.v3"
25 | corev1 "k8s.io/api/core/v1"
26 | )
27 |
28 | const (
29 | azureEndpoint = "http://169.254.169.254/metadata/scheduledevents?api-version=2020-07-01"
30 | defaultAlertMessage = "Draining node={{ .NodeName }}, type={{ .Event.EventType }}"
31 | defaultPeriod = 5 * time.Second
32 | defaultPodGracePeriodSeconds = -1
33 | defaultNodeGracePeriodSeconds = 120
34 | defaultGracePeriodSecond = 10
35 | defaultRequestTimeout = 5 * time.Second
36 | defaultWebHookTimeout = 30 * time.Second
37 | )
38 |
39 | const (
40 | EventMessageReceived = "Azure API sended schedule event for this node"
41 | EventMessageBeforeListen = "Start to listen events from Azure API"
42 | )
43 |
44 | var (
45 | errNoNode = errors.New("no node name is defined, run with -node=test")
46 | errChatIDMustBeInt = errors.New("TelegramChatID must be integer")
47 | errInvalidTaintEffect = errors.New("TaintEffect must be either NoSchedule, NoExecute or PreferNoSchedule")
48 | )
49 |
50 | type Type struct {
51 | ConfigFile *string
52 | LogPretty *bool
53 | LogLevel *string
54 | DevelopmentMode *bool
55 | KubeConfigFile *string
56 | Endpoint *string
57 | NodeName *string
58 | Period *time.Duration
59 | RequestTimeout *time.Duration
60 | TelegramToken *string
61 | TelegramChatID *string
62 | AlertMessage *string
63 | WebHookContentType *string
64 | WebHookURL *string
65 | WebHookTemplate *string
66 | WebHookTemplateFile *string
67 | WebHookMethod *string
68 | WebHookTimeout *time.Duration
69 | WebhookInsecure *bool
70 | WebhookProxy *string
71 | WebhookRetries *int
72 | SentryDSN *string
73 | WebHTTPAddress *string
74 | TaintNode *bool
75 | TaintEffect *string
76 | PodGracePeriodSeconds *int
77 | NodeGracePeriodSeconds *int
78 | GracePeriodSeconds *int
79 | DrainOnFreezeEvent *bool
80 | ResourceName *string
81 | ExitAfterNodeDrain *bool
82 | }
83 |
84 | var config = Type{
85 | ConfigFile: flag.String("config", os.Getenv("CONFIG"), "config file"),
86 | LogLevel: flag.String("log.level", "INFO", "log level"),
87 | LogPretty: flag.Bool("log.pretty", false, "log in text"),
88 | KubeConfigFile: flag.String("kubeconfig", "", "kubeconfig file"),
89 | Endpoint: flag.String("endpoint", azureEndpoint, "scheduled-events endpoint"),
90 | NodeName: flag.String("node", os.Getenv("MY_NODE_NAME"), "node to drain"),
91 | Period: flag.Duration("period", defaultPeriod, "period to scrape endpoint"),
92 | RequestTimeout: flag.Duration("request.timeout", defaultRequestTimeout, "request timeout"),
93 | TelegramToken: flag.String("telegram.token", os.Getenv("TELEGRAM_TOKEN"), "telegram token"),
94 | TelegramChatID: flag.String("telegram.chatID", os.Getenv("TELEGRAM_CHATID"), "telegram chatID"),
95 | AlertMessage: flag.String("alert.message", defaultAlertMessage, "default message"),
96 | WebHookMethod: flag.String("webhook.method", "POST", "request method"),
97 | WebHookContentType: flag.String("webhook.contentType", "application/json", "request content-type header"),
98 | WebHookURL: flag.String("webhook.url", os.Getenv("WEBHOOK_URL"), "send alerts to webhook"),
99 | WebHookTimeout: flag.Duration("webhook.timeout", defaultWebHookTimeout, "request timeout"),
100 | WebHookTemplate: flag.String("webhook.template", os.Getenv("WEBHOOK_TEMPLATE"), "request body"),
101 | WebHookTemplateFile: flag.String("webhook.template-file", os.Getenv("WEBHOOK_TEMPLATE_FILE"), "path to request body template file"),
102 | WebhookInsecure: flag.Bool("webhook.insecureSkip", true, "skip tls verification for webhook"),
103 | WebhookProxy: flag.String("webhook.http-proxy", os.Getenv("WEBHOOK_HTTP_PROXY"), "use http proxy for webhook"),
104 | WebhookRetries: flag.Int("webhook.retries", 3, "number of retries for webhook"), //nolint:mnd
105 | SentryDSN: flag.String("sentry.dsn", "", "sentry DSN"),
106 | WebHTTPAddress: flag.String("web.address", ":17923", ""),
107 | TaintNode: flag.Bool("taint.node", false, "Taint the node before cordon and draining"),
108 | TaintEffect: flag.String("taint.effect", "NoSchedule", "Taint effect to set on the node"),
109 | PodGracePeriodSeconds: flag.Int("podGracePeriodSeconds", defaultPodGracePeriodSeconds, "grace period is seconds for pods termination"),
110 | NodeGracePeriodSeconds: flag.Int("nodeGracePeriodSeconds", defaultNodeGracePeriodSeconds, "maximum time in seconds to drain the node"),
111 | GracePeriodSeconds: flag.Int("gracePeriodSeconds", defaultGracePeriodSecond, "grace period is seconds for application termination"),
112 | DrainOnFreezeEvent: flag.Bool("drainOnFreezeEvent", false, "drain node on freeze event"),
113 | ResourceName: flag.String("resource.name", "", "Azure resource name to drain"),
114 | ExitAfterNodeDrain: flag.Bool("exitAfterNodeDrain", false, "process will exit after node drain"),
115 | }
116 |
117 | func (t *Type) GracePeriod() time.Duration {
118 | return time.Duration(*t.GracePeriodSeconds) * time.Second
119 | }
120 |
121 | func (t *Type) NodeGracePeriod() time.Duration {
122 | return time.Duration(*t.NodeGracePeriodSeconds) * time.Second
123 | }
124 |
125 | // check is event is excluded from draining node.
126 | func (t *Type) IsExcludedEvent(e types.ScheduledEventsEventType) bool {
127 | if e == types.EventTypeFreeze && !*t.DrainOnFreezeEvent {
128 | return true
129 | }
130 |
131 | return false
132 | }
133 |
134 | func (t *Type) String() string {
135 | b, err := json.Marshal(t)
136 | if err != nil {
137 | return err.Error()
138 | }
139 |
140 | return string(b)
141 | }
142 |
143 | func Check() error {
144 | if len(*config.NodeName) == 0 {
145 | return errNoNode
146 | }
147 |
148 | if len(*config.TelegramChatID) > 0 {
149 | if _, err := strconv.Atoi(*config.TelegramChatID); err != nil {
150 | return errChatIDMustBeInt
151 | }
152 | }
153 |
154 | taintEffect := *config.TaintEffect
155 | if taintEffect != string(corev1.TaintEffectNoSchedule) &&
156 | taintEffect != string(corev1.TaintEffectNoExecute) &&
157 | taintEffect != string(corev1.TaintEffectPreferNoSchedule) {
158 | return errInvalidTaintEffect
159 | }
160 |
161 | return nil
162 | }
163 |
164 | func Get() *Type {
165 | return &config
166 | }
167 |
168 | func Set(specifiedConfig Type) {
169 | config = specifiedConfig
170 | }
171 |
172 | func Load() error {
173 | if len(*config.ConfigFile) == 0 {
174 | return nil
175 | }
176 |
177 | configByte, err := os.ReadFile(*config.ConfigFile)
178 | if err != nil {
179 | return errors.Wrap(err, "error in os.ReadFile")
180 | }
181 |
182 | err = yaml.Unmarshal(configByte, &config)
183 | if err != nil {
184 | return errors.Wrap(err, "error in yaml.Unmarshal")
185 | }
186 |
187 | return nil
188 | }
189 |
190 | var gitVersion = "dev"
191 |
192 | func GetVersion() string {
193 | return gitVersion
194 | }
195 |
--------------------------------------------------------------------------------
/pkg/config/config_test.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package config_test
14 |
15 | import (
16 | "testing"
17 | "time"
18 |
19 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/config"
20 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types"
21 | "github.com/stretchr/testify/assert"
22 | "github.com/stretchr/testify/require"
23 | )
24 |
25 | //nolint:paralleltest
26 | func TestConfigDefaults(t *testing.T) {
27 | if err := config.Load(); err != nil {
28 | t.Fatal(err)
29 | }
30 |
31 | assert.Equal(t, "http://169.254.169.254/metadata/scheduledevents?api-version=2020-07-01", *config.Get().Endpoint)
32 | }
33 |
34 | //nolint:paralleltest
35 | func TestValidConfigFile(t *testing.T) {
36 | configFile := "./testdata/config_test.yaml"
37 | testPeriod := 123
38 |
39 | newConfig := config.Type{
40 | ConfigFile: &configFile,
41 | GracePeriodSeconds: &testPeriod,
42 | NodeGracePeriodSeconds: &testPeriod,
43 | }
44 | config.Set(newConfig)
45 |
46 | err := config.Load()
47 | require.NoError(t, err)
48 |
49 | assert.Equal(t, "/some/test/path", *config.Get().KubeConfigFile)
50 | assert.Equal(t, time.Duration(testPeriod)*time.Second, config.Get().GracePeriod())
51 | assert.Equal(t, time.Duration(testPeriod)*time.Second, config.Get().NodeGracePeriod())
52 | assert.Contains(t, config.Get().String(), "123")
53 | }
54 |
55 | //nolint:paralleltest
56 | func TestInvalidConfigFile(t *testing.T) {
57 | configFile := "testdata/config_yaml_fake.yaml"
58 | newConfig := config.Type{ConfigFile: &configFile}
59 | config.Set(newConfig)
60 |
61 | err := config.Load()
62 | require.Error(t, err)
63 | }
64 |
65 | //nolint:paralleltest
66 | func TestNotFoundConfigFile(t *testing.T) {
67 | configFile := "testdata/fake.yaml"
68 | newConfig := config.Type{ConfigFile: &configFile}
69 | config.Set(newConfig)
70 |
71 | err := config.Load()
72 | require.Error(t, err)
73 | }
74 |
75 | //nolint:paralleltest
76 | func TestVersion(t *testing.T) {
77 | if config.GetVersion() != "dev" {
78 | t.Fatal("version is not dev")
79 | }
80 | }
81 |
82 | //nolint:paralleltest,funlen
83 | func TestConfig(t *testing.T) {
84 | testCases := []struct {
85 | taintEffect string
86 | nodeName string
87 | telegramID string
88 | err bool
89 | testName string
90 | }{
91 | {
92 | testName: "noSchedule",
93 | taintEffect: "NoSchedule",
94 | telegramID: "1",
95 | nodeName: "validNode",
96 | err: false,
97 | },
98 | {
99 | testName: "noExecute",
100 | taintEffect: "NoExecute",
101 | nodeName: "validNode",
102 | telegramID: "1",
103 | err: false,
104 | },
105 | {
106 | testName: "preferNoSchedule",
107 | taintEffect: "PreferNoSchedule",
108 | nodeName: "validNode",
109 | telegramID: "1",
110 | err: false,
111 | },
112 | {
113 | testName: "invalidNodeName",
114 | taintEffect: "NoSchedule",
115 | nodeName: "",
116 | telegramID: "1",
117 | err: true,
118 | },
119 | {
120 | testName: "InvalidTelegramId",
121 | taintEffect: "NoSchedule",
122 | nodeName: "validNode",
123 | telegramID: "invalidTelegramId",
124 | err: true,
125 | },
126 | {
127 | testName: "InvalidNodeName",
128 | taintEffect: "NoSchedule",
129 | nodeName: "",
130 | telegramID: "1",
131 | err: true,
132 | },
133 | {
134 | testName: "InvalidTaintEffect",
135 | taintEffect: "InvalidTaintEffect",
136 | nodeName: "validNode",
137 | telegramID: "1",
138 | err: true,
139 | },
140 | }
141 |
142 | for i := range testCases {
143 | t.Run(testCases[i].testName, func(t *testing.T) {
144 | newConfig := config.Type{
145 | TaintEffect: &testCases[i].taintEffect,
146 | NodeName: &testCases[i].nodeName,
147 | TelegramChatID: &testCases[i].telegramID,
148 | }
149 | config.Set(newConfig)
150 | err := config.Check()
151 |
152 | if testCases[i].err {
153 | require.Error(t, err)
154 | } else {
155 | require.NoError(t, err)
156 | }
157 | })
158 | }
159 | }
160 |
161 | func TestIsExcludedEvent(t *testing.T) {
162 | t.Parallel()
163 |
164 | trueValue := true
165 | falseValue := false
166 |
167 | testConfigValid := config.Type{
168 | DrainOnFreezeEvent: &falseValue,
169 | }
170 |
171 | // test DrainOnFreezeEvent logic
172 | testConfigValid.DrainOnFreezeEvent = &falseValue
173 | if b := testConfigValid.IsExcludedEvent(types.EventTypeFreeze); b != true {
174 | t.Fatal("when DrainOnFreezeEvent is false, IsExcludedEvent must be true")
175 | }
176 |
177 | testConfigValid.DrainOnFreezeEvent = &trueValue
178 | if b := testConfigValid.IsExcludedEvent(types.EventTypeFreeze); b == true {
179 | t.Fatal("when DrainOnFreezeEvent is true, IsExcludedEvent must be false")
180 | }
181 | }
182 |
--------------------------------------------------------------------------------
/pkg/config/testdata/config_test.yaml:
--------------------------------------------------------------------------------
1 | kubeconfigfile: /some/test/path
--------------------------------------------------------------------------------
/pkg/config/testdata/config_yaml_fake.yaml:
--------------------------------------------------------------------------------
1 | key: value
2 | - test
--------------------------------------------------------------------------------
/pkg/events/events.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package events
14 |
15 | import (
16 | "context"
17 | "encoding/json"
18 | "io"
19 | "net/http"
20 | "time"
21 |
22 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/cache"
23 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/metrics"
24 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types"
25 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/utils"
26 | "github.com/pkg/errors"
27 | log "github.com/sirupsen/logrus"
28 | )
29 |
30 | const (
31 | requestTimeout = 10 * time.Second
32 | readInterval = 5 * time.Second
33 | eventCacheTTL = 10 * time.Minute
34 | )
35 |
36 | var httpClient = &http.Client{
37 | Transport: metrics.NewInstrumenter("events").InstrumentedRoundTripper(),
38 | }
39 |
40 | type Reader struct {
41 | // method of making request
42 | Method string
43 | // endpoint to read events
44 | Endpoint string
45 | // timeout of making request
46 | RequestTimeout time.Duration
47 | // intervals of reading events
48 | Period time.Duration
49 | // name of the node
50 | NodeName string
51 | // name of the resource to watch
52 | AzureResource string
53 | // BeforeReading is a function that will be called before reading events
54 | BeforeReading func(ctx context.Context) error `json:"-"`
55 | // EventReceived is a function that will be called when event received
56 | // return true if you want to stop reading events
57 | EventReceived func(ctx context.Context, event types.ScheduledEventsEvent) (bool, error) `json:"-"`
58 | }
59 |
60 | func NewReader() *Reader {
61 | return &Reader{
62 | Method: http.MethodGet,
63 | Endpoint: "http://169.254.169.254/metadata/scheduledevents?api-version=2020-07-01",
64 | RequestTimeout: requestTimeout,
65 | Period: readInterval,
66 | }
67 | }
68 |
69 | func (r *Reader) ReadEvents(ctx context.Context) {
70 | log.Infof("Start reading events %s", r.String())
71 |
72 | if r.BeforeReading != nil {
73 | if err := r.BeforeReading(ctx); err != nil {
74 | log.WithError(err).Error("Error in BeforeReading")
75 | }
76 | }
77 |
78 | for ctx.Err() == nil {
79 | stopReadingEvents, err := r.ReadEndpoint(ctx)
80 | if err != nil {
81 | metrics.ErrorReadingEndpoint.WithLabelValues(r.getMetricsLabels()...).Inc()
82 |
83 | log.WithError(err).Error()
84 | }
85 |
86 | if stopReadingEvents {
87 | log.Info("Stop reading events")
88 |
89 | return
90 | }
91 |
92 | utils.SleepWithContext(ctx, r.Period)
93 | }
94 | }
95 |
96 | func (r *Reader) getScheduledEvents(ctx context.Context) (*types.ScheduledEventsType, error) {
97 | ctx, cancel := context.WithTimeout(ctx, r.RequestTimeout)
98 | defer cancel()
99 |
100 | req, err := http.NewRequestWithContext(ctx, r.Method, r.Endpoint, nil)
101 | if err != nil {
102 | return nil, errors.Wrap(err, "error in http.NewRequestWithContext")
103 | }
104 |
105 | req.Header.Add("Metadata", "true")
106 |
107 | log.WithFields(log.Fields{
108 | "method": req.Method,
109 | "url": req.URL,
110 | "headers": req.Header,
111 | }).Debug("Doing request")
112 |
113 | resp, err := httpClient.Do(req)
114 | if err != nil {
115 | return nil, errors.Wrap(err, "error in client.Do(req)")
116 | }
117 |
118 | defer resp.Body.Close()
119 |
120 | log.Debugf("response status: %s", resp.Status)
121 |
122 | body, err := io.ReadAll(resp.Body)
123 | if err != nil {
124 | return nil, errors.Wrap(err, "error in io.ReadAll")
125 | }
126 |
127 | log.Debugf("response body: %s", string(body))
128 |
129 | if len(body) == 0 {
130 | log.Warn("Events response is empty")
131 |
132 | return &types.ScheduledEventsType{}, nil
133 | }
134 |
135 | message := types.ScheduledEventsType{}
136 |
137 | if err := json.Unmarshal(body, &message); err != nil {
138 | return nil, errors.Wrap(err, "error in json.Unmarshal")
139 | }
140 |
141 | return &message, nil
142 | }
143 |
144 | func (r *Reader) ReadEndpoint(ctx context.Context) (bool, error) {
145 | message, err := r.getScheduledEvents(ctx)
146 | if err != nil {
147 | return false, errors.Wrap(err, "error in getScheduledEvents")
148 | }
149 |
150 | for _, event := range message.Events {
151 | for _, resource := range event.Resources {
152 | if resource == r.AzureResource {
153 | log.Infof("%+v", message)
154 |
155 | if cache.HasKey(event.EventId) {
156 | log.Infof("Event %s already processed", event.EventId)
157 |
158 | continue
159 | }
160 |
161 | // add to cache, ignore similar events for 10 minutes
162 | cache.Add(event.EventId, eventCacheTTL)
163 |
164 | metrics.ScheduledEventsTotal.WithLabelValues(append(r.getMetricsLabels(), string(event.EventType))...).Inc()
165 |
166 | if r.EventReceived != nil {
167 | return r.EventReceived(ctx, event)
168 | }
169 | }
170 | }
171 | }
172 |
173 | return false, nil
174 | }
175 |
176 | func (r *Reader) getMetricsLabels() []string {
177 | return []string{
178 | r.NodeName,
179 | r.AzureResource,
180 | }
181 | }
182 |
183 | func (r *Reader) String() string {
184 | b, _ := json.Marshal(r) //nolint:errchkjson
185 |
186 | return string(b)
187 | }
188 |
--------------------------------------------------------------------------------
/pkg/events/events_test.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package events_test
14 |
15 | import (
16 | "context"
17 | "encoding/json"
18 | "errors"
19 | "net/http"
20 | "net/http/httptest"
21 | "testing"
22 | "time"
23 |
24 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/events"
25 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types"
26 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/utils"
27 | log "github.com/sirupsen/logrus"
28 | )
29 |
30 | func TestReadingEvents(t *testing.T) { //nolint:funlen
31 | t.Parallel()
32 |
33 | log.SetLevel(log.DebugLevel)
34 |
35 | ctx := context.TODO()
36 |
37 | handler := http.NewServeMux()
38 | handler.HandleFunc("/badjson", func(w http.ResponseWriter, _ *http.Request) {
39 | w.WriteHeader(http.StatusOK)
40 | _, _ = w.Write([]byte(`!!!{"DocumentIncarnation":1,"Events":[]}`))
41 | })
42 | handler.HandleFunc("/emptyjson", func(w http.ResponseWriter, _ *http.Request) {
43 | w.WriteHeader(http.StatusOK)
44 | _, _ = w.Write([]byte(``))
45 | })
46 | handler.HandleFunc("/incorrectcontentlen", func(w http.ResponseWriter, _ *http.Request) {
47 | w.Header().Add("Content-Length", "50")
48 |
49 | _, _ = w.Write([]byte("a"))
50 | })
51 | handler.HandleFunc("/timeout", func(w http.ResponseWriter, r *http.Request) {
52 | utils.SleepWithContext(r.Context(), 5*time.Second)
53 | w.WriteHeader(http.StatusOK)
54 | _, _ = w.Write([]byte(``))
55 | })
56 | handler.HandleFunc("/document", func(w http.ResponseWriter, _ *http.Request) {
57 | message, _ := json.Marshal(types.ScheduledEventsType{
58 | DocumentIncarnation: 1,
59 | Events: []types.ScheduledEventsEvent{
60 | {
61 | EventId: time.Now().String(),
62 | EventType: types.EventTypeFreeze,
63 | ResourceType: "resourceType",
64 | Resources: []string{"resource1", "resource2"},
65 | },
66 | },
67 | })
68 |
69 | w.WriteHeader(http.StatusOK)
70 | _, _ = w.Write(message)
71 | })
72 |
73 | testServer := httptest.NewServer(handler)
74 |
75 | t.Run("badjson", func(t *testing.T) {
76 | t.Parallel()
77 |
78 | eventReader := events.NewReader()
79 | eventReader.Endpoint = testServer.URL + "/badjson"
80 |
81 | if _, err := eventReader.ReadEndpoint(ctx); err == nil {
82 | t.Error("expected error")
83 | }
84 | })
85 |
86 | t.Run("badhttp", func(t *testing.T) {
87 | t.Parallel()
88 |
89 | eventReader := events.NewReader()
90 | eventReader.Method = "bad method"
91 | eventReader.Endpoint = "fake://fake"
92 |
93 | if _, err := eventReader.ReadEndpoint(ctx); err == nil {
94 | t.Error("expected error")
95 | }
96 |
97 | ctx, cancel := context.WithTimeout(ctx, 1*time.Second)
98 | defer cancel()
99 |
100 | eventReader.ReadEvents(ctx)
101 | })
102 |
103 | t.Run("badhttpcontent", func(t *testing.T) {
104 | t.Parallel()
105 |
106 | eventReader := events.NewReader()
107 | eventReader.Endpoint = testServer.URL + "/incorrectcontentlen"
108 |
109 | if _, err := eventReader.ReadEndpoint(ctx); err == nil {
110 | t.Error("expected error")
111 | }
112 | })
113 |
114 | t.Run("emptyjson", func(t *testing.T) {
115 | t.Parallel()
116 |
117 | eventReader := events.NewReader()
118 | eventReader.Endpoint = testServer.URL + "/emptyjson"
119 |
120 | if _, err := eventReader.ReadEndpoint(ctx); err != nil {
121 | t.Error(err)
122 | }
123 | })
124 |
125 | t.Run("timeout", func(t *testing.T) {
126 | t.Parallel()
127 |
128 | eventReader := events.NewReader()
129 | eventReader.Endpoint = testServer.URL + "/timeout"
130 | eventReader.RequestTimeout = 1 * time.Second
131 |
132 | if _, err := eventReader.ReadEndpoint(ctx); !errors.Is(err, context.DeadlineExceeded) {
133 | t.Error(err)
134 | }
135 | })
136 |
137 | t.Run("document", func(t *testing.T) {
138 | t.Parallel()
139 |
140 | receivedDocument := types.ScheduledEventsEvent{}
141 |
142 | eventReader := events.NewReader()
143 | eventReader.Endpoint = testServer.URL + "/document"
144 | eventReader.AzureResource = "resource1"
145 | eventReader.BeforeReading = func(_ context.Context) error {
146 | return errors.New("error in BeforeReading") //nolint:goerr113
147 | }
148 | eventReader.EventReceived = func(_ context.Context, event types.ScheduledEventsEvent) (bool, error) {
149 | receivedDocument = event
150 |
151 | return true, nil
152 | }
153 |
154 | ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
155 | defer cancel()
156 |
157 | eventReader.ReadEvents(ctx)
158 |
159 | t.Logf("%+v", receivedDocument)
160 |
161 | if receivedDocument.EventId == "" {
162 | t.Error("unexpected event id")
163 | }
164 | })
165 | }
166 |
--------------------------------------------------------------------------------
/pkg/logger/logger.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package logger
14 |
15 | type KubectlLogger struct {
16 | Log func(string)
17 | }
18 |
19 | func (b *KubectlLogger) Write(p []byte) (int, error) {
20 | if b.Log != nil {
21 | b.Log(string(p))
22 | }
23 |
24 | return 0, nil
25 | }
26 |
--------------------------------------------------------------------------------
/pkg/logger/logger_test.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package logger_test
14 |
15 | import (
16 | "testing"
17 |
18 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/logger"
19 | )
20 |
21 | func TestKubectlLogger(t *testing.T) {
22 | t.Parallel()
23 |
24 | logger := logger.KubectlLogger{}
25 |
26 | logText := ""
27 |
28 | logger.Log = func(message string) {
29 | logText = message
30 | }
31 |
32 | i, err := logger.Write([]byte("test"))
33 | if err != nil {
34 | t.Fatal(err)
35 | }
36 |
37 | if i != 0 {
38 | t.Fatalf("expected: %d, got: %d", 0, i)
39 | }
40 |
41 | if logText != "test" {
42 | t.Fatalf("expected: %s, got: %s", "test", logText)
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/pkg/metrics/metrics.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package metrics
14 |
15 | import (
16 | "context"
17 | "crypto/tls"
18 | "fmt"
19 | "net/http"
20 | "net/url"
21 | "strconv"
22 | "strings"
23 | "time"
24 |
25 | "github.com/pkg/errors"
26 | "github.com/prometheus/client_golang/prometheus"
27 | "github.com/prometheus/client_golang/prometheus/promauto"
28 | "github.com/prometheus/client_golang/prometheus/promhttp"
29 | log "github.com/sirupsen/logrus"
30 | )
31 |
32 | const namespace = "aks_node_termination_handler"
33 |
34 | type Instrumenter struct {
35 | subsystemIdentifier string
36 | insecureSkipVerify bool
37 | proxyURL *url.URL
38 | }
39 |
40 | // New creates a new Instrumenter. The subsystemIdentifier will be used as part of
41 | // the metric names (e.g. http__requests_total).
42 | func NewInstrumenter(subsystemIdentifier string) *Instrumenter {
43 | return &Instrumenter{
44 | subsystemIdentifier: subsystemIdentifier,
45 | }
46 | }
47 |
48 | func (i *Instrumenter) WithProxy(proxyURL string) *Instrumenter {
49 | if proxyURL == "" {
50 | return i
51 | }
52 |
53 | proxy, err := url.Parse(proxyURL)
54 | if err != nil {
55 | log.WithError(err).Errorf("error parsing proxy url %s for %s", proxyURL, i.subsystemIdentifier)
56 | } else {
57 | i.proxyURL = proxy
58 | }
59 |
60 | return i
61 | }
62 |
63 | func (i *Instrumenter) WithInsecureSkipVerify(insecure bool) *Instrumenter {
64 | i.insecureSkipVerify = insecure
65 |
66 | return i
67 | }
68 |
69 | // InstrumentedRoundTripper returns an instrumented round tripper.
70 | func (i *Instrumenter) InstrumentedRoundTripper() http.RoundTripper {
71 | inFlightRequestsGauge := promauto.NewGauge(prometheus.GaugeOpts{
72 | Namespace: namespace,
73 | Name: fmt.Sprintf("http_%s_in_flight_requests", i.subsystemIdentifier),
74 | Help: fmt.Sprintf("A gauge of in-flight requests to the http %s.", i.subsystemIdentifier),
75 | })
76 |
77 | requestsPerEndpointCounter := promauto.NewCounterVec(
78 | prometheus.CounterOpts{
79 | Namespace: namespace,
80 | Name: fmt.Sprintf("http_%s_requests_total", i.subsystemIdentifier),
81 | Help: fmt.Sprintf("A counter for requests to the http %s per endpoint.", i.subsystemIdentifier),
82 | },
83 | []string{"code", "method", "endpoint"},
84 | )
85 |
86 | requestLatencyHistogram := promauto.NewHistogramVec(
87 | prometheus.HistogramOpts{
88 | Namespace: namespace,
89 | Name: fmt.Sprintf("http_%s_request_duration_seconds", i.subsystemIdentifier),
90 | Help: fmt.Sprintf("A histogram of request latencies to the http %s .", i.subsystemIdentifier),
91 | Buckets: prometheus.DefBuckets,
92 | },
93 | []string{"method"},
94 | )
95 |
96 | defaultTransport := &http.Transport{
97 | TLSClientConfig: &tls.Config{
98 | InsecureSkipVerify: i.insecureSkipVerify, //nolint:gosec
99 | },
100 | }
101 |
102 | if i.proxyURL != nil {
103 | log.Infof("using http_proxy %s for %s", i.proxyURL.String(), i.subsystemIdentifier)
104 |
105 | defaultTransport.Proxy = http.ProxyURL(i.proxyURL)
106 | }
107 |
108 | return promhttp.InstrumentRoundTripperInFlight(inFlightRequestsGauge,
109 | promhttp.InstrumentRoundTripperDuration(requestLatencyHistogram,
110 | i.instrumentRoundTripperEndpoint(requestsPerEndpointCounter, defaultTransport),
111 | ),
112 | )
113 | }
114 |
115 | func (i *Instrumenter) instrumentRoundTripperEndpoint(counter *prometheus.CounterVec, next http.RoundTripper) promhttp.RoundTripperFunc {
116 | return func(r *http.Request) (*http.Response, error) {
117 | resp, err := next.RoundTrip(r)
118 | if err == nil {
119 | statusCode := strconv.Itoa(resp.StatusCode)
120 | counter.WithLabelValues(statusCode, strings.ToLower(resp.Request.Method), resp.Request.URL.Path).Inc()
121 | }
122 |
123 | return resp, errors.Wrap(err, "error making roundtrip")
124 | }
125 | }
126 |
127 | var ErrorReadingEndpoint = promauto.NewCounterVec(
128 | prometheus.CounterOpts{
129 | Namespace: namespace,
130 | Name: "error_reading_endpoint_total",
131 | Help: "A counter for errored reading endpoint",
132 | },
133 | []string{"node", "resource"},
134 | )
135 |
136 | var ScheduledEventsTotal = promauto.NewCounterVec(
137 | prometheus.CounterOpts{
138 | Namespace: namespace,
139 | Name: "scheduled_events_total",
140 | Help: "Scheduled Events from Azure",
141 | },
142 | []string{"node", "resource", "type"},
143 | )
144 |
145 | var KubernetesAPIRequest = promauto.NewCounterVec(prometheus.CounterOpts{
146 | Namespace: namespace,
147 | Name: "apiserver_request_total",
148 | Help: "The total number of kunernetes API requests",
149 | }, []string{"cluster", "code"})
150 |
151 | var KubernetesAPIRequestDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
152 | Namespace: namespace,
153 | Name: "apiserver_request_duration",
154 | Help: "The duration in seconds of kunernetes API requests",
155 | }, []string{"cluster"})
156 |
157 | func GetHandler() http.Handler {
158 | return promhttp.Handler()
159 | }
160 |
161 | type KubernetesMetricsResult struct {
162 | Cluster string
163 | }
164 |
165 | func (r *KubernetesMetricsResult) Increment(_ context.Context, code string, _ string, host string) {
166 | KubernetesAPIRequest.WithLabelValues(host, code).Inc()
167 | }
168 |
169 | type KubernetesMetricsLatency struct {
170 | Cluster string
171 | }
172 |
173 | func (r *KubernetesMetricsLatency) Observe(_ context.Context, _ string, u url.URL, latency time.Duration) {
174 | KubernetesAPIRequestDuration.WithLabelValues(u.Host).Observe(latency.Seconds())
175 | }
176 |
--------------------------------------------------------------------------------
/pkg/metrics/metrics_test.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package metrics_test
14 |
15 | import (
16 | "context"
17 | "io"
18 | "net/http"
19 | "net/http/httptest"
20 | "strings"
21 | "testing"
22 | "time"
23 |
24 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/metrics"
25 | )
26 |
27 | var (
28 | client = &http.Client{}
29 | ts = httptest.NewServer(metrics.GetHandler())
30 | ctx = context.TODO()
31 | )
32 |
33 | func TestMetricsInc(t *testing.T) {
34 | t.Parallel()
35 |
36 | metrics.KubernetesAPIRequest.WithLabelValues("test", "200").Inc()
37 | metrics.KubernetesAPIRequestDuration.WithLabelValues("test").Observe(1)
38 | }
39 |
40 | func TestMetricsHandler(t *testing.T) {
41 | t.Parallel()
42 |
43 | // wait for server
44 | time.Sleep(time.Second)
45 |
46 | req, err := http.NewRequestWithContext(ctx, http.MethodGet, ts.URL, nil)
47 | if err != nil {
48 | t.Fatal(err)
49 | }
50 |
51 | resp, err := client.Do(req)
52 | if err != nil {
53 | t.Fatal(err)
54 | }
55 |
56 | body, err := io.ReadAll(resp.Body)
57 | if err != nil {
58 | t.Fatal(err)
59 | }
60 | defer resp.Body.Close()
61 |
62 | if m := "aks_node_termination_handler_apiserver_request_duration"; !strings.Contains(string(body), m) {
63 | t.Fatalf("no metric %s found", m)
64 | }
65 | }
66 |
67 | func TestKubernetesMetrics(t *testing.T) {
68 | t.Parallel()
69 |
70 | kubernetesMetricsResult := metrics.KubernetesMetricsResult{}
71 | kubernetesMetricsLatency := metrics.KubernetesMetricsLatency{}
72 |
73 | kubernetesMetricsResult.Increment(ctx, "200", "test", "test")
74 | kubernetesMetricsLatency.Observe(ctx, "test", *httptest.NewRequest(http.MethodGet, ts.URL, nil).URL, time.Second)
75 | }
76 |
77 | func TestInstrumenter(t *testing.T) {
78 | t.Parallel()
79 |
80 | instrumenter := metrics.NewInstrumenter("test")
81 |
82 | r, err := instrumenter.InstrumentedRoundTripper().RoundTrip(httptest.NewRequest(http.MethodGet, ts.URL, nil))
83 | if err != nil {
84 | t.Fatal(err)
85 | }
86 | defer r.Body.Close()
87 | }
88 |
89 | func TestInstrumenterWithEmptyProxy(t *testing.T) {
90 | t.Parallel()
91 |
92 | instrumenter := metrics.NewInstrumenter("TestInstrumenterWithEmptyProxy").WithProxy("")
93 |
94 | r, err := instrumenter.InstrumentedRoundTripper().RoundTrip(httptest.NewRequest(http.MethodGet, ts.URL, nil))
95 | if err != nil {
96 | t.Fatal(err)
97 | }
98 | defer r.Body.Close()
99 | }
100 |
101 | func TestInstrumenterProxy(t *testing.T) {
102 | t.Parallel()
103 |
104 | instrumenter := metrics.NewInstrumenter("testproxy").
105 | WithInsecureSkipVerify(true).
106 | WithProxy(ts.URL)
107 |
108 | r, err := instrumenter.InstrumentedRoundTripper().RoundTrip(httptest.NewRequest(http.MethodGet, ts.URL, nil))
109 | if err != nil {
110 | t.Fatal(err)
111 | }
112 | defer r.Body.Close()
113 | }
114 |
115 | func TestInstrumenterBabProxy(t *testing.T) {
116 | t.Parallel()
117 |
118 | instrumenter := metrics.NewInstrumenter("testbadproxy").
119 | WithInsecureSkipVerify(true).
120 | WithProxy("badproxy://badproxy:badproxy")
121 |
122 | r, err := instrumenter.InstrumentedRoundTripper().RoundTrip(httptest.NewRequest(http.MethodGet, ts.URL, nil))
123 | if err != nil {
124 | t.Fatal(err)
125 | }
126 | defer r.Body.Close()
127 | }
128 |
--------------------------------------------------------------------------------
/pkg/template/README.md:
--------------------------------------------------------------------------------
1 | # Templating Options
2 |
3 | | Template | Description | Example |
4 | | --------- | ----------- | ------- |
5 | | `{{ .Event.EventId }}` | Globally unique identifier for this event. | 602d9444-d2cd-49c7-8624-8643e7171297 |
6 | | `{{ .Event.EventType }}` | Impact this event causes. | Reboot |
7 | | `{{ .Event.ResourceType }}` | Type of resource this event affects. | VirtualMachine |
8 | | `{{ .Event.Resources }}` | List of resources this event affects. | [ FrontEnd_IN_0 ...] |
9 | | `{{ .Event.EventStatus }}` | Status of this event. | Scheduled |
10 | | `{{ .Event.NotBefore }}` | Time after which this event can start. The event is guaranteed to not start before this time. Will be blank if the event has already started | Mon, 19 Sep 2016 18:29:47 GMT |
11 | | `{{ .Event.Description }}` | Description of this event. | Host server is undergoing maintenance |
12 | | `{{ .Event.EventSource }}` | Initiator of the event. | Platform |
13 | | `{{ .Event.DurationInSeconds }}` | The expected duration of the interruption caused by the event. | -1 |
14 | | `{{ .NodeLabels }}` | Node labels | kubernetes.azure.com/agentpool:spotcpu4m16n ... |
15 | | `{{ .NodeName }}` | Node name | aks-spotcpu4m16n-41289323-vmss0000ny |
16 | | `{{ .ClusterName }}` | Node label kubernetes.azure.com/cluster | MC_EAST-US-RC-STAGE_stage-cluster_eastus |
17 | | `{{ .InstanceType }}` | Node label node.kubernetes.io/instance-type | Standard_D4as_v5 |
18 | | `{{ .NodeArch }}` | Node label kubernetes.io/arch | amd64 |
19 | | `{{ .NodeOS }}` | Node label kubernetes.io/os | linux |
20 | | `{{ .NodeRole }}` | Node label kubernetes.io/role | agent |
21 | | `{{ .NodeRegion }}` | Node label topology.kubernetes.io/region | eastus |
22 | | `{{ .NodeZone }}` | Node label topology.kubernetes.io/zone | 0 |
23 | | `{{ .NodePods }}` | List of pods on node | [ pod1 ...] |
24 |
--------------------------------------------------------------------------------
/pkg/template/template.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package template
14 |
15 | import (
16 | "bytes"
17 | "context"
18 | "html/template"
19 |
20 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/api"
21 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types"
22 | "github.com/pkg/errors"
23 | )
24 |
25 | type MessageType struct {
26 | Event types.ScheduledEventsEvent
27 | Template string
28 | NodeLabels map[string]string `description:"Node labels"`
29 | NodeName string `description:"Node name"`
30 | ClusterName string `description:"Node label kubernetes.azure.com/cluster"`
31 | InstanceType string `description:"Node label node.kubernetes.io/instance-type"`
32 | NodeArch string `description:"Node label kubernetes.io/arch"`
33 | NodeOS string `description:"Node label kubernetes.io/os"`
34 | NodeRole string `description:"Node label kubernetes.io/role"`
35 | NodeRegion string `description:"Node label topology.kubernetes.io/region"`
36 | NodeZone string `description:"Node label topology.kubernetes.io/zone"`
37 | NodePods []string `description:"List of pods on node"`
38 | }
39 |
40 | func NewMessageType(ctx context.Context, nodeName string, event types.ScheduledEventsEvent) (*MessageType, error) {
41 | nodeLabels, err := api.GetNodeLabels(ctx, nodeName)
42 | if err != nil {
43 | return nil, errors.Wrap(err, "error in nodes.get")
44 | }
45 |
46 | nodePods, err := api.GetNodePods(ctx, nodeName)
47 | if err != nil {
48 | return nil, errors.Wrap(err, "error in getNodePods")
49 | }
50 |
51 | return &MessageType{
52 | Event: event,
53 | NodeName: nodeName,
54 | NodeLabels: nodeLabels,
55 | ClusterName: nodeLabels["kubernetes.azure.com/cluster"],
56 | InstanceType: nodeLabels["node.kubernetes.io/instance-type"],
57 | NodeArch: nodeLabels["kubernetes.io/arch"],
58 | NodeOS: nodeLabels["kubernetes.io/os"],
59 | NodeRole: nodeLabels["kubernetes.io/role"],
60 | NodeRegion: nodeLabels["topology.kubernetes.io/region"],
61 | NodeZone: nodeLabels["topology.kubernetes.io/zone"],
62 | NodePods: nodePods,
63 | }, nil
64 | }
65 |
66 | func Message(obj *MessageType) (string, error) {
67 | tmpl, err := template.New("message").Parse(obj.Template)
68 | if err != nil {
69 | return "", errors.Wrap(err, "error in template.Parse")
70 | }
71 |
72 | var tpl bytes.Buffer
73 |
74 | err = tmpl.Execute(&tpl, obj)
75 | if err != nil {
76 | return "", errors.Wrap(err, "error in template.Execute")
77 | }
78 |
79 | return tpl.String(), nil
80 | }
81 |
--------------------------------------------------------------------------------
/pkg/template/template_test.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package template_test
14 |
15 | import (
16 | "context"
17 | "encoding/json"
18 | "fmt"
19 | "os"
20 | "reflect"
21 | "strings"
22 | "testing"
23 |
24 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/template"
25 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types"
26 | )
27 |
28 | const fakeTemplate = "{{"
29 |
30 | func TestTemplateMessage(t *testing.T) {
31 | t.Parallel()
32 |
33 | obj := &template.MessageType{
34 | Event: types.ScheduledEventsEvent{
35 | EventId: "someID",
36 | EventType: "someType",
37 | },
38 | NodePods: []string{"pod1", "pod2"},
39 | Template: "test {{ .Event.EventId }} {{ .Event.EventType }} {{ .NodePods}}",
40 | }
41 |
42 | tpl, err := template.Message(obj)
43 | if err != nil {
44 | t.Fatal(err)
45 | }
46 |
47 | if want := "test someID someType [pod1 pod2]"; tpl != want {
48 | t.Fatalf("want=%s,got=%s", want, tpl)
49 | }
50 | }
51 |
52 | func TestFakeTemplate(t *testing.T) {
53 | t.Parallel()
54 |
55 | _, err := template.Message(&template.MessageType{
56 | Template: fakeTemplate,
57 | })
58 | if err == nil {
59 | t.Fatal("must be error")
60 | }
61 | }
62 |
63 | func TestFakeTemplateFunc(t *testing.T) {
64 | t.Parallel()
65 |
66 | _, err := template.Message(&template.MessageType{
67 | Template: "{{ .DDD }}",
68 | })
69 | if err == nil {
70 | t.Fatal("must be error")
71 | }
72 |
73 | t.Log(err)
74 | }
75 |
76 | func TestTemplateMarkdown(t *testing.T) {
77 | t.Parallel()
78 |
79 | message := template.MessageType{}
80 |
81 | messageBytes, err := os.ReadFile("testdata/message.json")
82 | if err != nil {
83 | t.Fatal(err)
84 | }
85 |
86 | if err := json.Unmarshal(messageBytes, &message); err != nil {
87 | t.Fatal(err)
88 | }
89 |
90 | printType("", message)
91 |
92 | if err = os.WriteFile("README.md.tmp", []byte(buf.String()), 0o644); err != nil { //nolint:gosec
93 | t.Fatal(err)
94 | }
95 | }
96 |
97 | var buf strings.Builder
98 |
99 | func printType(prefix string, message interface{}) {
100 | v := reflect.ValueOf(message)
101 | typeOfS := v.Type()
102 |
103 | for i := range v.NumField() {
104 | switch typeOfS.Field(i).Name {
105 | case "Template":
106 | case "Event":
107 | printType(typeOfS.Field(i).Name+".", v.Field(i).Interface())
108 | default:
109 | value := v.Field(i).Interface()
110 |
111 | switch v.Field(i).Type().Kind() { //nolint:exhaustive
112 | case reflect.Slice:
113 | a := reflect.ValueOf(value).Interface().([]string) //nolint:forcetypeassert
114 | if len(a) > 0 {
115 | value = fmt.Sprintf("[ %s ...]", a[0])
116 | }
117 | case reflect.Int:
118 | value = fmt.Sprintf("%d", value)
119 | case reflect.Map:
120 | a := reflect.ValueOf(value).Interface().(map[string]string) //nolint:forcetypeassert
121 | for k, v := range a {
122 | value = fmt.Sprintf("%s:%s ...", k, v)
123 |
124 | break
125 | }
126 | }
127 |
128 | buf.WriteString(fmt.Sprintf(
129 | "| `{{ .%s%s }}` | %v | %v |\n",
130 | prefix,
131 | typeOfS.Field(i).Name,
132 | typeOfS.Field(i).Tag.Get("description"),
133 | value,
134 | ))
135 | }
136 | }
137 | }
138 |
139 | func TestNewMessageType(t *testing.T) {
140 | t.Parallel()
141 |
142 | if _, err := template.NewMessageType(context.TODO(), "!!invalid!!GetNodeLabels", types.ScheduledEventsEvent{}); err == nil {
143 | t.Fatal("error expected")
144 | }
145 |
146 | if _, err := template.NewMessageType(context.TODO(), "!!invalid!!GetNodePods", types.ScheduledEventsEvent{}); err == nil {
147 | t.Fatal("error expected")
148 | }
149 |
150 | messageType, err := template.NewMessageType(context.TODO(), "somenode", types.ScheduledEventsEvent{})
151 | if err != nil {
152 | t.Fatal(err)
153 | }
154 |
155 | if messageType.NodeName != "somenode" {
156 | t.Fatal("NodePods is nil")
157 | }
158 | }
159 |
--------------------------------------------------------------------------------
/pkg/template/testdata/message.json:
--------------------------------------------------------------------------------
1 | {
2 | "Event": {
3 | "EventId": "602d9444-d2cd-49c7-8624-8643e7171297",
4 | "EventType": "Reboot",
5 | "ResourceType": "VirtualMachine",
6 | "Resources": [
7 | "FrontEnd_IN_0",
8 | "aks-spotcpu2d2as-24469130-vmss_1",
9 | "aks-spotcpu4m16n-41289323-vmss_862"
10 | ],
11 | "EventStatus": "Scheduled",
12 | "NotBefore": "Mon, 19 Sep 2016 18:29:47 GMT",
13 | "Description": "Host server is undergoing maintenance",
14 | "EventSource": "Platform",
15 | "DurationInSeconds": -1
16 | },
17 | "Template": "",
18 | "NodeLabels": {
19 | "agentpool": "spotcpu4m16n",
20 | "beta.kubernetes.io/arch": "amd64",
21 | "beta.kubernetes.io/instance-type": "Standard_D4as_v5",
22 | "beta.kubernetes.io/os": "linux",
23 | "failure-domain.beta.kubernetes.io/region": "eastus",
24 | "failure-domain.beta.kubernetes.io/zone": "0",
25 | "kubernetes.azure.com/agentpool": "spotcpu4m16n",
26 | "kubernetes.azure.com/cluster": "MC_EAST-US-RC-STAGE_stage-cluster_eastus",
27 | "kubernetes.azure.com/consolidated-additional-properties": "d9a49827-aede-11ee-832c-fe2d222ef432",
28 | "kubernetes.azure.com/kubelet-identity-client-id": "6781a919-9379-417c-8aff-257ecacd1139",
29 | "kubernetes.azure.com/mode": "user",
30 | "kubernetes.azure.com/network-policy": "none",
31 | "kubernetes.azure.com/node-image-version": "AKSUbuntu-2204gen2containerd-202312.06.0",
32 | "kubernetes.azure.com/nodepool-type": "VirtualMachineScaleSets",
33 | "kubernetes.azure.com/os-sku": "Ubuntu",
34 | "kubernetes.azure.com/role": "agent",
35 | "kubernetes.azure.com/scalesetpriority": "spot",
36 | "kubernetes.azure.com/storageprofile": "managed",
37 | "kubernetes.azure.com/storagetier": "Premium_LRS",
38 | "kubernetes.io/arch": "amd64",
39 | "kubernetes.io/hostname": "aks-spotcpu4m16n-41289323-vmss0000ny",
40 | "kubernetes.io/os": "linux",
41 | "kubernetes.io/role": "agent",
42 | "node-role.kubernetes.io/agent": "",
43 | "node.kubernetes.io/instance-type": "Standard_D4as_v5",
44 | "storageprofile": "managed",
45 | "storagetier": "Premium_LRS",
46 | "topology.disk.csi.azure.com/zone": "",
47 | "topology.kubernetes.io/region": "eastus",
48 | "topology.kubernetes.io/zone": "0"
49 | },
50 | "NodeName": "aks-spotcpu4m16n-41289323-vmss0000ny",
51 | "ClusterName": "MC_EAST-US-RC-STAGE_stage-cluster_eastus",
52 | "InstanceType": "Standard_D4as_v5",
53 | "NodeArch": "amd64",
54 | "NodeOS": "linux",
55 | "NodeRole": "agent",
56 | "NodeRegion": "eastus",
57 | "NodeZone": "0",
58 | "NodePods": [
59 | "pod1",
60 | "pod2"
61 | ]
62 | }
--------------------------------------------------------------------------------
/pkg/types/testdata/ScheduledEventsType.json:
--------------------------------------------------------------------------------
1 | {
2 | "DocumentIncarnation": 1,
3 | "Events": [
4 | {
5 | "EventId": "602d9444-d2cd-49c7-8624-8643e7171297",
6 | "EventType": "Reboot",
7 | "ResourceType": "VirtualMachine",
8 | "Resources": [
9 | "FrontEnd_IN_0",
10 | "aks-spotcpu2d2as-24469130-vmss_1"
11 | ],
12 | "EventStatus": "Scheduled",
13 | "NotBefore": "Mon, 19 Sep 2016 18:29:47 GMT",
14 | "Description": "Host server is undergoing maintenance",
15 | "EventSource": "Platform",
16 | "DurationInSeconds": -1
17 | }
18 | ]
19 | }
--------------------------------------------------------------------------------
/pkg/types/types.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package types
14 |
15 | import (
16 | "fmt"
17 | "regexp"
18 |
19 | "github.com/pkg/errors"
20 | )
21 |
22 | type ScheduledEventsEventType string
23 |
24 | const (
25 | // The Virtual Machine is scheduled to pause for a few seconds. CPU and network connectivity
26 | // may be suspended, but there's no impact on memory or open files.
27 | EventTypeFreeze = "Freeze"
28 | // The Virtual Machine is scheduled for reboot (non-persistent memory is lost).
29 | // This event is made available on a best effort basis.
30 | EventTypeReboot = "Reboot"
31 | // The Virtual Machine is scheduled to move to another node (ephemeral disks are lost).
32 | // This event is delivered on a best effort basis.
33 | EventTypeRedeploy = "Redeploy"
34 | // The Spot Virtual Machine is being deleted (ephemeral disks are lost).
35 | EventTypePreempt = "Preempt"
36 | // The virtual machine is scheduled to be deleted.
37 | EventTypeTerminate = "Terminate"
38 | )
39 |
40 | // https://docs.microsoft.com/en-us/azure/virtual-machines/linux/scheduled-events
41 | type ScheduledEventsEvent struct {
42 | EventId string `description:"Globally unique identifier for this event."` //nolint:golint,revive,stylecheck
43 | EventType ScheduledEventsEventType `description:"Impact this event causes."`
44 | ResourceType string `description:"Type of resource this event affects."`
45 | Resources []string `description:"List of resources this event affects."`
46 | EventStatus string `description:"Status of this event."`
47 | NotBefore string `description:"Time after which this event can start. The event is guaranteed to not start before this time. Will be blank if the event has already started"` //nolint:lll
48 | Description string `description:"Description of this event."`
49 | EventSource string `description:"Initiator of the event."`
50 | DurationInSeconds int `description:"The expected duration of the interruption caused by the event."`
51 | }
52 |
53 | var (
54 | virtualMachineScaleSetsRe = regexp.MustCompile("^azure:///subscriptions/(.+)/resourceGroups/(.+)/providers/Microsoft.Compute/virtualMachineScaleSets/(.+)/virtualMachines/(.+)$")
55 | virtualMachineRe = regexp.MustCompile("^azure:///subscriptions/(.+)/resourceGroups/(.+)/providers/Microsoft.Compute/virtualMachines/(.+)$")
56 | )
57 |
58 | type AzureResource struct {
59 | ProviderID string
60 | EventResourceName string
61 | SubscriptionID string
62 | ResourceGroup string
63 | }
64 |
65 | func NewAzureResource(providerID string) (*AzureResource, error) {
66 | resource := &AzureResource{
67 | ProviderID: providerID,
68 | }
69 |
70 | switch {
71 | case virtualMachineScaleSetsRe.MatchString(providerID):
72 | v := virtualMachineScaleSetsRe.FindAllStringSubmatch(providerID, 1)
73 |
74 | resource.SubscriptionID = v[0][1]
75 | resource.ResourceGroup = v[0][2]
76 | resource.EventResourceName = fmt.Sprintf("%s_%s", v[0][3], v[0][4])
77 |
78 | case virtualMachineRe.MatchString(providerID):
79 | v := virtualMachineRe.FindAllStringSubmatch(providerID, 1)
80 |
81 | resource.SubscriptionID = v[0][1]
82 | resource.ResourceGroup = v[0][2]
83 | resource.EventResourceName = v[0][3]
84 |
85 | default:
86 | return nil, errors.Errorf("providerID not recognized: %s", providerID)
87 | }
88 |
89 | return resource, nil
90 | }
91 |
92 | // api-version=2020-07-01.
93 | type ScheduledEventsType struct {
94 | DocumentIncarnation int
95 | Events []ScheduledEventsEvent
96 | }
97 |
98 | type EventMessage struct {
99 | Type string
100 | Reason string
101 | Message string
102 | }
103 |
--------------------------------------------------------------------------------
/pkg/types/types_test.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package types_test
14 |
15 | import (
16 | "encoding/json"
17 | "os"
18 | "reflect"
19 | "strconv"
20 | "testing"
21 |
22 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/types"
23 | )
24 |
25 | func TestScheduledEventsType(t *testing.T) {
26 | t.Parallel()
27 |
28 | messageBytes, err := os.ReadFile("testdata/ScheduledEventsType.json")
29 | if err != nil {
30 | t.Fatal(err)
31 | }
32 |
33 | message := types.ScheduledEventsType{}
34 |
35 | err = json.Unmarshal(messageBytes, &message)
36 | if err != nil {
37 | t.Fatal(err)
38 | }
39 |
40 | if len(message.Events) == 0 {
41 | t.Fatal("events is empty")
42 | }
43 |
44 | if want := "VirtualMachine"; message.Events[0].ResourceType != want {
45 | t.Fatalf("want=%s, got=%s", want, message.Events[0].ResourceType)
46 | }
47 | }
48 |
49 | func TestAzureResource(t *testing.T) {
50 | t.Parallel()
51 |
52 | type azureResourceTest struct {
53 | providerID string
54 | want *types.AzureResource
55 | }
56 |
57 | tests := make([]azureResourceTest, 0)
58 |
59 | tests = append(tests, azureResourceTest{
60 | providerID: "azure:///subscriptions/12345a05-1234-1234-12345-922b47912341/resourceGroups/mc_prod_prod_eastus/providers/Microsoft.Compute/virtualMachineScaleSets/aks-spotcpu2v2-19654750-vmss/virtualMachines/2768", //nolint:lll
61 | want: &types.AzureResource{
62 | EventResourceName: "aks-spotcpu2v2-19654750-vmss_2768",
63 | SubscriptionID: "12345a05-1234-1234-12345-922b47912341",
64 | ResourceGroup: "mc_prod_prod_eastus",
65 | },
66 | })
67 |
68 | tests = append(tests, azureResourceTest{
69 | providerID: "azure:///subscriptions/12345a05-1234-1234-12345-922b47912342/resourceGroups/aro-infra-lth8qmzr-test-openshift-cluster1/providers/Microsoft.Compute/virtualMachines/test-openshift-cluste-t98dd-master-0", //nolint:lll
70 | want: &types.AzureResource{
71 | EventResourceName: "test-openshift-cluste-t98dd-master-0",
72 | SubscriptionID: "12345a05-1234-1234-12345-922b47912342",
73 | ResourceGroup: "aro-infra-lth8qmzr-test-openshift-cluster1",
74 | },
75 | })
76 |
77 | tests = append(tests, azureResourceTest{
78 | providerID: "azure:///subscriptions/12345a05-1234-1234-12345-922b47912343/resourceGroups/aro-infra-lth8qmzr-test-openshift-cluster2/providers/Microsoft.Compute/virtualMachines/test-openshift-cluste-t98dd-worker-eastus1-rz2t8", //nolint:lll
79 | want: &types.AzureResource{
80 | EventResourceName: "test-openshift-cluste-t98dd-worker-eastus1-rz2t8",
81 | SubscriptionID: "12345a05-1234-1234-12345-922b47912343",
82 | ResourceGroup: "aro-infra-lth8qmzr-test-openshift-cluster2",
83 | },
84 | })
85 |
86 | for testID, test := range tests {
87 | t.Run("Test"+strconv.Itoa(testID), func(t *testing.T) {
88 | t.Parallel()
89 |
90 | azureResource, err := types.NewAzureResource(test.providerID)
91 | if err != nil {
92 | t.Fatal(err)
93 | }
94 |
95 | // need to set providerID for comparison
96 | test.want.ProviderID = test.providerID
97 |
98 | if !reflect.DeepEqual(azureResource, test.want) {
99 | t.Fatalf("want=%+v, got=%+v", test.want, azureResource)
100 | }
101 | })
102 | }
103 |
104 | // test invalid providerID
105 | if _, err := types.NewAzureResource("azure://fake"); err == nil {
106 | t.Fatal("error expected")
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/pkg/utils/utils.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package utils
14 |
15 | import (
16 | "context"
17 | "time"
18 |
19 | log "github.com/sirupsen/logrus"
20 | )
21 |
22 | func SleepWithContext(ctx context.Context, d time.Duration) {
23 | log.Debugf("Sleep %s", d)
24 |
25 | select {
26 | case <-ctx.Done():
27 | return
28 | case <-time.After(d):
29 | return
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/pkg/utils/utils_test.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package utils_test
14 |
15 | import (
16 | "context"
17 | "testing"
18 | "time"
19 |
20 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/utils"
21 | )
22 |
23 | func TestSleepWithContext(t *testing.T) {
24 | t.Parallel()
25 |
26 | ctx, cancel := context.WithCancel(context.TODO())
27 | defer cancel()
28 |
29 | startTime := time.Now()
30 |
31 | utils.SleepWithContext(ctx, 1*time.Second)
32 |
33 | if time.Since(startTime) < 1*time.Second || time.Since(startTime) > 2*time.Second {
34 | t.Error("SleepWithContext() not working as expected")
35 | }
36 |
37 | cancel()
38 |
39 | startTime = time.Now()
40 | utils.SleepWithContext(ctx, 1*time.Second)
41 |
42 | if time.Since(startTime) >= 1*time.Second {
43 | t.Error("SleepWithContext() not working as expected")
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/pkg/web/web.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package web
14 |
15 | import (
16 | "context"
17 | "net/http"
18 | "net/http/pprof"
19 | "time"
20 |
21 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/alert"
22 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/api"
23 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/config"
24 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/metrics"
25 | log "github.com/sirupsen/logrus"
26 | )
27 |
28 | func Start(ctx context.Context) {
29 | const (
30 | readTimeout = 5 * time.Second
31 | requestTimeout = 10 * time.Second
32 | writeTimeout = 20 * time.Second
33 | )
34 |
35 | server := &http.Server{
36 | Addr: *config.Get().WebHTTPAddress,
37 | Handler: http.TimeoutHandler(GetHandler(), requestTimeout, "timeout"),
38 | ReadTimeout: readTimeout,
39 | WriteTimeout: writeTimeout,
40 | }
41 |
42 | log.Info("web.address=", server.Addr)
43 |
44 | go func() {
45 | <-ctx.Done()
46 |
47 | shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), config.Get().GracePeriod())
48 | defer shutdownCancel()
49 |
50 | _ = server.Shutdown(shutdownCtx) //nolint:contextcheck
51 | }()
52 |
53 | if err := server.ListenAndServe(); err != nil && ctx.Err() == nil {
54 | log.WithError(err).Fatal()
55 | }
56 | }
57 |
58 | func GetHandler() *http.ServeMux {
59 | mux := http.NewServeMux()
60 |
61 | mux.HandleFunc("/healthz", handlerHealthz)
62 | mux.HandleFunc("/drainNode", handlerDrainNode)
63 |
64 | mux.Handle("/metrics", metrics.GetHandler())
65 |
66 | mux.HandleFunc("/debug/pprof/", pprof.Index)
67 | mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
68 | mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
69 | mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
70 | mux.HandleFunc("/debug/pprof/trace", pprof.Trace)
71 |
72 | return mux
73 | }
74 |
75 | func handlerHealthz(w http.ResponseWriter, r *http.Request) {
76 | // check alerts transports
77 | if err := alert.Ping(); err != nil {
78 | log.WithError(err).Error("alerts transport is not working")
79 | http.Error(w, err.Error(), http.StatusInternalServerError)
80 |
81 | return
82 | }
83 |
84 | // check kubernetes API
85 | if _, err := api.GetNode(r.Context(), *config.Get().NodeName); err != nil {
86 | log.WithError(err).Error("kubernetes API is not available")
87 | http.Error(w, err.Error(), http.StatusInternalServerError)
88 |
89 | return
90 | }
91 |
92 | _, _ = w.Write([]byte("LIVE"))
93 | }
94 |
95 | func handlerDrainNode(w http.ResponseWriter, r *http.Request) {
96 | err := api.DrainNode(r.Context(), *config.Get().NodeName, "Preempt", "manual")
97 | if err != nil {
98 | http.Error(w, err.Error(), http.StatusInternalServerError)
99 |
100 | return
101 | }
102 |
103 | _, _ = w.Write([]byte("done"))
104 | }
105 |
--------------------------------------------------------------------------------
/pkg/webhook/testdata/WebhookTemplateFile.txt:
--------------------------------------------------------------------------------
1 | node_termination_event{node="{{ .NodeName }}"} 1
--------------------------------------------------------------------------------
/pkg/webhook/webhook.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | package webhook
14 |
15 | import (
16 | "bytes"
17 | "context"
18 | "fmt"
19 | "os"
20 |
21 | "github.com/hashicorp/go-retryablehttp"
22 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/config"
23 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/template"
24 | "github.com/pkg/errors"
25 | log "github.com/sirupsen/logrus"
26 | )
27 |
28 | var client = &retryablehttp.Client{}
29 |
30 | var errHTTPNotOK = errors.New("http result not OK")
31 |
32 | func SetHTTPClient(c *retryablehttp.Client) {
33 | client = c
34 | }
35 |
36 | func isResponseStatusOK(statusCode int) bool {
37 | return statusCode >= 200 && statusCode < 300
38 | }
39 |
40 | func SendWebHook(ctx context.Context, obj *template.MessageType) error {
41 | ctx, cancel := context.WithTimeout(ctx, *config.Get().WebHookTimeout)
42 | defer cancel()
43 |
44 | if len(*config.Get().WebHookURL) == 0 {
45 | return nil
46 | }
47 |
48 | message, err := template.NewMessageType(ctx, obj.NodeName, obj.Event)
49 | if err != nil {
50 | return errors.Wrap(err, "error in template.NewMessageType")
51 | }
52 |
53 | message.Template = *config.Get().WebHookTemplate
54 |
55 | if len(*config.Get().WebHookTemplateFile) > 0 {
56 | templateFile, err := os.ReadFile(*config.Get().WebHookTemplateFile)
57 | if err != nil {
58 | return errors.Wrap(err, "error in os.ReadFile")
59 | }
60 |
61 | message.Template = string(templateFile)
62 | }
63 |
64 | webhookBody, err := template.Message(message)
65 | if err != nil {
66 | return errors.Wrap(err, "error in template.Message")
67 | }
68 |
69 | requestBody := bytes.NewBufferString(webhookBody + "\n")
70 |
71 | req, err := retryablehttp.NewRequest(*config.Get().WebHookMethod, *config.Get().WebHookURL, requestBody)
72 | if err != nil {
73 | return errors.Wrap(err, "error in retryablehttp.NewRequest")
74 | }
75 |
76 | req.Header.Set("Content-Type", *config.Get().WebHookContentType)
77 |
78 | log.WithFields(log.Fields{
79 | "method": req.Method,
80 | "url": req.URL,
81 | "headers": req.Header,
82 | }).Infof("Doing request with body: %s", requestBody.String())
83 |
84 | resp, err := client.Do(req)
85 | if err != nil {
86 | return errors.Wrap(err, "error in client.Do")
87 | }
88 | defer resp.Body.Close()
89 |
90 | log.Infof("response status: %s", resp.Status)
91 |
92 | if !isResponseStatusOK(resp.StatusCode) {
93 | return errors.Wrap(errHTTPNotOK, fmt.Sprintf("StatusCode=%d", resp.StatusCode))
94 | }
95 |
96 | return nil
97 | }
98 |
--------------------------------------------------------------------------------
/pkg/webhook/webhook_test.go:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright paskal.maksim@gmail.com
3 | Licensed under the Apache License, Version 2.0 (the "License")
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 | http://www.apache.org/licenses/LICENSE-2.0
7 | Unless required by applicable law or agreed to in writing, software
8 | distributed under the License is distributed on an "AS IS" BASIS,
9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | */
13 | //nolint:goerr113
14 | package webhook_test
15 |
16 | import (
17 | "context"
18 | "errors"
19 | "flag"
20 | "fmt"
21 | "io"
22 | "net/http"
23 | "net/http/httptest"
24 | "testing"
25 |
26 | "github.com/hashicorp/go-retryablehttp"
27 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/metrics"
28 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/template"
29 | "github.com/maksim-paskal/aks-node-termination-handler/pkg/webhook"
30 | log "github.com/sirupsen/logrus"
31 | "github.com/stretchr/testify/require"
32 | )
33 |
34 | var retryableRequestCount = 0
35 |
36 | var ts = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
37 | if r.RequestURI == "/-/400" {
38 | w.WriteHeader(http.StatusBadRequest)
39 |
40 | return
41 | }
42 |
43 | if r.RequestURI == "/test-retryable" {
44 | retryableRequestCount++
45 |
46 | // return 500 for first 2 requests
47 | if retryableRequestCount < 3 {
48 | w.WriteHeader(http.StatusInternalServerError)
49 | } else {
50 | _, _ = w.Write([]byte("OK"))
51 | }
52 |
53 | return
54 | }
55 |
56 | if err := testWebhookRequest(r); err != nil {
57 | log.WithError(err).Error()
58 | w.WriteHeader(http.StatusInternalServerError)
59 | } else {
60 | _, _ = w.Write([]byte("OK"))
61 | }
62 | }))
63 |
64 | func getWebhookRetryableURL() string {
65 | return ts.URL + "/test-retryable"
66 | }
67 |
68 | func getWebhookURL() string {
69 | return ts.URL + "/metrics/job/aks-node-termination-handler"
70 | }
71 |
72 | func testWebhookRequest(r *http.Request) error {
73 | if r.RequestURI != "/metrics/job/aks-node-termination-handler" {
74 | return errors.New("Request URI is not correct")
75 | }
76 |
77 | defer r.Body.Close()
78 |
79 | body, _ := io.ReadAll(r.Body)
80 |
81 | if bodyString := string(body); bodyString != "node_termination_event{node=\"test\"} 1\n" {
82 | return fmt.Errorf("Response body [%s] is not correct", bodyString)
83 | }
84 |
85 | return nil
86 | }
87 |
88 | func TestWebHook(t *testing.T) { //nolint:funlen,tparallel
89 | t.Parallel()
90 |
91 | retryClient := retryablehttp.NewClient()
92 | retryClient.HTTPClient.Transport = metrics.NewInstrumenter("TestWebHook").
93 | WithProxy("").
94 | WithInsecureSkipVerify(true).
95 | InstrumentedRoundTripper()
96 | retryClient.RetryMax = 0
97 |
98 | retryClientProxy := retryablehttp.NewClient()
99 | retryClientProxy.HTTPClient.Transport = metrics.NewInstrumenter("TestWebHookWithProxy").
100 | WithProxy("http://someproxy").
101 | WithInsecureSkipVerify(true).
102 | InstrumentedRoundTripper()
103 | retryClientProxy.RetryMax = 0
104 |
105 | // retryable client with default retry settings
106 | retryClientDefault := retryablehttp.NewClient()
107 | retryClientDefault.HTTPClient.Transport = metrics.NewInstrumenter("TestWebHookWithDefaultSettings").
108 | WithProxy("").
109 | WithInsecureSkipVerify(true).
110 | InstrumentedRoundTripper()
111 | retryClientDefault.RetryMax = 3
112 |
113 | type Test struct {
114 | Name string
115 | Args map[string]string
116 | Error bool
117 | ErrorMessage string
118 | NodeName string
119 | HTTPClient *retryablehttp.Client
120 | }
121 |
122 | tests := []Test{
123 | {
124 | Name: "TestRetryable",
125 | Args: map[string]string{
126 | "webhook.url": getWebhookRetryableURL(),
127 | },
128 | HTTPClient: retryClientDefault,
129 | },
130 | {
131 | Name: "TestRetryableCustomStatusCodes",
132 | Args: map[string]string{
133 | "webhook.url": ts.URL + "/-/400",
134 | },
135 | HTTPClient: retryClientDefault,
136 | Error: true,
137 | ErrorMessage: "http result not OK",
138 | },
139 | {
140 | Name: "ValidHookAndTemplate",
141 | Args: map[string]string{
142 | "webhook.url": getWebhookURL(),
143 | "webhook.template": `node_termination_event{node="{{ .NodeName }}"} 1`,
144 | },
145 | },
146 | {
147 | Name: "EmptyURL",
148 | Args: map[string]string{
149 | "webhook.url": "",
150 | "webhook.template": `node_termination_event{node="{{ .NodeName }}"} 1`,
151 | },
152 | },
153 | {
154 | Name: "InvalidTemplate",
155 | Args: map[string]string{
156 | "webhook.url": getWebhookURL(),
157 | "webhook.template": `{{`,
158 | },
159 | Error: true,
160 | },
161 | {
162 | Name: "InvalidContext",
163 | Args: map[string]string{
164 | "webhook.url": "example.com",
165 | "webhook.template": `{{ .NodeName }}`,
166 | },
167 | Error: true,
168 | },
169 | {
170 | Name: "InvalidStatus",
171 | Args: map[string]string{
172 | "webhook.url": ts.URL,
173 | "webhook.template": `{{ .NodeName }}`,
174 | },
175 | Error: true,
176 | ErrorMessage: "giving up after 1 attempt",
177 | },
178 | {
179 | Name: "InvalidMethod",
180 | Args: map[string]string{
181 | "webhook.url": getWebhookURL(),
182 | "webhook.template": `{{ .NodeName }}`,
183 | "webhook.method": `???`,
184 | },
185 | Error: true,
186 | },
187 | {
188 | Name: "WebhookTemplateFile",
189 | Args: map[string]string{
190 | "webhook.url": getWebhookURL(),
191 | "webhook.template-file": "testdata/WebhookTemplateFile.txt",
192 | },
193 | },
194 | {
195 | Error: true,
196 | Name: "WebhookTemplateFileInvalid",
197 | Args: map[string]string{
198 | "webhook.url": getWebhookURL(),
199 | "webhook.template-file": "faketestdata/WebhookTemplateFile.txt",
200 | },
201 | },
202 | {
203 | Error: true,
204 | Name: "InvalidNodeName",
205 | Args: map[string]string{
206 | "webhook.url": getWebhookURL(),
207 | },
208 | NodeName: "!!invalid!!GetNodeLabels",
209 | },
210 | {
211 | Error: true,
212 | ErrorMessage: "error making roundtrip: proxyconnect tcp: dial tcp",
213 | Name: "HTTPClientProxy",
214 | Args: map[string]string{
215 | "webhook.url": getWebhookURL(),
216 | },
217 | HTTPClient: retryClientProxy,
218 | },
219 | }
220 |
221 | // clear flags
222 | cleanAllFlags := func() {
223 | for _, test := range tests {
224 | for key := range test.Args {
225 | _ = flag.Set(key, "")
226 | }
227 | }
228 | }
229 |
230 | for _, tc := range tests { //nolint:paralleltest
231 | t.Run(tc.Name, func(t *testing.T) {
232 | cleanAllFlags()
233 |
234 | for key, value := range tc.Args {
235 | _ = flag.Set(key, value)
236 | }
237 |
238 | messageType := &template.MessageType{
239 | NodeName: "test",
240 | }
241 |
242 | if len(tc.NodeName) > 0 {
243 | messageType.NodeName = tc.NodeName
244 | }
245 |
246 | if httpClient := tc.HTTPClient; httpClient != nil {
247 | webhook.SetHTTPClient(httpClient)
248 | } else {
249 | webhook.SetHTTPClient(retryClient)
250 | }
251 |
252 | err := webhook.SendWebHook(context.TODO(), messageType)
253 | if tc.Error {
254 | require.Error(t, err)
255 | require.Contains(t, err.Error(), tc.ErrorMessage)
256 | } else {
257 | require.NoError(t, err)
258 | }
259 | })
260 | }
261 |
262 | // Check retryable request counter, 3 requests should be made
263 | require.Equal(t, 3, retryableRequestCount)
264 | }
265 |
--------------------------------------------------------------------------------
/scripts/validate-license.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Copyright paskal.maksim@gmail.com
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | set -euo pipefail
17 | IFS=$'\n\t'
18 |
19 | find_files() {
20 | find . -not \( \
21 | \( \
22 | -wholename './vendor' \
23 | -o -wholename '*testdata*' \
24 | -o -wholename '*third_party*' \
25 | -o -wholename '*node_modules*' \
26 | \) -prune \
27 | \) \
28 | \( -name '*.go' -o -name '*.sh' -o -name 'LICENSE' \)
29 | }
30 |
31 | # Use "|| :" to ignore the error code when grep returns empty
32 | failed_license_header=($(find_files | xargs grep -L 'Licensed under the Apache License, Version 2.0 (the "License")' || :))
33 | if (( ${#failed_license_header[@]} > 0 )); then
34 | echo "Some source files are missing license headers."
35 | printf '%s\n' "${failed_license_header[@]}"
36 | exit 1
37 | fi
38 |
39 | # Use "|| :" to ignore the error code when grep returns empty
40 | failed_copyright_header=($(find_files | xargs grep -L 'Copyright paskal.maksim@gmail.com' || :))
41 | if (( ${#failed_copyright_header[@]} > 0 )); then
42 | echo "Some source files are missing the copyright header."
43 | printf '%s\n' "${failed_copyright_header[@]}"
44 | exit 1
45 | fi
46 |
47 | if grep --exclude-dir=.git --exclude=validate-license.sh --exclude=test.sh -rn . -e 'alldigital'; then
48 | echo "Some files have bad links"
49 | exit 1
50 | fi
--------------------------------------------------------------------------------