├── .dockerignore ├── .github └── workflows │ └── publish.yaml ├── .gitignore ├── .golangci.yml ├── LICENSE ├── README.md ├── cluster └── charts │ └── node-state-server │ ├── Chart.yaml │ ├── templates │ ├── manager-deployment.yaml │ └── manager-permissions.yaml │ └── values.yaml ├── cmd ├── crik │ ├── Dockerfile │ └── main.go └── node-state-server │ ├── Dockerfile │ └── main.go ├── examples └── simple-loop.yaml ├── go.mod ├── go.sum ├── hack └── boilerplate.go.txt └── internal ├── controller └── node │ ├── controller.go │ └── server.go └── exec ├── checkpoint.go ├── opts.go └── restore.go /.dockerignore: -------------------------------------------------------------------------------- 1 | # More info: https://docs.docker.com/engine/reference/builder/#dockerignore-file 2 | # Ignore build and test binaries. 3 | bin/ 4 | .github/ 5 | cluster/ 6 | examples/ 7 | hack/ 8 | README.md 9 | LICENSE -------------------------------------------------------------------------------- /.github/workflows/publish.yaml: -------------------------------------------------------------------------------- 1 | name: Publish 2 | on: 3 | push: 4 | branches: 5 | - main 6 | tags: 7 | - "v*" 8 | 9 | env: 10 | GO_VERSION: 1.22.2 11 | 12 | jobs: 13 | version: 14 | runs-on: ubuntu-latest 15 | outputs: 16 | version: ${{ steps.version.outputs.VERSION }} 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v4 20 | - name: Fetch history for all tags 21 | run: git fetch --prune --unshallow 22 | - name: Calculate version 23 | id: version 24 | run: | 25 | VERSION="v0.0.0" 26 | if [ -z "$(git tag)" ]; then 27 | echo "No tags found" 28 | VERSION="$(echo "v0.0.0-$(git rev-list HEAD --count)-$(git describe --dirty --always)" | sed 's/-/./2' | sed 's/-/./2' | sed 's/-/./2')" 29 | else 30 | echo "Tags found:\n$(git tag)" 31 | VERSION="$(git describe --dirty --always --tags --match 'v*' | sed 's|.*/||' | sed 's/-/./2' | sed 's/-/./2' | sed 's/-/./2')" 32 | fi 33 | echo "Version is ${VERSION}" 34 | echo "VERSION=${VERSION}" >> $GITHUB_OUTPUT 35 | image: 36 | runs-on: ubuntu-latest 37 | needs: 38 | - version 39 | permissions: 40 | packages: write 41 | contents: read 42 | id-token: write 43 | attestations: write 44 | strategy: 45 | matrix: 46 | app: [crik, node-state-server] 47 | steps: 48 | - name: Checkout 49 | uses: actions/checkout@v4 50 | - name: Setup Go 51 | uses: actions/setup-go@v5 52 | with: 53 | go-version: ${{ env.GO_VERSION }} 54 | cache: false 55 | - name: Find the Go Environment 56 | id: go 57 | run: | 58 | echo "cache=$(go env GOCACHE)" >> $GITHUB_OUTPUT 59 | echo "mod=$(go env GOMODCACHE)" >> $GITHUB_OUTPUT 60 | 61 | - name: Cache Go Dependencies 62 | uses: actions/cache@v4 63 | with: 64 | path: ${{ steps.go.outputs.mod }} 65 | key: mod-cache-${{ hashFiles('**/go.sum') }} 66 | restore-keys: mod-cache- 67 | 68 | - name: Cache Go Build Cache 69 | uses: actions/cache@v4 70 | with: 71 | path: ${{ steps.go.outputs.cache }} 72 | key: build-cache-${{ matrix.app }}-${{ hashFiles('**/go.sum') }} 73 | restore-keys: build-cache-${{ matrix.app }}- 74 | 75 | - name: Check if code-gen changes anything 76 | run: | 77 | go generate ./... 78 | git diff --exit-code && echo "generated code is up to date" || (echo "go generate resulted in changes" && git diff && exit 1) 79 | 80 | - name: Build 81 | env: 82 | PLATFORMS: linux/amd64,linux/arm64 83 | run: | 84 | for platform in $(echo $PLATFORMS | tr "," "\n"); do 85 | export os=$(echo $platform | cut -d'/' -f1) 86 | export arch=$(echo $platform | cut -d'/' -f2) 87 | echo "Building for $os/$arch" 88 | CGO_ENABLED=0 GOOS=${os} GOARCH=${arch} go build -o .work/bin/${{ matrix.app }}-${os}-${arch} cmd/${{ matrix.app }}/main.go & 89 | done 90 | wait 91 | 92 | - name: Set up Docker Buildx 93 | uses: docker/setup-buildx-action@v3 94 | - name: Login to Github Container Registry 95 | run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin 96 | - name: Generate metadata for images 97 | uses: docker/metadata-action@v5 98 | id: metadata 99 | with: 100 | images: ghcr.io/qawolf/crik/${{ matrix.app }} 101 | tags: | 102 | type=ref,event=branch 103 | type=sha,format=short,prefix= 104 | ${{ needs.version.outputs.VERSION }} 105 | - name: Build and push 106 | id: push 107 | uses: docker/build-push-action@v5 108 | with: 109 | context: . 110 | file: cmd/${{ matrix.app }}/Dockerfile 111 | platforms: linux/amd64,linux/arm64 112 | push: true 113 | tags: ${{ steps.metadata.outputs.tags }} 114 | labels: ${{ steps.metadata.outputs.labels }} 115 | 116 | - name: Attest 117 | uses: actions/attest-build-provenance@v1 118 | with: 119 | subject-name: ghcr.io/qawolf/crik/${{ matrix.app }} 120 | subject-digest: ${{ steps.push.outputs.digest }} 121 | push-to-registry: true 122 | chart: 123 | permissions: 124 | packages: write 125 | contents: read 126 | id-token: write 127 | runs-on: ubuntu-latest 128 | needs: 129 | - image 130 | - version 131 | strategy: 132 | matrix: 133 | chart: [node-state-server] 134 | steps: 135 | - name: Checkout 136 | uses: actions/checkout@v4 137 | - name: Set up Helm 138 | uses: azure/setup-helm@v4 139 | - name: Log in to GitHub Container Registry 140 | run: echo "${{ secrets.GITHUB_TOKEN }}" | helm registry login ghcr.io -u ${{ github.actor }} --password-stdin 141 | - name: Install yq 142 | uses: dcarbone/install-yq-action@v1.1.1 143 | - name: Push the chart 144 | id: push 145 | env: 146 | VERSION: ${{ needs.version.outputs.VERSION }} 147 | run: | 148 | set -e 149 | # Helm doesn't accept v prefix in version. 150 | CHART_TAG=$(echo ${VERSION} | cut -d'v' -f2) 151 | if [ "${{ matrix.chart }}" == "node-state-server" ]; then 152 | yq -i ".nodeStateServer.image.tag = \"${VERSION}\"" cluster/charts/${{ matrix.chart }}/values.yaml 153 | echo "Final values.yaml" 154 | cat cluster/charts/${{ matrix.chart }}/values.yaml 155 | fi 156 | helm package cluster/charts/${{ matrix.chart }} --dependency-update --version=${CHART_TAG} --app-version=${CHART_TAG} 157 | helm push ${{ matrix.chart }}-${CHART_TAG}.tgz oci://ghcr.io/qawolf/crik/charts 2>&1 | tee push.log 158 | DIGEST=$(cat push.log | sed -n 's/.*sha256:\([^ ]*\).*/sha256:\1/p') 159 | echo "digest=${DIGEST}" >> "$GITHUB_OUTPUT" 160 | 161 | # Attestation is not yet available for OCI images. 162 | # 163 | # - name: Attest 164 | # uses: actions/attest-build-provenance@v1 165 | # with: 166 | # subject-name: ghcr.io/qawolf/crik/charts/${{ matrix.chart }} 167 | # subject-digest: ${{ steps.push.outputs.digest }} 168 | # push-to-registry: true 169 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Binaries for programs and plugins 3 | *.exe 4 | *.exe~ 5 | *.dll 6 | *.so 7 | *.dylib 8 | bin/* 9 | Dockerfile.cross 10 | 11 | # Test binary, built with `go test -c` 12 | *.test 13 | 14 | # Output of the go coverage tool, specifically when used with LiteIDE 15 | *.out 16 | 17 | # Go workspace file 18 | go.work 19 | 20 | # Kubernetes Generated files - skip generated files, except for vendored files 21 | !vendor/**/zz_generated.* 22 | 23 | # editor and IDE paraphernalia 24 | .idea 25 | .vscode 26 | *.swp 27 | *.swo 28 | *~ 29 | 30 | .work 31 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | run: 2 | deadline: 5m 3 | allow-parallel-runners: true 4 | 5 | issues: 6 | # don't skip warning about doc comments 7 | # don't exclude the default set of lint 8 | exclude-use-default: false 9 | # restore some of the defaults 10 | # (fill in the rest as needed) 11 | exclude-rules: 12 | - path: "api/*" 13 | linters: 14 | - lll 15 | - path: "internal/*" 16 | linters: 17 | - dupl 18 | - lll 19 | linters: 20 | disable-all: true 21 | enable: 22 | - dupl 23 | - errcheck 24 | - exportloopref 25 | - goconst 26 | - gocyclo 27 | - gofmt 28 | - goimports 29 | - gosimple 30 | - govet 31 | - ineffassign 32 | - lll 33 | - misspell 34 | - nakedret 35 | - prealloc 36 | - staticcheck 37 | - typecheck 38 | - unconvert 39 | - unparam 40 | - unused 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Checkpoint and Restore in Kubernetes - crik 2 | 3 | `crik` is a project that aims to provide checkpoint and restore functionality for Kubernetes pods mainly targeted for 4 | node shutdown and restart scenarios. It is a command wrapper that, under the hood, utilizes 5 | [`criu`](https://github.com/checkpoint-restore/criu) to checkpoint and restore process trees in a `Pod`. 6 | 7 | > `crik` is first revealed at KubeCon EU 2024: 8 | > [The Party Must Go on - Resume Pods After Spot Instance Shut Down - Muvaffak Onuş, QA Wolf](https://kccnceu2024.sched.com/event/1YeP3) 9 | 10 | It is a work in progress and is not ready for production use. 11 | 12 | `crik` has two components: 13 | 14 | - `crik` - a command wrapper that executes given command and checkpoints it when SIGTERM is received and restores from 15 | checkpoint when image directory contains a checkpoint. 16 | - `manager` - a kubernetes controller that watches `Node` objects and updates its internal map of states so that `crik` 17 | can check whether it should checkpoint or restore depending on its node's state. 18 | 19 | ## Quick Start 20 | 21 | The only pre-requisite is to have a Kubernetes cluster running. You can use `kind` to create a local cluster. 22 | 23 | ```bash 24 | kind create cluster 25 | ``` 26 | 27 | Then, you can deploy the simple-loop example where a counter increases every second and you can delete the pod and see 28 | that it continues from where it left off in the new pod. 29 | 30 | ```bash 31 | kubectl apply -f examples/simple-loop.yaml 32 | ``` 33 | 34 | Watch logs: 35 | 36 | ```bash 37 | kubectl logs -f simple-loop-0 38 | ``` 39 | 40 | In another terminal, delete the pod: 41 | 42 | ```bash 43 | kubectl delete pod simple-loop-0 44 | ``` 45 | 46 | Now, a new pod is created. See that it continues from where it left off: 47 | 48 | ```bash 49 | kubectl logs -f simple-loop-0 50 | ``` 51 | 52 | ## Usage 53 | 54 | The application you want to checkpoint and restore should be run with `crik` command, like the following: 55 | 56 | ```bash 57 | crik run -- app-binary 58 | ``` 59 | 60 | The following is an example `Dockerfile` for your application that installs `crik` and runs your application. It assumes 61 | your application is `entrypoint.sh`. 62 | 63 | ```Dockerfile 64 | FROM ubuntu:22.04 65 | 66 | RUN apt-get update && apt-get install --no-install-recommends --yes gnupg curl ca-certificates 67 | 68 | # crik requires criu to be available. 69 | RUN curl "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x4E2A48715C45AEEC077B48169B29EEC9246B6CE2" | gpg --dearmor > /usr/share/keyrings/criu-ppa.gpg \ 70 | && echo "deb [signed-by=/usr/share/keyrings/criu-ppa.gpg] https://ppa.launchpadcontent.net/criu/ppa/ubuntu jammy main" > /etc/apt/sources.list.d/criu.list \ 71 | && apt-get update \ 72 | && apt-get install --no-install-recommends --yes criu iptables 73 | 74 | # Install crik 75 | COPY --from=ghcr.io/qawolf/crik/crik:v0.1.2 /usr/local/bin/crik /usr/local/bin/crik 76 | 77 | # Copy your application 78 | COPY entrypoint.sh /entrypoint.sh 79 | 80 | # Run your application with crik 81 | ENTRYPOINT ["crik", "run", "--", "/entrypoint.sh"] 82 | ``` 83 | 84 | ### Configuration 85 | 86 | Not all apps can be checkpointed and restored and for many of them, `criu` may need additional configurations. `crik` 87 | provides a high level configuration interface that you can use to configure `crik` for your application. The following 88 | is the minimum configuration you need to provide for your application and by default `crik` looks for `config.yaml` in 89 | `/etc/crik` directory. 90 | 91 | ```yaml 92 | kind: ConfigMap 93 | metadata: 94 | name: crik-simple-loop 95 | data: 96 | config.yaml: |- 97 | imageDir: /etc/checkpoint 98 | ``` 99 | 100 | Configuration options: 101 | 102 | - `imageDir` - the directory where `crik` will store the checkpoint images. It needs to be available in the same path 103 | in the new `Pod` as well. 104 | - `additionalPaths` - additional paths that `crik` will include in the checkpoint and copy back in the new `Pod`. Populate 105 | this list if you get `file not found` errors in the restore logs. The paths are relative to root `/` and can be 106 | directories or files. 107 | - `inotifyIncompatiblePaths` - paths that `crik` will delete before taking the checkpoint. Populate this list if you get 108 | `fsnotify: Handle 0x278:0x2ffb5b cannot be opened` errors in the restore logs. You need to find the inode of the 109 | file by converting `0x2ffb5b` to an integer, and then find the path of the file by running `find / -inum ` and 110 | add the path to this list. See [this comment](https://github.com/checkpoint-restore/criu/issues/1187#issuecomment-1975557296) for more details. 111 | 112 | ### Node State Server 113 | 114 | > Alpha feature. Not ready for production use. 115 | 116 | You can optionally configure `crik` to take checkpoint only if the node it's running on is going to be shut down. This is 117 | achieved by deploying a Kubernetes controller that watches `Node` events and updates its internal map of states so that 118 | `crik` can check whether it should checkpoint or restore depending on its node's state. This may include direct calls 119 | to the cloud provider's API to check the node's state in the future. 120 | 121 | Deploy the controller: 122 | 123 | ```bash 124 | helm upgrade --install node-state-server oci://ghcr.io/qawolf/crik/charts/node-state-server --version 0.1.2 125 | ``` 126 | 127 | Make sure to include the URL of the server in `crik`'s configuration mounted to your `Pod`. 128 | 129 | ```yaml 130 | # Assuming the chart is deployed to default namespace. 131 | kind: ConfigMap 132 | metadata: 133 | name: crik-simple-loop 134 | data: 135 | config.yaml: |- 136 | imageDir: /etc/checkpoint 137 | nodeStateServerURL: http://crik-node-state-server.default.svc.cluster.local:9376 138 | ``` 139 | 140 | `crik` will hit the `/node-state` endpoint of the server to get the state of the node it's running on when it receives 141 | SIGTERM and take checkpoint only if it returns `shutting-down` as the node's state. However, it needs to provide the 142 | node name to the server so make sure to add the following environment variable to your container spec in your `Pod`: 143 | 144 | ```yaml 145 | env: 146 | - name: KUBERNETES_NODE_NAME 147 | valueFrom: 148 | fieldRef: 149 | fieldPath: spec.nodeName 150 | ``` 151 | 152 | ## Developing 153 | 154 | Build `crik`: 155 | 156 | ```bash 157 | go build -o crik cmd/crik/main.go 158 | ``` 159 | 160 | ## Why not upstream? 161 | 162 | Taking checkpoints of processes and restoring them from within the container requires quite a few privileges to be given 163 | to the container. The best approach is to execute these operations at the container runtime level and today, container 164 | engines such as CRI-O and Podman do have native support for using `criu` to checkpoint and restore the whole containers 165 | and there is an ongoing effort to bring this functionality to Kubernetes as well. The first use case being the forensic 166 | analysis via checkpoints as described [here](https://kubernetes.io/blog/2023/03/10/forensic-container-analysis/). 167 | 168 | While it is the better approach, since it's such a low-level change, it's expected to take a while to be available in 169 | mainstream Kubernetes in an easily consumable way. For example, while taking a checkpoint is possible through `kubelet` 170 | API if you're using CRI-O, restoring it as another `Pod` in a different `Node` is not natively supported yet. 171 | 172 | `crik` allows you to use `criu` to checkpoint and restore a `Pod` to another `Node` today without waiting for the native 173 | support in Kubernetes. Once the native support is available, `crik` will utilize it under the hood. 174 | 175 | ## License 176 | 177 | This project is licensed under the Apache License, Version 2.0 - see the [LICENSE](LICENSE) file for details. 178 | -------------------------------------------------------------------------------- /cluster/charts/node-state-server/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: node-state-server 3 | version: 0.1.0 4 | description: A Helm chart for the Node State Server used by crik. 5 | -------------------------------------------------------------------------------- /cluster/charts/node-state-server/templates/manager-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: crik-node-state-server 5 | spec: 6 | selector: 7 | app.kubernetes.io/name: crik-node-state-server 8 | app.kubernetes.io/part-of: crik 9 | ports: 10 | - protocol: TCP 11 | port: 9376 12 | targetPort: 9376 13 | --- 14 | apiVersion: apps/v1 15 | kind: Deployment 16 | metadata: 17 | name: crik-node-state-server 18 | spec: 19 | selector: 20 | matchLabels: 21 | app.kubernetes.io/name: crik-node-state-server 22 | app.kubernetes.io/part-of: crik 23 | replicas: 3 24 | template: 25 | metadata: 26 | annotations: 27 | kubectl.kubernetes.io/default-container: main 28 | labels: 29 | app.kubernetes.io/name: crik-node-state-server 30 | app.kubernetes.io/part-of: crik 31 | spec: 32 | serviceAccountName: crik-node-state-server 33 | securityContext: 34 | runAsNonRoot: true 35 | containers: 36 | - name: main 37 | image: "{{ .Values.nodeStateServer.image.repository }}:{{ .Values.nodeStateServer.image.tag }}" 38 | args: 39 | - --metrics-port=8080 40 | - --health-probes-port=8081 41 | - --port=9376 42 | {{- if .Values.nodeStateServer.debug }} 43 | - --debug 44 | {{- end }} 45 | ports: 46 | # Exposes /nodes 47 | - name: main 48 | containerPort: 9376 49 | protocol: TCP 50 | # Exposes only /metrics 51 | - name: metrics 52 | containerPort: 8080 53 | protocol: TCP 54 | # Exposes /healthz and /readyz 55 | - name: probes 56 | containerPort: 8081 57 | protocol: TCP 58 | securityContext: 59 | allowPrivilegeEscalation: false 60 | capabilities: 61 | drop: 62 | - "ALL" 63 | livenessProbe: 64 | httpGet: 65 | path: /healthz 66 | port: 8081 67 | initialDelaySeconds: 15 68 | periodSeconds: 20 69 | readinessProbe: 70 | httpGet: 71 | path: /readyz 72 | port: 8081 73 | initialDelaySeconds: 5 74 | periodSeconds: 10 75 | resources: 76 | limits: 77 | cpu: 500m 78 | memory: 128Mi 79 | requests: 80 | cpu: 10m 81 | memory: 64Mi 82 | terminationGracePeriodSeconds: 10 83 | -------------------------------------------------------------------------------- /cluster/charts/node-state-server/templates/manager-permissions.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: crik-node-state-server 5 | labels: 6 | app.kubernetes.io/name: crik-node-state-server 7 | app.kubernetes.io/part-of: crik 8 | --- 9 | apiVersion: rbac.authorization.k8s.io/v1 10 | kind: ClusterRole 11 | metadata: 12 | name: crik-node-state-server 13 | labels: 14 | app.kubernetes.io/name: crik-node-state-server 15 | app.kubernetes.io/part-of: crik 16 | rules: 17 | # Nodes 18 | - apiGroups: 19 | - "" 20 | resources: 21 | - nodes 22 | verbs: 23 | - get 24 | - list 25 | - watch 26 | --- 27 | apiVersion: rbac.authorization.k8s.io/v1 28 | kind: ClusterRoleBinding 29 | metadata: 30 | name: crik-node-state-server 31 | labels: 32 | app.kubernetes.io/name: crik-node-state-server 33 | app.kubernetes.io/part-of: crik 34 | roleRef: 35 | apiGroup: rbac.authorization.k8s.io 36 | kind: ClusterRole 37 | name: crik-node-state-server 38 | subjects: 39 | - kind: ServiceAccount 40 | name: crik-node-state-server 41 | namespace: {{ .Release.Namespace }} 42 | -------------------------------------------------------------------------------- /cluster/charts/node-state-server/values.yaml: -------------------------------------------------------------------------------- 1 | nodeStateServer: 2 | debug: false 3 | image: 4 | repository: ghcr.io/qawolf/crik/node-state-server 5 | tag: v0.1.0 6 | -------------------------------------------------------------------------------- /cmd/crik/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/distroless/static-debian12:nonroot 2 | ARG TARGETOS 3 | ARG TARGETARCH 4 | 5 | COPY .work/bin/crik-${TARGETOS}-${TARGETARCH} /usr/local/bin/crik 6 | USER 65532 7 | 8 | ENTRYPOINT ["crik"] 9 | -------------------------------------------------------------------------------- /cmd/crik/main.go: -------------------------------------------------------------------------------- 1 | //go:build linux 2 | // +build linux 3 | 4 | /* 5 | Copyright 2024 QA Wolf Inc. 6 | 7 | Licensed under the Apache License, Version 2.0 (the "License"); 8 | you may not use this file except in compliance with the License. 9 | You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | */ 19 | 20 | package main 21 | 22 | import ( 23 | "encoding/json" 24 | "errors" 25 | "fmt" 26 | "net/http" 27 | "os" 28 | "os/exec" 29 | "os/signal" 30 | "strings" 31 | "syscall" 32 | 33 | "github.com/alecthomas/kong" 34 | "github.com/checkpoint-restore/go-criu/v7" 35 | 36 | "github.com/qawolf/crik/internal/controller/node" 37 | cexec "github.com/qawolf/crik/internal/exec" 38 | ) 39 | 40 | var signalChan = make(chan os.Signal, 1) 41 | 42 | var cli struct { 43 | Debug bool `help:"Enable debug mode."` 44 | 45 | Run Run `cmd:"" help:"Run given command wrapped by crik."` 46 | } 47 | 48 | func main() { 49 | ctx := kong.Parse(&cli) 50 | if err := ctx.Run(); err != nil { 51 | fmt.Printf("failed to run the command: %s", err.Error()) 52 | os.Exit(1) 53 | } 54 | } 55 | 56 | type Run struct { 57 | Args []string `arg:"" optional:"" passthrough:"" name:"command" help:"Command and its arguments to run. Required if --image-dir is not given or empty."` 58 | 59 | ConfigPath string `type:"path" default:"/etc/crik/config.yaml" help:"Path to the configuration file."` 60 | } 61 | 62 | func (r *Run) Run() error { 63 | cfg, err := cexec.ReadConfiguration(r.ConfigPath) 64 | if err != nil && !errors.Is(err, os.ErrNotExist) { 65 | return fmt.Errorf("failed to read configuration: %w", err) 66 | } 67 | willRestore, err := shouldRestore(cfg) 68 | if err != nil { 69 | return fmt.Errorf("failed to check if restore is needed: %w", err) 70 | } 71 | if willRestore { 72 | fmt.Printf("A checkpoint has been found in %s. Restoring.\n", cfg.ImageDir) 73 | if err := cexec.RestoreWithCmd(cfg.ImageDir); err != nil { 74 | return fmt.Errorf("failed to restore: %w", err) 75 | } 76 | return nil 77 | } 78 | if len(r.Args) == 0 { 79 | return fmt.Errorf("command is required when there is no checkpoint to restore, i.e. --image-dir is not given or empty") 80 | } 81 | // Make sure the PID is a high number so that it's not taken up during restore. 82 | lastPidPath := "/proc/sys/kernel/ns_last_pid" 83 | if err := os.WriteFile(lastPidPath, []byte("9000"), 0644); err != nil { 84 | return fmt.Errorf("failed to write to %s: %w", lastPidPath, err) 85 | } 86 | 87 | cmd := exec.Command(r.Args[0], r.Args[1:]...) 88 | cmd.SysProcAttr = &syscall.SysProcAttr{ 89 | Setsid: true, 90 | Unshareflags: syscall.CLONE_NEWIPC, 91 | } 92 | cmd.Stdin = nil 93 | cmd.Stdout = os.Stdout 94 | cmd.Stderr = os.Stderr 95 | if err := cmd.Start(); err != nil { 96 | return fmt.Errorf("failed to start command: %w", err) 97 | } 98 | fmt.Printf("Command started with PID %d\n", cmd.Process.Pid) 99 | if cfg.ImageDir != "" { 100 | fmt.Printf("Setting up SIGTERM handler to take checkpoint in %s\n", cfg.ImageDir) 101 | signal.Notify(signalChan, syscall.SIGTERM) 102 | sig := <-signalChan 103 | switch sig { 104 | case syscall.SIGTERM: 105 | fmt.Println("Received SIGTERM.") 106 | // Take checkpoint only if the node is in shutting down state or the node state server is not given. 107 | if cfg.NodeStateServerURL != "" { 108 | nodeName := os.Getenv("KUBERNETES_NODE_NAME") 109 | resp, err := http.Get(fmt.Sprintf("%s/nodes/%s", cfg.NodeStateServerURL, nodeName)) 110 | if err != nil { 111 | return fmt.Errorf("failed to get node state: %w", err) 112 | } 113 | defer resp.Body.Close() 114 | var response node.Node 115 | if err := json.NewDecoder(resp.Body).Decode(&response); err != nil { 116 | return fmt.Errorf("failed to decode node state: %w", err) 117 | } 118 | if response.State != node.NodeStateShuttingDown { 119 | fmt.Println("Node is not in shutting down state. Not taking checkpoint.") 120 | if err := cmd.Process.Signal(syscall.SIGTERM); err != nil { 121 | return fmt.Errorf("failed to send SIGTERM to the process: %w", err) 122 | } 123 | return cmd.Wait() 124 | } 125 | } 126 | duration, err := cexec.TakeCheckpoint(criu.MakeCriu(), cmd.Process.Pid, cfg) 127 | if err != nil { 128 | return fmt.Errorf("failed to take checkpoint: %w", err) 129 | } 130 | fmt.Printf("Checkpoint taken in %s\n", duration) 131 | } 132 | } 133 | return cmd.Wait() 134 | } 135 | 136 | func shouldRestore(cfg cexec.Configuration) (bool, error) { 137 | if cfg.ImageDir == "" { 138 | return false, nil 139 | } 140 | entries, err := os.ReadDir(cfg.ImageDir) 141 | if os.IsNotExist(err) { 142 | return false, nil 143 | } 144 | if err != nil { 145 | return false, err 146 | } 147 | for _, entry := range entries { 148 | if strings.HasSuffix(entry.Name(), ".img") { 149 | return true, nil 150 | } 151 | } 152 | return false, nil 153 | } 154 | -------------------------------------------------------------------------------- /cmd/node-state-server/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM gcr.io/distroless/static-debian12:nonroot 2 | ARG TARGETOS 3 | ARG TARGETARCH 4 | 5 | COPY .work/bin/node-state-server-${TARGETOS}-${TARGETARCH} /usr/local/bin/node-state-server 6 | USER 65532 7 | 8 | ENTRYPOINT ["node-state-server"] 9 | -------------------------------------------------------------------------------- /cmd/node-state-server/main.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 QA Wolf Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import ( 20 | "flag" 21 | "net/http" 22 | "os" 23 | "time" 24 | 25 | // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) 26 | // to ensure that exec-entrypoint and run can make use of them. 27 | _ "k8s.io/client-go/plugin/pkg/client/auth" 28 | 29 | "github.com/crossplane/crossplane-runtime/pkg/logging" 30 | "github.com/go-logr/logr" 31 | "go.uber.org/zap/zapcore" 32 | "k8s.io/apimachinery/pkg/runtime" 33 | utilruntime "k8s.io/apimachinery/pkg/util/runtime" 34 | clientgoscheme "k8s.io/client-go/kubernetes/scheme" 35 | ctrl "sigs.k8s.io/controller-runtime" 36 | "sigs.k8s.io/controller-runtime/pkg/healthz" 37 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 38 | metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" 39 | 40 | "github.com/qawolf/crik/internal/controller/node" 41 | ) 42 | 43 | var ( 44 | scheme = runtime.NewScheme() 45 | setupLog = ctrl.Log.WithName("setup") 46 | ) 47 | 48 | func init() { 49 | utilruntime.Must(clientgoscheme.AddToScheme(scheme)) 50 | } 51 | 52 | func main() { 53 | var metricsPort string 54 | var healthProbesPort string 55 | var serverPort string 56 | var debug bool 57 | flag.StringVar(&metricsPort, "metrics-port", "8080", "The port used by the metrics server.") 58 | flag.StringVar(&healthProbesPort, "health-probes-port", "8081", "The port used to serve health probe endpoints.") 59 | flag.StringVar(&serverPort, "port", "9376", "The port used to serve node state endpoint.") 60 | flag.BoolVar(&debug, "debug", false, "Turn on debug logs.") 61 | flag.Parse() 62 | var zlog logr.Logger 63 | if debug { 64 | zlog = zap.New( 65 | zap.UseDevMode(true), 66 | zap.Level(zapcore.DebugLevel), 67 | ) 68 | } else { 69 | zlog = zap.New( 70 | zap.UseDevMode(false), 71 | ) 72 | } 73 | log := logging.NewLogrLogger(zlog) 74 | ctrl.SetLogger(zlog) 75 | mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ 76 | Logger: zlog, 77 | Scheme: scheme, 78 | Metrics: metricsserver.Options{BindAddress: ":" + metricsPort}, 79 | HealthProbeBindAddress: ":" + healthProbesPort, 80 | // We don't need Node controller to be a singleton since it doesn't manipulate any state. 81 | LeaderElection: false, 82 | }) 83 | if err != nil { 84 | setupLog.Error(err, "unable to start manager") 85 | os.Exit(1) 86 | } 87 | 88 | s := node.NewServer() 89 | go func() { 90 | if err := (&http.Server{ 91 | Addr: ":" + serverPort, 92 | Handler: s, 93 | ReadHeaderTimeout: 1 * time.Second, 94 | }).ListenAndServe(); err != nil { 95 | setupLog.Error(err, "unable to start server") 96 | os.Exit(1) 97 | } 98 | }() 99 | if err := node.Setup(mgr, s, log); err != nil { 100 | setupLog.Error(err, "unable to create controller", "controller", "Node") 101 | os.Exit(1) 102 | } 103 | 104 | if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { 105 | setupLog.Error(err, "unable to set up health check") 106 | os.Exit(1) 107 | } 108 | if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil { 109 | setupLog.Error(err, "unable to set up ready check") 110 | os.Exit(1) 111 | } 112 | 113 | setupLog.Info("starting manager") 114 | if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { 115 | setupLog.Error(err, "problem running manager") 116 | os.Exit(1) 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /examples/simple-loop.yaml: -------------------------------------------------------------------------------- 1 | #apiVersion: v1 2 | #kind: ConfigMap 3 | #metadata: 4 | # name: crik-browser 5 | #data: 6 | # config.yaml: |- 7 | # imageDir: /etc/checkpoint 8 | # additionalPaths: 9 | # - /root/.cache # webkit 10 | # inotifyIncompatiblePaths: 11 | # - /root/.config/glib-2.0/settings # webkit 12 | # - /usr/share/zoneinfo # chrome 13 | --- 14 | apiVersion: v1 15 | kind: ConfigMap 16 | metadata: 17 | name: crik-simple-loop 18 | data: 19 | config.yaml: |- 20 | imageDir: /etc/checkpoint 21 | --- 22 | apiVersion: apps/v1 23 | kind: StatefulSet 24 | metadata: 25 | name: simple-loop 26 | spec: 27 | replicas: 1 28 | selector: 29 | matchLabels: 30 | app: simple-loop 31 | template: 32 | metadata: 33 | labels: 34 | app: simple-loop 35 | spec: 36 | containers: 37 | - name: main 38 | image: ghcr.io/qawolf/crik:v0.1.0 39 | imagePullPolicy: IfNotPresent 40 | securityContext: 41 | privileged: true 42 | # capabilities: 43 | # add: [ "CHECKPOINT_RESTORE", "NET_ADMIN", "SYS_ADMIN" ] 44 | args: 45 | - bash 46 | - -c 47 | - 'for ((i=0; ; i++)); do echo "Count is at $i"; sleep 1; done' 48 | volumeMounts: 49 | - mountPath: /etc/crik 50 | name: crik-config 51 | - mountPath: /etc/checkpoint 52 | name: checkpoint-storage 53 | - mountPath: /proc/sys/kernel/ns_last_pid 54 | name: checkpoint-cap 55 | env: 56 | - name: KUBERNETES_NODE_NAME 57 | valueFrom: 58 | fieldRef: 59 | fieldPath: spec.nodeName 60 | volumes: 61 | - name: crik-config 62 | configMap: 63 | name: crik-simple-loop 64 | - name: checkpoint-cap 65 | hostPath: 66 | path: /proc/sys/kernel/ns_last_pid 67 | type: File 68 | volumeClaimTemplates: 69 | - metadata: 70 | name: checkpoint-storage 71 | spec: 72 | accessModes: [ "ReadWriteOnce" ] 73 | resources: 74 | requests: 75 | storage: 1Gi 76 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/qawolf/crik 2 | 3 | go 1.22 4 | 5 | require ( 6 | github.com/alecthomas/kong v0.9.0 7 | github.com/checkpoint-restore/go-criu/v7 v7.1.0 8 | github.com/crossplane/crossplane-runtime v1.15.1 9 | github.com/go-logr/logr v1.4.1 10 | github.com/pkg/errors v0.9.1 11 | go.uber.org/zap v1.27.0 12 | google.golang.org/protobuf v1.33.0 13 | k8s.io/api v0.29.1 14 | k8s.io/apimachinery v0.29.1 15 | k8s.io/client-go v0.29.1 16 | sigs.k8s.io/controller-runtime v0.17.0 17 | sigs.k8s.io/yaml v1.4.0 18 | ) 19 | 20 | require ( 21 | github.com/beorn7/perks v1.0.1 // indirect 22 | github.com/cespare/xxhash/v2 v2.2.0 // indirect 23 | github.com/davecgh/go-spew v1.1.1 // indirect 24 | github.com/emicklei/go-restful/v3 v3.11.0 // indirect 25 | github.com/evanphx/json-patch/v5 v5.8.0 // indirect 26 | github.com/fsnotify/fsnotify v1.7.0 // indirect 27 | github.com/go-logr/zapr v1.3.0 // indirect 28 | github.com/go-openapi/jsonpointer v0.19.6 // indirect 29 | github.com/go-openapi/jsonreference v0.20.2 // indirect 30 | github.com/go-openapi/swag v0.22.3 // indirect 31 | github.com/gogo/protobuf v1.3.2 // indirect 32 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect 33 | github.com/golang/protobuf v1.5.3 // indirect 34 | github.com/google/gnostic-models v0.6.8 // indirect 35 | github.com/google/go-cmp v0.6.0 // indirect 36 | github.com/google/gofuzz v1.2.0 // indirect 37 | github.com/google/uuid v1.4.0 // indirect 38 | github.com/imdario/mergo v0.3.16 // indirect 39 | github.com/josharian/intern v1.0.0 // indirect 40 | github.com/json-iterator/go v1.1.12 // indirect 41 | github.com/mailru/easyjson v0.7.7 // indirect 42 | github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect 43 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 44 | github.com/modern-go/reflect2 v1.0.2 // indirect 45 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 46 | github.com/prometheus/client_golang v1.18.0 // indirect 47 | github.com/prometheus/client_model v0.5.0 // indirect 48 | github.com/prometheus/common v0.45.0 // indirect 49 | github.com/prometheus/procfs v0.12.0 // indirect 50 | github.com/spf13/pflag v1.0.5 // indirect 51 | go.uber.org/multierr v1.11.0 // indirect 52 | golang.org/x/exp v0.0.0-20240112132812-db7319d0e0e3 // indirect 53 | golang.org/x/net v0.20.0 // indirect 54 | golang.org/x/oauth2 v0.15.0 // indirect 55 | golang.org/x/sys v0.18.0 // indirect 56 | golang.org/x/term v0.16.0 // indirect 57 | golang.org/x/text v0.14.0 // indirect 58 | golang.org/x/time v0.5.0 // indirect 59 | gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect 60 | google.golang.org/appengine v1.6.8 // indirect 61 | gopkg.in/inf.v0 v0.9.1 // indirect 62 | gopkg.in/yaml.v2 v2.4.0 // indirect 63 | gopkg.in/yaml.v3 v3.0.1 // indirect 64 | k8s.io/apiextensions-apiserver v0.29.1 // indirect 65 | k8s.io/component-base v0.29.1 // indirect 66 | k8s.io/klog/v2 v2.110.1 // indirect 67 | k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 // indirect 68 | k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect 69 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect 70 | sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect 71 | ) 72 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/alecthomas/assert/v2 v2.6.0 h1:o3WJwILtexrEUk3cUVal3oiQY2tfgr/FHWiz/v2n4FU= 2 | github.com/alecthomas/assert/v2 v2.6.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= 3 | github.com/alecthomas/kong v0.9.0 h1:G5diXxc85KvoV2f0ZRVuMsi45IrBgx9zDNGNj165aPA= 4 | github.com/alecthomas/kong v0.9.0/go.mod h1:Y47y5gKfHp1hDc7CH7OeXgLIpp+Q2m1Ni0L5s3bI8Os= 5 | github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc= 6 | github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= 7 | github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= 8 | github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= 9 | github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= 10 | github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= 11 | github.com/checkpoint-restore/go-criu/v7 v7.1.0 h1:JbQyO4o+P8ycNTMLPiiDqXg49bAcy4WljWCzYQho35A= 12 | github.com/checkpoint-restore/go-criu/v7 v7.1.0/go.mod h1:1svAtmbtvX4BKI45OFzgoTTLG7oYFKdColv/Vcsb2A8= 13 | github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= 14 | github.com/crossplane/crossplane-runtime v1.15.1 h1:g1h75tNYOQT152IUNxs8ZgSsRFQKrZN9z69KefMujXs= 15 | github.com/crossplane/crossplane-runtime v1.15.1/go.mod h1:kRcJjJQmBFrR2n/KhwL8wYS7xNfq3D8eK4JliEScOHI= 16 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 17 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 18 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 19 | github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= 20 | github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= 21 | github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U= 22 | github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= 23 | github.com/evanphx/json-patch/v5 v5.8.0 h1:lRj6N9Nci7MvzrXuX6HFzU8XjmhPiXPlsKEy1u0KQro= 24 | github.com/evanphx/json-patch/v5 v5.8.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= 25 | github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= 26 | github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= 27 | github.com/go-logr/logr v1.3.0/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= 28 | github.com/go-logr/logr v1.4.1 h1:pKouT5E8xu9zeFC39JXRDukb6JFQPXM5p5I91188VAQ= 29 | github.com/go-logr/logr v1.4.1/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= 30 | github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= 31 | github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= 32 | github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= 33 | github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= 34 | github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= 35 | github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= 36 | github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= 37 | github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= 38 | github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= 39 | github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= 40 | github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= 41 | github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= 42 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= 43 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= 44 | github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= 45 | github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= 46 | github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= 47 | github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= 48 | github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= 49 | github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= 50 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 51 | github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 52 | github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= 53 | github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= 54 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 55 | github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= 56 | github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 57 | github.com/google/pprof v0.0.0-20240117000934-35fc243c5815 h1:WzfWbQz/Ze8v6l++GGbGNFZnUShVpP/0xffCPLL+ax8= 58 | github.com/google/pprof v0.0.0-20240117000934-35fc243c5815/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik= 59 | github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4= 60 | github.com/google/uuid v1.4.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= 61 | github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= 62 | github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= 63 | github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= 64 | github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= 65 | github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= 66 | github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= 67 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 68 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 69 | github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= 70 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= 71 | github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= 72 | github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= 73 | github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= 74 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 75 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 76 | github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= 77 | github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= 78 | github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= 79 | github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= 80 | github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 h1:jWpvCLoY8Z/e3VKvlsiIGKtc+UG6U5vzxaoagmhXfyg= 81 | github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0/go.mod h1:QUyp042oQthUoa9bqDv0ER0wrtXnBruoNd7aNjkbP+k= 82 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 83 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= 84 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 85 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= 86 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= 87 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= 88 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= 89 | github.com/onsi/ginkgo/v2 v2.14.0 h1:vSmGj2Z5YPb9JwCWT6z6ihcUvDhuXLc3sJiqd3jMKAY= 90 | github.com/onsi/ginkgo/v2 v2.14.0/go.mod h1:JkUdW7JkN0V6rFvsHcJ478egV3XH9NxpD27Hal/PhZw= 91 | github.com/onsi/gomega v1.30.0 h1:hvMK7xYz4D3HapigLTeGdId/NcfQx1VHMJc60ew99+8= 92 | github.com/onsi/gomega v1.30.0/go.mod h1:9sxs+SwGrKI0+PWe4Fxa9tFQQBG5xSsSbMXOI8PPpoQ= 93 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 94 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 95 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 96 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 97 | github.com/prometheus/client_golang v1.18.0 h1:HzFfmkOzH5Q8L8G+kSJKUx5dtG87sewO+FoDDqP5Tbk= 98 | github.com/prometheus/client_golang v1.18.0/go.mod h1:T+GXkCk5wSJyOqMIzVgvvjFDlkOQntgjkJWKrN5txjA= 99 | github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw= 100 | github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI= 101 | github.com/prometheus/common v0.45.0 h1:2BGz0eBc2hdMDLnO/8n0jeB3oPrt2D08CekT0lneoxM= 102 | github.com/prometheus/common v0.45.0/go.mod h1:YJmSTw9BoKxJplESWWxlbyttQR4uaEcGyv9MZjVOJsY= 103 | github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= 104 | github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= 105 | github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= 106 | github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= 107 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= 108 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 109 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 110 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 111 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 112 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 113 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 114 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 115 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 116 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= 117 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 118 | github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 119 | github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= 120 | github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= 121 | go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= 122 | go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= 123 | go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= 124 | go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= 125 | go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= 126 | go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= 127 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 128 | golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 129 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 130 | golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= 131 | golang.org/x/exp v0.0.0-20240112132812-db7319d0e0e3 h1:hNQpMuAJe5CtcUqCXaWga3FHu+kQvCqcsoVaQgSV60o= 132 | golang.org/x/exp v0.0.0-20240112132812-db7319d0e0e3/go.mod h1:idGWGoKP1toJGkd5/ig9ZLuPcZBC3ewk7SzmH0uou08= 133 | golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 134 | golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= 135 | golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= 136 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 137 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 138 | golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 139 | golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= 140 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= 141 | golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= 142 | golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= 143 | golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= 144 | golang.org/x/oauth2 v0.15.0 h1:s8pnnxNVzjWyrvYdFUQq5llS1PX2zhPXmccZv99h7uQ= 145 | golang.org/x/oauth2 v0.15.0/go.mod h1:q48ptWNTY5XWf+JNten23lcvHpLJ0ZSxF5ttTHKVCAM= 146 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 147 | golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 148 | golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 149 | golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 150 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 151 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 152 | golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 153 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 154 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 155 | golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 156 | golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 157 | golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= 158 | golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 159 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 160 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 161 | golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE= 162 | golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY= 163 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 164 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 165 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 166 | golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= 167 | golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= 168 | golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= 169 | golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= 170 | golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= 171 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 172 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 173 | golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= 174 | golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= 175 | golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= 176 | golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc= 177 | golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps= 178 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 179 | golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 180 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 181 | golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 182 | gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= 183 | gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= 184 | google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM= 185 | google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds= 186 | google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= 187 | google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= 188 | google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= 189 | google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= 190 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 191 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= 192 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= 193 | gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= 194 | gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= 195 | gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 196 | gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= 197 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 198 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 199 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 200 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 201 | k8s.io/api v0.29.1 h1:DAjwWX/9YT7NQD4INu49ROJuZAAAP/Ijki48GUPzxqw= 202 | k8s.io/api v0.29.1/go.mod h1:7Kl10vBRUXhnQQI8YR/R327zXC8eJ7887/+Ybta+RoQ= 203 | k8s.io/apiextensions-apiserver v0.29.1 h1:S9xOtyk9M3Sk1tIpQMu9wXHm5O2MX6Y1kIpPMimZBZw= 204 | k8s.io/apiextensions-apiserver v0.29.1/go.mod h1:zZECpujY5yTW58co8V2EQR4BD6A9pktVgHhvc0uLfeU= 205 | k8s.io/apimachinery v0.29.1 h1:KY4/E6km/wLBguvCZv8cKTeOwwOBqFNjwJIdMkMbbRc= 206 | k8s.io/apimachinery v0.29.1/go.mod h1:6HVkd1FwxIagpYrHSwJlQqZI3G9LfYWRPAkUvLnXTKU= 207 | k8s.io/client-go v0.29.1 h1:19B/+2NGEwnFLzt0uB5kNJnfTsbV8w6TgQRz9l7ti7A= 208 | k8s.io/client-go v0.29.1/go.mod h1:TDG/psL9hdet0TI9mGyHJSgRkW3H9JZk2dNEUS7bRks= 209 | k8s.io/component-base v0.29.1 h1:MUimqJPCRnnHsskTTjKD+IC1EHBbRCVyi37IoFBrkYw= 210 | k8s.io/component-base v0.29.1/go.mod h1:fP9GFjxYrLERq1GcWWZAE3bqbNcDKDytn2srWuHTtKc= 211 | k8s.io/klog/v2 v2.110.1 h1:U/Af64HJf7FcwMcXyKm2RPM22WZzyR7OSpYj5tg3cL0= 212 | k8s.io/klog/v2 v2.110.1/go.mod h1:YGtd1984u+GgbuZ7e08/yBuAfKLSO0+uR1Fhi6ExXjo= 213 | k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00 h1:aVUu9fTY98ivBPKR9Y5w/AuzbMm96cd3YHRTU83I780= 214 | k8s.io/kube-openapi v0.0.0-20231010175941-2dd684a91f00/go.mod h1:AsvuZPBlUDVuCdzJ87iajxtXuR9oktsTctW/R9wwouA= 215 | k8s.io/utils v0.0.0-20230726121419-3b25d923346b h1:sgn3ZU783SCgtaSJjpcVVlRqd6GSnlTLKgpAAttJvpI= 216 | k8s.io/utils v0.0.0-20230726121419-3b25d923346b/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= 217 | sigs.k8s.io/controller-runtime v0.17.0 h1:fjJQf8Ukya+VjogLO6/bNX9HE6Y2xpsO5+fyS26ur/s= 218 | sigs.k8s.io/controller-runtime v0.17.0/go.mod h1:+MngTvIQQQhfXtwfdGw/UOQ/aIaqsYywfCINOtwMO/s= 219 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= 220 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= 221 | sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= 222 | sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= 223 | sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= 224 | sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= 225 | -------------------------------------------------------------------------------- /hack/boilerplate.go.txt: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 QA Wolf Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ -------------------------------------------------------------------------------- /internal/controller/node/controller.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 QA Wolf Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // Package node contains the controller logic for the Nodes. 18 | package node 19 | 20 | import ( 21 | "context" 22 | "strings" 23 | 24 | "github.com/crossplane/crossplane-runtime/pkg/event" 25 | "github.com/crossplane/crossplane-runtime/pkg/logging" 26 | "github.com/pkg/errors" 27 | corev1 "k8s.io/api/core/v1" 28 | kerrors "k8s.io/apimachinery/pkg/api/errors" 29 | "k8s.io/apimachinery/pkg/runtime" 30 | ctrl "sigs.k8s.io/controller-runtime" 31 | "sigs.k8s.io/controller-runtime/pkg/client" 32 | ) 33 | 34 | const ( 35 | controllerName = "node-controller" 36 | 37 | errGetNode = "failed to get node" 38 | ) 39 | 40 | // Setup sets up the controller with the Manager. 41 | func Setup(mgr ctrl.Manager, server *Server, log logging.Logger) error { 42 | r := NewReconciler( 43 | mgr.GetClient(), 44 | mgr.GetScheme(), 45 | WithEventRecorder(event.NewAPIRecorder(mgr.GetEventRecorderFor(controllerName))), 46 | WithLogger(log.WithValues("controller", controllerName)), 47 | ) 48 | return ctrl.NewControllerManagedBy(mgr). 49 | For(&corev1.Node{}).Complete(r) 50 | } 51 | 52 | type NodeStateWriter interface { 53 | SetNodeState(name string, state NodeState) 54 | DeleteNodeState(name string) 55 | } 56 | 57 | type NopNodeStateWriter struct{} 58 | 59 | func (NopNodeStateWriter) SetNodeState(name string, state NodeState) {} 60 | func (NopNodeStateWriter) DeleteNodeState(name string) {} 61 | 62 | // WithEventRecorder sets the EventRecorder for the Reconciler. 63 | func WithEventRecorder(e event.Recorder) ReconcilerOption { 64 | return func(r *Reconciler) { 65 | r.record = e 66 | } 67 | } 68 | 69 | // WithLogger sets the Logger for the Reconciler. 70 | func WithLogger(l logging.Logger) ReconcilerOption { 71 | return func(r *Reconciler) { 72 | r.rootLog = l 73 | } 74 | } 75 | 76 | // WithNodeStateWriter sets the NodeStateWriter for the Reconciler. 77 | func WithNodeStateWriter(s NodeStateWriter) ReconcilerOption { 78 | return func(r *Reconciler) { 79 | r.nodes = s 80 | } 81 | } 82 | 83 | // ReconcilerOption is a function that sets some option on the Reconciler. 84 | type ReconcilerOption func(*Reconciler) 85 | 86 | // NewReconciler returns a new Reconciler. 87 | func NewReconciler(c client.Client, s *runtime.Scheme, opts ...ReconcilerOption) *Reconciler { 88 | r := &Reconciler{ 89 | client: c, 90 | Scheme: s, 91 | record: event.NewNopRecorder(), 92 | rootLog: logging.NewNopLogger(), 93 | nodes: NopNodeStateWriter{}, 94 | } 95 | for _, f := range opts { 96 | f(r) 97 | } 98 | return r 99 | } 100 | 101 | // Reconciler reconciles a Node object to detect shutdown events and notify Playground pods running on that Node. 102 | type Reconciler struct { 103 | client client.Client 104 | Scheme *runtime.Scheme 105 | 106 | record event.Recorder 107 | rootLog logging.Logger 108 | 109 | nodes NodeStateWriter 110 | } 111 | 112 | // Reconcile gets triggered by every event on Node resources. 113 | func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 114 | log := r.rootLog.WithValues("node", req.NamespacedName) 115 | 116 | n := &corev1.Node{} 117 | if err := r.client.Get(ctx, req.NamespacedName, n); err != nil { 118 | if kerrors.IsNotFound(err) { 119 | r.nodes.DeleteNodeState(req.Name) 120 | return ctrl.Result{}, nil 121 | } 122 | return ctrl.Result{}, errors.Wrap(err, errGetNode) 123 | } 124 | var readyCondition corev1.NodeCondition 125 | for _, c := range n.Status.Conditions { 126 | if c.Type == corev1.NodeReady { 127 | readyCondition = c 128 | break 129 | } 130 | } 131 | // NOTE(muvaf): This covers GKE node shutdown event. It may or may not work for Kubernetes deployments. 132 | if !(readyCondition.Status == corev1.ConditionFalse && 133 | readyCondition.Reason == "KubeletNotReady" && 134 | strings.Contains(readyCondition.Message, "node is shutting down")) { 135 | return ctrl.Result{}, nil 136 | } 137 | log.Debug("node is shutting down", "node", n.Name, "phase", n.Status.Phase) 138 | r.nodes.SetNodeState(n.Name, NodeStateShuttingDown) 139 | return ctrl.Result{}, nil 140 | } 141 | -------------------------------------------------------------------------------- /internal/controller/node/server.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 QA Wolf Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package node 18 | 19 | import ( 20 | "k8s.io/apimachinery/pkg/util/json" 21 | "net/http" 22 | "strings" 23 | "sync" 24 | ) 25 | 26 | // Node is the state of a node. 27 | type Node struct { 28 | Name string `json:"name"` 29 | State NodeState `json:"state"` 30 | } 31 | 32 | type NodeState string 33 | 34 | // Node states. 35 | var ( 36 | NodeStateUnknown NodeState = "unknown" 37 | NodeStateShuttingDown NodeState = "shutting-down" 38 | ) 39 | 40 | type Server struct { 41 | nodes map[string]NodeState 42 | 43 | *sync.RWMutex 44 | } 45 | 46 | func NewServer() *Server { 47 | return &Server{ 48 | nodes: map[string]NodeState{}, 49 | RWMutex: &sync.RWMutex{}, 50 | } 51 | } 52 | 53 | // SetNodeState sets the state of a node. 54 | func (s *Server) SetNodeState(name string, state NodeState) { 55 | s.Lock() 56 | defer s.Unlock() 57 | s.nodes[name] = state 58 | } 59 | 60 | // GetNodeState gets the state of a node. 61 | func (s *Server) GetNodeState(name string) NodeState { 62 | s.RLock() 63 | defer s.RUnlock() 64 | if node, ok := s.nodes[name]; ok { 65 | return node 66 | } 67 | return NodeStateUnknown 68 | } 69 | 70 | // DeleteNodeState deletes the state of a node from the map. 71 | func (s *Server) DeleteNodeState(name string) { 72 | s.Lock() 73 | defer s.Unlock() 74 | delete(s.nodes, name) 75 | } 76 | 77 | func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) { 78 | nodeName := strings.Trim(strings.TrimPrefix(r.URL.Path, "/v1/nodes/"), "/") 79 | if nodeName == "" { 80 | http.Error(w, "node query parameter is missing", http.StatusBadRequest) 81 | return 82 | } 83 | switch r.Method { 84 | case http.MethodGet: 85 | n := Node{ 86 | Name: nodeName, 87 | State: s.GetNodeState(nodeName), 88 | } 89 | if err := json.NewEncoder(w).Encode(n); err != nil { 90 | http.Error(w, err.Error(), http.StatusInternalServerError) 91 | return 92 | } 93 | w.WriteHeader(http.StatusOK) 94 | default: 95 | http.Error(w, "method not allowed", http.StatusMethodNotAllowed) 96 | } 97 | return 98 | } 99 | -------------------------------------------------------------------------------- /internal/exec/checkpoint.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 QA Wolf Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package exec 18 | 19 | import ( 20 | "fmt" 21 | "os" 22 | "path/filepath" 23 | "sigs.k8s.io/yaml" 24 | "strconv" 25 | "syscall" 26 | "time" 27 | 28 | "github.com/checkpoint-restore/go-criu/v7" 29 | "github.com/checkpoint-restore/go-criu/v7/rpc" 30 | "google.golang.org/protobuf/proto" 31 | ) 32 | 33 | type Actions struct { 34 | pid int 35 | configuration Configuration 36 | } 37 | 38 | // PreDump is called when criu is about to dump the process. 39 | func (a Actions) PreDump() error { 40 | // Temp hack to resolve crash during dump. 41 | for _, p := range a.configuration.InotifyIncompatiblePaths { 42 | if err := os.RemoveAll(p); err != nil { 43 | return fmt.Errorf("failed to remove %s: %w", p, err) 44 | } 45 | } 46 | conf := &configurationOnDisk{ 47 | Configuration: a.configuration, 48 | } 49 | conf.UnixFileDescriptorTrio = make([]string, 3) 50 | fdDir := filepath.Join("/proc", strconv.Itoa(a.pid), "fd") 51 | for i := 0; i < 3; i++ { 52 | fdPath := filepath.Join(fdDir, strconv.Itoa(i)) 53 | link, err := os.Readlink(fdPath) 54 | if err != nil { 55 | return fmt.Errorf("failed to read link of %s: %w", fdPath, err) 56 | } 57 | conf.UnixFileDescriptorTrio[i] = link 58 | } 59 | confYAML, err := yaml.Marshal(conf) 60 | if err != nil { 61 | return fmt.Errorf("failed to marshal fds: %w", err) 62 | } 63 | if err := os.WriteFile(filepath.Join(a.configuration.ImageDir, ConfigurationFileName), confYAML, 0o600); err != nil { 64 | return fmt.Errorf("failed to write stdio-fds.json: %w", err) 65 | } 66 | if err := os.MkdirAll(filepath.Join(a.configuration.ImageDir, "extraFiles"), 0755); err != nil { 67 | return fmt.Errorf("failed to create extra path: %w", err) 68 | } 69 | for _, p := range a.configuration.AdditionalPaths { 70 | if _, err := os.Stat(p); os.IsNotExist(err) { 71 | continue 72 | } 73 | if err := CopyDir(p, filepath.Join(a.configuration.ImageDir, "extraFiles", p)); err != nil { 74 | return fmt.Errorf("failed to copy %s: %w", p, err) 75 | } 76 | } 77 | return nil 78 | } 79 | 80 | // PostDump does nothing. 81 | func (a Actions) PostDump() error { 82 | return nil 83 | } 84 | 85 | // PreRestore does nothing. 86 | func (a Actions) PreRestore() error { 87 | return nil 88 | } 89 | 90 | // PostRestore does nothing. 91 | func (a Actions) PostRestore(pid int32) error { 92 | return nil 93 | } 94 | 95 | // NetworkLock does nothing. 96 | func (a Actions) NetworkLock() error { 97 | return nil 98 | } 99 | 100 | // NetworkUnlock does nothing. 101 | func (a Actions) NetworkUnlock() error { 102 | return nil 103 | } 104 | 105 | // SetupNamespaces does nothing. 106 | func (a Actions) SetupNamespaces(_ int32) error { 107 | return nil 108 | } 109 | 110 | // PostSetupNamespaces does nothing. 111 | func (a Actions) PostSetupNamespaces() error { 112 | return nil 113 | } 114 | 115 | // PostResume does nothing. 116 | func (a Actions) PostResume() error { 117 | return nil 118 | } 119 | 120 | func TakeCheckpoint(c *criu.Criu, pid int, configuration Configuration) (time.Duration, error) { 121 | start := time.Now() 122 | fd, err := syscall.Open(configuration.ImageDir, syscall.O_DIRECTORY, 755) 123 | if err != nil { 124 | return time.Since(start), fmt.Errorf("failed to open directory %s: %w", configuration.ImageDir, err) 125 | } 126 | cgMode := rpc.CriuCgMode_IGNORE 127 | opts := &rpc.CriuOpts{ 128 | TcpEstablished: proto.Bool(true), 129 | ShellJob: proto.Bool(false), 130 | FileLocks: proto.Bool(false), 131 | LogFile: proto.String("dump.log"), 132 | AutoDedup: proto.Bool(false), 133 | Pid: proto.Int32(int32(pid)), 134 | ImagesDirFd: proto.Int32(int32(fd)), // To make it use ImagesDir. 135 | OrphanPtsMaster: proto.Bool(true), 136 | NotifyScripts: proto.Bool(true), 137 | LeaveRunning: proto.Bool(false), 138 | LeaveStopped: proto.Bool(false), 139 | LogLevel: proto.Int32(4), 140 | LazyPages: proto.Bool(false), 141 | GhostLimit: proto.Uint32(500 * 1048576), // 500MB 142 | Root: proto.String("/"), 143 | TcpClose: proto.Bool(true), 144 | ManageCgroupsMode: &cgMode, 145 | External: GetExternalDirectoriesForCheckpoint(), 146 | } 147 | actions := Actions{ 148 | pid: pid, 149 | configuration: configuration, 150 | } 151 | if err := c.Dump(opts, actions); err != nil { 152 | return time.Since(start), fmt.Errorf("failed to dump: %w", err) 153 | } 154 | return time.Since(start), nil 155 | } 156 | -------------------------------------------------------------------------------- /internal/exec/opts.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 QA Wolf Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package exec 18 | 19 | import ( 20 | "fmt" 21 | "io" 22 | "os" 23 | "path/filepath" 24 | "strings" 25 | 26 | "github.com/checkpoint-restore/go-criu/v7/crit" 27 | "k8s.io/apimachinery/pkg/util/yaml" 28 | ) 29 | 30 | const ( 31 | ConfigurationFileName = "configuration.yaml" 32 | ) 33 | 34 | func ReadConfiguration(path string) (Configuration, error) { 35 | b, err := os.ReadFile(path) 36 | if err != nil { 37 | return Configuration{}, fmt.Errorf("failed to read configuration file: %w", err) 38 | } 39 | var c Configuration 40 | if err := yaml.Unmarshal(b, &c); err != nil { 41 | return Configuration{}, fmt.Errorf("failed to unmarshal configuration: %w", err) 42 | } 43 | return c, nil 44 | } 45 | 46 | // Configuration lets crik know about quirks of the processes whose checkpoint is being taken. For example, the files 47 | // that need to be part of the checkpoint but are not part of the container's image need to be specified here. 48 | type Configuration struct { 49 | // ImageDir is the directory where the checkpoint is stored. It is expected to be available in the new container as 50 | // well. 51 | ImageDir string `json:"imageDir"` 52 | 53 | // NodeStateServerURL is the URL of the node state server. If given, crik will first check if the node is in shutting 54 | // down state and only then take checkpoint. 55 | // If not given, crik will always take checkpoint when it receives SIGTERM. 56 | NodeStateServerURL string `json:"nodeStateServerURL"` 57 | 58 | // AdditionalPaths is the list of paths that are not part of the container's image but were opened by one of the 59 | // processes in the tree. We need to make sure that these paths are available in the new container as well. 60 | // The paths are relative to the root of the container's filesystem. 61 | // Entries can be path to a file or a directory. 62 | AdditionalPaths []string `json:"additionalPaths,omitempty"` 63 | 64 | // InotifyIncompatiblePaths is the list of paths that are known to cause issues with inotify. We delete those paths 65 | // before taking the checkpoint. 66 | InotifyIncompatiblePaths []string `json:"inotifyIncompatiblePaths,omitempty"` 67 | } 68 | 69 | // configurationOnDisk contains additional metadata information about the checkpoint that is used during restore. 70 | type configurationOnDisk struct { 71 | Configuration 72 | 73 | // UnixFileDescriptors is the list of file descriptors that are opened by all UNIX processes by default. 74 | // They map to 0 -> stdin, 1 -> stdout, 2 -> stderr. 75 | // In containers, these are connected to either /dev/null or pipes. We need to make sure that when we restore, the 76 | // pipes are connected to criu's stdin, stdout, and stderr which is what's connected to the new container's stdin, 77 | // stdout, and stderr. 78 | // This list has only 3 elements in all cases. 79 | UnixFileDescriptorTrio []string `json:"unixFileDescriptorTrio,omitempty"` 80 | } 81 | 82 | var ( 83 | // DirectoryMounts is the list of directories that are mounted by the container runtime and need to be marked as 84 | // such during checkpoint and restore so that the underlying files can change without breaking the restore process. 85 | DirectoryMounts = []DirectoryMount{ 86 | { 87 | Name: "zoneinfo", 88 | PathInCheckpoint: "/usr/share/zoneinfo", 89 | PathInRestore: "/usr/share/zoneinfo", 90 | }, 91 | { 92 | Name: "null", 93 | PathInCheckpoint: "/dev/null", 94 | PathInRestore: "/dev/null", 95 | }, 96 | { 97 | Name: "random", 98 | PathInCheckpoint: "/dev/random", 99 | PathInRestore: "/dev/random", 100 | }, 101 | { 102 | Name: "urandom", 103 | PathInCheckpoint: "/dev/urandom", 104 | PathInRestore: "/dev/urandom", 105 | }, 106 | { 107 | Name: "tty", 108 | PathInCheckpoint: "/dev/tty", 109 | PathInRestore: "/dev/tty", 110 | }, 111 | { 112 | Name: "zero", 113 | PathInCheckpoint: "/dev/zero", 114 | PathInRestore: "/dev/zero", 115 | }, 116 | { 117 | Name: "full", 118 | PathInCheckpoint: "/dev/full", 119 | PathInRestore: "/dev/full", 120 | }, 121 | } 122 | ) 123 | 124 | type DirectoryMount struct { 125 | Name string `json:"name"` 126 | PathInCheckpoint string `json:"pathInCheckpoint"` 127 | PathInRestore string `json:"pathInRestore"` 128 | } 129 | 130 | func GetExternalDirectoriesForCheckpoint() []string { 131 | result := make([]string, len(DirectoryMounts)) 132 | for i, d := range DirectoryMounts { 133 | result[i] = fmt.Sprintf("mnt[%s]:%s", d.PathInCheckpoint, d.Name) 134 | } 135 | return result 136 | } 137 | 138 | func GetExternalDirectoriesForRestore() []string { 139 | result := make([]string, len(DirectoryMounts)) 140 | for i, d := range DirectoryMounts { 141 | result[i] = fmt.Sprintf("mnt[%s]:%s", d.Name, d.PathInRestore) 142 | } 143 | return result 144 | } 145 | 146 | func CopyDir(src, dst string) error { 147 | return filepath.WalkDir(src, func(srcPath string, d os.DirEntry, err error) error { 148 | // If the file/folder doesn't exist, we don't need to copy it. 149 | if os.IsNotExist(err) { 150 | return nil 151 | } 152 | if err != nil { 153 | return err 154 | } 155 | rel, err := filepath.Rel(src, srcPath) 156 | if err != nil { 157 | return err 158 | } 159 | dstPath := filepath.Join(dst, rel) 160 | if d.IsDir() { 161 | return os.MkdirAll(dstPath, d.Type().Perm()) 162 | } 163 | // TODO(muvaf): This changes the perms of folder if the dir wasn't walked before. 164 | if err := os.MkdirAll(filepath.Dir(dstPath), 0755); err != nil { 165 | return err 166 | } 167 | src, err := os.Open(srcPath) 168 | if err != nil { 169 | return err 170 | } 171 | defer src.Close() 172 | 173 | dst, err := os.Create(dstPath) 174 | if err != nil { 175 | return err 176 | } 177 | defer dst.Close() 178 | 179 | if _, err := io.Copy(dst, src); err != nil { 180 | return err 181 | } 182 | 183 | // Get the source file mode to apply to the destination file 184 | srcInfo, err := src.Stat() 185 | if err != nil { 186 | return err 187 | } 188 | return os.Chmod(dstPath, srcInfo.Mode()) 189 | }) 190 | } 191 | 192 | func GetKubePodFilePaths(imageDir string) (map[string]string, error) { 193 | c := crit.New(nil, nil, imageDir, false, false) 194 | fds, err := c.ExploreFds() 195 | if err != nil { 196 | return nil, fmt.Errorf("failed to explore fds: %w", err) 197 | } 198 | result := map[string]string{} 199 | for _, fd := range fds { 200 | for _, file := range fd.Files { 201 | if !strings.HasPrefix(file.Path, "/sys/fs/cgroup/kubepods.slice") || 202 | file.Type != "REG" { 203 | continue 204 | } 205 | result[filepath.Base(file.Path)] = file.Path 206 | } 207 | } 208 | return result, nil 209 | } 210 | -------------------------------------------------------------------------------- /internal/exec/restore.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 QA Wolf Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package exec 18 | 19 | import ( 20 | "fmt" 21 | "os" 22 | "os/exec" 23 | "path/filepath" 24 | "sigs.k8s.io/yaml" 25 | "strings" 26 | "syscall" 27 | ) 28 | 29 | func RestoreWithCmd(imageDir string) error { 30 | if err := os.MkdirAll("/tmp/.X11-unix", 0755); err != nil { 31 | return fmt.Errorf("failed to mkdir /tmp/.X11-unix: %w", err) 32 | } 33 | if err := CopyDir(filepath.Join(imageDir, "extraFiles"), "/"); err != nil { 34 | return fmt.Errorf("failed to copy extra files: %w", err) 35 | } 36 | args := []string{"restore", 37 | "--images-dir", imageDir, 38 | "--tcp-established", 39 | "--file-locks", 40 | "--evasive-devices", 41 | "--tcp-close", 42 | "--manage-cgroups=ignore", 43 | "-v4", 44 | "--log-file", "restore.log", 45 | } 46 | configYAML, err := os.ReadFile(filepath.Join(imageDir, ConfigurationFileName)) 47 | if err != nil { 48 | return fmt.Errorf("failed to read stdio file descriptors: %w", err) 49 | } 50 | conf := &configurationOnDisk{} 51 | if err := yaml.Unmarshal(configYAML, conf); err != nil { 52 | return fmt.Errorf("failed to unmarshal stdio file descriptors: %w", err) 53 | } 54 | for _, d := range GetExternalDirectoriesForRestore() { 55 | args = append(args, "--external", d) 56 | } 57 | inheritedFds := conf.UnixFileDescriptorTrio 58 | 59 | // When cgroup v2 is used, the path to resource usage files contain pod and container IDs which are changed 60 | // in the new pod. We find and replace them with the new files. 61 | kubePodFiles, err := GetKubePodFilePaths(imageDir) 62 | if err != nil { 63 | return fmt.Errorf("failed to get kubepods.slice files: %w", err) 64 | } 65 | var extraFiles []*os.File 66 | if len(kubePodFiles) > 0 { 67 | // All processes within container are in the same cgroup, so getting the folder of self is enough. 68 | str, err := os.ReadFile("/proc/self/cgroup") 69 | if err != nil { 70 | return fmt.Errorf("failed to read /proc/self/cgroup: %w", err) 71 | } 72 | basePath := filepath.Join("/sys/fs/cgroup", strings.Split(strings.Split(string(str), "\n")[0], ":")[2]) 73 | for k, v := range kubePodFiles { 74 | path := filepath.Join(basePath, k) 75 | f, err := os.OpenFile(path, syscall.O_RDONLY, 0) 76 | if err != nil { 77 | return fmt.Errorf("failed to open %s: %w", k, err) 78 | } 79 | // The index of file descriptor in extraFiles must match the index+3 in inheritedFds because 80 | // the first 3 file descriptors are reserved for stdin, stdout, and stderr. 81 | inheritedFds = append(inheritedFds, strings.TrimPrefix(v, "/")) 82 | extraFiles = append(extraFiles, f) 83 | } 84 | } 85 | for i, fdStr := range inheritedFds { 86 | args = append(args, "--inherit-fd", fmt.Sprintf("fd[%d]:%s", i, fdStr)) 87 | } 88 | cmd := exec.Command("criu", args...) 89 | cmd.ExtraFiles = extraFiles 90 | cmd.Stdin = nil 91 | cmd.Stdout = os.Stdout 92 | cmd.Stderr = os.Stderr 93 | return cmd.Run() 94 | } 95 | --------------------------------------------------------------------------------