├── .devcontainer └── devcontainer.json ├── .dockerignore ├── .github ├── dependabot.yml └── workflows │ ├── pr.yml │ ├── release-prometheus-process.yml │ ├── release-prometheus-tokio.yml │ └── release.yml ├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── deny.toml ├── examples ├── Cargo.toml ├── Dockerfile ├── README.md ├── lease.rs ├── tests │ └── lease.rs └── watch_pods.rs ├── justfile ├── kubert-prometheus-process ├── Cargo.toml └── src │ ├── lib.rs │ ├── linux.rs │ └── linux │ └── netstat.rs ├── kubert-prometheus-tokio ├── Cargo.toml └── src │ └── lib.rs └── kubert ├── Cargo.toml └── src ├── admin.rs ├── admin ├── diagnostics.rs ├── diagnostics │ ├── lease.rs │ └── watch.rs └── metrics.rs ├── client.rs ├── client ├── metrics.rs └── timeouts.rs ├── errors.rs ├── index.rs ├── initialized.rs ├── lease.rs ├── lib.rs ├── log.rs ├── requeue.rs ├── runtime.rs ├── runtime └── metrics.rs ├── server.rs ├── server ├── tests.rs ├── tls_openssl.rs └── tls_rustls.rs └── shutdown.rs /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "kubert", 3 | "image": "ghcr.io/linkerd/dev:v45", 4 | "runArgs": [ 5 | "--init", 6 | // Use the host network so we can access k3d, etc. 7 | "--net=host", 8 | // For lldb 9 | "--cap-add=SYS_PTRACE", 10 | "--security-opt=seccomp=unconfined" 11 | ], 12 | "overrideCommand": false, 13 | "remoteUser": "code", 14 | "mounts": [ 15 | { 16 | "source": "/var/run/docker.sock", 17 | "target": "/var/run/docker-host.sock", 18 | "type": "bind" 19 | }, 20 | { 21 | "source": "${localEnv:HOME}/.docker", 22 | "target": "/home/code/.docker", 23 | "type": "bind" 24 | } 25 | ], 26 | "containerEnv": { 27 | "CXX": "clang++-19", 28 | }, 29 | "features": { 30 | "ghcr.io/devcontainers/features/github-cli:1": {} 31 | }, 32 | "customizations": { 33 | "vscode": { 34 | "extensions": [ 35 | "github.vscode-github-actions", 36 | "kokakiwi.vscode-just", 37 | "NathanRidley.autotrim", 38 | "rust-lang.rust-analyzer", 39 | "ms-kubernetes-tools.vscode-kubernetes-tools", 40 | "samverschueren.final-newline", 41 | "tamasfe.even-better-toml" 42 | ] 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | *.md 2 | .dockerignore 3 | .github 4 | .git 5 | Dockerfile 6 | justfile 7 | target 8 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: cargo 4 | directory: "/" 5 | schedule: 6 | interval: weekly 7 | day: friday 8 | time: "03:30" 9 | timezone: Etc/UTC 10 | allow: 11 | - dependency-type: all 12 | groups: 13 | kube: 14 | patterns: 15 | - k8s-openapi 16 | - kube 17 | - kube-* 18 | update-types: [major, minor] 19 | 20 | - package-ecosystem: "github-actions" 21 | directory: "/" 22 | schedule: 23 | interval: "weekly" 24 | day: friday 25 | time: "03:00" 26 | timezone: Etc/UTC 27 | -------------------------------------------------------------------------------- /.github/workflows/pr.yml: -------------------------------------------------------------------------------- 1 | name: Pull Request 2 | on: pull_request 3 | 4 | permissions: 5 | contents: read 6 | 7 | env: 8 | CARGO_INCREMENTAL: 0 9 | CARGO_NET_RETRY: 10 10 | RUSTUP_MAX_RETRIES: 10 11 | 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.head_ref }} 14 | cancel-in-progress: true 15 | 16 | jobs: 17 | changed: 18 | timeout-minutes: 5 19 | runs-on: ubuntu-24.04 20 | steps: 21 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 22 | 23 | - id: build 24 | uses: tj-actions/changed-files@0fee5fb278312d962ff465bb38dc4cae9f446de2 25 | with: 26 | files: | 27 | .github/workflows/pr.yml 28 | justfile 29 | examples/Dockerfile 30 | 31 | - id: markdown 32 | uses: tj-actions/changed-files@0fee5fb278312d962ff465bb38dc4cae9f446de2 33 | with: 34 | files: | 35 | **/*.md 36 | 37 | - id: actions 38 | uses: tj-actions/changed-files@0fee5fb278312d962ff465bb38dc4cae9f446de2 39 | with: 40 | files: | 41 | .github/workflows/** 42 | .devcontainer/* 43 | 44 | - id: kubert 45 | uses: tj-actions/changed-files@0fee5fb278312d962ff465bb38dc4cae9f446de2 46 | with: 47 | files: | 48 | Cargo.toml 49 | kubert/** 50 | kubert-examples/** 51 | 52 | - id: kubert-prometheus-process 53 | uses: tj-actions/changed-files@0fee5fb278312d962ff465bb38dc4cae9f446de2 54 | with: 55 | files: | 56 | Cargo.toml 57 | kubert-prometheus-process/** 58 | 59 | - id: kubert-prometheus-tokio 60 | uses: tj-actions/changed-files@0fee5fb278312d962ff465bb38dc4cae9f446de2 61 | with: 62 | files: | 63 | Cargo.toml 64 | kubert-prometheus-tokio/** 65 | 66 | - id: cargo 67 | uses: tj-actions/changed-files@0fee5fb278312d962ff465bb38dc4cae9f446de2 68 | with: 69 | files: | 70 | **/Cargo.toml 71 | 72 | - id: deny 73 | uses: tj-actions/changed-files@0fee5fb278312d962ff465bb38dc4cae9f446de2 74 | with: 75 | files: | 76 | deny.toml 77 | 78 | - id: rust 79 | uses: tj-actions/changed-files@0fee5fb278312d962ff465bb38dc4cae9f446de2 80 | with: 81 | files: | 82 | **/*.rs 83 | 84 | outputs: 85 | by-dependabot: ${{ github.actor == 'dependabot[bot]' }} 86 | any: >- 87 | ${{ 88 | steps.build.outputs.any_changed == 'true' || 89 | steps.actions.outputs.any_changed == 'true' || 90 | steps.markdown.outputs.any_changed == 'true' || 91 | steps.kubert.outputs.any_change == 'true' || 92 | steps.kubert-prometheus-process.outputs.any_change == 'true' || 93 | steps.kubert-prometheus-tokio.outputs.any_change == 'true' || 94 | steps.cargo.outputs.any_changed == 'true' || 95 | steps.deny.outputs.any_changed == 'true' || 96 | steps.rust.outputs.any_changed == 'true' 97 | }} 98 | build: ${{ steps.build.outputs.any_changed == 'true' }} 99 | actions: ${{ steps.actions.outputs.any_changed == 'true' }} 100 | cargo: ${{ steps.cargo.outputs.any_changed == 'true' }} 101 | deny: ${{ steps.deny.outputs.any_changed == 'true' }} 102 | kubert: ${{ steps.kubert.outputs.any_changed == 'true' }} 103 | kubert-prometheus-process: ${{ steps.kubert-prometheus-process.outputs.any_changed == 'true' }} 104 | kubert-prometheus-tokio: ${{ steps.kubert-prometheus-tokio.outputs.any_changed == 'true' }} 105 | rust: ${{ steps.rust.outputs.any_changed == 'true' }} 106 | markdown: ${{ steps.markdown.outputs.any_changed == 'true' }} 107 | 108 | info: 109 | timeout-minutes: 3 110 | needs: changed 111 | runs-on: ubuntu-24.04 112 | steps: 113 | - name: changed.outputs 114 | run: | 115 | jq . <&2 45 | exit 1 46 | fi 47 | ( echo version="$version" 48 | echo mode=release 49 | ) >> "$GITHUB_OUTPUT" 50 | else 51 | sha="${{ github.sha }}" 52 | ( echo version="$(just-cargo crate-version kubert-prometheus-process)-git-${sha:0:7}" 53 | echo mode=test 54 | ) >> "$GITHUB_OUTPUT" 55 | fi 56 | outputs: 57 | mode: ${{ steps.meta.outputs.mode }} 58 | version: ${{ steps.meta.outputs.version }} 59 | 60 | release: 61 | needs: [meta] 62 | permissions: 63 | contents: write 64 | timeout-minutes: 5 65 | runs-on: ubuntu-24.04 66 | steps: 67 | - if: needs.meta.outputs.mode == 'release' 68 | uses: softprops/action-gh-release@c95fe1489396fe8a9eb87c0abf8aa5b2ef267fda 69 | with: 70 | name: kubert-prometheus-process ${{ needs.meta.outputs.version }} 71 | generate_release_notes: false 72 | make_latest: false 73 | 74 | crate: 75 | # Only publish the crate after the rest of the release succeeds. 76 | needs: [meta, release] 77 | timeout-minutes: 10 78 | runs-on: ubuntu-24.04 79 | container: ghcr.io/linkerd/dev:v45-rust 80 | steps: 81 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 82 | - run: cargo publish --package=kubert-prometheus-process --dry-run 83 | - if: needs.meta.outputs.mode == 'release' 84 | run: cargo publish --package=kubert-prometheus-process --token=${{ secrets.CRATESIO_TOKEN }} 85 | -------------------------------------------------------------------------------- /.github/workflows/release-prometheus-tokio.yml: -------------------------------------------------------------------------------- 1 | name: Release kubernetes-prometheus-tokio 2 | 3 | on: 4 | pull_request: 5 | paths: 6 | - .github/workflows/release-prometheus-tokio.yml 7 | push: 8 | tags: 9 | - 'kubert-prometheus-tokio/*' 10 | 11 | env: 12 | CARGO_INCREMENTAL: 0 13 | CARGO_NET_RETRY: 10 14 | RUSTUP_MAX_RETRIES: 10 15 | 16 | permissions: 17 | contents: read 18 | 19 | jobs: 20 | cleanup: 21 | runs-on: ubuntu-24.04 22 | permissions: 23 | actions: write 24 | steps: 25 | - uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa 26 | with: 27 | all_but_latest: true 28 | access_token: ${{ github.token }} 29 | 30 | meta: 31 | timeout-minutes: 5 32 | runs-on: ubuntu-24.04 33 | container: ghcr.io/linkerd/dev:v45-rust 34 | steps: 35 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 36 | - id: meta 37 | shell: bash 38 | run: | 39 | ref="${{ github.ref }}" 40 | if [[ "$ref" == refs/tags/kubert-prometheus-tokio/* ]]; then 41 | version="${ref##refs/tags/kubert-prometheus-tokio/}" 42 | crate=$(just-cargo crate-version kubert-prometheus-tokio) 43 | if [[ "$crate" != "$version" ]]; then 44 | echo "::error ::Crate version $crate does not match tag $version" >&2 45 | exit 1 46 | fi 47 | ( echo version="$version" 48 | echo mode=release 49 | ) >> "$GITHUB_OUTPUT" 50 | else 51 | sha="${{ github.sha }}" 52 | ( echo version="$(just-cargo crate-version kubert-prometheus-tokio)-git-${sha:0:7}" 53 | echo mode=test 54 | ) >> "$GITHUB_OUTPUT" 55 | fi 56 | outputs: 57 | mode: ${{ steps.meta.outputs.mode }} 58 | version: ${{ steps.meta.outputs.version }} 59 | 60 | release: 61 | needs: [meta] 62 | permissions: 63 | contents: write 64 | timeout-minutes: 5 65 | runs-on: ubuntu-24.04 66 | steps: 67 | - if: needs.meta.outputs.mode == 'release' 68 | uses: softprops/action-gh-release@c95fe1489396fe8a9eb87c0abf8aa5b2ef267fda 69 | with: 70 | name: kubert-prometheus-tokio ${{ needs.meta.outputs.version }} 71 | generate_release_notes: false 72 | make_latest: false 73 | 74 | crate: 75 | # Only publish the crate after the rest of the release succeeds. 76 | needs: [meta, release] 77 | timeout-minutes: 10 78 | runs-on: ubuntu-24.04 79 | container: ghcr.io/linkerd/dev:v45-rust 80 | env: 81 | RUSTFLAGS: '--cfg tokio_unstable' 82 | RUSTDOCFLAGS: '--cfg tokio_unstable' 83 | steps: 84 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 85 | - run: cargo publish --package=kubert-prometheus-tokio --dry-run 86 | - if: needs.meta.outputs.mode == 'release' 87 | run: cargo publish --package=kubert-prometheus-tokio --token=${{ secrets.CRATESIO_TOKEN }} 88 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release kubert 2 | 3 | on: 4 | pull_request: 5 | paths: 6 | - .github/workflows/release-kubert.yml 7 | push: 8 | tags: 9 | - 'kubert/*' 10 | 11 | env: 12 | CARGO_INCREMENTAL: 0 13 | CARGO_NET_RETRY: 10 14 | RUSTUP_MAX_RETRIES: 10 15 | 16 | permissions: 17 | contents: read 18 | 19 | jobs: 20 | cleanup: 21 | runs-on: ubuntu-24.04 22 | permissions: 23 | actions: write 24 | steps: 25 | - uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa 26 | with: 27 | all_but_latest: true 28 | access_token: ${{ github.token }} 29 | 30 | meta: 31 | timeout-minutes: 5 32 | runs-on: ubuntu-24.04 33 | container: ghcr.io/linkerd/dev:v45-rust 34 | steps: 35 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 36 | - id: meta 37 | shell: bash 38 | run: | 39 | ref="${{ github.ref }}" 40 | if [[ "$ref" == refs/tags/kubert/* ]]; then 41 | version="${ref##refs/tags/kubert/}" 42 | crate=$(just-cargo crate-version kubert) 43 | if [[ "$crate" != "$version" ]]; then 44 | echo "::error ::Crate version $crate does not match tag $version" >&2 45 | exit 1 46 | fi 47 | ( echo version="$version" 48 | echo mode=release 49 | ) >> "$GITHUB_OUTPUT" 50 | else 51 | sha="${{ github.sha }}" 52 | ( echo version="$(just-cargo crate-version kubert)-git-${sha:0:7}" 53 | echo mode=test 54 | ) >> "$GITHUB_OUTPUT" 55 | fi 56 | outputs: 57 | mode: ${{ steps.meta.outputs.mode }} 58 | version: ${{ steps.meta.outputs.version }} 59 | 60 | release: 61 | needs: [meta] 62 | permissions: 63 | contents: write 64 | timeout-minutes: 5 65 | runs-on: ubuntu-24.04 66 | steps: 67 | - if: needs.meta.outputs.mode == 'release' 68 | uses: softprops/action-gh-release@c95fe1489396fe8a9eb87c0abf8aa5b2ef267fda 69 | with: 70 | name: kubert ${{ needs.meta.outputs.version }} 71 | generate_release_notes: true 72 | 73 | crate: 74 | # Only publish the crate after the rest of the release succeeds. 75 | needs: [meta, release] 76 | timeout-minutes: 10 77 | runs-on: ubuntu-24.04 78 | container: ghcr.io/linkerd/dev:v45-rust 79 | env: 80 | RUSTFLAGS: '--cfg tokio_unstable' 81 | RUSTDOCFLAGS: '--cfg tokio_unstable' 82 | steps: 83 | - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 84 | - run: cargo publish --package=kubert --dry-run 85 | - if: needs.meta.outputs.mode == 'release' 86 | run: cargo publish --package=kubert --token=${{ secrets.CRATESIO_TOKEN }} 87 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | resolver = "2" 3 | default-members = [ 4 | "kubert", 5 | "kubert-prometheus-process", 6 | "kubert-prometheus-tokio", 7 | ] 8 | members = [ 9 | "kubert", 10 | "kubert-prometheus-process", 11 | "kubert-prometheus-tokio", 12 | "examples", 13 | ] 14 | 15 | [workspace.dependencies] 16 | 17 | clap = { version = "4", default-features = false } 18 | 19 | hyper = { version = "1", default-features = false } 20 | hyper-openssl = { version = "0.10" } 21 | hyper-util = { version = "0.1", default-features = false } 22 | 23 | k8s-openapi = { version = "0.24", default-features = false } 24 | 25 | kube-client = { version = "0.99", default-features = false } 26 | kube-core = { version = "0.99", default-features = false } 27 | kube-runtime = { version = "0.99", default-features = false } 28 | kube = { version = "0.99", default-features = false } 29 | 30 | prometheus-client = { version = "0.23.0", default-features = false } 31 | 32 | tokio = { version = "1.17.0", default-features = false } 33 | tokio-rustls = { version = "0.26.1", default-features = false } 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2022 Oliver Gould 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # kubert 2 | 3 | Rust Kubernetes runtime helpers. Based on [`kube-rs`][krs]. 4 | 5 | ![kubert](https://user-images.githubusercontent.com/240738/154825590-5a94ca46-a453-4037-a738-26663b2c8630.png) 6 | 7 | /ˈkuː.bərt/ 8 | 9 | [![Crates.io][crate-badge]][crate-url] 10 | [![Documentation][docs-badge]][docs-url] 11 | [![License](https://img.shields.io/crates/l/kubert)](LICENSE) 12 | 13 | [crate-badge]: https://img.shields.io/crates/v/kubert.svg 14 | [crate-url]: https://crates.io/crates/kubert 15 | [docs-badge]: https://docs.rs/kubert/badge.svg 16 | [docs-url]: https://docs.rs/kubert 17 | 18 | ## Features 19 | 20 | * [`clap`](https://docs.rs/clap) command-line interface support; 21 | * A basic admin server with `/ready` and `/live` probe endpoints; 22 | * Optional [`prometheus-client`][pc] integration, with process and Tokio Runtime 23 | metrics; 24 | * A default Kubernetes client; 25 | * Graceful shutdown on `SIGTERM` or `SIGINT` signals; 26 | * An HTTPS server (for admission controllers and API extensions) with 27 | certificate reloading; 28 | * A utility for maintaining an index derived from watching one or more 29 | Kubernetes resources types; 30 | * A _requeue_ channel that supports deferring/rescheduling updates (i.e. in case 31 | a write fails); 32 | * And a [`Runtime`][rt] type that ties it all together! 33 | 34 | ### Why not `kube-rs`? 35 | 36 | The [`kube`][krs] crate is great! And in fact, `kubert` builds on `kube`'s 37 | client and runtime modules. This crate, however, captures some of the repeated 38 | patterns we've encountered building controllers for 39 | [Linkerd](https://github.com/linkerd/linkerd2). It doesn't try to hide 40 | `kube`--though it does reduce boilerplate around initializing watches and caches 41 | (reflectors); and it expects you to schedule work via the `tokio` runtime. 42 | 43 | ## Examples 44 | 45 | This repository includes a simple [example application](./examples) that 46 | demonstrates how to use a `kubert::Runtime`. 47 | 48 | Other examples include: 49 | 50 | * [Linkerd2 policy controller](https://github.com/linkerd/linkerd2/blob/d4543cd86e427b241ce961b50dd83b1738c0b069/policy-controller/src/main.rs) 51 | 52 | ## kubert-prometheus-process 53 | 54 | The `kubert-prometheus-process` crate provides [process metrics][pm] for 55 | prometheus-client. It has no dependencies on kubert, and can be used 56 | independently. 57 | 58 | ## kubert-prometheus-tokio 59 | 60 | The `kubert-prometheus-tokio` crate exposes [Tokio runtime metrics][tm] for 61 | prometheus-client. It also has no dependencies on kubert, and can be used 62 | independently. 63 | 64 | Note that tokio runtime metrics require `RUSTFLAGS="--cfg tokio_unstable"` to be 65 | set at compile-time. 66 | 67 | ## Status 68 | 69 | This crate is still fairly experimental, though it's based on production code 70 | from Linkerd; and we plan to use it in Linkerd moving forward. 71 | 72 | [krs]: https://docs.rs/kube 73 | [pc]: https://docs.rs/prometheus-client 74 | [pm]: https://prometheus.io/docs/instrumenting/writing_clientlibs/#process-metrics 75 | [tm]: https://docs.rs/tokio-metrics 76 | [rt]: https://docs.rs/kubert/latest/kubert/runtime/struct.Runtime.html 77 | -------------------------------------------------------------------------------- /deny.toml: -------------------------------------------------------------------------------- 1 | [advisories] 2 | db-path = "~/.cargo/advisory-db" 3 | db-urls = ["https://github.com/rustsec/advisory-db"] 4 | ignore = [ 5 | ] 6 | 7 | [licenses] 8 | allow = [ 9 | "Apache-2.0", 10 | "BSD-2-Clause", 11 | "BSD-3-Clause", 12 | "ISC", 13 | "MIT", 14 | "OpenSSL", 15 | "Unicode-3.0", 16 | "Zlib", 17 | ] 18 | confidence-threshold = 0.8 19 | exceptions = [ 20 | { allow = [ 21 | "ISC", 22 | "MIT", 23 | "OpenSSL", 24 | ], name = "ring", version = "*" }, 25 | ] 26 | 27 | [[licenses.clarify]] 28 | name = "ring" 29 | version = "*" 30 | expression = "MIT AND ISC AND OpenSSL" 31 | license-files = [{ path = "LICENSE", hash = 0xbd0eed23 }] 32 | 33 | [bans] 34 | skip-tree = [ 35 | { name = "windows-sys" }, 36 | { name = "windows_aarch64_msvc" }, 37 | { name = "windows_i686_gnu" }, 38 | { name = "windows_i686_msvc" }, 39 | { name = "windows_x86_64_gnu" }, 40 | { name = "windows_x86_64_msvc" }, 41 | # tracing-subscriber needs an older regex-automata. 42 | { name = "regex-automata" }, 43 | # `serde_json` and `serde_yaml` depend on incompatible versions of indexmap 44 | { name = "indexmap" }, 45 | # the proc-macro ecosystem is still in the process of migrating from `syn` 46 | # 1.0 to `syn` 2.0 47 | { name = "syn" }, 48 | # `parking-lot-core` and `dirs-next` (transitive deps via `kube-client`) 49 | # depend on incompatible versions of `redox_syscall`. 50 | { name = "redox_syscall" }, 51 | # Until thiserror v2 is widely used. 52 | { name = "thiserror", version = "1" }, 53 | ] 54 | 55 | [sources] 56 | unknown-registry = "deny" 57 | unknown-git = "deny" 58 | allow-registry = ["https://github.com/rust-lang/crates.io-index"] 59 | allow-git = [] 60 | -------------------------------------------------------------------------------- /examples/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "kubert-examples" 3 | version = "0.1.0" 4 | publish = false 5 | edition = "2021" 6 | license = "Apache-2.0" 7 | rust-version = "1.77" 8 | 9 | [package.metadata.release] 10 | release = false 11 | 12 | [features] 13 | default = ["rustls-tls"] 14 | rustls-tls = ["kubert/rustls-tls-aws-lc-rs"] 15 | openssl-tls = ["kubert/openssl-tls", "dep:openssl"] 16 | 17 | [dependencies.kubert] 18 | path = "../kubert" 19 | default-features = false 20 | features = [ 21 | "clap", 22 | "lease", 23 | "prometheus-client", 24 | "runtime", 25 | "runtime-brotli", 26 | "runtime-diagnostics", 27 | ] 28 | 29 | [dependencies.openssl] 30 | version = "0.10.57" 31 | optional = true 32 | features = ["vendored"] 33 | 34 | [dev-dependencies] 35 | anyhow = "1" 36 | chrono = { version = "0.4", default-features = false } 37 | futures = { version = "0.3", default-features = false } 38 | maplit = "1" 39 | prometheus-client = "0.23" 40 | rand = "0.9" 41 | regex = "1" 42 | thiserror = "2" 43 | tracing = "0.1" 44 | tracing-subscriber = { version = "0.3", features = ["ansi", "env-filter"] } 45 | 46 | [dev-dependencies.clap] 47 | workspace = true 48 | features = ["derive", "help", "env", "std"] 49 | 50 | [dev-dependencies.k8s-openapi] 51 | workspace = true 52 | features = ["latest"] 53 | 54 | [dev-dependencies.kube] 55 | workspace = true 56 | features = ["client", "derive", "runtime"] 57 | 58 | [dev-dependencies.tokio] 59 | workspace = true 60 | features = ["macros", "parking_lot", "rt", "rt-multi-thread", "time"] 61 | 62 | [[example]] 63 | name = "watch-pods" 64 | path = "watch_pods.rs" 65 | 66 | [[example]] 67 | name = "lease" 68 | path = "lease.rs" 69 | -------------------------------------------------------------------------------- /examples/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ghcr.io/linkerd/dev:v45-rust as build 2 | WORKDIR /kubert 3 | COPY . . 4 | ENV CARGO_NET_RETRY=10 \ 5 | CARGO_INCREMENTAL=0 \ 6 | RUSTFLAGS="--cfg tokio_unstable" 7 | RUN --mount=type=cache,target=/usr/local/cargo/registry \ 8 | just-cargo fetch 9 | ARG FEATURES="rustls-tls" 10 | RUN --mount=type=cache,target=/usr/local/cargo/registry \ 11 | just-cargo build \ 12 | --package=kubert-examples --examples \ 13 | --no-default-features --features=${FEATURES} 14 | 15 | FROM gcr.io/distroless/cc-debian12:debug 16 | COPY --from=build /kubert/target/debug/examples/watch-pods /watch-pods 17 | ENTRYPOINT ["/watch-pods"] 18 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # kubert examples 2 | 3 | ## [`watch-pods`](./watch_pods.rs) 4 | 5 | A simple Kubernetes example that watches for pod updates and logs them. 6 | 7 | ```text 8 | :; cargo run --example watch-pods -p kubert-examples -- --selector=linkerd.io/control-plane-ns 9 | Compiling kubert-examples v0.1.0 (/workspaces/kubert/examples) 10 | Finished dev [unoptimized + debuginfo] target(s) in 7.23s 11 | Running `target/debug/examples/watch-pods --selector=linkerd.io/control-plane-ns` 12 | 2022-03-02T03:16:12.370463Z INFO pods: watch_pods: added namespace=linkerd name=linkerd-identity-7d9c4cd9b8-kpwql 13 | 2022-03-02T03:16:12.380229Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-identity-7d9c4cd9b8-kpwql 14 | 2022-03-02T03:16:12.407258Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-identity-7d9c4cd9b8-kpwql 15 | 2022-03-02T03:16:12.464362Z INFO pods: watch_pods: added namespace=linkerd name=linkerd-destination-5b6fc7cb9-hn9hr 16 | 2022-03-02T03:16:12.486658Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-destination-5b6fc7cb9-hn9hr 17 | 2022-03-02T03:16:12.509484Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-destination-5b6fc7cb9-hn9hr 18 | 2022-03-02T03:16:12.515244Z INFO pods: watch_pods: added namespace=linkerd name=linkerd-proxy-injector-6c57f585c4-n674t 19 | 2022-03-02T03:16:12.524817Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-proxy-injector-6c57f585c4-n674t 20 | 2022-03-02T03:16:12.547041Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-proxy-injector-6c57f585c4-n674t 21 | 2022-03-02T03:16:13.592621Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-proxy-injector-6c57f585c4-n674t 22 | 2022-03-02T03:16:13.732357Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-destination-5b6fc7cb9-hn9hr 23 | 2022-03-02T03:16:13.762360Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-identity-7d9c4cd9b8-kpwql 24 | 2022-03-02T03:16:14.738187Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-identity-7d9c4cd9b8-kpwql 25 | 2022-03-02T03:16:15.740861Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-identity-7d9c4cd9b8-kpwql 26 | 2022-03-02T03:16:20.602560Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-proxy-injector-6c57f585c4-n674t 27 | 2022-03-02T03:16:20.616147Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-proxy-injector-6c57f585c4-n674t 28 | 2022-03-02T03:16:21.606533Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-proxy-injector-6c57f585c4-n674t 29 | 2022-03-02T03:16:22.396102Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-identity-7d9c4cd9b8-kpwql 30 | 2022-03-02T03:16:22.744278Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-destination-5b6fc7cb9-hn9hr 31 | 2022-03-02T03:16:22.759241Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-destination-5b6fc7cb9-hn9hr 32 | 2022-03-02T03:16:23.746871Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-destination-5b6fc7cb9-hn9hr 33 | 2022-03-02T03:16:23.760010Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-destination-5b6fc7cb9-hn9hr 34 | 2022-03-02T03:16:23.773358Z INFO pods: watch_pods: updated namespace=linkerd name=linkerd-destination-5b6fc7cb9-hn9hr 35 | ``` 36 | -------------------------------------------------------------------------------- /examples/lease.rs: -------------------------------------------------------------------------------- 1 | #![deny(warnings, rust_2018_idioms)] 2 | #![forbid(unsafe_code)] 3 | 4 | use anyhow::Result; 5 | use k8s_openapi::{api::coordination::v1 as coordv1, apimachinery::pkg::apis::meta::v1 as metav1}; 6 | use kube::ResourceExt; 7 | use tokio::time; 8 | 9 | #[derive(Clone, clap::Parser)] 10 | #[clap(version)] 11 | struct Args { 12 | /// The tracing filter used for logs 13 | #[arg(long, env = "KUBERT_EXAMPLE_LOG", default_value = "lease=info,warn")] 14 | log_level: kubert::LogFilter, 15 | 16 | /// The logging format 17 | #[arg(long, default_value = "plain")] 18 | log_format: kubert::LogFormat, 19 | 20 | #[clap(flatten)] 21 | client: kubert::ClientArgs, 22 | 23 | #[clap(flatten)] 24 | admin: kubert::AdminArgs, 25 | 26 | #[arg(long, default_value = "kubert-examples")] 27 | field_manager: String, 28 | 29 | #[arg(short, long, env = "LOGNAME", default_value = "default")] 30 | identity: String, 31 | 32 | #[arg(short, long, default_value = "default")] 33 | namespace: String, 34 | 35 | #[command(subcommand)] 36 | command: Command, 37 | } 38 | 39 | #[derive(Clone, clap::Parser)] 40 | enum Command { 41 | /// Create a Lease 42 | Create { name: String }, 43 | 44 | /// Try to claim a Lease 45 | Claim { 46 | #[arg(long, default_value = "30s")] 47 | duration: Timeout, 48 | 49 | #[arg(long, default_value = "1s")] 50 | renew_grace_period: Timeout, 51 | 52 | name: String, 53 | }, 54 | 55 | /// Get the status of a Lease 56 | Get { name: String }, 57 | 58 | /// Release a lease if it is currently held by the given identity 59 | Vacate { name: String }, 60 | 61 | Run { 62 | #[arg(long, default_value = "30s")] 63 | duration: Timeout, 64 | 65 | #[arg(long, default_value = "1s")] 66 | renew_grace_period: Timeout, 67 | 68 | name: String, 69 | }, 70 | } 71 | 72 | #[tokio::main] 73 | async fn main() -> Result<()> { 74 | use clap::Parser; 75 | 76 | let Args { 77 | log_level, 78 | log_format, 79 | client, 80 | admin, 81 | field_manager, 82 | identity, 83 | namespace, 84 | command, 85 | } = Args::parse(); 86 | 87 | let rt = kubert::Runtime::builder() 88 | .with_log(log_level, log_format) 89 | .with_client(client); 90 | 91 | match command { 92 | Command::Create { name } => { 93 | let rt = rt.build().await?; 94 | let api = kube::Api::namespaced(rt.client(), &namespace); 95 | run(rt, async move { 96 | let lease = api 97 | .create( 98 | &Default::default(), 99 | &coordv1::Lease { 100 | metadata: metav1::ObjectMeta { 101 | name: Some(name), 102 | namespace: Some(namespace), 103 | ..Default::default() 104 | }, 105 | ..Default::default() 106 | }, 107 | ) 108 | .await?; 109 | println!("Created lease: {}", lease.name_unchecked()); 110 | Ok::<_, kubert::lease::Error>(0) 111 | }) 112 | .await 113 | } 114 | 115 | Command::Get { name } => { 116 | let rt = rt.build().await?; 117 | let api = kube::Api::namespaced(rt.client(), &namespace); 118 | run(rt, async move { 119 | let lease = kubert::LeaseManager::init(api, name) 120 | .await? 121 | .with_field_manager(field_manager); 122 | match lease.claimed().await { 123 | Some(claim) => print_claim(&claim, &identity), 124 | None => println!("? Unclaimed"), 125 | } 126 | Ok::<_, kubert::lease::Error>(0) 127 | }) 128 | .await 129 | } 130 | 131 | Command::Claim { 132 | duration: Timeout(lease_duration), 133 | renew_grace_period: Timeout(renew_grace_period), 134 | name, 135 | } => { 136 | let rt = rt.build().await?; 137 | let api = kube::Api::namespaced(rt.client(), &namespace); 138 | run(rt, async move { 139 | let params = kubert::lease::ClaimParams { 140 | lease_duration, 141 | renew_grace_period, 142 | }; 143 | 144 | let lease = kubert::LeaseManager::init(api, name) 145 | .await? 146 | .with_field_manager(field_manager); 147 | let claim = lease.ensure_claimed(&identity, ¶ms).await?; 148 | print_claim(&claim, &identity); 149 | 150 | Ok::<_, kubert::lease::Error>(!claim.is_current_for(&identity) as i32) 151 | }) 152 | .await 153 | } 154 | 155 | Command::Vacate { name } => { 156 | let rt = rt.build().await?; 157 | let api = kube::Api::namespaced(rt.client(), &namespace); 158 | run(rt, async move { 159 | let released = kubert::LeaseManager::init(api, name) 160 | .await? 161 | .with_field_manager(field_manager) 162 | .vacate(&identity) 163 | .await?; 164 | let code = if released { 165 | println!("+ Claim vacated"); 166 | 0 167 | } else { 168 | println!("- Claim not vacated"); 169 | 1 170 | }; 171 | Ok::<_, kubert::lease::Error>(code) 172 | }) 173 | .await 174 | } 175 | 176 | Command::Run { 177 | duration: Timeout(lease_duration), 178 | renew_grace_period: Timeout(renew_grace_period), 179 | name, 180 | } => { 181 | let mut prom = prometheus_client::registry::Registry::default(); 182 | 183 | let claim_state = prometheus_client::metrics::gauge::Gauge::::default(); 184 | prom.sub_registry_with_prefix("kubert_lease").register( 185 | "claimed", 186 | "Indicates whether this instance is owns the lease", 187 | claim_state.clone(), 188 | ); 189 | 190 | let state_changes = prometheus_client::metrics::counter::Counter::::default(); 191 | prom.sub_registry_with_prefix("kubert_lease").register( 192 | "claim_changes", 193 | "Counts changes of this process's claim of the lease", 194 | state_changes.clone(), 195 | ); 196 | 197 | let rt = rt 198 | .with_admin(admin.into_builder().with_prometheus(prom)) 199 | .build() 200 | .await?; 201 | let shutdown = rt.shutdown_handle(); 202 | let (mut claims, task) = rt 203 | .spawn_lease(kubert::LeaseParams { 204 | name, 205 | namespace, 206 | field_manager: Some(field_manager.into()), 207 | claimant: identity.clone(), 208 | lease_duration, 209 | renew_grace_period, 210 | }) 211 | .await?; 212 | run(rt, async move { 213 | let mut claimed = false; 214 | loop { 215 | claimed = { 216 | let claim = claims.borrow_and_update(); 217 | print_claim(&claim, &identity); 218 | match (claimed, claim.is_current_for(&identity)) { 219 | (true, true) => true, 220 | (false, false) => false, 221 | (true, false) => { 222 | claim_state.set(0); 223 | state_changes.inc(); 224 | false 225 | } 226 | (false, true) => { 227 | claim_state.set(1); 228 | state_changes.inc(); 229 | true 230 | } 231 | } 232 | }; 233 | 234 | let shutdown = shutdown.clone(); 235 | tokio::select! { 236 | biased; 237 | _ = shutdown.signaled() => { 238 | return Ok(0); 239 | } 240 | res = claims.changed() => { 241 | if res.is_err() { 242 | task.await.expect("task")?; 243 | return Ok(0); 244 | } 245 | } 246 | } 247 | } 248 | }) 249 | .await 250 | } 251 | } 252 | } 253 | 254 | async fn run(rt: kubert::Runtime, fut: F) -> Result<()> 255 | where 256 | F: std::future::Future> + Send + 'static, 257 | { 258 | tokio::select! { 259 | // Block the main thread on the shutdown signal. This won't complete 260 | // until the watch stream stops (after pending Pod updates are logged). 261 | // If a second signal is received before the watch stream completes, the 262 | // future fails. 263 | res = rt.run() => res.map_err(Into::into), 264 | 265 | // If the watch stream completes, exit gracefully 266 | res = fut => { 267 | let code = res?; 268 | if code != 0 { 269 | std::process::exit(code); 270 | } 271 | Ok(()) 272 | }, 273 | } 274 | } 275 | 276 | fn print_claim(claim: &kubert::lease::Claim, identity: &str) { 277 | let holder = &claim.holder; 278 | let expiry = claim 279 | .expiry 280 | .to_rfc3339_opts(chrono::SecondsFormat::Secs, true); 281 | 282 | if !claim.is_current() { 283 | println!("! Expired for {holder} at {expiry}"); 284 | return; 285 | } 286 | 287 | println!( 288 | "{} Claimed by {holder} until {expiry}", 289 | if claim.holder == identity { "+" } else { "-" } 290 | ); 291 | } 292 | 293 | #[derive(Copy, Clone, Debug)] 294 | struct Timeout(time::Duration); 295 | 296 | #[derive(Copy, Clone, Debug, thiserror::Error)] 297 | #[error("invalid duration")] 298 | struct InvalidTimeout; 299 | 300 | impl std::str::FromStr for Timeout { 301 | type Err = InvalidTimeout; 302 | 303 | fn from_str(s: &str) -> Result { 304 | let re = regex::Regex::new(r"^\s*(\d+)(ms|s|m)?\s*$").expect("duration regex"); 305 | let cap = re.captures(s).ok_or(InvalidTimeout)?; 306 | let magnitude = cap[1].parse().map_err(|_| InvalidTimeout)?; 307 | let t = match cap.get(2).map(|m| m.as_str()) { 308 | None if magnitude == 0 => time::Duration::from_millis(0), 309 | Some("ms") => time::Duration::from_millis(magnitude), 310 | Some("s") => time::Duration::from_secs(magnitude), 311 | Some("m") => time::Duration::from_secs(magnitude * 60), 312 | _ => return Err(InvalidTimeout), 313 | }; 314 | Ok(Self(t)) 315 | } 316 | } 317 | -------------------------------------------------------------------------------- /examples/tests/lease.rs: -------------------------------------------------------------------------------- 1 | #![deny(warnings, rust_2018_idioms)] 2 | 3 | use k8s_openapi::{ 4 | api::coordination::v1 as coordv1, 5 | apimachinery::pkg::apis::meta::v1::{self as metav1}, 6 | }; 7 | use kubert::LeaseManager; 8 | use maplit::{btreemap, convert_args}; 9 | use tokio::time; 10 | 11 | type Api = kube::Api; 12 | 13 | macro_rules! assert_time_eq { 14 | ($a:expr, $b:expr $(,)?) => { 15 | assert_eq!( 16 | $a.to_rfc3339_opts(chrono::SecondsFormat::Millis, true), 17 | $b.to_rfc3339_opts(chrono::SecondsFormat::Millis, true), 18 | ); 19 | }; 20 | } 21 | 22 | #[tokio::test(flavor = "current_thread")] 23 | async fn exclusive() { 24 | let handle = Handle::setup().await; 25 | 26 | // Create a lease instance and claim it. 27 | 28 | let lease0 = handle.init_new().await; 29 | let params = kubert::lease::ClaimParams { 30 | lease_duration: time::Duration::from_secs(3), 31 | ..Default::default() 32 | }; 33 | let claim0 = lease0 34 | .ensure_claimed("alice", ¶ms) 35 | .await 36 | .expect("claim"); 37 | assert!(claim0.is_current_for("alice")); 38 | 39 | // Create another lease instance and try to claim it--the prior lease should 40 | // have precedence. 41 | 42 | let lease1 = handle.init_new().await; 43 | let claim1 = lease1.ensure_claimed("bob", ¶ms).await.expect("claim"); 44 | assert_eq!(claim0.holder, claim1.holder); 45 | assert_eq!(claim0.expiry.timestamp(), claim1.expiry.timestamp()); 46 | assert!(claim0.is_current_for("alice")); 47 | assert!(claim1.is_current_for("alice")); 48 | assert!(!claim0.is_current_for("bob")); 49 | assert!(!claim1.is_current_for("bob")); 50 | 51 | // Inspect the lease resource to verify that it has all expected fields. 52 | let rsrc = handle.get().await; 53 | assert_eq!( 54 | rsrc.holder_identity.as_deref().expect("holderIdentity"), 55 | "alice" 56 | ); 57 | assert_time_eq!( 58 | rsrc.renew_time 59 | .as_ref() 60 | .map(|metav1::MicroTime(t)| t) 61 | .expect("renewTime"), 62 | claim0.expiry - chrono::Duration::from_std(params.lease_duration).unwrap() 63 | ); 64 | // Since we just acquired this, the acquire time and renew time are the 65 | // same. 66 | assert_time_eq!( 67 | rsrc.acquire_time.as_ref().unwrap().0, 68 | rsrc.renew_time.as_ref().unwrap().0 69 | ); 70 | assert_eq!( 71 | time::Duration::from_secs( 72 | rsrc.lease_duration_seconds 73 | .expect("leaseDurationSeconds") 74 | .try_into() 75 | .unwrap() 76 | ), 77 | params.lease_duration 78 | ); 79 | assert_eq!(rsrc.lease_transitions, Some(1)); 80 | 81 | handle.delete().await; 82 | } 83 | 84 | #[tokio::test(flavor = "current_thread")] 85 | async fn expires() { 86 | let handle = Handle::setup().await; 87 | 88 | let lease = handle.init_new().await; 89 | let params = kubert::lease::ClaimParams { 90 | lease_duration: time::Duration::from_secs(3), 91 | ..Default::default() 92 | }; 93 | let claim0 = lease.ensure_claimed("alice", ¶ms).await.expect("claim"); 94 | assert!(claim0.is_current_for("alice")); 95 | 96 | // Wait for the claim to expire. 97 | claim0.expire().await; 98 | 99 | // Claiming with another identity should succeed. 100 | let claim1 = lease.ensure_claimed("bob", ¶ms).await.expect("claim"); 101 | assert!(claim1.is_current_for("bob")); 102 | 103 | // Inspect the lease resource to verify that it has all expected fields. 104 | let rsrc = handle.get().await; 105 | assert_eq!( 106 | rsrc.holder_identity.as_deref().expect("holderIdentity"), 107 | "bob" 108 | ); 109 | assert_time_eq!( 110 | rsrc.renew_time 111 | .as_ref() 112 | .map(|metav1::MicroTime(t)| t) 113 | .expect("renewTime"), 114 | claim1.expiry - chrono::Duration::from_std(params.lease_duration).unwrap(), 115 | ); 116 | // Since we just acquired this, the acquire time and renew time are the 117 | // same. 118 | assert_eq!(rsrc.acquire_time, rsrc.renew_time); 119 | assert_eq!( 120 | time::Duration::from_secs( 121 | rsrc.lease_duration_seconds 122 | .expect("leaseDurationSeconds") 123 | .try_into() 124 | .unwrap() 125 | ), 126 | params.lease_duration 127 | ); 128 | assert_eq!(rsrc.lease_transitions, Some(2)); 129 | 130 | handle.delete().await; 131 | } 132 | 133 | #[tokio::test(flavor = "current_thread")] 134 | async fn renews() { 135 | let handle = Handle::setup().await; 136 | 137 | let lease = handle.init_new().await; 138 | let params = kubert::lease::ClaimParams { 139 | lease_duration: time::Duration::from_secs(8), 140 | renew_grace_period: time::Duration::from_secs(5), 141 | }; 142 | let claim0 = lease.ensure_claimed("alice", ¶ms).await.expect("claim"); 143 | assert!(claim0.is_current_for("alice")); 144 | 145 | tokio::time::sleep(time::Duration::from_secs(1)).await; 146 | 147 | // Trying to claim again does not change the expiry. 148 | let claim1 = lease.ensure_claimed("alice", ¶ms).await.expect("claim"); 149 | assert_eq!(claim0, claim1); 150 | 151 | // Wait for the claim to be renewable. 152 | claim0.expire_with_grace(params.renew_grace_period).await; 153 | 154 | // Claiming now (before the expiry) should update the expiry. 155 | let claim2 = lease.ensure_claimed("alice", ¶ms).await.expect("claim"); 156 | assert!(claim2.is_current_for("alice")); 157 | assert_ne!(claim2, claim0); 158 | 159 | // Inspect the lease resource to verify that it has all expected fields. 160 | let rsrc = handle.get().await; 161 | assert_eq!( 162 | rsrc.holder_identity.as_deref().expect("holderIdentity"), 163 | "alice" 164 | ); 165 | assert_time_eq!( 166 | rsrc.renew_time 167 | .as_ref() 168 | .map(|metav1::MicroTime(t)| t) 169 | .expect("renewTime"), 170 | claim2.expiry - chrono::Duration::from_std(params.lease_duration).unwrap(), 171 | ); 172 | assert_time_eq!( 173 | rsrc.acquire_time 174 | .as_ref() 175 | .map(|metav1::MicroTime(t)| t) 176 | .expect("renewTime"), 177 | claim0.expiry - chrono::Duration::from_std(params.lease_duration).unwrap(), 178 | ); 179 | assert_eq!( 180 | time::Duration::from_secs( 181 | rsrc.lease_duration_seconds 182 | .expect("leaseDurationSeconds") 183 | .try_into() 184 | .unwrap() 185 | ), 186 | params.lease_duration 187 | ); 188 | assert_eq!(rsrc.lease_transitions, Some(1)); 189 | 190 | // Wait for the claim to expire completely. 191 | claim2.expire().await; 192 | 193 | // Create a new lease that does not share internal state and use it to claim the lease for Bob. 194 | let lease1 = handle.init_new().await; 195 | let claim3 = lease1.ensure_claimed("bob", ¶ms).await.expect("claim"); 196 | assert!(claim3.is_current_for("bob")); 197 | 198 | // The original lease must 199 | let claim4 = lease.ensure_claimed("alice", ¶ms).await.expect("claim"); 200 | assert!(claim4.is_current_for("bob")); 201 | 202 | // Inspect the lease resource to verify that it has all expected fields. 203 | let rsrc = handle.get().await; 204 | assert_eq!( 205 | rsrc.holder_identity.as_deref().expect("holderIdentity"), 206 | "bob" 207 | ); 208 | assert_time_eq!( 209 | rsrc.renew_time 210 | .as_ref() 211 | .map(|metav1::MicroTime(t)| t) 212 | .expect("renewTime"), 213 | claim3.expiry - chrono::Duration::from_std(params.lease_duration).unwrap(), 214 | ); 215 | assert_time_eq!( 216 | rsrc.acquire_time 217 | .as_ref() 218 | .map(|metav1::MicroTime(t)| t) 219 | .expect("renewTime"), 220 | claim3.expiry - chrono::Duration::from_std(params.lease_duration).unwrap(), 221 | ); 222 | assert_eq!( 223 | time::Duration::from_secs( 224 | rsrc.lease_duration_seconds 225 | .expect("leaseDurationSeconds") 226 | .try_into() 227 | .unwrap() 228 | ), 229 | params.lease_duration 230 | ); 231 | assert_eq!(rsrc.lease_transitions, Some(2)); 232 | 233 | handle.delete().await; 234 | } 235 | 236 | #[tokio::test(flavor = "current_thread")] 237 | async fn vacates() { 238 | let handle = Handle::setup().await; 239 | 240 | let lease = handle.init_new().await; 241 | let params = kubert::lease::ClaimParams { 242 | lease_duration: time::Duration::from_secs(3), 243 | ..Default::default() 244 | }; 245 | let claim = lease.ensure_claimed("id", ¶ms).await.expect("claim"); 246 | assert!(claim.is_current_for("id")); 247 | let released = lease.vacate("id").await.expect("release"); 248 | assert!(released); 249 | 250 | // Inspect the lease resource to verify that it has all expected fields. 251 | let rsrc = handle.get().await; 252 | assert_eq!(rsrc.holder_identity, None,); 253 | assert_eq!(rsrc.renew_time, None,); 254 | assert_eq!(rsrc.acquire_time, None); 255 | assert_eq!(rsrc.lease_duration_seconds, None); 256 | assert_eq!(rsrc.lease_transitions, Some(1)); 257 | 258 | handle.delete().await; 259 | } 260 | 261 | #[tokio::test(flavor = "current_thread")] 262 | async fn vacate_expired_noop() { 263 | let handle = Handle::setup().await; 264 | 265 | let lease = handle.init_new().await; 266 | let params = kubert::lease::ClaimParams { 267 | lease_duration: time::Duration::from_secs(3), 268 | ..Default::default() 269 | }; 270 | let claim = lease.ensure_claimed("id", ¶ms).await.expect("claim"); 271 | assert!(claim.is_current_for("id")); 272 | claim.expire().await; 273 | let released = lease.vacate("id").await.expect("release"); 274 | assert!(!released); 275 | 276 | // Inspect the lease resource to verify that it has all expected fields. 277 | let rsrc = handle.get().await; 278 | assert_eq!(rsrc.holder_identity.as_deref(), Some("id")); 279 | assert_time_eq!( 280 | rsrc.renew_time 281 | .as_ref() 282 | .map(|metav1::MicroTime(t)| t) 283 | .expect("renewTime"), 284 | claim.expiry - chrono::Duration::from_std(params.lease_duration).unwrap(), 285 | ); 286 | assert_time_eq!( 287 | rsrc.acquire_time 288 | .as_ref() 289 | .map(|metav1::MicroTime(t)| t) 290 | .expect("renewTime"), 291 | claim.expiry - chrono::Duration::from_std(params.lease_duration).unwrap(), 292 | ); 293 | assert_eq!(rsrc.lease_duration_seconds, Some(3)); 294 | assert_eq!(rsrc.lease_transitions, Some(1)); 295 | 296 | handle.delete().await; 297 | } 298 | 299 | // === Utils === 300 | 301 | struct Handle { 302 | api: Api, 303 | name: String, 304 | _guard: tracing::subscriber::DefaultGuard, 305 | } 306 | 307 | impl Handle { 308 | const NS: &'static str = "default"; 309 | const LABEL: &'static str = "kubert.olix0r.net/test"; 310 | 311 | async fn setup() -> Self { 312 | let _guard = Self::init_tracing(); 313 | let client = kube::Client::try_default().await.expect("client"); 314 | let api = Api::namespaced(client, Self::NS); 315 | let name = Self::rand_name("kubert-test"); 316 | api.create( 317 | &Default::default(), 318 | &coordv1::Lease { 319 | metadata: metav1::ObjectMeta { 320 | name: Some(name.clone()), 321 | namespace: Some(Self::NS.to_string()), 322 | labels: Some(convert_args!(btreemap!( 323 | Self::LABEL => std::thread::current().name().expect("thread name"), 324 | ))), 325 | ..Default::default() 326 | }, 327 | ..Default::default() 328 | }, 329 | ) 330 | .await 331 | .expect("create lease"); 332 | Handle { api, name, _guard } 333 | } 334 | 335 | async fn init_new(&self) -> LeaseManager { 336 | LeaseManager::init(self.api.clone(), &self.name) 337 | .await 338 | .expect("lease must initialize") 339 | } 340 | 341 | async fn get(&self) -> coordv1::LeaseSpec { 342 | self.api 343 | .get(&self.name) 344 | .await 345 | .expect("get") 346 | .spec 347 | .expect("spec") 348 | } 349 | 350 | async fn delete(self) { 351 | self.api 352 | .delete(&self.name, &Default::default()) 353 | .await 354 | .expect("delete"); 355 | } 356 | 357 | fn rand_name(base: impl std::fmt::Display) -> String { 358 | use rand::Rng; 359 | 360 | struct LowercaseAlphanumeric; 361 | 362 | // Modified from `rand::distributions::Alphanumeric` 363 | // 364 | // Copyright 2018 Developers of the Rand project 365 | // Copyright (c) 2014 The Rust Project Developers 366 | impl rand::distr::Distribution for LowercaseAlphanumeric { 367 | fn sample(&self, rng: &mut R) -> u8 { 368 | const RANGE: u32 = 26 + 10; 369 | const CHARSET: &[u8] = b"abcdefghijklmnopqrstuvwxyz0123456789"; 370 | loop { 371 | let var = rng.next_u32() >> (32 - 6); 372 | if var < RANGE { 373 | return CHARSET[var as usize]; 374 | } 375 | } 376 | } 377 | } 378 | 379 | let suffix = rand::rng() 380 | .sample_iter(&LowercaseAlphanumeric) 381 | .take(5) 382 | .map(char::from) 383 | .collect::(); 384 | format!("{}-{}", base, suffix) 385 | } 386 | 387 | fn init_tracing() -> tracing::subscriber::DefaultGuard { 388 | tracing::subscriber::set_default( 389 | tracing_subscriber::fmt() 390 | .with_test_writer() 391 | .with_thread_names(true) 392 | // .without_time() 393 | .with_env_filter( 394 | tracing_subscriber::EnvFilter::try_from_default_env().unwrap_or_else(|_| { 395 | "trace,tower=info,hyper=info,kube=info,h2=info" 396 | .parse() 397 | .unwrap() 398 | }), 399 | ) 400 | .finish(), 401 | ) 402 | } 403 | } 404 | -------------------------------------------------------------------------------- /examples/watch_pods.rs: -------------------------------------------------------------------------------- 1 | #![deny(warnings, rust_2018_idioms)] 2 | #![forbid(unsafe_code)] 3 | 4 | use anyhow::{bail, Result}; 5 | use clap::Parser; 6 | use futures::prelude::*; 7 | use k8s_openapi::api::core::v1::Pod; 8 | use kube::{ 9 | runtime::watcher::{self, Event}, 10 | ResourceExt, 11 | }; 12 | use prometheus_client::metrics::{counter::Counter, family::Family, gauge::Gauge}; 13 | use tokio::time; 14 | use tracing::Instrument; 15 | 16 | #[derive(Clone, Parser)] 17 | #[clap(version)] 18 | struct Args { 19 | /// The tracing filter used for logs 20 | #[clap( 21 | long, 22 | env = "KUBERT_EXAMPLE_LOG", 23 | default_value = "watch_pods=info,warn" 24 | )] 25 | log_level: kubert::LogFilter, 26 | 27 | /// The logging format 28 | #[clap(long, default_value = "plain")] 29 | log_format: kubert::LogFormat, 30 | 31 | #[clap(flatten)] 32 | client: kubert::ClientArgs, 33 | 34 | #[clap(flatten)] 35 | admin: kubert::AdminArgs, 36 | 37 | /// Exit after the first update is received 38 | #[clap(long)] 39 | exit: bool, 40 | 41 | /// The amount of time to wait for the first update 42 | #[clap(long, default_value = "10s")] 43 | timeout: Timeout, 44 | 45 | /// An optional pod selector 46 | #[clap(long, short = 'l')] 47 | selector: Option, 48 | } 49 | 50 | #[derive(Clone, Debug)] 51 | struct Metrics { 52 | events_restart: Counter, 53 | events_apply: Counter, 54 | events_delete: Counter, 55 | current_pods: Gauge, 56 | total_pods: Counter, 57 | } 58 | 59 | #[tokio::main] 60 | async fn main() -> Result<()> { 61 | let Args { 62 | log_level, 63 | log_format, 64 | client, 65 | admin, 66 | exit, 67 | timeout: Timeout(timeout), 68 | selector, 69 | } = Args::parse(); 70 | 71 | let mut prom = prometheus_client::registry::Registry::default(); 72 | 73 | // Register application metrics before configuring the admin server. 74 | let metrics = Metrics::register(prom.sub_registry_with_prefix("example_watch_pods")); 75 | let runtime_metrics = kubert::RuntimeMetrics::register(prom.sub_registry_with_prefix("kubert")); 76 | 77 | // Configure a runtime with: 78 | // - a Kubernetes client 79 | // - an admin server with /live, /ready, and /metrics endpoints 80 | // - a tracing (logging) subscriber 81 | let rt = kubert::Runtime::builder() 82 | .with_log(log_level, log_format) 83 | .with_admin(admin.into_builder().with_prometheus(prom)) 84 | .with_metrics(runtime_metrics) 85 | .with_client(client); 86 | 87 | let deadline = time::Instant::now() + timeout; 88 | let mut runtime = match time::timeout_at(deadline, rt.build()).await { 89 | Ok(res) => res?, 90 | Err(_) => bail!("Timed out waiting for Kubernetes client to initialize"), 91 | }; 92 | 93 | // Watch all pods and print changes. 94 | // 95 | // This stream completes when shutdown is signaled; and the admin endpoint does not return ready 96 | // until the first update is received. 97 | tracing::debug!(?selector); 98 | let watcher_config = selector 99 | .iter() 100 | .fold(watcher::Config::default(), |p, l| p.labels(l)); 101 | let pods = runtime.watch_all::(watcher_config); 102 | let mut deadline = Some(deadline); 103 | let task = tokio::spawn( 104 | async move { 105 | tokio::pin!(pods); 106 | 107 | // Keep a list of all known pods so we can identify new and deleted pods on restart. 108 | // The watch will restart roughly every 5 minutes. 109 | let mut known = std::collections::HashSet::<(String, String)>::new(); 110 | let mut prior = Default::default(); 111 | while let Some(ev) = init_timeout(deadline.take(), pods.next()).await? { 112 | tracing::trace!(?ev); 113 | match ev { 114 | Event::Init => { 115 | prior = std::mem::take(&mut known); 116 | } 117 | Event::InitApply(pod) => { 118 | metrics.events_restart.inc(); 119 | 120 | let namespace = pod.namespace().unwrap(); 121 | let name = pod.name_unchecked(); 122 | let k = (namespace.clone(), name.clone()); 123 | if prior.remove(&k) { 124 | tracing::debug!(%namespace, %name, "Already exists") 125 | } else { 126 | metrics.current_pods.inc(); 127 | metrics.total_pods.inc(); 128 | tracing::info!(%namespace, %name, "Added") 129 | } 130 | known.insert(k); 131 | } 132 | Event::InitDone => { 133 | tracing::debug!(pods = %known.len(), "Restarted"); 134 | for (namespace, name) in std::mem::take(&mut prior).into_iter() { 135 | metrics.current_pods.dec(); 136 | tracing::info!(%namespace, %name, "Deleted") 137 | } 138 | } 139 | 140 | Event::Apply(pod) => { 141 | metrics.events_apply.inc(); 142 | let namespace = pod.namespace().unwrap(); 143 | let name = pod.name_unchecked(); 144 | if known.insert((namespace.clone(), name.clone())) { 145 | metrics.current_pods.inc(); 146 | metrics.total_pods.inc(); 147 | tracing::info!(%namespace, %name, "Added"); 148 | } else { 149 | tracing::info!(%namespace, %name, "Updated"); 150 | } 151 | } 152 | 153 | Event::Delete(pod) => { 154 | metrics.events_delete.inc(); 155 | let namespace = pod.namespace().unwrap(); 156 | let name = pod.name_unchecked(); 157 | tracing::info!(%namespace, %name, "Deleted"); 158 | if known.remove(&(namespace, name)) { 159 | metrics.current_pods.dec(); 160 | } 161 | } 162 | } 163 | 164 | if exit { 165 | return Ok::<_, anyhow::Error>(()); 166 | } 167 | } 168 | tracing::debug!("completed"); 169 | Ok(()) 170 | } 171 | .instrument(tracing::info_span!("pods")), 172 | ); 173 | 174 | tokio::select! { 175 | // Block the main thread on the shutdown signal. This won't complete until the watch stream 176 | // stops (after pending Pod updates are logged). If a second signal is received before the watch 177 | // stream completes, the future fails. 178 | res = runtime.run() => { 179 | if res.is_err() { 180 | bail!("aborted"); 181 | } 182 | } 183 | 184 | // If the watch stream completes, exit gracefully 185 | res = task => match res { 186 | Err(error) => bail!("spawned task failed: {}", error), 187 | Ok(Err(_)) => bail!("Timed out waiting for the first update"), 188 | Ok(Ok(())) => { 189 | tracing::debug!("watch completed"); 190 | } 191 | }, 192 | } 193 | 194 | Ok(()) 195 | } 196 | 197 | #[derive(Copy, Clone, Debug)] 198 | struct Timeout(time::Duration); 199 | 200 | #[derive(Copy, Clone, Debug, thiserror::Error)] 201 | #[error("invalid duration")] 202 | struct InvalidTimeout; 203 | 204 | impl std::str::FromStr for Timeout { 205 | type Err = InvalidTimeout; 206 | 207 | fn from_str(s: &str) -> Result { 208 | let re = regex::Regex::new(r"^\s*(\d+)(ms|s|m)?\s*$").expect("duration regex"); 209 | let cap = re.captures(s).ok_or(InvalidTimeout)?; 210 | let magnitude = cap[1].parse().map_err(|_| InvalidTimeout)?; 211 | let t = match cap.get(2).map(|m| m.as_str()) { 212 | None if magnitude == 0 => time::Duration::from_millis(0), 213 | Some("ms") => time::Duration::from_millis(magnitude), 214 | Some("s") => time::Duration::from_secs(magnitude), 215 | Some("m") => time::Duration::from_secs(magnitude * 60), 216 | _ => return Err(InvalidTimeout), 217 | }; 218 | Ok(Self(t)) 219 | } 220 | } 221 | 222 | async fn init_timeout(deadline: Option, future: F) -> Result { 223 | if let Some(deadline) = deadline { 224 | return time::timeout_at(deadline, future).await.map_err(Into::into); 225 | } 226 | 227 | Ok(future.await) 228 | } 229 | 230 | impl Metrics { 231 | fn register(prom: &mut prometheus_client::registry::Registry) -> Self { 232 | let events = Family::<_, Counter>::default(); 233 | let events_restart = events.get_or_create(&[("op", "restart")]).clone(); 234 | let events_apply = events.get_or_create(&[("op", "apply")]).clone(); 235 | let events_delete = events.get_or_create(&[("op", "delete")]).clone(); 236 | prom.register("events", "Number of events observed", events.clone()); 237 | 238 | let current_pods = Gauge::::default(); 239 | prom.register( 240 | "current_pods", 241 | "Number of Pods being observed", 242 | current_pods.clone(), 243 | ); 244 | 245 | let total_pods = Counter::::default(); 246 | prom.register( 247 | "pods", 248 | "Total number of unique pods observed", 249 | total_pods.clone(), 250 | ); 251 | 252 | Self { 253 | events_restart, 254 | events_apply, 255 | events_delete, 256 | current_pods, 257 | total_pods, 258 | } 259 | } 260 | } 261 | -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | # Depends on utilties from `github.com/linkerd/dev`. 2 | 3 | features := "all" 4 | _features := if features == "all" { 5 | "--all-features" 6 | } else if features != "" { 7 | "--no-default-features --features=" + features 8 | } else { "" } 9 | 10 | # Required to build openssl 11 | export CXX := 'clang++-19' 12 | 13 | # Enable tokio-metrics 14 | export RUSTFLAGS := env_var_or_default('RUSTFLAGS', '--cfg tokio_unstable') 15 | export RUSTDOCFLAGS := env_var_or_default('RUSTDOCFLAGS', '--cfg tokio_unstable') 16 | 17 | # 18 | # Recipes 19 | # 20 | 21 | # Run all tests and build the proxy 22 | default: fetch test build 23 | 24 | lint: md-lint fmt-check clippy doc deny 25 | 26 | md-lint: 27 | just-md lint '**/*.md' '!**/target' 28 | 29 | # Fetch dependencies 30 | fetch: 31 | @just-cargo fetch 32 | 33 | check *args: 34 | @just-cargo check --workspace --all-targets {{ _features }} {{ args }} 35 | 36 | clippy-all *args: 37 | @just-cargo clippy --workspace --all-targets --all-features {{ args }} 38 | 39 | clippy *args: 40 | @just-cargo clippy -p kubert --all-targets {{ _features }} {{ args }} 41 | 42 | doc: 43 | @just-cargo doc -p kubert --no-deps --all-features --features=k8s-openapi/latest 44 | 45 | deny: 46 | cargo-deny {{ _features }} check 47 | 48 | fmt: 49 | @just-cargo fmt 50 | 51 | fmt-check: 52 | @just-cargo fmt -- --check 53 | 54 | # Build all tests 55 | test-build *args: 56 | @just-cargo test-build --workspace --exclude=kubert-examples {{ _features }} {{ args }} 57 | 58 | # Run all tests 59 | test *args: 60 | @just-cargo test --workspace --exclude=kubert-examples {{ _features }} {{ args }} 61 | 62 | # Build the proxy 63 | build *args: 64 | @just-cargo build {{ _features }} {{ args }} 65 | 66 | build-examples name='': 67 | @just-cargo build --package=kubert-examples \ 68 | {{ if name == '' { "--examples" } else { "--example=" + name } }} \ 69 | {{ _features }} 70 | 71 | build-examples-image: 72 | docker buildx build . -f examples/Dockerfile \ 73 | --tag=kubert-examples:test --output=type=docker \ 74 | {{ if features != "all" { "--build-arg='FEATURES=" + features + "'" } else { "" } }} 75 | 76 | test-cluster-create: 77 | #!/usr/bin/env bash 78 | set -euo pipefail 79 | export K3S_DISABLE='local-storage,traefik,servicelb,metrics-server@server:*' 80 | export K3D_CREATE_ARGS='--no-lb' 81 | just-k3d create 82 | 83 | test-cluster-delete: 84 | @just-k3d delete 85 | 86 | _test-cluster-exists: 87 | @just-k3d ready 88 | 89 | test-cluster-import-examples: build-examples-image _test-cluster-exists 90 | @just-k3d import kubert-examples:test 91 | 92 | _test-sfx := `tr -dc 'a-z0-9' std::io::Result<()> { 59 | let start_time = Instant::now(); 60 | let start_time_from_epoch = SystemTime::now() 61 | .duration_since(UNIX_EPOCH) 62 | .expect("process start time"); 63 | 64 | #[cfg(target_os = "linux")] 65 | let system = linux::System::load()?; 66 | 67 | reg.register_with_unit( 68 | "start_time", 69 | "Time that the process started (in seconds since the UNIX epoch)", 70 | Unit::Seconds, 71 | ConstGauge::new(start_time_from_epoch.as_secs_f64()), 72 | ); 73 | 74 | let clock_time_ts = Gauge::::default(); 75 | reg.register_with_unit( 76 | "clock_time", 77 | "Current system time for this process", 78 | Unit::Seconds, 79 | clock_time_ts, 80 | ); 81 | 82 | reg.register_collector(Box::new(ProcessCollector { 83 | start_time, 84 | #[cfg(target_os = "linux")] 85 | system, 86 | })); 87 | 88 | Ok(()) 89 | } 90 | 91 | #[derive(Debug)] 92 | struct ProcessCollector { 93 | start_time: Instant, 94 | #[cfg(target_os = "linux")] 95 | system: linux::System, 96 | } 97 | 98 | impl Collector for ProcessCollector { 99 | fn encode(&self, mut encoder: DescriptorEncoder<'_>) -> std::fmt::Result { 100 | let uptime = ConstCounter::new( 101 | Instant::now() 102 | .saturating_duration_since(self.start_time) 103 | .as_secs_f64(), 104 | ); 105 | let ue = encoder.encode_descriptor( 106 | "uptime", 107 | "Total time since the process started (in seconds)", 108 | Some(&Unit::Seconds), 109 | MetricType::Counter, 110 | )?; 111 | uptime.encode(ue)?; 112 | 113 | #[cfg(target_os = "linux")] 114 | self.system.encode(encoder)?; 115 | 116 | Ok(()) 117 | } 118 | } 119 | 120 | // Metric that always reports the current system time on a call to [`get`]. 121 | #[derive(Copy, Clone, Debug, Default)] 122 | struct ClockMetric; 123 | 124 | impl gauge::Atomic for ClockMetric { 125 | fn inc(&self) -> f64 { 126 | self.get() 127 | } 128 | 129 | fn inc_by(&self, _v: f64) -> f64 { 130 | self.get() 131 | } 132 | 133 | fn dec(&self) -> f64 { 134 | self.get() 135 | } 136 | 137 | fn dec_by(&self, _v: f64) -> f64 { 138 | self.get() 139 | } 140 | 141 | fn set(&self, _v: f64) -> f64 { 142 | self.get() 143 | } 144 | 145 | fn get(&self) -> f64 { 146 | match SystemTime::now().duration_since(UNIX_EPOCH) { 147 | Ok(elapsed) => elapsed.as_secs_f64().floor(), 148 | Err(e) => { 149 | tracing::warn!( 150 | "System time is before the UNIX epoch; reporting negative timestamp" 151 | ); 152 | -e.duration().as_secs_f64().floor() 153 | } 154 | } 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /kubert-prometheus-process/src/linux.rs: -------------------------------------------------------------------------------- 1 | //! Process metrics for Prometheus. 2 | //! 3 | //! This crate registers a collector that provides the standard set of [Process 4 | //! metrics][pm]. 5 | //! 6 | //! ``` 7 | //! let mut prom = prometheus_client::registry::Registry::default(); 8 | //! if let Err(error) = 9 | //! kubert_prometheus_process::register(prom.sub_registry_with_prefix("process")) 10 | //! { 11 | //! tracing::warn!(%error, "Failed to register process metrics"); 12 | //! } 13 | //! ``` 14 | //! 15 | //! [pm]: https://prometheus.io/docs/instrumenting/writing_clientlibs/#process-metrics 16 | // 17 | // Based on linkerd2-proxy. 18 | // 19 | // Copyright The Linkerd Authors 20 | // 21 | // Licensed under the Apache License, Version 2.0 (the "License"); 22 | // you may not use this file except in compliance with the License. 23 | // You may obtain a copy of the License at 24 | // 25 | // http://www.apache.org/licenses/LICENSE-2.0 26 | // 27 | // Unless required by applicable law or agreed to in writing, software 28 | // distributed under the License is distributed on an "AS IS" BASIS, 29 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 30 | // See the License for the specific language governing permissions and 31 | // limitations under the License. 32 | 33 | use libc::{self, pid_t}; 34 | use process::Stat; 35 | use procfs::{ 36 | process::{self, LimitValue, Process}, 37 | ProcResult, 38 | }; 39 | use prometheus_client::{ 40 | collector::Collector, 41 | encoding::{DescriptorEncoder, EncodeMetric}, 42 | metrics::{counter::ConstCounter, gauge::ConstGauge, MetricType}, 43 | registry::Unit, 44 | }; 45 | use std::time::Duration; 46 | use std::{fs, io}; 47 | use tracing::{error, warn}; 48 | 49 | mod netstat; 50 | 51 | use self::netstat::ProcNetstat; 52 | 53 | #[derive(Clone, Debug)] 54 | pub(super) struct System { 55 | page_size: u64, 56 | ms_per_tick: u64, 57 | } 58 | 59 | impl System { 60 | pub fn load() -> std::io::Result { 61 | let page_size = page_size()?; 62 | let ms_per_tick = ms_per_tick()?; 63 | Ok(Self { 64 | page_size, 65 | ms_per_tick, 66 | }) 67 | } 68 | } 69 | 70 | impl Collector for System { 71 | fn encode(&self, mut encoder: DescriptorEncoder<'_>) -> std::fmt::Result { 72 | let stat = match blocking_stat() { 73 | Ok(stat) => stat, 74 | Err(error) => { 75 | tracing::warn!(%error, "Failed to read process stats"); 76 | return Ok(()); 77 | } 78 | }; 79 | 80 | let clock_ticks = stat.utime + stat.stime; 81 | let cpu = 82 | ConstCounter::new(Duration::from_millis(clock_ticks * self.ms_per_tick).as_secs_f64()); 83 | let cpue = encoder.encode_descriptor( 84 | "cpu", 85 | "Total user and system CPU time spent in seconds", 86 | Some(&Unit::Seconds), 87 | MetricType::Counter, 88 | )?; 89 | cpu.encode(cpue)?; 90 | 91 | let vm_bytes = ConstGauge::new(stat.vsize as i64); 92 | let vme = encoder.encode_descriptor( 93 | "virtual_memory", 94 | "Virtual memory size in bytes", 95 | Some(&Unit::Bytes), 96 | MetricType::Gauge, 97 | )?; 98 | vm_bytes.encode(vme)?; 99 | 100 | let rss_bytes = ConstGauge::new((stat.rss * self.page_size) as i64); 101 | let rsse = encoder.encode_descriptor( 102 | "resident_memory", 103 | "Resident memory size in bytes", 104 | Some(&Unit::Bytes), 105 | MetricType::Gauge, 106 | )?; 107 | rss_bytes.encode(rsse)?; 108 | 109 | match open_fds(stat.pid) { 110 | Ok(open_fds) => { 111 | let fds = ConstGauge::new(open_fds as i64); 112 | let fdse = encoder.encode_descriptor( 113 | "open_fds", 114 | "Number of open file descriptors", 115 | None, 116 | MetricType::Gauge, 117 | )?; 118 | fds.encode(fdse)?; 119 | } 120 | Err(error) => { 121 | tracing::warn!(%error, "Could not determine open fds"); 122 | } 123 | } 124 | 125 | match max_fds() { 126 | Ok(max_fds) => { 127 | let fds = ConstGauge::new(max_fds as i64); 128 | let fdse = encoder.encode_descriptor( 129 | "max_fds", 130 | "Maximum number of open file descriptors", 131 | None, 132 | MetricType::Gauge, 133 | )?; 134 | fds.encode(fdse)?; 135 | } 136 | Err(error) => { 137 | tracing::warn!(%error, "Could not determine max fds"); 138 | } 139 | } 140 | 141 | // Add network metrics 142 | match ProcNetstat::read(stat.pid) { 143 | Ok(ProcNetstat { ip_ext, .. }) => { 144 | let recv_bytes = ConstCounter::new(ip_ext.in_octets.unwrap_or_default()); 145 | let rbe = encoder.encode_descriptor( 146 | "network_receive", 147 | "Number of bytes received by the process over the network", 148 | Some(&Unit::Bytes), 149 | MetricType::Counter, 150 | )?; 151 | recv_bytes.encode(rbe)?; 152 | 153 | let transmit_bytes = ConstCounter::new(ip_ext.out_octets.unwrap_or_default()); 154 | let tbe = encoder.encode_descriptor( 155 | "network_transmit", 156 | "Number of bytes sent by the process over the network", 157 | Some(&Unit::Bytes), 158 | MetricType::Counter, 159 | )?; 160 | transmit_bytes.encode(tbe)?; 161 | } 162 | Err(error) => { 163 | tracing::warn!(%error, "Failed to get network statistics"); 164 | } 165 | } 166 | 167 | let threads = ConstGauge::new(stat.num_threads); 168 | let te = encoder.encode_descriptor( 169 | "threads", 170 | "Number of OS threads in the process.", 171 | None, 172 | MetricType::Gauge, 173 | )?; 174 | threads.encode(te)?; 175 | 176 | Ok(()) 177 | } 178 | } 179 | 180 | fn page_size() -> io::Result { 181 | sysconf(libc::_SC_PAGESIZE, "page size") 182 | } 183 | 184 | fn ms_per_tick() -> io::Result { 185 | // On Linux, CLK_TCK is ~always `100`, so pure integer division 186 | // works. This is probably not suitable if we encounter other 187 | // values. 188 | let clock_ticks_per_sec = sysconf(libc::_SC_CLK_TCK, "clock ticks per second")?; 189 | let ms_per_tick = 1_000 / clock_ticks_per_sec; 190 | if clock_ticks_per_sec != 100 { 191 | warn!( 192 | clock_ticks_per_sec, 193 | ms_per_tick, "Unexpected value; process_cpu_seconds_total may be inaccurate." 194 | ); 195 | } 196 | Ok(ms_per_tick) 197 | } 198 | 199 | fn blocking_stat() -> ProcResult { 200 | Process::myself()?.stat() 201 | } 202 | 203 | fn open_fds(pid: pid_t) -> io::Result { 204 | let mut open = 0; 205 | for f in fs::read_dir(format!("/proc/{}/fd", pid))? { 206 | if !f?.file_type()?.is_dir() { 207 | open += 1; 208 | } 209 | } 210 | Ok(open) 211 | } 212 | 213 | fn max_fds() -> ProcResult { 214 | let limits = Process::myself()?.limits()?.max_open_files; 215 | match limits.soft_limit { 216 | LimitValue::Unlimited => match limits.hard_limit { 217 | LimitValue::Unlimited => Ok(0), 218 | LimitValue::Value(hard) => Ok(hard), 219 | }, 220 | LimitValue::Value(soft) => Ok(soft), 221 | } 222 | } 223 | 224 | #[allow(unsafe_code)] 225 | fn sysconf(num: libc::c_int, name: &'static str) -> Result { 226 | match unsafe { libc::sysconf(num) } { 227 | e if e <= 0 => { 228 | let error = io::Error::last_os_error(); 229 | error!("error getting {}: {:?}", name, error); 230 | Err(error) 231 | } 232 | val => Ok(val as u64), 233 | } 234 | } 235 | -------------------------------------------------------------------------------- /kubert-prometheus-tokio/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "kubert-prometheus-tokio" 3 | version = "0.2.0" 4 | edition = "2021" 5 | license = "Apache-2.0" 6 | description = "A prometheus-client tokio runtime metrics collector" 7 | readme = "../README.md" 8 | repository = "https://github.com/olix0r/kubert" 9 | rust-version = "1.74" 10 | keywords = ["prometheus-client", "tokio", "metrics", "monitoring"] 11 | 12 | [features] 13 | rt = ["tokio/rt", "tokio/time", "tokio-metrics/rt"] 14 | 15 | [lints.rust] 16 | unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] } 17 | 18 | [dependencies] 19 | prometheus-client = { workspace = true } 20 | tokio = { workspace = true } 21 | tokio-metrics = "0.4" 22 | tracing = "0.1" 23 | -------------------------------------------------------------------------------- /kubert-prometheus-tokio/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! A `prometheus-client` exporter for `tokio-metrics`. 2 | 3 | #![deny(rust_2018_idioms, missing_docs, warnings)] 4 | #![forbid(unsafe_code)] 5 | #![cfg_attr(docsrs, feature(doc_cfg))] 6 | 7 | #[cfg(all(feature = "rt", tokio_unstable))] 8 | pub use self::rt::Runtime; 9 | 10 | #[cfg(all(feature = "rt", not(tokio_unstable)))] 11 | compile_error!("RUSTFLAGS='--cfg tokio_unstable' must be set to use `tokio-metrics/rt`"); 12 | 13 | #[cfg(all(feature = "rt", tokio_unstable))] 14 | mod rt { 15 | use prometheus_client::{ 16 | metrics::{counter::Counter, gauge::Gauge}, 17 | registry::{Registry, Unit}, 18 | }; 19 | use tokio::time; 20 | use tokio_metrics::{RuntimeIntervals, RuntimeMonitor}; 21 | 22 | /// Tokio runtime metrics. 23 | /// 24 | /// NOTE that this module requires unstable tokio functionality that must be 25 | /// enabled via the `tokio_unstable` feature. When it is not enabled, no metrics 26 | /// will be registered. 27 | /// 28 | /// `RUSTFLAGS="--cfg tokio_unstable"` must be set at build-time to use this feature. 29 | #[derive(Debug)] 30 | pub struct Runtime { 31 | runtime: tokio::runtime::Handle, 32 | metrics: Metrics, 33 | } 34 | 35 | #[derive(Debug, Default)] 36 | struct Metrics { 37 | workers: Gauge, 38 | park: Counter, 39 | noop: Counter, 40 | steal: Counter, 41 | steal_operations: Counter, 42 | remote_schedule: Counter, 43 | local_schedule: Counter, 44 | overflow: Counter, 45 | polls: Counter, 46 | busy: Counter, 47 | injection_queue_depth: Gauge, 48 | local_queue_depth: Gauge, 49 | budget_forced_yield: Counter, 50 | io_driver_ready: Counter, 51 | // TODO poll_count_histogram requires configuration 52 | } 53 | 54 | impl Runtime { 55 | /// Registers Tokio runtime metrics with the given registry. Note that 56 | /// metrics are NOT prefixed. 57 | pub fn register(reg: &mut Registry, runtime: tokio::runtime::Handle) -> Self { 58 | let metrics = Metrics::default(); 59 | 60 | reg.register( 61 | "workers", 62 | "The number of worker threads used by the runtime", 63 | metrics.workers.clone(), 64 | ); 65 | 66 | reg.register( 67 | "park", 68 | "Total number of times worker threads parked", 69 | metrics.park.clone(), 70 | ); 71 | reg.register( 72 | "noop", 73 | "Number of times workers unparked but found no new work", 74 | metrics.noop.clone(), 75 | ); 76 | reg.register( 77 | "steal", 78 | "Number of tasks stolen by workers from others", 79 | metrics.steal.clone(), 80 | ); 81 | reg.register( 82 | "steal_operations", 83 | "Number of times workers stole tasks from other", 84 | metrics.steal_operations.clone(), 85 | ); 86 | 87 | reg.register( 88 | "remote_schedule", 89 | "Total number of remote schedule operations", 90 | metrics.remote_schedule.clone(), 91 | ); 92 | reg.register( 93 | "local_schedule", 94 | "Total number of local schedule operations", 95 | metrics.local_schedule.clone(), 96 | ); 97 | 98 | reg.register( 99 | "overflow", 100 | "Total number of overflow operations", 101 | metrics.overflow.clone(), 102 | ); 103 | reg.register( 104 | "polls", 105 | "The number of tasks that have been polled across all worker threads", 106 | metrics.polls.clone(), 107 | ); 108 | reg.register_with_unit( 109 | "busy", 110 | "Total duration of time when worker threads were busy processing tasks", 111 | Unit::Seconds, 112 | metrics.busy.clone(), 113 | ); 114 | 115 | reg.register( 116 | "injection_queue_depth", 117 | "The number of tasks currently scheduled in the runtime's injection queue", 118 | metrics.injection_queue_depth.clone(), 119 | ); 120 | reg.register( 121 | "local_queue_depth", 122 | "The total number of tasks currently scheduled in workers' local queues", 123 | metrics.local_queue_depth.clone(), 124 | ); 125 | 126 | reg.register( 127 | "budget_forced_yield", 128 | "Number of times a worker thread was forced to yield due to budget exhaustion", 129 | metrics.budget_forced_yield.clone(), 130 | ); 131 | reg.register( 132 | "io_driver_ready", 133 | "Number of times the IO driver was woken up", 134 | metrics.io_driver_ready.clone(), 135 | ); 136 | 137 | Self { runtime, metrics } 138 | } 139 | 140 | /// Drives metrics updates for a runtime according to a fixed interval. 141 | pub async fn updated(&self, interval: &mut time::Interval) -> ! { 142 | let mut probes = RuntimeMonitor::new(&self.runtime).intervals(); 143 | loop { 144 | interval.tick().await; 145 | self.metrics.probe(&mut probes); 146 | } 147 | } 148 | } 149 | 150 | impl Metrics { 151 | #[tracing::instrument(skip_all, ret, level = tracing::Level::TRACE)] 152 | fn probe(&self, probes: &mut RuntimeIntervals) { 153 | let probe = probes.next().expect("runtime metrics stream must not end"); 154 | 155 | // Tokio-metrics tracks all of these values as rates so we have 156 | // to turn them back into absolute counters: 157 | self.park.inc_by(probe.total_park_count); 158 | self.noop.inc_by(probe.total_noop_count); 159 | self.steal.inc_by(probe.total_steal_count); 160 | self.steal_operations.inc_by(probe.total_steal_operations); 161 | self.remote_schedule.inc_by(probe.num_remote_schedules); 162 | self.local_schedule.inc_by(probe.total_local_schedule_count); 163 | self.overflow.inc_by(probe.total_overflow_count); 164 | self.polls.inc_by(probe.total_polls_count); 165 | self.busy.inc_by(probe.total_busy_duration.as_secs_f64()); 166 | self.io_driver_ready.inc_by(probe.io_driver_ready_count); 167 | 168 | // Instantaneous gauges: 169 | self.workers.set(probe.workers_count as i64); 170 | self.injection_queue_depth 171 | .set(probe.total_local_queue_depth as i64); 172 | self.local_queue_depth 173 | .set(probe.total_local_queue_depth as i64); 174 | 175 | // Absolute counters need to be incremented by the delta: 176 | if let Some(delta) = probe 177 | .budget_forced_yield_count 178 | .checked_sub(self.budget_forced_yield.get()) 179 | { 180 | self.budget_forced_yield.inc_by(delta); 181 | } else { 182 | tracing::trace!("budget_forced_yield_count overflow"); 183 | } 184 | } 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /kubert/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "kubert" 3 | version = "0.24.0" 4 | edition = "2021" 5 | license = "Apache-2.0" 6 | description = "Kubernetes runtime helpers. Based on kube-rs." 7 | readme = "../README.md" 8 | repository = "https://github.com/olix0r/kubert" 9 | rust-version = "1.81" 10 | keywords = ["kubernetes", "client", "runtime", "server"] 11 | 12 | [features] 13 | rustls-tls = [ 14 | "dep:rustls-pemfile", 15 | "dep:tokio-rustls", 16 | "kube-client?/rustls-tls", 17 | ] 18 | rustls-tls-aws-lc-rs = [ 19 | "rustls-tls", 20 | "tokio-rustls/aws-lc-rs", 21 | "kube-client?/aws-lc-rs", 22 | "kube-client?/rustls-tls", 23 | ] 24 | rustls-tls-ring = [ 25 | "rustls-tls", 26 | "tokio-rustls/ring", 27 | "kube-client?/ring", 28 | "kube-client?/rustls-tls", 29 | ] 30 | openssl-tls = [ 31 | "dep:hyper-openssl", 32 | "dep:once_cell", 33 | "dep:openssl", 34 | "dep:tokio-openssl", 35 | "kube-client?/openssl-tls", 36 | ] 37 | admin = [ 38 | "dep:ahash", 39 | "dep:bytes", 40 | "dep:futures-util", 41 | "dep:http-body-util", 42 | "dep:thiserror", 43 | "dep:tower", 44 | "dep:tracing", 45 | "hyper/http1", 46 | "hyper/server", 47 | "hyper-util/server", 48 | "hyper-util/tokio", 49 | "tokio/sync", 50 | "tower/util", 51 | ] 52 | admin-brotli = ["tower-http/compression-br"] 53 | admin-gzip = ["tower-http/compression-gzip"] 54 | admin-compression = ["admin-brotli", "admin-gzip"] 55 | client = [ 56 | "dep:bytes", 57 | "dep:futures-util", 58 | "dep:hyper", 59 | "dep:kube-client", 60 | "dep:thiserror", 61 | "dep:tower", 62 | "tower/util", 63 | "tower-http/map-response-body", 64 | ] 65 | client-brotli = ["tower-http/decompression-br"] 66 | client-gzip = ["tower-http/decompression-gzip"] 67 | client-decompression = ["client-brotli", "client-gzip"] 68 | errors = [ 69 | "dep:futures-core", 70 | "dep:futures-util", 71 | "dep:pin-project-lite", 72 | "dep:tracing", 73 | "tokio/time", 74 | ] 75 | index = [ 76 | "dep:ahash", 77 | "dep:futures-core", 78 | "dep:futures-util", 79 | "dep:kube-core", 80 | "dep:kube-runtime", 81 | "dep:tracing", 82 | "dep:parking_lot", 83 | ] 84 | initialized = [ 85 | "dep:futures-core", 86 | "dep:futures-util", 87 | "dep:pin-project-lite", 88 | "tokio/sync", 89 | ] 90 | lease = [ 91 | "dep:backon", 92 | "dep:chrono", 93 | "dep:futures-util", 94 | "dep:hyper", 95 | "dep:k8s-openapi", 96 | "dep:kube-client", 97 | "dep:kube-core", 98 | "dep:serde", 99 | "dep:serde_json", 100 | "dep:thiserror", 101 | "dep:tracing", 102 | "tokio/sync", 103 | ] 104 | log = ["dep:thiserror", "dep:tracing", "dep:tracing-subscriber"] 105 | prometheus-client = [ 106 | "dep:kubert-prometheus-process", 107 | "dep:kubert-prometheus-tokio", 108 | "dep:prometheus-client", 109 | ] 110 | requeue = [ 111 | "dep:futures-core", 112 | "dep:tracing", 113 | "tokio/macros", 114 | "tokio/sync", 115 | "tokio-util/time", 116 | ] 117 | runtime = [ 118 | "dep:kube-core", 119 | "dep:kube-runtime", 120 | "dep:serde", 121 | "dep:thiserror", 122 | "dep:tracing", 123 | "admin", 124 | "client", 125 | "errors", 126 | "initialized", 127 | "log", 128 | "shutdown", 129 | ] 130 | runtime-diagnostics = [ 131 | "dep:chrono", 132 | "dep:serde_json", 133 | "dep:k8s-openapi", 134 | "dep:parking_lot", 135 | "dep:sha2", 136 | "runtime", 137 | ] 138 | runtime-brotli = ["admin-brotli", "client-brotli"] 139 | runtime-gzip = ["admin-gzip", "client-gzip"] 140 | runtime-compression = ["admin-compression", "client-decompression"] 141 | server = [ 142 | "dep:drain", 143 | "dep:rustls-pemfile", 144 | "dep:thiserror", 145 | "dep:tower", 146 | "dep:tracing", 147 | "hyper/http1", 148 | "hyper/http2", 149 | "hyper/server", 150 | "hyper-util/server", 151 | "hyper-util/server-auto", 152 | "hyper-util/server-graceful", 153 | "hyper-util/service", 154 | "tokio/fs", 155 | "tokio/macros", 156 | "tokio/net", 157 | "tokio/rt", 158 | ] 159 | server-brotli = ["tower-http/compression-br", "tower-http/decompression-br"] 160 | server-gzip = ["tower-http/compression-gzip", "tower-http/decompression-gzip"] 161 | server-compression = ["server-brotli", "server-gzip"] 162 | shutdown = [ 163 | "dep:drain", 164 | "dep:futures-core", 165 | "dep:thiserror", 166 | "dep:tracing", 167 | "tokio/macros", 168 | "tokio/signal", 169 | ] 170 | 171 | [package.metadata.docs.rs] 172 | features = [ 173 | "rustls-tls", 174 | "openssl-tls", 175 | "admin", 176 | "client", 177 | "errors", 178 | "gzip", 179 | "index", 180 | "initialized", 181 | "lease", 182 | "log", 183 | "prometheus-client", 184 | "requeue", 185 | "runtime", 186 | "runtime-diagnostics", 187 | "server", 188 | "shutdown", 189 | "k8s-openapi/latest", 190 | ] 191 | rustdoc-args = ["--cfg", "docsrs"] 192 | 193 | [lints.rust] 194 | unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tokio_unstable)'] } 195 | 196 | [dependencies] 197 | ahash = { version = "0.8", optional = true } 198 | backon = { version = "1", optional = true, features = ["tokio-sleep"] } 199 | bytes = { version = "1", optional = true } 200 | drain = { version = "0.2.1", optional = true, default-features = false } 201 | chrono = { version = "0.4", optional = true, default-features = false } 202 | futures-core = { version = "0.3", optional = true, default-features = false } 203 | futures-util = { version = "0.3", optional = true, default-features = false } 204 | http-body-util = { version = "0.1", optional = true } 205 | hyper = { workspace = true, optional = true, default-features = false } 206 | hyper-openssl = { workspace = true, optional = true } 207 | hyper-util = { workspace = true, optional = true, default-features = false } 208 | once_cell = { version = "1", optional = true } 209 | openssl = { version = "0.10.57", optional = true, default-features = false } 210 | parking_lot = { version = "0.12", optional = true } 211 | pin-project-lite = { version = "0.2", optional = true } 212 | prometheus-client = { workspace = true, optional = true } 213 | rustls-pemfile = { version = "2", optional = true } 214 | serde = { version = "1", optional = true } 215 | serde_json = { version = "1", optional = true } 216 | sha2 = { version = "0.10", optional = true } 217 | thiserror = { version = "2", optional = true } 218 | tokio = { workspace = true, optional = false, default-features = false } 219 | tokio-rustls = { workspace = true, optional = true } 220 | tokio-openssl = { version = "0.6.3", optional = true } 221 | tokio-util = { version = "0.7", optional = true, default-features = false } 222 | tower-http = { version = "0.6.0", optional = true, default-features = false } 223 | tower = { version = "0.5", default-features = false, optional = true } 224 | tracing = { version = "0.1.31", optional = true } 225 | 226 | kubert-prometheus-process = { version = "0.2.0", path = "../kubert-prometheus-process", optional = true } 227 | 228 | [dependencies.clap] 229 | version = "4" 230 | optional = true 231 | default-features = false 232 | features = ["derive", "std"] 233 | 234 | # Not used directly, but required to ensure that the k8s-openapi dependency is considered part of 235 | # the "deps" graph rather than just the "dev-deps" graph 236 | [dependencies.k8s-openapi] 237 | workspace = true 238 | optional = true 239 | 240 | [dependencies.kube-client] 241 | workspace = true 242 | optional = true 243 | features = ["client", "config"] 244 | 245 | [dependencies.kube-core] 246 | workspace = true 247 | optional = true 248 | 249 | [dependencies.kube-runtime] 250 | workspace = true 251 | optional = true 252 | 253 | [dependencies.tracing-subscriber] 254 | version = "0.3.9" 255 | optional = true 256 | default-features = false 257 | features = ["env-filter", "fmt", "json", "smallvec", "tracing-log"] 258 | 259 | [target.'cfg(tokio_unstable)'.dependencies.kubert-prometheus-tokio] 260 | version = "0.2.0" 261 | path = "../kubert-prometheus-tokio" 262 | optional = true 263 | features = ["rt"] 264 | 265 | # === Dev === 266 | 267 | [dev-dependencies] 268 | tokio-rustls = { workspace = true, features = ["aws-lc-rs"] } 269 | kube = { workspace = true, features = ["runtime"] } 270 | rcgen = { version = "0.13.0" } 271 | tempfile = "3.8" 272 | tokio-stream = "0.1" 273 | tokio-test = "0.4" 274 | tracing-subscriber = { version = "0.3", features = ["ansi"] } 275 | 276 | [dev-dependencies.k8s-openapi] 277 | workspace = true 278 | features = ["latest"] 279 | 280 | [dev-dependencies.tokio] 281 | workspace = true 282 | features = ["macros", "test-util"] 283 | -------------------------------------------------------------------------------- /kubert/src/admin/diagnostics.rs: -------------------------------------------------------------------------------- 1 | use k8s_openapi::apimachinery::pkg::apis::meta::v1::Time; 2 | use parking_lot::Mutex; 3 | use std::{net::SocketAddr, sync::Arc}; 4 | 5 | #[cfg(feature = "lease")] 6 | mod lease; 7 | mod watch; 8 | 9 | #[cfg(feature = "lease")] 10 | pub(crate) use self::lease::LeaseDiagnostics; 11 | use self::watch::WatchDiagnostics; 12 | 13 | #[derive(Clone, Debug)] 14 | pub(crate) struct Diagnostics { 15 | initial_time: chrono::DateTime, 16 | watches: Arc>>, 17 | #[cfg(feature = "lease")] 18 | leases: Arc>>, 19 | } 20 | 21 | #[derive(Clone, Debug, serde::Serialize)] 22 | #[serde(rename_all = "camelCase")] 23 | struct Summary { 24 | initial_timestamp: Time, 25 | current_timestamp: Time, 26 | 27 | #[serde(skip_serializing_if = "Vec::is_empty")] 28 | watches: Vec, 29 | 30 | #[cfg(feature = "lease")] 31 | #[serde(skip_serializing_if = "Vec::is_empty")] 32 | leases: Vec, 33 | } 34 | 35 | // === impl Diagnostics === 36 | 37 | impl Diagnostics { 38 | pub(super) fn new() -> Self { 39 | Self { 40 | initial_time: chrono::Utc::now(), 41 | watches: Default::default(), 42 | #[cfg(feature = "lease")] 43 | leases: Default::default(), 44 | } 45 | } 46 | 47 | pub(crate) fn register_watch( 48 | &self, 49 | api: &crate::runtime::Api, 50 | label_selector: Option<&str>, 51 | ) -> WatchDiagnostics 52 | where 53 | T: kube_core::Resource, 54 | T::DynamicType: Default, 55 | { 56 | let wd = WatchDiagnostics::new(api.resource_url(), label_selector); 57 | self.watches.lock().push(wd.weak()); 58 | wd 59 | } 60 | 61 | pub(super) fn handle(&self, client_addr: SocketAddr, req: super::Request) -> super::Response { 62 | if req.method() != hyper::Method::GET { 63 | return hyper::Response::builder() 64 | .status(hyper::StatusCode::METHOD_NOT_ALLOWED) 65 | .header(hyper::header::ALLOW, "GET") 66 | .body(super::Body::default()) 67 | .unwrap(); 68 | } 69 | 70 | if !client_addr.ip().is_loopback() { 71 | tracing::info!(client.ip=%client_addr.ip(), "Rejecting non-loopback request for diagnostics"); 72 | return hyper::Response::builder() 73 | .status(hyper::StatusCode::FORBIDDEN) 74 | .body(super::Body::default()) 75 | .unwrap(); 76 | } 77 | 78 | let with_resources = req.uri().query() == Some("resources"); 79 | let watches = self.summarize_watches(with_resources); 80 | #[cfg(feature = "lease")] 81 | let leases = self.summarize_leases(); 82 | let summary = Summary { 83 | initial_timestamp: Time(self.initial_time), 84 | current_timestamp: Time(chrono::Utc::now()), 85 | watches, 86 | #[cfg(feature = "lease")] 87 | leases, 88 | }; 89 | 90 | let mut bytes = Vec::with_capacity(8 * 1024); 91 | if let Err(error) = serde_json::to_writer_pretty(&mut bytes, &summary) { 92 | tracing::error!(%error, "Failed to serialize runtime diagnostics"); 93 | return hyper::Response::builder() 94 | .status(hyper::StatusCode::INTERNAL_SERVER_ERROR) 95 | .body(super::Body::default()) 96 | .unwrap(); 97 | } 98 | 99 | hyper::Response::builder() 100 | .header(hyper::header::CONTENT_TYPE, "application/json") 101 | .body(super::Body::from(bytes)) 102 | .unwrap() 103 | } 104 | 105 | /// Collect the summaries of the remaining watches, with their resources 106 | /// sorted by creation. 107 | fn summarize_watches(&self, with_resources: bool) -> Vec { 108 | let mut refs = self.watches.lock(); 109 | // Clean up any dead weak refs, i.e. of watches that have been dropped. 110 | refs.retain(|w| w.upgrade().is_some()); 111 | refs.iter() 112 | .filter_map(|wref| { 113 | let watch = wref.upgrade()?; 114 | let state = watch.read(); 115 | Some(state.summary(with_resources)) 116 | }) 117 | .collect() 118 | } 119 | } 120 | 121 | #[cfg(feature = "lease")] 122 | impl Diagnostics { 123 | pub(crate) fn register_lease(&self, params: &crate::LeaseParams) -> LeaseDiagnostics { 124 | let ld = LeaseDiagnostics::new(params); 125 | self.leases.lock().push(ld.weak()); 126 | ld 127 | } 128 | 129 | fn summarize_leases(&self) -> Vec { 130 | let mut refs = self.leases.lock(); 131 | // Clean up any dead weak refs, i.e. of leases that have been dropped. 132 | refs.retain(|w| w.upgrade().is_some()); 133 | refs.iter() 134 | .filter_map(|wref| { 135 | let lease = wref.upgrade()?; 136 | let state = lease.read(); 137 | Some(state.clone()) 138 | }) 139 | .collect() 140 | } 141 | } 142 | -------------------------------------------------------------------------------- /kubert/src/admin/diagnostics/lease.rs: -------------------------------------------------------------------------------- 1 | use k8s_openapi::apimachinery::pkg::apis::meta::v1::Time; 2 | use parking_lot::RwLock; 3 | use std::{ 4 | borrow::Cow, 5 | sync::{Arc, Weak}, 6 | }; 7 | 8 | pub(crate) struct LeaseDiagnostics(Arc>); 9 | 10 | pub(super) type StateRef = Weak>; 11 | 12 | #[derive(Clone, Debug, serde::Serialize)] 13 | #[serde(rename_all = "camelCase")] 14 | pub(super) struct LeaseState { 15 | name: String, 16 | namespace: String, 17 | claimant: String, 18 | field_manager: Cow<'static, str>, 19 | lease_duration_seconds: f64, 20 | renew_grace_period_seconds: f64, 21 | #[serde(flatten)] 22 | stats: LeaseStats, 23 | #[serde(skip_serializing_if = "Option::is_none")] 24 | resource_version: Option, 25 | #[serde(skip_serializing_if = "Option::is_none")] 26 | current: Option, 27 | } 28 | 29 | #[derive(Clone, Debug, serde::Serialize)] 30 | #[serde(rename_all = "camelCase")] 31 | struct LeaseStats { 32 | updates: u64, 33 | creation_timestamp: Time, 34 | #[serde(skip_serializing_if = "Option::is_none")] 35 | last_update_timestamp: Option