├── .github ├── dependabot.yml └── workflows │ ├── publish.yml │ ├── release.yml │ └── test.yml ├── .gitignore ├── CONTRIBUTING.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── scripts ├── Dockerfile-develop ├── Dockerfile-multiarch ├── ezbake-deployment.yml ├── install-for-cluster.yml ├── install-for-namespace-admin.yml └── install-for-namespace.yml └── src ├── grpc_access.rs ├── ingest.rs ├── k8s.rs ├── main.rs ├── metrics.rs ├── xds.rs └── xds ├── cache.rs ├── delta.rs ├── resources.rs ├── server.rs └── sotw.rs /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "cargo" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Images to GHCR 2 | 3 | env: 4 | REGISTRY: ghcr.io 5 | IMAGE_NAME: ${{ github.repository }} 6 | 7 | on: 8 | push: 9 | branches: 10 | - main 11 | 12 | jobs: 13 | push-store-image: 14 | runs-on: ubuntu-latest 15 | 16 | permissions: 17 | contents: read 18 | packages: write 19 | id-token: write 20 | 21 | steps: 22 | - name: 'Checkout GitHub Action' 23 | uses: actions/checkout@main 24 | 25 | - name: Set up Docker Buildx 26 | uses: docker/setup-buildx-action@v3.7.1 27 | 28 | - name: 'Login to Registry' 29 | uses: docker/login-action@v1 30 | with: 31 | registry: ${{env.REGISTRY}} 32 | username: ${{github.actor}} 33 | password: ${{secrets.GITHUB_TOKEN}} 34 | 35 | - name: 'Build Image' 36 | run: | 37 | docker buildx build --file ./scripts/Dockerfile-multiarch \ 38 | --platform=linux/arm64,linux/amd64 \ 39 | --push \ 40 | --tag ${{env.REGISTRY}}/junction-labs/${{env.IMAGE_NAME}}:latest \ 41 | . 42 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Publish Github Release 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | sha: 7 | description: Git Commit SHA. Use the latest commit on main if left blank. 8 | type: string 9 | tag: 10 | description: The version tag of the release, of the form "v1.2.3". Use "latest" if it is a dry-run. 11 | type: string 12 | default: latest 13 | 14 | concurrency: 15 | group: ${{ github.workflow }}-${{ github.ref }} 16 | cancel-in-progress: true 17 | 18 | env: 19 | REGISTRY: ghcr.io 20 | IMAGE_NAME: ${{ github.repository }} 21 | 22 | jobs: 23 | release-artifacts: 24 | runs-on: ubuntu-latest 25 | 26 | permissions: 27 | contents: write 28 | packages: write 29 | id-token: write 30 | 31 | steps: 32 | - name: "Checkout GitHub Action" 33 | uses: actions/checkout@main 34 | with: 35 | ref: ${{ inputs.sha }} 36 | 37 | - name: Set up Docker Buildx 38 | uses: docker/setup-buildx-action@v3.7.1 39 | 40 | - name: "Login to Registry" 41 | uses: docker/login-action@v1 42 | with: 43 | registry: ${{env.REGISTRY}} 44 | username: ${{github.actor}} 45 | password: ${{secrets.GITHUB_TOKEN}} 46 | 47 | - name: "Build and Publish Image" 48 | run: | 49 | docker buildx build --file ./scripts/Dockerfile-multiarch \ 50 | --platform=linux/arm64,linux/amd64 \ 51 | --push \ 52 | --tag ${{env.REGISTRY}}/junction-labs/${{env.IMAGE_NAME}}:${{inputs.tag}} \ 53 | . 54 | 55 | - name: "Update container tag in YAML" 56 | run: | 57 | echo ${{ github.sha }} > release.txt 58 | sed -i 's|ezbake:latest|ezbake:${{inputs.tag}}|g' scripts/install-for-cluster.yml 59 | sed -i 's|ezbake:latest|ezbake:${{inputs.tag}}|g' scripts/install-for-namespace.yml 60 | 61 | - name: Release 62 | if: ${{ ! contains(inputs.tag, 'latest') }} 63 | uses: softprops/action-gh-release@c95fe1489396fe8a9eb87c0abf8aa5b2ef267fda # v0.1.15 64 | with: 65 | tag_name: ${{ inputs.tag }} 66 | files: | 67 | release.txt 68 | scripts/install-for-cluster.yml 69 | scripts/install-for-namespace.yml 70 | scripts/install-for-namespace-admin.yml 71 | env: 72 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 73 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | 7 | pull_request: 8 | branches: [ "main" ] 9 | 10 | env: 11 | CARGO_TERM_COLOR: always 12 | rust_stable: stable 13 | rust_min: 1.79 14 | 15 | jobs: 16 | msrv: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v4 20 | - name: "Install Rust @${{ env.rust_min }}" 21 | uses: dtolnay/rust-toolchain@stable 22 | with: 23 | toolchain: ${{ env.rust_min }} 24 | - uses: Swatinem/rust-cache@v2 25 | - name: check 26 | run: cargo check 27 | tests: 28 | runs-on: ubuntu-latest 29 | steps: 30 | - uses: actions/checkout@v4 31 | - name: "Install Rust @${{ env.rust_stable }}" 32 | uses: dtolnay/rust-toolchain@stable 33 | with: 34 | toolchain: ${{ env.rust_stable }} 35 | - uses: Swatinem/rust-cache@v2 36 | - name: test 37 | run: cargo test 38 | clippy: 39 | runs-on: ubuntu-latest 40 | steps: 41 | - uses: actions/checkout@v4 42 | - name: "Install Rust @ ${{ env.rust_stable }}" 43 | uses: dtolnay/rust-toolchain@stable 44 | with: 45 | toolchain: ${{ env.rust_stable }} 46 | - uses: Swatinem/rust-cache@v2 47 | - name: clippy 48 | run: cargo clippy --all --tests --all-features --no-deps -- -D warnings 49 | fmt: 50 | runs-on: ubuntu-latest 51 | steps: 52 | - uses: actions/checkout@v4 53 | - name: "Install Rust @ ${{ env.rust_stable }}" 54 | uses: dtolnay/rust-toolchain@stable 55 | with: 56 | toolchain: ${{ env.rust_stable }} 57 | - uses: Swatinem/rust-cache@v2 58 | - name: fmt 59 | run: | 60 | if ! rustfmt --check --edition 2021 $(git ls-files '*.rs'); then 61 | echo "rustfmt found un-formatted files" >&2 62 | exit 1 63 | fi 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to ezbake 2 | 3 | Thanks for contributing to the ezbake repo! 4 | 5 | ## Code of Conduct 6 | 7 | All Junction Labs repos adhere to the [Rust Code of Conduct][coc], without exception. 8 | 9 | [coc]: https://www.rust-lang.org/policies/code-of-conduct 10 | 11 | ## Required Dependencies 12 | 13 | This project depends on having a working `rust` toolchain. We currently do not have an 14 | MSRV policy. 15 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ezbake" 3 | version = "0.3.1" 4 | edition = "2021" 5 | rust-version = "1.79" 6 | 7 | [dependencies] 8 | anyhow = "1.0" 9 | clap = { version = "4.5.27", features = ["derive", "wrap_help"] } 10 | crossbeam-skiplist = "0.1" 11 | enum-map = "2.7" 12 | futures = "0.3" 13 | h2 = "0.3" 14 | http = "0.2" 15 | kube = { version = "0.96", features = ["runtime", "client"] } 16 | metrics = "0.24.0" 17 | metrics-exporter-prometheus = { version = "0.16.0", default-features = false, features = [ 18 | "http-listener", 19 | ] } 20 | once_cell = "1.19" 21 | smol_str = "0.3" 22 | thiserror = "2.0" 23 | tokio = { version = "1.38", features = ["full"] } 24 | tokio-stream = "0.1" 25 | tonic = "0.11" 26 | tonic-reflection = "0.11" 27 | tracing = "0.1" 28 | tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] } 29 | tower = "0.4" 30 | tower-http = { version = "0.4", features = ["metrics", "trace"] } 31 | xds-api = { version = "0.1", features = ["descriptor"] } 32 | 33 | # directly depend on k8s-openapi to specify the v1_xx feature 34 | k8s-openapi = { version = "0.23", features = ["v1_29"] } 35 | junction-api = { git = "https://github.com/junction-labs/junction-client", features = [ 36 | "kube", 37 | "xds", 38 | ] } 39 | crossbeam = "0.8.4" 40 | svix-ksuid = "0.8.0" 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `ezbake` 2 | 3 | A simple xDS control-plane for Junction. 4 | 5 | ## What is it? 6 | 7 | [ezbake] is a simple [xDS] control plane for 8 | Junction, which uses the [gateway_api] to support dynamic configuration. 9 | `ezbake` runs in a Kubernetes cluster, watches its running services, and runs 10 | as an xDS control plane to drive the Junction client. 11 | 12 | [ezbake]: https://github.com/junction-labs/ezbake 13 | [gateway_api]: https://gateway-api.sigs.k8s.io/ 14 | [xDS]: https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol 15 | 16 | ### Installing 17 | 18 | The simplest installation is as follows, which first sets up the Kubernetes 19 | gateway API CRD, and then sets up ezbake as a 2 pod deployment in its own 20 | namespace (junction), with permissions to monitor all services, endpoints, and 21 | gateway API config in the cluster. 22 | 23 | ```bash 24 | kubectl apply -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.2.0/experimental-install.yaml 25 | kubectl apply -f https://github.com/junction-labs/ezbake/releases/latest/download/install-for-cluster.yml 26 | ``` 27 | 28 | Now, to communicate with ezbake, all clients will need the `JUNCTION_ADS_SERVER` environment 29 | variable set as follows: 30 | 31 | ```bash 32 | export JUNCTION_ADS_SERVER="grpc://ezbake.junction.svc.cluster.local:8008" 33 | ``` 34 | 35 | > [!NOTE] 36 | > 37 | > `ezbake` returns Pod IPs directly without any NAT, so if your cluster 38 | > isn't configured to allow talking directly to Pod IPs from outside the cluster, 39 | > any client you run outside the cluster **won't be able to connect to any 40 | > backends**. Notably, local clusters created with `k3d`, `kind`, and Docker 41 | > Desktop behave this way. 42 | 43 | 44 | Once you have an `ezbake` running, use [Junction][junction-client] to generate 45 | Routing and Load Balancing configuration that you can apply to this cluster. 46 | `ezbake` will automatically pick up your configuration and distribute it to any 47 | connected client. See the [Getting Stated Guide](https://docs.junctionlabs.io/getting-started/) 48 | for more. 49 | 50 | ### Uninstalling 51 | 52 | To uninstall, run `kubectl delete` on the Gateway APIs and the objects that 53 | `ezbake` installed: 54 | 55 | ```bash 56 | kubectl delete -f https://github.com/junction-labs/ezbake/releases/latest/download/install-for-cluster.yml 57 | kubectl delete -f https://github.com/kubernetes-sigs/gateway-api/releases/download/v1.2.0/experimental-install.yaml 58 | ``` 59 | 60 | ### Building from source 61 | 62 | You can build and run `ezbake` locally and connect to a remote Kubernetes, as 63 | long as you have a valid kubeconfig available. `ezbake` checks the standard 64 | Kubernetes configuration paths, and uses your currently set context. To change 65 | your currently set context, use `kubectl config use-context`. 66 | 67 | To build ezbake, you'll need a working Rust toolchain - we recommend installing 68 | [rustup](https://rustup.rs) if you haven't already. 69 | 70 | Build ezbake locally with `cargo build --release` and run it with `./target/release/ezbake`. 71 | 72 | `ezbake` is a single-binary server. Run `ezbake --help` for full usage information: 73 | 74 | ```text 75 | Usage: ezbake [OPTIONS] 76 | 77 | Options: 78 | --log-pretty 79 | Log in a pretty, human-readable format 80 | 81 | -l, --listen-addr 82 | The address to listen on 83 | 84 | [default: 0.0.0.0:8008] 85 | 86 | --metrics-addr 87 | The address to expose metrics on 88 | 89 | [default: 0.0.0.0:8009] 90 | 91 | --all-namespaces 92 | Watch all namespaces. Defaults to false. 93 | 94 | It's an error to set both --all-namespaces and --namespace. 95 | 96 | --namespace 97 | The namespace to watch. If this option is not set explicitly, ezbake 98 | will watch the the namespace set in the kubeconfig's s current 99 | context, the namespace specified by the service account the server is 100 | running as, or the `default` namespace. 101 | 102 | It's an error to set both --all-namespaces and --namespace. 103 | 104 | -h, --help 105 | Print help (see a summary with '-h') 106 | 107 | -V, --version 108 | Print version 109 | ``` 110 | 111 | By default, `ezbake` listens on `0.0.0.0:8008`. Don't forget to set up [your 112 | client][junction-client] to look for your server on that address. 113 | 114 | ### Building a container 115 | 116 | ```bash 117 | docker build --tag ezbake --file ./scripts/Dockerfile-develop --load . 118 | ``` 119 | -------------------------------------------------------------------------------- /scripts/Dockerfile-develop: -------------------------------------------------------------------------------- 1 | FROM rust:1.81 AS build 2 | 3 | # create a new empty shell project 4 | RUN USER=root cargo new --bin ezbake 5 | WORKDIR /ezbake 6 | 7 | # copy over manifests 8 | COPY ./Cargo.lock ./Cargo.lock 9 | COPY ./Cargo.toml ./Cargo.toml 10 | 11 | # this build step will cache dependencies 12 | RUN cargo build --release 13 | RUN rm src/*.rs 14 | 15 | # copy source tree and build 16 | COPY ./src ./src 17 | RUN rm ./target/release/deps/ezbake* 18 | RUN cargo build --release 19 | 20 | # our final base 21 | FROM debian:bookworm-slim 22 | COPY --from=build /ezbake/target/release/ezbake . 23 | CMD ["./ezbake"] 24 | -------------------------------------------------------------------------------- /scripts/Dockerfile-multiarch: -------------------------------------------------------------------------------- 1 | FROM --platform=linux/amd64 lukemathwalker/cargo-chef:latest-rust-latest AS amd64-chef 2 | FROM --platform=linux/arm64 lukemathwalker/cargo-chef:latest-rust-latest AS arm64-chef 3 | FROM --platform=$BUILDPLATFORM lukemathwalker/cargo-chef:latest-rust-latest AS chef 4 | WORKDIR /app 5 | 6 | FROM chef AS planner 7 | COPY ./Cargo.lock ./Cargo.lock 8 | COPY ./Cargo.toml ./Cargo.toml 9 | COPY ./src ./src 10 | RUN cargo chef prepare --recipe-path recipe.json 11 | 12 | FROM chef AS builder 13 | COPY --from=planner /app/recipe.json recipe.json 14 | 15 | ARG TARGETPLATFORM 16 | ARG TARGETARCH 17 | 18 | # Copy runtime dependencies for specific target platform/architecture 19 | # ARM specific folders 20 | WORKDIR /all-files/linux/arm64/lib/aarch64-linux-gnu 21 | 22 | # AMD64 specific folders 23 | WORKDIR /all-files/linux/amd64/lib/x86_64-linux-gnu 24 | WORKDIR /all-files/linux/amd64/lib64 25 | 26 | # Common folders 27 | WORKDIR /all-files/${TARGETPLATFORM}/etc/ssl/certs 28 | WORKDIR /all-files/${TARGETPLATFORM}/app 29 | 30 | # ARM64 31 | COPY --from=arm64-chef \ 32 | /lib/aarch64-linux-gnu/libssl.so.3 \ 33 | /lib/aarch64-linux-gnu/libcrypto.so.3 \ 34 | /lib/aarch64-linux-gnu/libgcc_s.so.1 \ 35 | /lib/aarch64-linux-gnu/libm.so.6 \ 36 | /lib/aarch64-linux-gnu/libc.so.6 \ 37 | /all-files/linux/arm64/lib/aarch64-linux-gnu/ 38 | 39 | COPY --from=arm64-chef \ 40 | /lib/ld-linux-aarch64.so.1 \ 41 | /all-files/linux/arm64/lib 42 | 43 | # AMD64 44 | COPY --from=amd64-chef \ 45 | /lib/x86_64-linux-gnu/libssl.so.3 \ 46 | /lib/x86_64-linux-gnu/libcrypto.so.3 \ 47 | /lib/x86_64-linux-gnu/libgcc_s.so.1 \ 48 | /lib/x86_64-linux-gnu/libm.so.6 \ 49 | /lib/x86_64-linux-gnu/libc.so.6 \ 50 | /all-files/linux/amd64/lib/x86_64-linux-gnu/ 51 | 52 | COPY --from=amd64-chef \ 53 | /lib64/ld-linux-x86-64.so.2 \ 54 | /all-files/linux/amd64/lib64/ 55 | 56 | # Common files - certs 57 | COPY --from=amd64-chef \ 58 | /etc/ssl/certs/ca-certificates.crt \ 59 | /all-files/linux/amd64/etc/ssl/certs/ 60 | COPY --from=arm64-chef \ 61 | /etc/ssl/certs/ca-certificates.crt \ 62 | /all-files/linux/arm64/etc/ssl/certs/ 63 | 64 | WORKDIR /app 65 | 66 | # Install dependencies for cross-compilation and protobuf 67 | RUN dpkg --add-architecture arm64 \ 68 | && apt-get update \ 69 | && apt-get install -y protobuf-compiler \ 70 | g++-aarch64-linux-gnu \ 71 | libc6-dev-arm64-cross \ 72 | libssl-dev:arm64 \ 73 | ca-certificates \ 74 | && rustup target add aarch64-unknown-linux-gnu \ 75 | && rustup toolchain install stable-aarch64-unknown-linux-gnu \ 76 | && rm -rf /var/lib/apt/lists/* 77 | 78 | # Build dependencies - this is the caching Docker layer! 79 | RUN case ${TARGETARCH} in \ 80 | arm64) PKG_CONFIG_SYSROOT_DIR=/ CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc cargo chef cook --target=aarch64-unknown-linux-gnu --release --recipe-path recipe.json ;; \ 81 | amd64) cargo chef cook --release --recipe-path recipe.json ;; \ 82 | *) exit 1 ;; \ 83 | esac 84 | 85 | # Copy the source code 86 | COPY . /app 87 | 88 | # Build application - this is the caching Docker layer! 89 | RUN case ${TARGETARCH} in \ 90 | arm64) PKG_CONFIG_SYSROOT_DIR=/ CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc cargo build --target=aarch64-unknown-linux-gnu --release ;; \ 91 | amd64) cargo build --release ;; \ 92 | *) exit 1 ;; \ 93 | esac 94 | 95 | # Copy all the dependencies to a separate folder 96 | RUN set -ex; \ 97 | # Determine target (source folder for the binary and env files) 98 | case ${TARGETARCH} in \ 99 | arm64) target='/app/target/aarch64-unknown-linux-gnu/release';; \ 100 | amd64) target='/app/target/release';; \ 101 | *) exit 1 ;; \ 102 | esac; \ 103 | # Copy files from the target folder to app folder 104 | cp $target/ezbake /all-files/${TARGETPLATFORM}/app 105 | 106 | # # Create a single layer image 107 | FROM scratch AS runtime 108 | LABEL org.opencontainers.image.source="https://github.com/junction-labs/ezbake" 109 | ARG TARGETPLATFORM 110 | ARG TARGETARCH 111 | WORKDIR /app 112 | COPY --from=builder /all-files/${TARGETPLATFORM} / 113 | ENTRYPOINT ["/app/ezbake"] 114 | -------------------------------------------------------------------------------- /scripts/ezbake-deployment.yml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: ezbake 6 | namespace: junction 7 | labels: 8 | app: ezbake 9 | spec: 10 | replicas: 2 11 | selector: 12 | matchLabels: 13 | app: ezbake 14 | template: 15 | metadata: 16 | labels: 17 | app: ezbake 18 | spec: 19 | serviceAccountName: junction-account 20 | containers: 21 | - name: ezbake 22 | command: ["./ezbake"] 23 | args: ["--all-namespaces"] 24 | image: ezbake:latest 25 | imagePullPolicy: IfNotPresent 26 | env: 27 | - name: RUST_LOG 28 | value: "ezbake=trace,warn" -------------------------------------------------------------------------------- /scripts/install-for-cluster.yml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: Namespace 3 | apiVersion: v1 4 | metadata: 5 | name: junction 6 | --- 7 | apiVersion: v1 8 | kind: ServiceAccount 9 | metadata: 10 | name: junction-account 11 | namespace: junction 12 | --- 13 | apiVersion: rbac.authorization.k8s.io/v1 14 | kind: ClusterRole 15 | metadata: 16 | name: junction-role 17 | namespace: junction 18 | rules: 19 | - apiGroups: [""] 20 | resources: ["services"] 21 | verbs: ["get", "watch", "list"] 22 | - apiGroups: ["gateway.networking.k8s.io"] 23 | resources: ["httproutes", "grpcroutes"] 24 | verbs: ["get", "watch", "list"] 25 | - apiGroups: ["discovery.k8s.io"] 26 | resources: ["endpointslices"] 27 | verbs: ["get", "watch", "list"] 28 | - apiGroups: ["gateway.networking.k8s.io"] 29 | resources: ["httproutes/status", "grpcroutes/status"] 30 | verbs: ["patch"] 31 | --- 32 | apiVersion: rbac.authorization.k8s.io/v1 33 | kind: ClusterRoleBinding 34 | metadata: 35 | name: junction-role-binding 36 | namespace: junction 37 | roleRef: 38 | apiGroup: rbac.authorization.k8s.io 39 | kind: ClusterRole 40 | name: junction-role 41 | subjects: 42 | - kind: ServiceAccount 43 | name: junction-account 44 | namespace: junction 45 | --- 46 | apiVersion: apps/v1 47 | kind: Deployment 48 | metadata: 49 | name: ezbake 50 | namespace: junction 51 | labels: 52 | app: ezbake 53 | spec: 54 | replicas: 2 55 | selector: 56 | matchLabels: 57 | app: ezbake 58 | template: 59 | metadata: 60 | labels: 61 | app: ezbake 62 | spec: 63 | serviceAccountName: junction-account 64 | containers: 65 | - name: ezbake 66 | command: ["./ezbake"] 67 | args: ["--all-namespaces"] 68 | image: ghcr.io/junction-labs/junction-labs/ezbake:latest 69 | imagePullPolicy: Always 70 | env: 71 | - name: RUST_LOG 72 | value: "ezbake=trace,warn" 73 | --- 74 | apiVersion: v1 75 | kind: Service 76 | metadata: 77 | name: ezbake 78 | namespace: junction 79 | spec: 80 | type: ClusterIP 81 | selector: 82 | app: ezbake 83 | ports: 84 | - port: 8008 85 | targetPort: 8008 86 | name: grpc 87 | -------------------------------------------------------------------------------- /scripts/install-for-namespace-admin.yml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: v1 3 | kind: ServiceAccount 4 | metadata: 5 | name: junction-account 6 | namespace: foo 7 | --- 8 | apiVersion: rbac.authorization.k8s.io/v1 9 | kind: Role 10 | metadata: 11 | name: junction-role 12 | namespace: foo 13 | rules: 14 | - apiGroups: [""] 15 | resources: ["services"] 16 | verbs: ["get", "watch", "list"] 17 | - apiGroups: ["gateway.networking.k8s.io"] 18 | resources: ["httproutes", "grpcroutes"] 19 | verbs: ["get", "watch", "list"] 20 | - apiGroups: ["discovery.k8s.io"] 21 | resources: ["endpointslices"] 22 | verbs: ["get", "watch", "list"] 23 | - apiGroups: ["gateway.networking.k8s.io"] 24 | resources: ["httproutes/status", "grpcroutes/status"] 25 | verbs: ["patch"] 26 | --- 27 | apiVersion: rbac.authorization.k8s.io/v1 28 | kind: RoleBinding 29 | metadata: 30 | name: junction-role-binding 31 | namespace: foo 32 | roleRef: 33 | apiGroup: rbac.authorization.k8s.io 34 | kind: ClusterRole 35 | name: junction-role 36 | subjects: 37 | - kind: ServiceAccount 38 | name: junction-account 39 | namespace: foo 40 | -------------------------------------------------------------------------------- /scripts/install-for-namespace.yml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: apps/v1 3 | kind: Deployment 4 | metadata: 5 | name: ezbake 6 | namespace: foo 7 | labels: 8 | app: ezbake 9 | spec: 10 | replicas: 2 11 | selector: 12 | matchLabels: 13 | app: ezbake 14 | template: 15 | metadata: 16 | labels: 17 | app: ezbake 18 | spec: 19 | containers: 20 | - name: ezbake 21 | command: ["./ezbake"] 22 | image: ghcr.io/junction-labs/junction-labs/ezbake:latest 23 | imagePullPolicy: Always 24 | env: 25 | - name: RUST_LOG 26 | value: "ezbake=trace,warn" 27 | --- 28 | apiVersion: v1 29 | kind: Service 30 | metadata: 31 | name: ezbake 32 | namespace: foo 33 | spec: 34 | type: ClusterIP 35 | selector: 36 | app: ezbake 37 | ports: 38 | - port: 8008 39 | targetPort: 8008 40 | name: grpc 41 | -------------------------------------------------------------------------------- /src/grpc_access.rs: -------------------------------------------------------------------------------- 1 | //! GRPC Access logging. 2 | //! 3 | //! This module groups together all of the access logging stuff we do so that 4 | //! it's extremely easy to enable/disable together. Setting 5 | //! `ezbake::grpc_access=LEVEL` adjusts all of the logging for ezbake - no more 6 | //! remembering that tower_http needs to change log levels. 7 | 8 | use std::time::Duration; 9 | 10 | use tower_http::classify::GrpcFailureClass; 11 | use tracing::{info_span, Span}; 12 | use xds_api::pb::envoy::service::discovery::v3::{ 13 | DeltaDiscoveryRequest, DeltaDiscoveryResponse, DiscoveryResponse, 14 | }; 15 | 16 | macro_rules! layer { 17 | () => { 18 | tower_http::trace::TraceLayer::new_for_grpc() 19 | .make_span_with(crate::grpc_access::make_span) 20 | .on_request(crate::grpc_access::on_request) 21 | .on_response(crate::grpc_access::on_response) 22 | .on_eos(crate::grpc_access::on_eos) 23 | .on_failure(crate::grpc_access::on_failure) 24 | }; 25 | } 26 | pub(crate) use layer; 27 | use xds_api::pb::envoy::service::discovery::v3::DiscoveryRequest; 28 | 29 | pub(crate) fn make_span(request: &http::Request) -> Span { 30 | info_span!( 31 | "grpc", 32 | method = %request.method(), 33 | uri = %request.uri(), 34 | version = ?request.version() 35 | ) 36 | } 37 | 38 | pub(crate) fn on_request(_request: &http::Request, _span: &Span) { 39 | tracing::info!("request-recieved") 40 | } 41 | 42 | pub(crate) fn on_response(_response: &http::Response, latency: Duration, _span: &Span) { 43 | let latency_us = latency.as_micros(); 44 | tracing::debug!(%latency_us, "response-sent") 45 | } 46 | 47 | pub(crate) fn on_eos(trailers: Option<&http::HeaderMap>, stream_duration: Duration, _span: &Span) { 48 | let status_code = trailers 49 | .and_then(tonic::Status::from_header_map) 50 | .map(|status| status.code()); 51 | 52 | let duration_us = stream_duration.as_micros(); 53 | tracing::debug!(?status_code, %duration_us, "end-of-stream") 54 | } 55 | 56 | pub(crate) fn on_failure( 57 | failure_classification: GrpcFailureClass, 58 | latency: Duration, 59 | _span: &Span, 60 | ) { 61 | let duration_us = latency.as_micros(); 62 | tracing::warn!( 63 | classification = %failure_classification, 64 | %duration_us, 65 | "failed" 66 | ) 67 | } 68 | 69 | pub(crate) fn xds_discovery_request(request: &DiscoveryRequest) { 70 | tracing::info!( 71 | v = request.version_info, 72 | n = request.response_nonce, 73 | ty = request.type_url, 74 | r = ?request.resource_names, 75 | error_code = request.error_detail.as_ref().map(|e| e.code), 76 | error_message = request.error_detail.as_ref().map(|e| &e.message), 77 | "DiscoveryRequest", 78 | ); 79 | } 80 | 81 | pub(crate) fn xds_discovery_response(response: &DiscoveryResponse) { 82 | tracing::info!( 83 | v = response.version_info, 84 | n = response.nonce, 85 | ty = response.type_url, 86 | r_count = response.resources.len(), 87 | "DiscoveryResponse", 88 | ); 89 | } 90 | pub(crate) fn xds_delta_discovery_request(request: &DeltaDiscoveryRequest) { 91 | tracing::info!( 92 | n = request.response_nonce, 93 | ty = request.type_url, 94 | r = ?request.resource_names_subscribe, 95 | init = ?request.initial_resource_versions, 96 | error_code = request.error_detail.as_ref().map(|e| e.code), 97 | error_message = request.error_detail.as_ref().map(|e| &e.message), 98 | "DeltaDiscoveryRequest", 99 | ); 100 | } 101 | 102 | pub(crate) fn xds_delta_discovery_response(response: &DeltaDiscoveryResponse) { 103 | tracing::info!( 104 | n = response.nonce, 105 | ty = response.type_url, 106 | added = response.resources.len(), 107 | removed = response.removed_resources.len(), 108 | "DiscoveryResponse", 109 | ); 110 | } 111 | -------------------------------------------------------------------------------- /src/k8s.rs: -------------------------------------------------------------------------------- 1 | use futures::TryStreamExt; 2 | use junction_api::kube::gateway_api::apis::experimental::httproutes::HTTPRoute; 3 | use junction_api::kube::k8s_openapi::{ 4 | self, 5 | api::{core::v1::Service, discovery::v1::EndpointSlice}, 6 | serde::Deserialize, 7 | }; 8 | use kube::{ 9 | runtime::{ 10 | self, 11 | reflector::{self, store::Writer, ObjectRef, Store}, 12 | watcher, WatchStreamExt, 13 | }, 14 | Resource, ResourceExt as _, 15 | }; 16 | use std::time::Duration; 17 | use std::{collections::HashSet, fmt::Debug, future::Future, sync::Arc, time::Instant}; 18 | use tokio::sync::broadcast; 19 | use tracing::{debug, trace}; 20 | 21 | pub(crate) trait KubeResource: 22 | Clone + Debug + for<'de> Deserialize<'de> + Resource + Send + Sync + 'static 23 | { 24 | type ParentRef: KubeResource; 25 | 26 | fn static_kind() -> &'static str; 27 | 28 | fn parent_refs(&self) -> Vec>; 29 | 30 | fn modify(&mut self); 31 | 32 | fn has_changed(&self, other: &Self) -> bool; 33 | } 34 | 35 | macro_rules! check_changed { 36 | ($old:expr, $new:expr) => { 37 | if $old != $new { 38 | return true; 39 | } 40 | }; 41 | } 42 | 43 | const LAST_APPLIED_CONFIG: &str = "kubectl.kubernetes.io/last-applied-configuration"; 44 | 45 | impl KubeResource for HTTPRoute { 46 | type ParentRef = Service; 47 | 48 | fn static_kind() -> &'static str { 49 | "HTTPRoute" 50 | } 51 | 52 | fn parent_refs(&self) -> Vec> { 53 | let mut parents = vec![]; 54 | 55 | for parent_ref in self.spec.parent_refs.iter().flatten() { 56 | if !matches!(parent_ref.kind.as_deref(), Some("Service")) { 57 | continue; 58 | } 59 | let namespace = parent_ref 60 | .namespace 61 | .as_ref() 62 | .or(self.metadata.namespace.as_ref()); 63 | 64 | if let Some(namespace) = namespace { 65 | parents.push(ObjectRef::new(&parent_ref.name).within(namespace)); 66 | } 67 | } 68 | 69 | parents 70 | } 71 | 72 | fn modify(&mut self) { 73 | self.annotations_mut().remove(LAST_APPLIED_CONFIG); 74 | self.managed_fields_mut().clear(); 75 | self.status = None; 76 | } 77 | 78 | fn has_changed(&self, _other: &Self) -> bool { 79 | // TODO: HTTPRoute and friends don't implement PartialEq/Eq, so it's 80 | // hard to check anything meaningful here. always rebuild for now and 81 | // deal with too many updates. 82 | // 83 | // https://github.com/kube-rs/gateway-api-rs/pull/53 84 | true 85 | } 86 | } 87 | 88 | impl KubeResource for Service { 89 | type ParentRef = Service; 90 | 91 | fn static_kind() -> &'static str { 92 | ::KIND 93 | } 94 | 95 | fn parent_refs(&self) -> Vec> { 96 | Vec::new() 97 | } 98 | 99 | fn modify(&mut self) { 100 | self.annotations_mut().remove(LAST_APPLIED_CONFIG); 101 | self.managed_fields_mut().clear(); 102 | self.status = None; 103 | } 104 | 105 | fn has_changed(&self, other: &Self) -> bool { 106 | check_changed!(self.metadata.labels, other.metadata.labels); 107 | check_changed!(self.metadata.annotations, other.metadata.annotations); 108 | check_changed!(self.spec, other.spec); 109 | 110 | false 111 | } 112 | } 113 | 114 | impl KubeResource for EndpointSlice { 115 | type ParentRef = Service; 116 | 117 | fn static_kind() -> &'static str { 118 | ::KIND 119 | } 120 | 121 | fn parent_refs(&self) -> Vec> { 122 | let Some(svc_namespace) = self.metadata.namespace.as_ref() else { 123 | return Vec::new(); 124 | }; 125 | let Some(svc_name) = self.labels().get("kubernetes.io/service-name") else { 126 | return Vec::new(); 127 | }; 128 | 129 | vec![ObjectRef::new(svc_name).within(svc_namespace)] 130 | } 131 | 132 | fn modify(&mut self) { 133 | self.annotations_mut().remove(LAST_APPLIED_CONFIG); 134 | self.managed_fields_mut().clear(); 135 | } 136 | 137 | fn has_changed(&self, other: &Self) -> bool { 138 | check_changed!(self.metadata.labels, other.metadata.labels); 139 | check_changed!(self.ports, other.ports); 140 | // FIXME: this doesn't check for ordering changes. not sure how often those happen. 141 | check_changed!(self.endpoints, other.endpoints); 142 | 143 | false 144 | } 145 | } 146 | 147 | pub(crate) type ChangedObjects = Arc>>; 148 | 149 | /// A wrapper around an [ObjectRef] that includes references to any of the 150 | /// objects parent refs. 151 | /// 152 | /// Only the wrapped [ObjectRef] is used for Eq/Hash/Cmp. 153 | #[derive(Debug)] 154 | pub(crate) struct RefAndParents { 155 | pub obj: ObjectRef, 156 | pub parents: Vec>, 157 | } 158 | 159 | impl PartialEq for RefAndParents { 160 | fn eq(&self, other: &Self) -> bool { 161 | self.obj == other.obj 162 | } 163 | } 164 | 165 | impl Eq for RefAndParents {} 166 | 167 | impl std::hash::Hash for RefAndParents { 168 | fn hash(&self, state: &mut H) { 169 | self.obj.hash(state); 170 | } 171 | } 172 | 173 | impl RefAndParents { 174 | fn from_obj(o: &K) -> Self { 175 | let obj = ObjectRef::from_obj(o); 176 | let parents = o.parent_refs(); 177 | Self { obj, parents } 178 | } 179 | } 180 | 181 | pub(crate) struct Watch { 182 | pub store: Store, 183 | pub changes: broadcast::Sender>, 184 | } 185 | 186 | pub(crate) fn watch( 187 | api: kube::Api, 188 | debounce_duration: Duration, 189 | ) -> ( 190 | Watch, 191 | impl Future> + Send + 'static, 192 | ) { 193 | let (store, writer) = reflector::store(); 194 | let (change_tx, _change_rx) = broadcast::channel(10); 195 | 196 | ( 197 | Watch { 198 | store: store.clone(), 199 | changes: change_tx.clone(), 200 | }, 201 | run_watch(api, store, writer, change_tx, debounce_duration), 202 | ) 203 | } 204 | 205 | async fn run_watch( 206 | api: kube::Api, 207 | store: Store, 208 | mut writer: Writer, 209 | changes: broadcast::Sender>, 210 | debounce_duration: Duration, 211 | ) -> Result<(), watcher::Error> { 212 | let stream = runtime::watcher(api, runtime::watcher::Config::default().any_semantic()) 213 | .default_backoff() 214 | .modify(T::modify); 215 | 216 | let mut stream = std::pin::pin!(stream); 217 | 218 | debug!(kind = T::static_kind(), "watch starting"); 219 | let mut debounce = None; 220 | let mut changed: HashSet<_> = HashSet::new(); 221 | loop { 222 | tokio::select! { 223 | biased; 224 | 225 | _ = sleep_until(&debounce) => { 226 | if !changed.is_empty() { 227 | let to_send: ChangedObjects<_> = Arc::new(std::mem::take(&mut changed)); 228 | if changes.send(to_send).is_err() { 229 | debug!(kind = T::static_kind(), "watch ended: all recievers dropped"); 230 | break; 231 | }; 232 | } 233 | debounce.take(); 234 | } 235 | event = stream.try_next() => { 236 | // return the error if the stream dies, continue if there's no next item. 237 | let Some(event) = event? else { 238 | continue 239 | }; 240 | handle_watch_event(&event, &mut changed, &mut debounce, &store, debounce_duration); 241 | writer.apply_watcher_event(&event); 242 | }, 243 | } 244 | } 245 | 246 | debug!(kind = T::static_kind(), "watch exiting"); 247 | Ok(()) 248 | } 249 | 250 | pub(crate) fn is_api_not_found(e: &watcher::Error) -> bool { 251 | matches!( 252 | e, 253 | watcher::Error::InitialListFailed(kube::Error::Api(e)) if e.code == 404, 254 | ) 255 | } 256 | 257 | fn handle_watch_event( 258 | event: &watcher::Event, 259 | changed: &mut HashSet>, 260 | debounce: &mut Option, 261 | store: &Store, 262 | debounce_duration: Duration, 263 | ) { 264 | match &event { 265 | // on apply, compare with the currently cached version of 266 | // the object and only mark it if there's a meaningful 267 | // change. 268 | watcher::Event::Apply(new_obj) => { 269 | let new_ref = RefAndParents::from_obj(new_obj); 270 | let old_obj = store.get(&new_ref.obj); 271 | let has_changed = old_obj.map_or(true, |obj| obj.has_changed(new_obj)); 272 | 273 | trace!(kind = T::static_kind(), event = "apply", obj = %new_ref.obj); 274 | 275 | if has_changed { 276 | changed.insert(new_ref); 277 | debounce.get_or_insert_with(|| Instant::now() + debounce_duration); 278 | } 279 | } 280 | // On delete, mark the object as changed and send the ref along. 281 | watcher::Event::Delete(obj) => { 282 | let obj_ref = RefAndParents::from_obj(obj); 283 | trace!(kind = T::static_kind(), event = "delete", obj = %obj_ref.obj); 284 | 285 | changed.insert(obj_ref); 286 | debounce.get_or_insert_with(|| Instant::now() + debounce_duration); 287 | } 288 | // On init, we mark everything in the store as changed and pause the 289 | // debounce timer until InitDone by setting it to None. All objects are 290 | // marked changed so we can see which objects may have been deleted. 291 | // This is probably worse than buffering the entire init and doing a 292 | // diff with the Store. 293 | // 294 | // While init is happening, any InitApply event gets treated like an 295 | // apply, but shouldn't reset the debounce timer. 296 | // 297 | // When init is done, unpause the debounce timer. 298 | // 299 | // NOTE: this works because this is called BEFORE events are applied 300 | // to the watcher. 301 | watcher::Event::Init => { 302 | trace!(kind = T::static_kind(), "watch restarted"); 303 | for obj in store.state() { 304 | changed.insert(RefAndParents::from_obj(&obj)); 305 | } 306 | debounce.take(); 307 | } 308 | watcher::Event::InitApply(new_obj) => { 309 | let new_ref = RefAndParents::from_obj(new_obj); 310 | changed.insert(new_ref); 311 | } 312 | watcher::Event::InitDone => { 313 | debounce.get_or_insert_with(Instant::now); 314 | } 315 | } 316 | } 317 | 318 | async fn sleep_until(deadline: &Option) { 319 | match deadline { 320 | Some(d) => tokio::time::sleep_until((*d).into()).await, 321 | None => futures::future::pending().await, 322 | } 323 | } 324 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use std::{future::Future, time::Duration}; 2 | 3 | use clap::{Args, Parser}; 4 | use ingest::IngestIndex; 5 | use junction_api::kube::k8s_openapi::{ 6 | self, 7 | api::{core::v1::Service, discovery::v1::EndpointSlice}, 8 | }; 9 | use tonic::{server::NamedService, transport::Server}; 10 | use tracing_subscriber::EnvFilter; 11 | use xds::{AdsServer, SnapshotWriter}; 12 | use xds_api::pb::envoy::service::{ 13 | cluster::v3::cluster_discovery_service_server::ClusterDiscoveryServiceServer, 14 | discovery::v3::aggregated_discovery_service_server::AggregatedDiscoveryServiceServer, 15 | endpoint::v3::endpoint_discovery_service_server::EndpointDiscoveryServiceServer, 16 | listener::v3::listener_discovery_service_server::ListenerDiscoveryServiceServer, 17 | route::v3::route_discovery_service_server::RouteDiscoveryServiceServer, 18 | status::v3::client_status_discovery_service_server::ClientStatusDiscoveryServiceServer, 19 | }; 20 | 21 | mod grpc_access; 22 | mod ingest; 23 | mod k8s; 24 | mod xds; 25 | 26 | pub(crate) mod metrics; 27 | 28 | // TODO: multi-cluster 29 | // TODO: actually figure out metrics/logs/etc. would be nice to have a flag that 30 | // dumps XDS requests in a chrome trace format or something. 31 | 32 | /// an ez service discovery server 33 | #[derive(Parser, Debug)] 34 | #[command(version)] 35 | struct CliArgs { 36 | /// Log in a pretty, human-readable format. 37 | #[arg(long)] 38 | log_pretty: bool, 39 | 40 | /// The address to listen on. 41 | #[arg(long, short, default_value = "0.0.0.0:8008")] 42 | listen_addr: String, 43 | 44 | /// The address to expose metrics on. 45 | #[arg(long, default_value = "0.0.0.0:8009")] 46 | metrics_addr: String, 47 | 48 | #[command(flatten)] 49 | namespace_args: NamespaceArgs, 50 | } 51 | 52 | #[derive(Args, Debug)] 53 | #[group(multiple = false)] 54 | struct NamespaceArgs { 55 | /// Watch all namespaces. Defaults to false. 56 | /// 57 | /// It's an error to set both --all-namespaces and --namespace. 58 | #[arg(long)] 59 | all_namespaces: bool, 60 | 61 | /// The namespace to watch. If this option is not set explicitly, ezbake 62 | /// will watch the the namespace set in the kubeconfig's s current context, 63 | /// the namespace specified by the service account the server is running as, 64 | /// or the `default` namespace. 65 | /// 66 | /// It's an error to set both --all-namespaces and --namespace. 67 | #[arg(long)] 68 | namespace: Option, 69 | } 70 | 71 | #[tokio::main] 72 | async fn main() { 73 | let args = CliArgs::parse(); 74 | setup_tracing(args.log_pretty); 75 | 76 | if let Err(e) = crate::metrics::install_prom(&args.metrics_addr) { 77 | tracing::error!( 78 | "invalid metrics addr: '{addr}': {e}", 79 | addr = args.metrics_addr 80 | ); 81 | std::process::exit(1); 82 | } 83 | 84 | let client = kube::Client::try_default().await.unwrap(); 85 | let index = crate::ingest::index(); 86 | let (cache, writer) = xds::snapshot(index.cache_callbacks()); 87 | 88 | let ingest = ingest( 89 | index, 90 | &client, 91 | args.namespace_args.all_namespaces, 92 | args.namespace_args.namespace.as_deref(), 93 | writer, 94 | ); 95 | let serve = serve(&args.listen_addr, cache); 96 | 97 | std::process::exit(tokio::select! { 98 | biased; 99 | res = ingest => { 100 | tracing::error!(err = ?res, "ingest exited unexpectedly"); 101 | 1 102 | }, 103 | res = serve => { 104 | tracing::error!(err = ?res, "server exited unexpectedly"); 105 | 2 106 | } 107 | _ = handle_signals() => 0, 108 | }) 109 | } 110 | 111 | async fn handle_signals() -> std::io::Result<()> { 112 | use tokio::signal::unix::{signal, SignalKind}; 113 | 114 | // is this awkward to write out? yes. is this better than the equivalent 115 | // with futures::future::select_all? also yes. 116 | let mut sigterm = signal(SignalKind::terminate())?; 117 | let mut sigint = signal(SignalKind::interrupt())?; 118 | 119 | tokio::select! { 120 | _ = sigterm.recv() => (), 121 | _ = sigint.recv() => (), 122 | } 123 | 124 | Ok(()) 125 | } 126 | 127 | /// Set up a tracing exporter. 128 | /// 129 | /// This is here and not in grpc_access because tracing covers much more surface area 130 | /// than just grpc. 131 | fn setup_tracing(log_pretty: bool) { 132 | let default_log_filter = "ezbake=info" 133 | .parse() 134 | .expect("default log filter must be valid"); 135 | let builder = tracing_subscriber::fmt() 136 | .with_env_filter( 137 | EnvFilter::builder() 138 | .with_default_directive(default_log_filter) 139 | .from_env_lossy(), 140 | ) 141 | .with_target(true); 142 | 143 | if log_pretty { 144 | // don't use .pretty(), it's too pretty 145 | builder.init(); 146 | } else { 147 | builder 148 | .json() 149 | .flatten_event(true) 150 | // TODO: we're not really emitting events from deeply nested spans 151 | // often, and the span list is redundant with the current span. 152 | // omit it for now. 153 | .with_span_list(false) 154 | .init(); 155 | } 156 | } 157 | 158 | async fn serve(addr: &str, cache: xds::SnapshotCache) -> anyhow::Result<()> { 159 | // tonic server structs have a ::NAME string that we register with 160 | // the reflection server so that reflection only shows what we're 161 | // implementing, instead of EVERY single xDS api. 162 | // 163 | // for whatever reason, this means that we have to explicitly re-register 164 | // the reflection service name. BUT we can't refer to the type without 165 | // knowing the generic, which is hidden, so we can't call ::NAME on the 166 | // reflection service. 167 | macro_rules! server_with_reflection { 168 | ($ads_server:expr => [$($service_type:tt),* $(,)?] $(,)?) => {{ 169 | let reflection = tonic_reflection::server::Builder::configure() 170 | .register_encoded_file_descriptor_set(xds_api::FILE_DESCRIPTOR_SET) 171 | .with_service_name("grpc.reflection.v1alpha.ServerReflection"); 172 | 173 | let mut server = Server::builder().layer(grpc_access::layer!()); 174 | 175 | $( 176 | let svc = $service_type::new($ads_server.clone()); 177 | let reflection = reflection.with_service_name($service_type::::NAME); 178 | let server = server.add_service(svc); 179 | )* 180 | 181 | let server = server.add_service(reflection.build()?); 182 | server 183 | }}; 184 | } 185 | 186 | let ads = xds::AdsServer::new(cache); 187 | let server = server_with_reflection!( 188 | ads => [ 189 | AggregatedDiscoveryServiceServer, 190 | ListenerDiscoveryServiceServer, 191 | RouteDiscoveryServiceServer, 192 | ClusterDiscoveryServiceServer, 193 | EndpointDiscoveryServiceServer, 194 | ClientStatusDiscoveryServiceServer, 195 | ], 196 | ); 197 | let server = server.serve(addr.parse()?); 198 | 199 | server.await?; 200 | Ok(()) 201 | } 202 | 203 | async fn ingest( 204 | index: IngestIndex, 205 | client: &kube::Client, 206 | all_namespaces: bool, 207 | namespace: Option<&str>, 208 | writer: SnapshotWriter, 209 | ) -> anyhow::Result<()> { 210 | // watch Gateway API routes 211 | // 212 | // the watches here need a little bit of extra error handling, in case the APIs 213 | // are not installed, installed at an incompatible version, or someone removes 214 | // a CRD at a weird time. 215 | let (route_watch, run_route_watch) = k8s::watch( 216 | kube_api(client, all_namespaces, namespace), 217 | Duration::from_secs(2), 218 | ); 219 | let run_route_watch = async { 220 | match run_route_watch.await { 221 | Err(e) if k8s::is_api_not_found(&e) => { 222 | tracing::info!("HTTPRoute API not found. Continuing without Gateway APIs"); 223 | Ok(()) 224 | } 225 | v => v, 226 | } 227 | }; 228 | // watch Services and EndpointSlices 229 | let (svc_watch, run_svc_watch) = k8s::watch::( 230 | kube_api(client, all_namespaces, namespace), 231 | Duration::from_secs(2), 232 | ); 233 | let (slice_watch, run_slice_watch) = k8s::watch::( 234 | kube_api(client, all_namespaces, namespace), 235 | Duration::from_secs(2), 236 | ); 237 | 238 | // ingest::run should 239 | tokio::spawn(ingest::run( 240 | index, 241 | writer, 242 | svc_watch, 243 | route_watch, 244 | slice_watch, 245 | )); 246 | tokio::try_join!( 247 | spawn_watch(run_route_watch), 248 | spawn_watch(run_slice_watch), 249 | spawn_watch(run_svc_watch), 250 | )?; 251 | 252 | Ok(()) 253 | } 254 | 255 | fn kube_api(client: &kube::Client, all_namespaces: bool, namespace: Option<&str>) -> kube::Api 256 | where 257 | K: kube::Resource, 258 | ::DynamicType: Default, 259 | { 260 | match (all_namespaces, namespace) { 261 | (true, _) => kube::Api::all(client.clone()), 262 | (_, Some(namespace)) => kube::Api::namespaced(client.clone(), namespace), 263 | _ => kube::Api::default_namespaced(client.clone()), 264 | } 265 | } 266 | 267 | async fn spawn_watch(watch: F) -> anyhow::Result<()> 268 | where 269 | F: Future> + Send + 'static, 270 | E: std::error::Error + Send + Sync + 'static, 271 | { 272 | let handle = tokio::spawn(watch); 273 | 274 | match handle.await { 275 | Ok(Ok(val)) => Ok(val), 276 | Ok(Err(e)) => Err(e.into()), 277 | Err(e) => Err(e.into()), 278 | } 279 | } 280 | -------------------------------------------------------------------------------- /src/metrics.rs: -------------------------------------------------------------------------------- 1 | use std::{net::SocketAddr, time::Instant}; 2 | 3 | use metrics::{describe_counter, describe_gauge, Gauge, Histogram, IntoF64}; 4 | use metrics_exporter_prometheus::PrometheusBuilder; 5 | 6 | /// Install and start a prometheus http exporter listening on `metrics_addr` and 7 | /// defines all metrics. 8 | pub(crate) fn install_prom(metrics_addr: &str) -> anyhow::Result<()> { 9 | let metrics_addr: SocketAddr = metrics_addr.parse()?; 10 | 11 | // it's 2024 prometheus still makes you define your own damn buckets for 12 | // histograms. 13 | // 14 | // start with an exponential series of bounds, starting at 250 micros and 15 | // see how it goes. 16 | const US_PER_SEC: f64 = 1000000.0; 17 | let buckets: Vec = (0..16) 18 | .map(|i| (2u32.pow(i) as f64) * 250.0 / US_PER_SEC) 19 | .collect(); 20 | 21 | let builder = PrometheusBuilder::new() 22 | .with_http_listener(metrics_addr) 23 | .set_buckets(&buckets) 24 | .expect("invalid bucket settings. this is a bug"); 25 | builder.install()?; 26 | 27 | describe_metrics(); 28 | 29 | Ok(()) 30 | } 31 | 32 | // TODO: This sucks, metrics are defined all over the place. Make metric names 33 | // and/or values module-level variables with actual symbols or even just pass 34 | // the Arc around. 35 | fn describe_metrics() { 36 | describe_timer!( 37 | "ingest_time", 38 | "Time to convert a batch of Kubernetes resources to xDS (seconds)", 39 | ); 40 | describe_gauge!( 41 | "ads.active_connections", 42 | "The number of currently active ADS connections", 43 | ); 44 | describe_counter!("ads.rx", "The total number of ADS messages sent"); 45 | describe_counter!("ads.tx", "The total number of ADS messages received"); 46 | } 47 | 48 | /// Increments a gauge by the given amount, then decrements it when the returned 49 | /// guard goes out of scope. 50 | /// 51 | /// ```no_run 52 | /// // the gauge starts at zero (or undefined!) 53 | /// { 54 | /// // increment the gauge to 2 for the duration of this block 55 | /// let _gauge = scoped_gauge!("my_cool_gauge", 2); 56 | /// do_cool_stuff(); 57 | /// do_more_cool_stuff(); 58 | /// } 59 | /// // after the gage drops, the guard resets to zero 60 | /// ``` 61 | macro_rules! scoped_gauge { 62 | ($name:expr) => { 63 | crate::metrics::inc_gauge!($name, 1) 64 | }; 65 | ($name:expr, $inc:expr) => {{ 66 | let g = ::metrics::gauge!($name); 67 | g.increment($inc); 68 | crate::metrics::IncGuard::new(g, -$inc) 69 | }}; 70 | } 71 | pub(crate) use scoped_gauge; 72 | 73 | /// Describe a timer. Shorthand for `describe_histogram!(name, Unit::Seconds, 74 | /// description)` so you don't have to remember what units timers are in. 75 | macro_rules! describe_timer { 76 | ($name:expr, $description:expr $(,)?) => {{ 77 | ::metrics::describe_histogram!($name, ::metrics::Unit::Seconds, $description) 78 | }}; 79 | } 80 | pub(crate) use describe_timer; 81 | 82 | /// Creates a timer that runs until it goes out of scope. Timed values are 83 | /// tracked with a metrics histogram and assumes that durations are recorded as 84 | /// an f64 number of seconds. 85 | /// 86 | /// ```no_run 87 | /// let _timer = scoped_timer!( 88 | /// "my-timer", "label_one" => "value", "label_two" => "another-value" 89 | /// ); 90 | /// 91 | /// // do stuff and the timer runs to the end of the current scope 92 | /// ``` 93 | macro_rules! scoped_timer { 94 | ($name:expr $(, $label_key:expr $(=> $label_value:expr)?)* $(,)?) => {{ 95 | let hist = ::metrics::histogram!($name $(, $label_key $(=> $label_value)?)*); 96 | crate::metrics::TimerGuard::new_at(hist, std::time::Instant::now()) 97 | }}; 98 | } 99 | pub(crate) use scoped_timer; 100 | 101 | /// An RAII guard that decrements a gauge on drop. 102 | /// 103 | /// Created with [inc_gauge]. 104 | pub(crate) struct IncGuard { 105 | gauge: Gauge, 106 | value: f64, 107 | } 108 | 109 | impl IncGuard { 110 | pub(crate) fn new(gauge: Gauge, value: T) -> Self { 111 | let value = value.into_f64(); 112 | Self { gauge, value } 113 | } 114 | } 115 | 116 | impl Drop for IncGuard { 117 | fn drop(&mut self) { 118 | self.gauge.increment(self.value); 119 | } 120 | } 121 | 122 | /// An RAII timer guard that records its duration on drop. 123 | /// 124 | /// Created with [time_scope]. 125 | pub(crate) struct TimerGuard { 126 | hist: Histogram, 127 | started_at: Instant, 128 | } 129 | 130 | impl TimerGuard { 131 | pub(crate) fn new_at(hist: Histogram, started_at: Instant) -> Self { 132 | Self { hist, started_at } 133 | } 134 | } 135 | 136 | impl Drop for TimerGuard { 137 | fn drop(&mut self) { 138 | self.hist.record(self.started_at.elapsed().as_secs_f64()); 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/xds.rs: -------------------------------------------------------------------------------- 1 | mod cache; 2 | mod delta; 3 | mod resources; 4 | mod server; 5 | mod sotw; 6 | 7 | use std::collections::BTreeSet; 8 | 9 | pub(crate) use cache::{ 10 | snapshot, ResourceSnapshot, SnapshotCache, SnapshotCallback, SnapshotWriter, 11 | }; 12 | pub(crate) use resources::ResourceType; 13 | pub(crate) use server::AdsServer; 14 | 15 | use xds_api::pb::envoy::service::discovery::v3::{DeltaDiscoveryRequest, DiscoveryRequest}; 16 | 17 | #[inline] 18 | pub(crate) fn is_nack(r: &DiscoveryRequest) -> bool { 19 | r.error_detail.is_some() 20 | } 21 | 22 | #[inline] 23 | pub(crate) fn is_delta_nack(r: &DeltaDiscoveryRequest) -> bool { 24 | r.error_detail.is_some() 25 | } 26 | 27 | /// A set of XDS resource names for tracking connection state. 28 | /// 29 | /// LDS and CDS have some extra-special wildcard handling that requires 30 | /// differentiating between two different wildcard states to preserve backwards 31 | /// compatibility. 32 | /// 33 | /// https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol#how-the-client-specifies-what-resources-to-return 34 | #[derive(Clone, Debug, PartialEq, Eq)] 35 | enum ResourceNames { 36 | EmptyWildcard, 37 | Wildcard(BTreeSet), 38 | Explicit(BTreeSet), 39 | } 40 | 41 | impl Default for ResourceNames { 42 | fn default() -> Self { 43 | Self::EmptyWildcard 44 | } 45 | } 46 | 47 | impl FromIterator for ResourceNames { 48 | fn from_iter>(iter: T) -> Self { 49 | let mut inner = BTreeSet::new(); 50 | let mut wildcard = false; 51 | 52 | for name in iter { 53 | if name == "*" { 54 | wildcard = true; 55 | } else { 56 | inner.insert(name); 57 | } 58 | } 59 | 60 | if wildcard { 61 | Self::Wildcard(inner) 62 | } else { 63 | Self::Explicit(inner) 64 | } 65 | } 66 | } 67 | 68 | impl ResourceNames { 69 | fn from_names(previous: &Self, names: Vec) -> Self { 70 | if names.is_empty() && matches!(previous, Self::EmptyWildcard) { 71 | Self::EmptyWildcard 72 | } else { 73 | Self::from_iter(names) 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/xds/cache.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | collections::{BTreeMap, BTreeSet}, 3 | str::FromStr, 4 | sync::Arc, 5 | }; 6 | 7 | use crossbeam::atomic::AtomicCell; 8 | use crossbeam_skiplist::SkipMap; 9 | use enum_map::EnumMap; 10 | use svix_ksuid::{Ksuid, KsuidLike}; 11 | use tokio::sync::broadcast::{self, error::RecvError}; 12 | use tracing::warn; 13 | use xds_api::pb::google::protobuf; 14 | 15 | use crate::xds::resources::ResourceType; 16 | 17 | /// A set of updates/deletes to apply to a [SnapshotCache]. 18 | pub(crate) struct ResourceSnapshot { 19 | resources: EnumMap>>, 20 | touch: EnumMap>, 21 | } 22 | 23 | impl std::fmt::Debug for ResourceSnapshot { 24 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 25 | let mut dbg_struct = f.debug_struct("ResourceSnapshot"); 26 | for (resource_type, resources) in &self.resources { 27 | let keys: Vec<_> = resources.keys().collect(); 28 | dbg_struct.field(resource_type.type_url(), &keys); 29 | } 30 | dbg_struct.finish() 31 | } 32 | } 33 | 34 | impl ResourceSnapshot { 35 | pub(crate) fn new() -> Self { 36 | Self { 37 | resources: EnumMap::default(), 38 | touch: EnumMap::default(), 39 | } 40 | } 41 | 42 | pub(crate) fn is_empty(&self) -> bool { 43 | self.resources.values().all(|m| m.is_empty()) 44 | } 45 | 46 | pub(crate) fn update_counts(&self) -> EnumMap { 47 | let mut counts = EnumMap::default(); 48 | 49 | for (resource_type, resources) in &self.resources { 50 | counts[resource_type] = resources.values().filter(|v| v.is_some()).count() 51 | } 52 | 53 | counts 54 | } 55 | 56 | pub(crate) fn delete_counts(&self) -> EnumMap { 57 | let mut counts = EnumMap::default(); 58 | 59 | for (resource_type, resources) in &self.resources { 60 | counts[resource_type] = resources.values().filter(|v| v.is_none()).count() 61 | } 62 | 63 | counts 64 | } 65 | 66 | pub(crate) fn touch_counts(&self) -> EnumMap { 67 | let mut counts = EnumMap::default(); 68 | for (resource_type, names) in &self.touch { 69 | counts[resource_type] = names.len(); 70 | } 71 | counts 72 | } 73 | 74 | pub(crate) fn insert_update( 75 | &mut self, 76 | resource_type: ResourceType, 77 | name: String, 78 | proto: protobuf::Any, 79 | ) { 80 | self.resources[resource_type].insert(name, Some(proto)); 81 | } 82 | 83 | pub(crate) fn insert_delete(&mut self, resource_type: ResourceType, name: String) { 84 | self.resources[resource_type].insert(name, None); 85 | } 86 | 87 | pub(crate) fn touch(&mut self, resource_type: ResourceType, name: String) { 88 | self.touch[resource_type].insert(name); 89 | } 90 | 91 | #[cfg(test)] 92 | pub(crate) fn updates_and_deletes( 93 | &self, 94 | resource_type: ResourceType, 95 | ) -> (Vec, Vec) { 96 | let resources = &self.resources[resource_type]; 97 | 98 | resources 99 | .keys() 100 | .cloned() 101 | .partition(|k| resources.get(k).is_some_and(|v| v.is_some())) 102 | } 103 | } 104 | 105 | // NOTE: this uses a ksuid for now. no idea if that's good or not long term, but 106 | // it's an easy way to get something relatively unique and roughly ordered. 107 | #[derive(Clone, Copy, PartialEq, Eq)] 108 | pub(crate) struct ResourceVersion(Ksuid); 109 | 110 | impl ResourceVersion { 111 | /// Create a new ResourceVersion from a u64. Returns `None` if the value is 112 | /// zero. 113 | pub(crate) fn new() -> Self { 114 | Self(Ksuid::new(None, None)) 115 | } 116 | } 117 | 118 | impl Default for ResourceVersion { 119 | fn default() -> Self { 120 | Self::new() 121 | } 122 | } 123 | 124 | impl std::fmt::Debug for ResourceVersion { 125 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 126 | std::fmt::Display::fmt(self, f) 127 | } 128 | } 129 | 130 | impl std::fmt::Display for ResourceVersion { 131 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 132 | f.write_fmt(format_args!("{id}", id = self.0)) 133 | } 134 | } 135 | 136 | #[derive(Debug, thiserror::Error)] 137 | pub(crate) enum ParseVersionError { 138 | #[error("failed to parse resource version")] 139 | ParseError, 140 | } 141 | 142 | impl FromStr for ResourceVersion { 143 | type Err = ParseVersionError; 144 | 145 | fn from_str(s: &str) -> Result { 146 | Ksuid::from_str(s) 147 | .map(Self) 148 | .map_err(|_| ParseVersionError::ParseError) 149 | } 150 | } 151 | 152 | #[derive(Debug)] 153 | pub(crate) struct VersionCounter; 154 | 155 | impl VersionCounter { 156 | pub(crate) fn new() -> Self { 157 | Self 158 | } 159 | 160 | pub(crate) fn next(&self) -> ResourceVersion { 161 | ResourceVersion::new() 162 | } 163 | } 164 | 165 | pub(crate) type Entry<'a> = crossbeam_skiplist::map::Entry<'a, String, VersionedProto>; 166 | 167 | pub(crate) struct VersionedProto { 168 | pub version: ResourceVersion, 169 | pub proto: protobuf::Any, 170 | } 171 | 172 | impl std::fmt::Debug for VersionedProto { 173 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 174 | f.debug_struct("VersionedProto") 175 | .field("version", &self.version) 176 | .field("proto", &"...") 177 | .finish() 178 | } 179 | } 180 | 181 | pub(crate) trait SnapshotCallback { 182 | fn call(&self, writer: SnapshotWriter, resource_type: ResourceType, resource_name: &str); 183 | } 184 | 185 | /// Create a new [SnapshotCache] and a paired [SnapshotWriter]. The writer is 186 | /// the only way to safely write to this cache - don't drop it if you need to 187 | /// write. 188 | pub(crate) fn snapshot( 189 | resource_callbacks: impl IntoIterator< 190 | Item = (ResourceType, Box), 191 | >, 192 | ) -> (SnapshotCache, SnapshotWriter) { 193 | let mut callbacks = EnumMap::default(); 194 | for (rtype, cb) in resource_callbacks { 195 | callbacks[rtype] = Some(cb) 196 | } 197 | 198 | let inner = Arc::new(SnapshotCacheInner { 199 | version: VersionCounter::new(), 200 | typed: Default::default(), 201 | callbacks, 202 | // 12 = 3 cache updates for each of 4 types of resource type. 203 | // 204 | // this is completely arbitrary. 205 | notifications: broadcast::Sender::new(12), 206 | }); 207 | 208 | ( 209 | SnapshotCache { 210 | inner: inner.clone(), 211 | }, 212 | SnapshotWriter { inner }, 213 | ) 214 | } 215 | 216 | /// A read handle into a versioned cache of xDS snapshots. A cache can have any 217 | /// number of concurrent readers. 218 | /// 219 | /// This handle is cheaply cloneable, for sharing the cache between multiple 220 | /// tasks or threads. 221 | #[derive(Clone)] 222 | pub(crate) struct SnapshotCache { 223 | inner: Arc, 224 | } 225 | 226 | struct SnapshotCacheInner { 227 | version: VersionCounter, 228 | typed: EnumMap, 229 | callbacks: EnumMap>>, 230 | notifications: broadcast::Sender, 231 | } 232 | 233 | #[derive(Default)] 234 | struct ResourceCache { 235 | version: AtomicCell, 236 | resources: SkipMap, 237 | } 238 | 239 | impl SnapshotCache { 240 | pub fn version(&self, resource_type: ResourceType) -> ResourceVersion { 241 | self.inner.typed[resource_type].version.load() 242 | } 243 | 244 | pub fn get(&self, resource_type: ResourceType, resource_name: &str) -> Option { 245 | // fast path: resource exists 246 | if let Some(e) = self.inner.typed[resource_type].resources.get(resource_name) { 247 | return Some(e); 248 | } 249 | 250 | // slow path: try to compute the resource, allow updating the cache in the callback. 251 | if let Some(cb) = &self.inner.callbacks[resource_type] { 252 | cb.call( 253 | SnapshotWriter { 254 | inner: self.inner.clone(), 255 | }, 256 | resource_type, 257 | resource_name, 258 | ) 259 | } 260 | 261 | self.inner.typed[resource_type].resources.get(resource_name) 262 | } 263 | 264 | pub fn len(&self, resource_type: ResourceType) -> usize { 265 | self.inner.typed[resource_type].resources.len() 266 | } 267 | 268 | pub fn iter( 269 | &self, 270 | resource_type: ResourceType, 271 | ) -> crossbeam_skiplist::map::Iter { 272 | self.inner.typed[resource_type].resources.iter() 273 | } 274 | 275 | pub fn changes(&self) -> SnapshotChange { 276 | SnapshotChange { 277 | _inner: self.inner.clone(), 278 | notifications: self.inner.notifications.subscribe(), 279 | } 280 | } 281 | } 282 | 283 | /// A handle to a cache that notifies on cache change. 284 | pub(crate) struct SnapshotChange { 285 | notifications: broadcast::Receiver, 286 | // hold a reference to inner to guarantee the sender half of the 287 | // channel can't drop. 288 | _inner: Arc, 289 | } 290 | 291 | impl SnapshotChange { 292 | pub async fn changed(&mut self) -> ResourceType { 293 | loop { 294 | match self.notifications.recv().await { 295 | Ok(rtype) => return rtype, 296 | Err(RecvError::Lagged(n)) => { 297 | warn!( 298 | dropped_notifications = %n, 299 | "cache subscription fell behind." 300 | ); 301 | } 302 | Err(RecvError::Closed) => panic!( 303 | "snapshot subscription dropped. this is a bug in ezbake, please report it" 304 | ), 305 | } 306 | } 307 | } 308 | } 309 | 310 | /// A write handle to a versioned cache of xDS. Write handles cannot be created 311 | /// directly, and can only be obtained from creating a new cache with 312 | /// [snapshot_cache]. 313 | /// 314 | /// There should be at most one write handle to a cache at any time. 315 | pub(crate) struct SnapshotWriter { 316 | inner: Arc, 317 | } 318 | 319 | impl SnapshotWriter { 320 | pub(crate) fn update(&mut self, snapshot: ResourceSnapshot) -> ResourceVersion { 321 | let version = self.inner.version.next(); 322 | let mut notify: EnumMap = EnumMap::default(); 323 | 324 | for (resource_type, updates) in snapshot.resources { 325 | let cache = &self.inner.typed[resource_type]; 326 | 327 | let mut changed = false; 328 | for (name, update) in updates { 329 | match update { 330 | Some(proto) => { 331 | let proto = VersionedProto { version, proto }; 332 | cache.resources.insert(name, proto); 333 | } 334 | None => { 335 | cache.resources.remove(&name); 336 | } 337 | } 338 | 339 | changed = true; 340 | } 341 | 342 | if changed { 343 | // update the cache version AFTER updating all individual resource 344 | // versions so that once you can see the snapshot version change, 345 | // changes to all resources are visible as well. 346 | cache.version.store(version); 347 | notify[resource_type] = true; 348 | } 349 | } 350 | 351 | for (resource_type, names) in snapshot.touch { 352 | let cache = &self.inner.typed[resource_type]; 353 | 354 | for name in names { 355 | if let Some(entry) = cache.resources.get(&name) { 356 | let proto = entry.value().proto.clone(); 357 | cache 358 | .resources 359 | .insert(name, VersionedProto { version, proto }); 360 | notify[resource_type] = true; 361 | } 362 | } 363 | } 364 | 365 | for (resource_type, should_notify) in notify { 366 | if should_notify { 367 | // ignore the error. it just means there's nothing to do 368 | let _ = self.inner.notifications.send(resource_type); 369 | } 370 | } 371 | 372 | version 373 | } 374 | } 375 | 376 | #[cfg(test)] 377 | mod test { 378 | use super::SnapshotCache; 379 | 380 | #[test] 381 | fn assert_cache_send_sync() { 382 | assert_send::(); 383 | assert_sync::(); 384 | } 385 | 386 | fn assert_send() {} 387 | fn assert_sync() {} 388 | } 389 | -------------------------------------------------------------------------------- /src/xds/delta.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{BTreeMap, BTreeSet}; 2 | 3 | use enum_map::EnumMap; 4 | use smol_str::{SmolStr, ToSmolStr}; 5 | use xds_api::pb::envoy::{ 6 | config::core::v3 as xds_node, 7 | service::discovery::v3::{DeltaDiscoveryRequest, DeltaDiscoveryResponse, Resource}, 8 | }; 9 | 10 | use crate::xds::is_delta_nack; 11 | 12 | use super::{cache::VersionedProto, server::SubInfo, ResourceType, SnapshotCache}; 13 | 14 | // NOTES: two big things we can do to make this saner and probably also allocate 15 | // less: 16 | // 17 | // - parse ResourceVersions on incoming messages. 18 | // - store xds Resources in cache already. there's nothing (yet?) that gets 19 | // changed on each response that would involve setting a new resource. 20 | 21 | #[derive(Debug, thiserror::Error)] 22 | pub(crate) enum ConnectionError { 23 | #[error("missing node info")] 24 | MisingNode, 25 | 26 | #[error("invalid request: {0}")] 27 | InvalidRequest(anyhow::Error), 28 | } 29 | 30 | impl ConnectionError { 31 | pub(crate) fn into_status(self) -> tonic::Status { 32 | tonic::Status::invalid_argument(self.to_string()) 33 | } 34 | } 35 | pub(crate) struct AdsConnection { 36 | node: xds_node::Node, 37 | nonce: u64, 38 | snapshot: SnapshotCache, 39 | subscriptions: EnumMap>, 40 | } 41 | 42 | #[derive(Clone, Debug, Default)] 43 | pub(crate) struct AdsSubscription { 44 | // true iff this is a wildcard subscription. 45 | // 46 | // we currently ignore wildcards in sending responses, but since this is 47 | // a pretty key part of the protocol we track and check it. 48 | is_wildcard: bool, 49 | 50 | /// the nonce of the last reseponse sent 51 | last_sent_nonce: Option, 52 | 53 | /// the last nonce a client successfully ACK'd 54 | last_ack_nonce: Option, 55 | 56 | /// whether or not the client applied the last response 57 | applied: bool, 58 | 59 | /// the set of resources the client is subcribed to. this is be a superset 60 | /// of the keyset of sent - it includes anything we've sent to the client, 61 | /// and any resources the client is interested in that don't exist yet. 62 | subscribed: BTreeSet, 63 | 64 | // the last version of each resource sent back to the client 65 | sent: BTreeMap, 66 | 67 | // whether or not the cache has changed and resources need to 68 | // be rescanned. 69 | changed: bool, 70 | 71 | // the set of resources that need an update, whether or not the version of 72 | // the resources in cache has changed. 73 | sync: BTreeSet, 74 | 75 | // the set of names that have been removed while in wildcard mode. if the 76 | // removed resource is part of the wildcard, we have to force re-send it, 77 | // otherwise we do another round of remove/ack with the client 78 | remove_wildcard: BTreeSet, 79 | } 80 | 81 | impl AdsConnection { 82 | pub(crate) fn from_initial_request( 83 | request: &mut DeltaDiscoveryRequest, 84 | snapshot: SnapshotCache, 85 | ) -> Result { 86 | match request.node.take() { 87 | Some(node) => Ok(Self { 88 | nonce: 0, 89 | node, 90 | snapshot, 91 | subscriptions: EnumMap::default(), 92 | }), 93 | None => Err(ConnectionError::MisingNode), 94 | } 95 | } 96 | 97 | #[cfg(test)] 98 | fn test_new(node: xds_node::Node, snapshot: SnapshotCache) -> Self { 99 | Self { 100 | nonce: 0, 101 | node, 102 | snapshot, 103 | subscriptions: EnumMap::default(), 104 | } 105 | } 106 | 107 | pub(crate) fn node(&self) -> &xds_node::Node { 108 | &self.node 109 | } 110 | 111 | pub(crate) fn sent(&self) -> EnumMap { 112 | let mut sent = EnumMap::default(); 113 | 114 | for (rtype, sub) in &self.subscriptions { 115 | let Some(sub) = sub else { 116 | continue; 117 | }; 118 | sent[rtype] = SubInfo { 119 | applied: sub.applied, 120 | sent: sub.sent.clone(), 121 | } 122 | } 123 | 124 | sent 125 | } 126 | 127 | pub(crate) fn ads_responses(&mut self) -> Vec { 128 | let mut responses = Vec::with_capacity(ResourceType::all().len()); 129 | for rtype in ResourceType::all() { 130 | responses.extend(self.ads_response_for(*rtype)); 131 | } 132 | responses 133 | } 134 | 135 | fn ads_response_for(&mut self, rtype: ResourceType) -> Option { 136 | let sub = self.subscriptions[rtype].as_mut()?; 137 | 138 | // get and clear subscription state. we should no longer have to touch 139 | // the subscription. 140 | let mut sync = std::mem::take(&mut sub.sync); 141 | let remove_wildcard = std::mem::take(&mut sub.remove_wildcard); 142 | let changed = sub.changed; 143 | sub.changed = false; 144 | 145 | // if the sub is marked as changed, scan the set of sent items to see 146 | // any of them need updating. pull these all from the pending set so 147 | // they're not updated twice. 148 | // 149 | // tracks the set of updated and removed items because we're iterating over 150 | // sub.sent and can't modify it in place. 151 | let mut to_update = BTreeMap::new(); 152 | let mut to_remove = BTreeSet::new(); 153 | 154 | let mut resources = vec![]; 155 | let mut removed_resources = vec![]; 156 | 157 | // TODO: actually check to see if there are wildcard resources for this 158 | // node. right now we assume there are never any. 159 | // 160 | // if there are, we have to combine them with the sent map somehow and 161 | // have to do the remove check differently. 162 | removed_resources.extend(remove_wildcard.into_iter().map(|s| s.into())); 163 | 164 | if changed { 165 | // TODO: include wildcard resources here 166 | for name in &sub.subscribed { 167 | // if we're already sending an update because the version 168 | // changed, we don't need to do it again, and if we're forcing 169 | // this, just do it anyway. 170 | let force = sync.remove(name); 171 | let last_version = sub.sent.get(name); 172 | match self.snapshot.get(rtype, name) { 173 | Some(entry) => { 174 | let VersionedProto { version, proto } = entry.value(); 175 | 176 | if force || Some(&version.to_smolstr()) != last_version { 177 | resources.push(Resource { 178 | name: name.to_string(), 179 | version: version.to_string(), 180 | resource: Some(proto.clone()), 181 | ..Default::default() 182 | }); 183 | to_update.insert(name.clone(), version.to_smolstr()); 184 | } 185 | } 186 | None => { 187 | removed_resources.push(name.to_string()); 188 | to_remove.insert(name.clone()); 189 | } 190 | } 191 | } 192 | } 193 | 194 | // grab all pending names and send em as well 195 | for name in sync { 196 | match self.snapshot.get(rtype, &name) { 197 | Some(entry) => { 198 | let name = entry.key(); 199 | let VersionedProto { version, proto } = entry.value(); 200 | 201 | resources.push(Resource { 202 | name: name.to_string(), 203 | version: version.to_string(), 204 | resource: Some(proto.clone()), 205 | ..Default::default() 206 | }); 207 | to_update.insert(name.to_smolstr(), version.to_smolstr()); 208 | } 209 | None => { 210 | removed_resources.push(name.to_string()); 211 | to_remove.insert(name); 212 | } 213 | } 214 | } 215 | 216 | // update subscriptions in one go 217 | for (k, v) in to_update { 218 | sub.sent.insert(k, v); 219 | } 220 | for k in to_remove { 221 | sub.sent.remove(&k); 222 | } 223 | 224 | // don't send noop reponses 225 | if resources.is_empty() && removed_resources.is_empty() { 226 | return None; 227 | } 228 | 229 | // there's fundamentally a consistency issue here - since the snapshot can 230 | // change out from under us, we don't have a single version number that 231 | // truly represents the state of everything right now. just pick the highest 232 | // version number at the end for now - this is only intended for debugging 233 | // anyway. 234 | let snapshot_version = self.snapshot.version(rtype).to_string(); 235 | 236 | let nonce = next_nonce(&mut self.nonce); 237 | sub.last_sent_nonce = Some(nonce.clone()); 238 | 239 | Some(DeltaDiscoveryResponse { 240 | type_url: rtype.type_url().to_string(), 241 | nonce: nonce.to_string(), 242 | system_version_info: snapshot_version, 243 | resources, 244 | removed_resources, 245 | ..Default::default() 246 | }) 247 | } 248 | 249 | pub(crate) fn handle_snapshot_update(&mut self, changed_type: ResourceType) { 250 | let Some(sub) = &mut self.subscriptions[changed_type] else { 251 | return; 252 | }; 253 | sub.changed = true; 254 | } 255 | 256 | pub(crate) fn handle_ads_request( 257 | &mut self, 258 | mut request: DeltaDiscoveryRequest, 259 | ) -> Result<(), ConnectionError> { 260 | let Some(rtype) = ResourceType::from_type_url(&request.type_url) else { 261 | return Ok(()); 262 | }; 263 | 264 | // TODO: validate the request. the client verifies that *responses* 265 | // can't have duplicate resource names in add/remove, but there's no 266 | // explicit stipulation on what the client can send in a *request*. 267 | // it's probably safe to assume that we should be able to assume the 268 | // same - it's nonsensical to do otherwise. see: 269 | // https://github.com/envoyproxy/envoy/blob/2674bd9f5dfbfce3db55c4ed8c4c4aeda4b97823/test/extensions/config_subscription/grpc/delta_subscription_state_test.cc#L1153 270 | if false { 271 | todo!("request validation"); 272 | } 273 | 274 | let sub = match &mut self.subscriptions[rtype] { 275 | // handle the initial request for this resource type. 276 | None => { 277 | // create a new sub 278 | let sub = self.subscriptions[rtype].get_or_insert_with(Default::default); 279 | 280 | // set initial resource versions and mark the sub as having state 281 | // changed so the next set of responses compares sent versions and 282 | // actual versions. 283 | let initial_resource_versions = 284 | std::mem::take(&mut request.initial_resource_versions); 285 | for (name, version) in initial_resource_versions { 286 | let name = name.to_smolstr(); 287 | let version = version.to_smolstr(); 288 | sub.subscribed.insert(name.clone()); 289 | sub.sent.insert(name, version); 290 | } 291 | sub.changed = true; 292 | sub.applied = true; 293 | 294 | // check to see if this is a new wildcard sub. 295 | if request.resource_names_subscribe.is_empty() { 296 | sub.is_wildcard = true; 297 | } 298 | 299 | sub 300 | } 301 | // on any subsequent, check that initial_resource_versions is empty, 302 | // handle ACK/NACK bookeeping, and then return the sub. 303 | Some(sub) => { 304 | if !request.initial_resource_versions.is_empty() { 305 | return Err(ConnectionError::InvalidRequest(anyhow::anyhow!( 306 | "initial_resource_versions may only be set on initial requests" 307 | ))); 308 | } 309 | 310 | // check the nonce to see if this is an ACK/NACK. this is more 311 | // than go-control-plane seems to do. it sets nonces but 312 | // basically ignores them. delta/v3/server.go 313 | // 314 | // https://github.com/envoyproxy/go-control-plane/blob/main/pkg/server/delta/v3/server.go#L86-L124 315 | if let Some(request_nonce) = nonempty_then(&request.response_nonce, SmolStr::new) { 316 | if Some(&request_nonce) == sub.last_sent_nonce.as_ref() { 317 | match is_delta_nack(&request) { 318 | //ACK 319 | false => { 320 | sub.applied = true; 321 | sub.last_ack_nonce = Some(request_nonce); 322 | } 323 | // NACK 324 | true => { 325 | sub.applied = false; 326 | } 327 | } 328 | } 329 | } 330 | 331 | sub 332 | } 333 | }; 334 | 335 | // on subscribing, we register a name as pending *even if* it's already in the 336 | // sent set with the same version as is in cache, per the protocol. 337 | for name in request.resource_names_subscribe { 338 | if name == "*" { 339 | sub.is_wildcard = true; 340 | continue; 341 | } 342 | let name = name.to_smolstr(); 343 | sub.remove_wildcard.remove(&name); 344 | sub.subscribed.insert(name.clone()); 345 | sub.sync.insert(name); 346 | } 347 | // on unsubscribing, clear out all of the state for this name. if the 348 | // sub is currently in wildcard mode, toss it in the pile for special 349 | // handling on the next outgoing message. 350 | for name in request.resource_names_unsubscribe { 351 | let name = name.to_smolstr(); 352 | sub.subscribed.remove(&name); 353 | sub.sync.remove(&name); 354 | sub.sent.remove(&name); 355 | 356 | if rtype.supports_wildcard() && sub.is_wildcard { 357 | sub.remove_wildcard.insert(name); 358 | } 359 | } 360 | 361 | Ok(()) 362 | } 363 | } 364 | 365 | // not a method because borrowck is silly 366 | fn next_nonce(nonce: &mut u64) -> SmolStr { 367 | *nonce = nonce.wrapping_add(1); 368 | nonce.to_smolstr() 369 | } 370 | 371 | fn nonempty_then<'a, F, T>(s: &'a str, f: F) -> Option 372 | where 373 | F: FnOnce(&'a str) -> T, 374 | { 375 | if s.is_empty() { 376 | None 377 | } else { 378 | Some(f(s)) 379 | } 380 | } 381 | 382 | #[cfg(test)] 383 | mod test { 384 | 385 | use crate::xds::cache::ResourceVersion; 386 | use crate::xds::{ResourceSnapshot, SnapshotWriter}; 387 | 388 | use super::*; 389 | use xds_api::pb::envoy::config::core::v3::{self as xds_core}; 390 | use xds_api::pb::google::protobuf; 391 | 392 | macro_rules! request { 393 | ($rypte:expr) => { 394 | request($rypte, None, vec![], vec![], vec![], None) 395 | }; 396 | ($rypte:expr, init = $init:expr) => { 397 | request($rypte, None, $init, vec![], vec![], None) 398 | }; 399 | ($rypte:expr, add = $add:expr) => { 400 | request($rypte, None, vec![], $add, vec![], None) 401 | }; 402 | ($rypte:expr, n = $nonce:expr, add = $add:expr) => { 403 | request($rypte, Some($nonce), vec![], $add, vec![], None) 404 | }; 405 | ($rypte:expr, init = $init:expr, add = $add:expr) => { 406 | request($rypte, None, $init, $add, vec![], None) 407 | }; 408 | ($rypte:expr, remove = $remove:expr) => { 409 | request($rypte, None, vec![], vec![], $remove, None) 410 | }; 411 | ($rypte:expr, n = $nonce:expr) => { 412 | request($rypte, Some($nonce), vec![], vec![], vec![], None) 413 | }; 414 | ($rypte:expr, n = $nonce:expr, err = $err:expr) => { 415 | request($rypte, $nonce, vec![], vec![], vec![], Some($err)) 416 | }; 417 | ($rypte:expr, n = $nonce:expr, init = $init:expr, add = $add:expr, remove = $remove:expr) => { 418 | request($rypte, $nonce, $init, $add, $remove, None) 419 | }; 420 | } 421 | 422 | #[test] 423 | fn test_xds_init_no_data() { 424 | let (_, snapshot) = new_snapshot([]); 425 | let node = xds_core::Node { 426 | id: "test-node".to_string(), 427 | ..Default::default() 428 | }; 429 | 430 | // with new wildcard subscription should not respond 431 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 432 | conn.handle_ads_request(request!(ResourceType::Listener)) 433 | .unwrap(); 434 | // with an explicit subscription to missing, should NACK 435 | conn.handle_ads_request(request!(ResourceType::Cluster, add = vec!["example.com"])) 436 | .unwrap(); 437 | // with initial versions, should respond with a removal 438 | conn.handle_ads_request(request!( 439 | ResourceType::RouteConfiguration, 440 | init = vec![("bar.com", "v2")] 441 | )) 442 | .unwrap(); 443 | // new empty non-wildcard subscription, shouldn't respond. technically invalid? 444 | conn.handle_ads_request(request!(ResourceType::ClusterLoadAssignment)) 445 | .unwrap(); 446 | 447 | // should generate a CDS not-found and an RDS delete 448 | let responses = conn.ads_responses(); 449 | assert_eq!(responses.len(), 2); 450 | assert_eq!(responses[0].type_url, ResourceType::Cluster.type_url()); 451 | assert_eq!( 452 | responses[0].removed_resources, 453 | vec!["example.com".to_string()] 454 | ); 455 | assert_eq!( 456 | responses[1].type_url, 457 | ResourceType::RouteConfiguration.type_url() 458 | ); 459 | assert_eq!(responses[1].removed_resources, vec!["bar.com".to_string()]); 460 | } 461 | 462 | #[test] 463 | fn test_xds_init_with_data() { 464 | let node = xds_core::Node { 465 | id: "test-node".to_string(), 466 | ..Default::default() 467 | }; 468 | 469 | let (version, snapshot) = new_snapshot([ 470 | (ResourceType::Listener, vec!["nginx.example.com"]), 471 | ( 472 | ResourceType::Cluster, 473 | vec![ 474 | "nginx.default.svc.cluster.local:80", 475 | "nginx-staging.default.svc.cluster.local:80", 476 | ], 477 | ), 478 | ( 479 | ResourceType::ClusterLoadAssignment, 480 | vec![ 481 | "nginx.default.svc.cluster.local:80", 482 | "nginx-staging.default.svc.cluster.local:80", 483 | ], 484 | ), 485 | ]); 486 | 487 | // all four types get init requests out of order. 488 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 489 | // when all names and versions match, nothing to do 490 | conn.handle_ads_request(request!( 491 | ResourceType::ClusterLoadAssignment, 492 | init = vec![ 493 | ("nginx.default.svc.cluster.local:80", &version.to_string(),), 494 | ( 495 | "nginx-staging.default.svc.cluster.local:80", 496 | &version.to_string(), 497 | ), 498 | ] 499 | )) 500 | .unwrap(); 501 | // only one name matches, should get one resource back 502 | conn.handle_ads_request(request!( 503 | ResourceType::Cluster, 504 | init = vec![ 505 | ("nginx.default.svc.cluster.local:80", &version.to_string()), 506 | ("nginx-staging.default.svc.cluster.local:80", "1111.2222"), 507 | ] 508 | )) 509 | .unwrap(); 510 | // first message is a new resource request, but also includes initial 511 | // version that matches. should be sent anyway. 512 | conn.handle_ads_request(request!( 513 | ResourceType::Listener, 514 | init = vec![("nginx.example.com", &version.to_string()),], 515 | add = vec!["nginx.example.com"] 516 | )) 517 | .unwrap(); 518 | // empty message should continue to do nothing 519 | conn.handle_ads_request(request!(ResourceType::RouteConfiguration, init = vec![])) 520 | .unwrap(); 521 | 522 | let responses = conn.ads_responses(); 523 | 524 | // responses are ordered! this should correspond to ResourceType::all() with 525 | // any missing types omitted. 526 | let rtypes: Vec = responses 527 | .iter() 528 | .filter_map(|r| ResourceType::from_type_url(&r.type_url)) 529 | .collect(); 530 | assert_eq!(&rtypes, &[ResourceType::Cluster, ResourceType::Listener,],); 531 | 532 | // CDS should respond with a single message containing only the out of 533 | // date resource 534 | assert_eq!(responses[0].resources.len(), 1); 535 | assert_eq!( 536 | responses[0].resources[0].name, 537 | "nginx-staging.default.svc.cluster.local:80" 538 | ); 539 | 540 | // LDS response should contain the subscribed resource 541 | assert_eq!(responses[1].resources.len(), 1); 542 | assert_eq!(responses[1].resources[0].name, "nginx.example.com"); 543 | } 544 | 545 | #[test] 546 | fn test_lds_ack() { 547 | let node = xds_core::Node { 548 | id: "test-node".to_string(), 549 | ..Default::default() 550 | }; 551 | 552 | let (_, snapshot) = new_snapshot([ 553 | (ResourceType::Listener, vec!["nginx.example.com"]), 554 | ( 555 | ResourceType::Cluster, 556 | vec![ 557 | "nginx.default.svc.cluster.local:80", 558 | "nginx-staging.default.svc.cluster.local:80", 559 | ], 560 | ), 561 | ( 562 | ResourceType::ClusterLoadAssignment, 563 | vec![ 564 | "nginx.default.svc.cluster.local:80", 565 | "nginx-staging.default.svc.cluster.local:80", 566 | ], 567 | ), 568 | ]); 569 | 570 | // send a request 571 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 572 | conn.handle_ads_request(request!( 573 | ResourceType::Listener, 574 | add = vec!["nginx.example.com"] 575 | )) 576 | .unwrap(); 577 | 578 | let resp = conn.ads_responses(); 579 | assert_eq!(resp.len(), 1); 580 | assert_eq!(resp[0].type_url, ResourceType::Listener.type_url()); 581 | assert_eq!(resp[0].resources.len(), 1); 582 | 583 | // handle an ACK 584 | conn.handle_ads_request(request!(ResourceType::Listener, n = &resp[0].nonce)) 585 | .unwrap(); 586 | 587 | let resp = conn.ads_responses(); 588 | assert!(resp.is_empty()); 589 | 590 | // track the ACK state 591 | let sub = conn.subscriptions[ResourceType::Listener].as_ref().unwrap(); 592 | assert!( 593 | matches!( 594 | sub, 595 | AdsSubscription { 596 | last_ack_nonce: Some(n1), 597 | last_sent_nonce: Some(n2), 598 | applied: true, 599 | .. 600 | } if n1 == n2, 601 | ), 602 | "should track the ACK in the subscription: sub={sub:?}", 603 | ); 604 | } 605 | 606 | #[test] 607 | fn test_lds_nack() { 608 | let node = xds_core::Node { 609 | id: "test-node".to_string(), 610 | ..Default::default() 611 | }; 612 | 613 | let (_, snapshot) = new_snapshot([ 614 | (ResourceType::Listener, vec!["nginx.example.com"]), 615 | ( 616 | ResourceType::Cluster, 617 | vec![ 618 | "nginx.default.svc.cluster.local:80", 619 | "nginx-staging.default.svc.cluster.local:80", 620 | ], 621 | ), 622 | ( 623 | ResourceType::ClusterLoadAssignment, 624 | vec![ 625 | "nginx.default.svc.cluster.local:80", 626 | "nginx-staging.default.svc.cluster.local:80", 627 | ], 628 | ), 629 | ]); 630 | 631 | // send a request 632 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 633 | conn.handle_ads_request(request!( 634 | ResourceType::Listener, 635 | add = vec!["nginx.example.com"] 636 | )) 637 | .unwrap(); 638 | 639 | let resp = conn.ads_responses(); 640 | assert_eq!(resp.len(), 1); 641 | assert_eq!(resp[0].type_url, ResourceType::Listener.type_url()); 642 | assert_eq!(resp[0].resources.len(), 1); 643 | 644 | // handle an NACK 645 | conn.handle_ads_request(request!( 646 | ResourceType::Listener, 647 | n = Some(&resp[0].nonce), 648 | err = "you can't cut back on funding, you will regret this" 649 | )) 650 | .unwrap(); 651 | 652 | assert!(conn.ads_responses().is_empty()); 653 | 654 | // should track the NACK 655 | let sub = conn.subscriptions[ResourceType::Listener].as_ref().unwrap(); 656 | assert!( 657 | matches!( 658 | sub, 659 | AdsSubscription { 660 | last_ack_nonce: None, 661 | last_sent_nonce: Some(_), 662 | applied: false, 663 | .. 664 | } 665 | ), 666 | "should track the NACK in the subscription: sub={sub:?}", 667 | ); 668 | } 669 | 670 | #[test] 671 | fn test_lds_remove_subscription() { 672 | let node = xds_core::Node { 673 | id: "test-node".to_string(), 674 | ..Default::default() 675 | }; 676 | let (_, snapshot) = new_snapshot([(ResourceType::Listener, vec!["nginx.example.com"])]); 677 | 678 | // send a request for a new subscription 679 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 680 | conn.handle_ads_request(request!( 681 | ResourceType::Listener, 682 | add = vec!["nginx.example.com"] 683 | )) 684 | .unwrap(); 685 | 686 | // should respond with the data and treat the resource as subscribed. 687 | let resp = conn.ads_responses(); 688 | assert_eq!(resp.len(), 1); 689 | assert_eq!(resp[0].type_url, ResourceType::Listener.type_url()); 690 | assert_eq!(resp[0].resources.len(), 1); 691 | let sub = conn.subscriptions[ResourceType::Listener].as_ref().unwrap(); 692 | assert!(sub.sent.contains_key("nginx.example.com")); 693 | 694 | // handle the ACK 695 | conn.handle_ads_request(request!(ResourceType::Listener, n = &resp[0].nonce)) 696 | .unwrap(); 697 | let resp = conn.ads_responses(); 698 | assert!(resp.is_empty()); 699 | 700 | // send an unsubcribe to the server 701 | conn.handle_ads_request(request!( 702 | ResourceType::Listener, 703 | remove = vec!["nginx.example.com"] 704 | )) 705 | .unwrap(); 706 | 707 | // server should not generate a response, but should unsubscribe the 708 | // client. 709 | assert!(conn.ads_responses().is_empty()); 710 | let sub = conn.subscriptions[ResourceType::Listener].as_ref().unwrap(); 711 | assert!(!sub.sent.contains_key("nginx.example.com")); 712 | } 713 | 714 | #[test] 715 | fn test_lds_remove_subscription_wildcard() { 716 | let node = xds_core::Node { 717 | id: "test-node".to_string(), 718 | ..Default::default() 719 | }; 720 | let (_, snapshot) = new_snapshot([(ResourceType::Listener, vec!["nginx.example.com"])]); 721 | 722 | // send a request for a new subscription 723 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 724 | conn.handle_ads_request(request!( 725 | ResourceType::Listener, 726 | add = vec!["*", "nginx.example.com"] 727 | )) 728 | .unwrap(); 729 | 730 | // should respond with the data and treat the resource as subscribed as well as part 731 | // of the wildcard. 732 | let resp = conn.ads_responses(); 733 | assert_eq!(resp.len(), 1); 734 | assert_eq!(resp[0].type_url, ResourceType::Listener.type_url()); 735 | assert_eq!(resp[0].resources.len(), 1); 736 | let sub = conn.subscriptions[ResourceType::Listener].as_ref().unwrap(); 737 | assert!(sub.is_wildcard); 738 | assert!(sub.sent.contains_key("nginx.example.com")); 739 | 740 | // handle the ACK 741 | conn.handle_ads_request(request!(ResourceType::Listener, n = &resp[0].nonce)) 742 | .unwrap(); 743 | let resp = conn.ads_responses(); 744 | assert!(resp.is_empty()); 745 | 746 | // send an unsubcribe to the server 747 | conn.handle_ads_request(request!( 748 | ResourceType::Listener, 749 | remove = vec!["nginx.example.com"] 750 | )) 751 | .unwrap(); 752 | 753 | // server should generate a response to indicate that the resource was not part 754 | // of the wildcard. 755 | let resp = conn.ads_responses(); 756 | assert_eq!(resp.len(), 1); 757 | assert_eq!(resp[0].type_url, ResourceType::Listener.type_url()); 758 | assert_eq!(resp[0].removed_resources, vec!["nginx.example.com"]); 759 | } 760 | 761 | #[test] 762 | fn test_lds_not_found() { 763 | let node = xds_core::Node { 764 | id: "test-node".to_string(), 765 | ..Default::default() 766 | }; 767 | 768 | let (_, snapshot, mut writer) = 769 | new_snapshot_with_writer([(ResourceType::Listener, vec!["nginx.example.com"])]); 770 | 771 | // send a request 772 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 773 | conn.handle_ads_request(request!( 774 | ResourceType::Listener, 775 | add = vec!["new.example.com"] 776 | )) 777 | .unwrap(); 778 | 779 | // should return a not-found 780 | let resp = conn.ads_responses(); 781 | assert_eq!(resp.len(), 1); 782 | assert_eq!(resp[0].type_url, ResourceType::Listener.type_url()); 783 | assert_eq!(resp[0].removed_resources, vec!["new.example.com"]); 784 | 785 | // update the snapshot with the new resource 786 | let mut snapshot = ResourceSnapshot::new(); 787 | snapshot.insert_update( 788 | ResourceType::Listener, 789 | "new.example.com".to_string(), 790 | anything(), 791 | ); 792 | let next_version = writer.update(snapshot); 793 | conn.handle_snapshot_update(ResourceType::Listener); 794 | 795 | let resp = conn.ads_responses(); 796 | assert_eq!(resp.len(), 1); 797 | assert_eq!(resp[0].type_url, ResourceType::Listener.type_url(),); 798 | assert_eq!(resp[0].resources[0].name, "new.example.com"); 799 | assert_eq!(resp[0].resources[0].version, next_version.to_string()); 800 | } 801 | 802 | #[test] 803 | fn test_cds_handle_ack_as_update() { 804 | let node = xds_core::Node { 805 | id: "test-node".to_string(), 806 | ..Default::default() 807 | }; 808 | 809 | let (_, snapshot) = new_snapshot([ 810 | (ResourceType::Listener, vec!["nginx.example.com"]), 811 | ( 812 | ResourceType::Cluster, 813 | vec![ 814 | "nginx.default.svc.cluster.local:80", 815 | "nginx-staging.default.svc.cluster.local:80", 816 | ], 817 | ), 818 | ( 819 | ResourceType::ClusterLoadAssignment, 820 | vec![ 821 | "nginx.default.svc.cluster.local:80", 822 | "nginx-staging.default.svc.cluster.local:80", 823 | ], 824 | ), 825 | ]); 826 | 827 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 828 | conn.handle_ads_request(request!( 829 | ResourceType::Cluster, 830 | init = vec![("nginx.default.svc.cluster.local:80", "old-version-number")] 831 | )) 832 | .unwrap(); 833 | 834 | // should update the cluster 835 | let resp = conn.ads_responses(); 836 | assert_eq!(resp.len(), 1); 837 | assert_eq!(resp[0].type_url, ResourceType::Cluster.type_url()); 838 | assert_eq!(resp[0].resources.len(), 1); 839 | assert_eq!( 840 | resp[0].resources[0].name, 841 | "nginx.default.svc.cluster.local:80" 842 | ); 843 | 844 | // first ACK changes the subscription, which should generate a respoonse. 845 | conn.handle_ads_request(request!( 846 | ResourceType::Cluster, 847 | n = &resp[0].nonce, 848 | add = vec!["nginx-staging.default.svc.cluster.local:80"] 849 | )) 850 | .unwrap(); 851 | 852 | let resp = conn.ads_responses(); 853 | assert_eq!(resp.len(), 1); 854 | assert_eq!(resp[0].type_url, ResourceType::Cluster.type_url()); 855 | assert_eq!(resp[0].resources.len(), 1); 856 | assert_eq!( 857 | resp[0].resources[0].name, 858 | "nginx-staging.default.svc.cluster.local:80" 859 | ); 860 | 861 | // second ack shouldn't change anything 862 | conn.handle_ads_request(request!(ResourceType::Cluster, n = &resp[0].nonce)) 863 | .unwrap(); 864 | assert!(conn.ads_responses().is_empty()); 865 | 866 | let sub = conn.subscriptions[ResourceType::Cluster].as_ref().unwrap(); 867 | assert!( 868 | matches!( 869 | sub, 870 | AdsSubscription { 871 | last_ack_nonce: Some(n1), 872 | last_sent_nonce: Some(n2), 873 | applied: true, 874 | .. 875 | } if n1 == n2, 876 | ), 877 | "should track the ACK in the subscription: sub={sub:?}", 878 | ); 879 | } 880 | 881 | #[test] 882 | fn test_eds_remove_subscription() { 883 | let node = xds_core::Node { 884 | id: "test-node".to_string(), 885 | ..Default::default() 886 | }; 887 | 888 | let (_, snapshot) = new_snapshot([ 889 | (ResourceType::Listener, vec!["nginx.example.com"]), 890 | ( 891 | ResourceType::Cluster, 892 | vec![ 893 | "nginx.default.svc.cluster.local:80", 894 | "nginx-staging.default.svc.cluster.local:80", 895 | ], 896 | ), 897 | ( 898 | ResourceType::ClusterLoadAssignment, 899 | vec![ 900 | "nginx.default.svc.cluster.local:80", 901 | "nginx-staging.default.svc.cluster.local:80", 902 | ], 903 | ), 904 | ]); 905 | 906 | // Initial EDS connection should return a a message for each EDS resource. 907 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 908 | conn.handle_ads_request(request!( 909 | ResourceType::ClusterLoadAssignment, 910 | add = vec![ 911 | "nginx.default.svc.cluster.local:80", 912 | "nginx-staging.default.svc.cluster.local:80", 913 | ] 914 | )) 915 | .unwrap(); 916 | 917 | let resp = conn.ads_responses(); 918 | assert_eq!(resp.len(), 1); 919 | assert_eq!(resp[0].resources.len(), 2); 920 | 921 | // ACK the response 922 | conn.handle_ads_request(request!( 923 | ResourceType::ClusterLoadAssignment, 924 | n = &resp[0].nonce 925 | )) 926 | .unwrap(); 927 | assert!(conn.ads_responses().is_empty()); 928 | 929 | // remove a resource. no response is expected. 930 | conn.handle_ads_request(request!( 931 | ResourceType::ClusterLoadAssignment, 932 | remove = vec!["nginx-staging.default.svc.cluster.local:80"] 933 | )) 934 | .unwrap(); 935 | assert!(conn.ads_responses().is_empty()); 936 | 937 | let sub = conn.subscriptions[ResourceType::ClusterLoadAssignment] 938 | .as_ref() 939 | .unwrap(); 940 | assert!( 941 | matches!( 942 | sub, 943 | AdsSubscription { 944 | last_ack_nonce: Some(n1), 945 | last_sent_nonce: Some(n2), 946 | applied: true, 947 | .. 948 | } if n1 == n2, 949 | ), 950 | "should track the ACK in the subscription: sub={sub:?}", 951 | ); 952 | } 953 | 954 | #[test] 955 | fn test_eds_not_found() { 956 | let node = xds_core::Node { 957 | id: "test-node".to_string(), 958 | ..Default::default() 959 | }; 960 | 961 | let (version, snapshot, mut writer) = new_snapshot_with_writer([( 962 | ResourceType::ClusterLoadAssignment, 963 | vec![ 964 | "nginx.default.svc.cluster.local:80", 965 | "nginx-staging.default.svc.cluster.local:80", 966 | ], 967 | )]); 968 | 969 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 970 | conn.handle_ads_request(request!( 971 | ResourceType::ClusterLoadAssignment, 972 | init = vec![("nginx.default.svc.cluster.local:80", &version.to_string())] 973 | )) 974 | .unwrap(); 975 | assert!(conn.ads_responses().is_empty()); 976 | 977 | // ask for something that does not exist 978 | conn.handle_ads_request(request!( 979 | ResourceType::ClusterLoadAssignment, 980 | add = vec!["nginx-next.default.svc.cluster.local:80"] 981 | )) 982 | .unwrap(); 983 | 984 | let resp = conn.ads_responses(); 985 | assert_eq!(resp.len(), 1); 986 | assert_eq!( 987 | resp[0].type_url, 988 | ResourceType::ClusterLoadAssignment.type_url() 989 | ); 990 | assert_eq!( 991 | resp[0].removed_resources, 992 | vec!["nginx-next.default.svc.cluster.local:80"], 993 | ); 994 | 995 | // create the resource and check that we now send an update for it. 996 | let mut snapshot = ResourceSnapshot::new(); 997 | snapshot.insert_update( 998 | ResourceType::ClusterLoadAssignment, 999 | "nginx-next.default.svc.cluster.local:80".to_string(), 1000 | anything(), 1001 | ); 1002 | let next_version = writer.update(snapshot); 1003 | conn.handle_snapshot_update(ResourceType::ClusterLoadAssignment); 1004 | 1005 | let resp = conn.ads_responses(); 1006 | assert_eq!(resp.len(), 1); 1007 | assert_eq!( 1008 | resp[0].type_url, 1009 | ResourceType::ClusterLoadAssignment.type_url(), 1010 | ); 1011 | assert_eq!( 1012 | resp[0].resources[0].name, 1013 | "nginx-next.default.svc.cluster.local:80", 1014 | ); 1015 | assert_eq!(resp[0].resources[0].version, next_version.to_string()); 1016 | } 1017 | 1018 | #[test] 1019 | fn test_eds_add_remove_add() { 1020 | let node = xds_core::Node { 1021 | id: "test-node".to_string(), 1022 | ..Default::default() 1023 | }; 1024 | 1025 | let (version, snapshot) = new_snapshot([ 1026 | (ResourceType::Listener, vec!["nginx.example.com"]), 1027 | ( 1028 | ResourceType::Cluster, 1029 | vec![ 1030 | "nginx.default.svc.cluster.local:80", 1031 | "nginx-staging.default.svc.cluster.local:80", 1032 | ], 1033 | ), 1034 | ( 1035 | ResourceType::ClusterLoadAssignment, 1036 | vec![ 1037 | "nginx.default.svc.cluster.local:80", 1038 | "nginx-staging.default.svc.cluster.local:80", 1039 | ], 1040 | ), 1041 | ]); 1042 | 1043 | // Initial EDS connection 1044 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 1045 | conn.handle_ads_request(request!( 1046 | ResourceType::ClusterLoadAssignment, 1047 | init = vec![("nginx.default.svc.cluster.local:80", &version.to_string())] 1048 | )) 1049 | .unwrap(); 1050 | 1051 | assert!(conn.ads_responses().is_empty()); 1052 | 1053 | // add, remove, and add again before we get a chance to reply 1054 | conn.handle_ads_request(request!( 1055 | ResourceType::ClusterLoadAssignment, 1056 | add = vec!["nginx-staging.default.svc.cluster.local:80"] 1057 | )) 1058 | .unwrap(); 1059 | conn.handle_ads_request(request!( 1060 | ResourceType::ClusterLoadAssignment, 1061 | remove = vec!["nginx-staging.default.svc.cluster.local:80"] 1062 | )) 1063 | .unwrap(); 1064 | conn.handle_ads_request(request!( 1065 | ResourceType::ClusterLoadAssignment, 1066 | add = vec!["nginx-staging.default.svc.cluster.local:80"] 1067 | )) 1068 | .unwrap(); 1069 | 1070 | // should respond as if we only processed a single add 1071 | let resp = conn.ads_responses(); 1072 | assert_eq!(resp.len(), 1); 1073 | assert_eq!( 1074 | resp[0].type_url, 1075 | ResourceType::ClusterLoadAssignment.type_url() 1076 | ); 1077 | assert_eq!(resp[0].resources.len(), 1); 1078 | assert_eq!( 1079 | resp[0].resources[0].name, 1080 | "nginx-staging.default.svc.cluster.local:80" 1081 | ); 1082 | } 1083 | 1084 | #[test] 1085 | fn test_eds_remove_add_remove() { 1086 | let node = xds_core::Node { 1087 | id: "test-node".to_string(), 1088 | ..Default::default() 1089 | }; 1090 | 1091 | let (version, snapshot) = new_snapshot([ 1092 | (ResourceType::Listener, vec!["nginx.example.com"]), 1093 | ( 1094 | ResourceType::Cluster, 1095 | vec![ 1096 | "nginx.default.svc.cluster.local:80", 1097 | "nginx-staging.default.svc.cluster.local:80", 1098 | ], 1099 | ), 1100 | ( 1101 | ResourceType::ClusterLoadAssignment, 1102 | vec![ 1103 | "nginx.default.svc.cluster.local:80", 1104 | "nginx-staging.default.svc.cluster.local:80", 1105 | ], 1106 | ), 1107 | ]); 1108 | 1109 | // Initial EDS connection 1110 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 1111 | conn.handle_ads_request(request!( 1112 | ResourceType::ClusterLoadAssignment, 1113 | init = vec![("nginx.default.svc.cluster.local:80", &version.to_string())] 1114 | )) 1115 | .unwrap(); 1116 | 1117 | assert!(conn.ads_responses().is_empty()); 1118 | 1119 | // add, remove, and add again before we get a chance to reply 1120 | conn.handle_ads_request(request!( 1121 | ResourceType::ClusterLoadAssignment, 1122 | remove = vec!["nginx.default.svc.cluster.local:80"] 1123 | )) 1124 | .unwrap(); 1125 | conn.handle_ads_request(request!( 1126 | ResourceType::ClusterLoadAssignment, 1127 | add = vec!["nginx.default.svc.cluster.local:80"] 1128 | )) 1129 | .unwrap(); 1130 | conn.handle_ads_request(request!( 1131 | ResourceType::ClusterLoadAssignment, 1132 | remove = vec!["nginx.default.svc.cluster.local:80"] 1133 | )) 1134 | .unwrap(); 1135 | 1136 | // should not respond on a remove 1137 | assert_eq!(conn.ads_responses(), vec![]); 1138 | } 1139 | 1140 | #[test] 1141 | fn test_snapshot_update() { 1142 | let node = xds_core::Node { 1143 | id: "test-node".to_string(), 1144 | ..Default::default() 1145 | }; 1146 | 1147 | let (version, snapshot, mut writer) = new_snapshot_with_writer([ 1148 | (ResourceType::Listener, vec!["nginx.example.com"]), 1149 | ( 1150 | ResourceType::Cluster, 1151 | vec![ 1152 | "nginx.default.svc.cluster.local:80", 1153 | "nginx-staging.default.svc.cluster.local:80", 1154 | ], 1155 | ), 1156 | ( 1157 | ResourceType::ClusterLoadAssignment, 1158 | vec![ 1159 | "nginx.default.svc.cluster.local:80", 1160 | "nginx-staging.default.svc.cluster.local:80", 1161 | ], 1162 | ), 1163 | ]); 1164 | 1165 | // Initial EDS connection 1166 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 1167 | conn.handle_ads_request(request!( 1168 | ResourceType::ClusterLoadAssignment, 1169 | init = vec![ 1170 | ("nginx.default.svc.cluster.local:80", &version.to_string()), 1171 | ( 1172 | "nginx-staging.default.svc.cluster.local:80", 1173 | &version.to_string() 1174 | ) 1175 | ] 1176 | )) 1177 | .unwrap(); 1178 | assert!(conn.ads_responses().is_empty()); 1179 | 1180 | // update the snapshot and then notify the conn that it changed. 1181 | let mut snapshot = ResourceSnapshot::new(); 1182 | snapshot.insert_update( 1183 | ResourceType::ClusterLoadAssignment, 1184 | "nginx.default.svc.cluster.local:80".to_string(), 1185 | anything(), 1186 | ); 1187 | let next_version = writer.update(snapshot); 1188 | 1189 | conn.handle_snapshot_update(ResourceType::ClusterLoadAssignment); 1190 | 1191 | // should return a single response for the changed resource 1192 | let resp = conn.ads_responses(); 1193 | assert_eq!(resp.len(), 1); 1194 | assert_eq!( 1195 | resp[0].type_url, 1196 | ResourceType::ClusterLoadAssignment.type_url(), 1197 | ); 1198 | assert_eq!( 1199 | resp[0].resources[0].name, 1200 | "nginx.default.svc.cluster.local:80", 1201 | ); 1202 | assert_eq!(resp[0].resources[0].version, next_version.to_string()); 1203 | } 1204 | 1205 | fn new_snapshot( 1206 | data: impl IntoIterator)>, 1207 | ) -> (ResourceVersion, SnapshotCache) { 1208 | let (version, cache, _writer) = new_snapshot_with_writer(data); 1209 | (version, cache) 1210 | } 1211 | 1212 | fn new_snapshot_with_writer( 1213 | data: impl IntoIterator)>, 1214 | ) -> (ResourceVersion, SnapshotCache, SnapshotWriter) { 1215 | let mut snapshot = ResourceSnapshot::new(); 1216 | for (rtype, names) in data { 1217 | for name in names { 1218 | snapshot.insert_update(rtype, name.to_string(), anything()); 1219 | } 1220 | } 1221 | 1222 | let (cache, mut writer) = crate::xds::snapshot([]); 1223 | let version = writer.update(snapshot); 1224 | 1225 | (version, cache, writer) 1226 | } 1227 | 1228 | fn request( 1229 | rtype: ResourceType, 1230 | response_nonce: Option<&str>, 1231 | init: Vec<(&str, &str)>, 1232 | add: Vec<&str>, 1233 | remove: Vec<&str>, 1234 | error: Option<&str>, 1235 | ) -> DeltaDiscoveryRequest { 1236 | let type_url = rtype.type_url().to_string(); 1237 | let response_nonce = response_nonce.map(|s| s.to_string()).unwrap_or_default(); 1238 | let resource_names_subscribe = add.into_iter().map(|n| n.to_string()).collect(); 1239 | let resource_names_unsubscribe = remove.into_iter().map(|n| n.to_string()).collect(); 1240 | let initial_resource_versions = init 1241 | .into_iter() 1242 | .map(|(k, v)| (k.to_string(), v.to_string())) 1243 | .collect(); 1244 | 1245 | let error_detail = error.map(|s| xds_api::pb::google::rpc::Status { 1246 | code: tonic::Code::InvalidArgument.into(), 1247 | message: s.to_string(), 1248 | ..Default::default() 1249 | }); 1250 | 1251 | DeltaDiscoveryRequest { 1252 | type_url, 1253 | response_nonce, 1254 | initial_resource_versions, 1255 | resource_names_subscribe, 1256 | resource_names_unsubscribe, 1257 | error_detail, 1258 | ..Default::default() 1259 | } 1260 | } 1261 | 1262 | fn anything() -> protobuf::Any { 1263 | protobuf::Any { 1264 | type_url: "type_url".to_string(), 1265 | value: vec![], 1266 | } 1267 | } 1268 | } 1269 | -------------------------------------------------------------------------------- /src/xds/resources.rs: -------------------------------------------------------------------------------- 1 | use xds_api::WellKnownTypes; 2 | 3 | #[derive(Clone, Copy, Debug, PartialEq, Eq, enum_map::Enum)] 4 | pub(crate) enum ResourceType { 5 | Listener, 6 | RouteConfiguration, 7 | Cluster, 8 | ClusterLoadAssignment, 9 | } 10 | 11 | impl ResourceType { 12 | fn from_wkt(wkt: WellKnownTypes) -> Option { 13 | match wkt { 14 | WellKnownTypes::Listener => Some(Self::Listener), 15 | WellKnownTypes::RouteConfiguration => Some(Self::RouteConfiguration), 16 | WellKnownTypes::Cluster => Some(Self::Cluster), 17 | WellKnownTypes::ClusterLoadAssignment => Some(Self::ClusterLoadAssignment), 18 | _ => None, 19 | } 20 | } 21 | 22 | fn as_wkt(&self) -> WellKnownTypes { 23 | match self { 24 | ResourceType::Listener => WellKnownTypes::Listener, 25 | ResourceType::RouteConfiguration => WellKnownTypes::RouteConfiguration, 26 | ResourceType::Cluster => WellKnownTypes::Cluster, 27 | ResourceType::ClusterLoadAssignment => WellKnownTypes::ClusterLoadAssignment, 28 | } 29 | } 30 | 31 | pub(crate) const fn supports_wildcard(&self) -> bool { 32 | matches!(self, ResourceType::Cluster | ResourceType::Listener) 33 | } 34 | 35 | /// Return a slice of all resource types, ordered according to Envoy's preferred 36 | /// make-before-break ordering. 37 | /// 38 | /// See 39 | pub(crate) const fn all() -> &'static [Self] { 40 | &[ 41 | Self::Cluster, 42 | Self::ClusterLoadAssignment, 43 | Self::Listener, 44 | Self::RouteConfiguration, 45 | ] 46 | } 47 | 48 | pub(crate) fn type_url(&self) -> &'static str { 49 | self.as_wkt().type_url() 50 | } 51 | 52 | pub(crate) fn from_type_url(type_url: &str) -> Option { 53 | WellKnownTypes::from_type_url(type_url).and_then(Self::from_wkt) 54 | } 55 | 56 | pub(crate) fn group_responses(&self) -> bool { 57 | matches!(self, ResourceType::Listener | ResourceType::Cluster) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/xds/server.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | use std::{net::SocketAddr, pin::Pin, sync::Arc}; 3 | 4 | use crossbeam_skiplist::SkipMap; 5 | use enum_map::EnumMap; 6 | use futures::Stream; 7 | use metrics::counter; 8 | use smol_str::SmolStr; 9 | use tokio_stream::wrappers::ReceiverStream; 10 | use tonic::{Request, Response, Status, Streaming}; 11 | use tracing::{info, trace, warn, Span}; 12 | use xds_api::pb::envoy::config::core::v3 as xds_node; 13 | use xds_api::pb::envoy::service::{ 14 | cluster::v3::cluster_discovery_service_server::ClusterDiscoveryService, 15 | discovery::v3::{ 16 | aggregated_discovery_service_server::AggregatedDiscoveryService, DeltaDiscoveryRequest, 17 | DeltaDiscoveryResponse, DiscoveryRequest, DiscoveryResponse, 18 | }, 19 | endpoint::v3::endpoint_discovery_service_server::EndpointDiscoveryService, 20 | listener::v3::listener_discovery_service_server::ListenerDiscoveryService, 21 | route::v3::route_discovery_service_server::RouteDiscoveryService, 22 | status::v3::{ 23 | client_config::GenericXdsConfig, 24 | client_status_discovery_service_server::ClientStatusDiscoveryService, ClientConfig, 25 | ClientStatusRequest, ClientStatusResponse, ConfigStatus, 26 | }, 27 | }; 28 | 29 | use crate::{ 30 | grpc_access, 31 | xds::{ResourceType, SnapshotCache}, 32 | }; 33 | 34 | use super::{delta, sotw}; 35 | 36 | #[derive(Clone)] 37 | pub(crate) struct AdsServer { 38 | cache: SnapshotCache, 39 | stats: ConnectionSnapshot, 40 | } 41 | 42 | impl AdsServer { 43 | pub(crate) fn new(cache: SnapshotCache) -> Self { 44 | Self { 45 | cache, 46 | stats: Default::default(), 47 | } 48 | } 49 | 50 | fn fetch( 51 | &self, 52 | resource_type: ResourceType, 53 | request: Request, 54 | ) -> Result, Status> { 55 | let request = request.into_inner(); 56 | 57 | grpc_access::xds_discovery_request(&request); 58 | 59 | let snapshot_version = self.cache.version(resource_type); 60 | let request_version = request.version_info.parse().ok(); 61 | if request_version == Some(snapshot_version) { 62 | // TODO: delay/long-poll here? this is what go-control-plane does, but it's odd 63 | return Err(Status::cancelled("already up to date")); 64 | } 65 | 66 | let mut resources = Vec::with_capacity(request.resource_names.len()); 67 | if request.resource_names.is_empty() { 68 | for e in self.cache.iter(resource_type) { 69 | resources.push(e.value().proto.clone()); 70 | } 71 | } else { 72 | for name in &request.resource_names { 73 | if let Some(e) = self.cache.get(resource_type, name) { 74 | resources.push(e.value().proto.clone()) 75 | } 76 | } 77 | }; 78 | 79 | let response = DiscoveryResponse { 80 | version_info: snapshot_version.to_string(), 81 | resources, 82 | ..Default::default() 83 | }; 84 | grpc_access::xds_discovery_response(&response); 85 | 86 | Ok(Response::new(response)) 87 | } 88 | } 89 | 90 | macro_rules! try_send { 91 | ($ch:expr, $value:expr) => { 92 | if let Err(_) = $ch.send($value).await { 93 | tracing::debug!("channel closed unexpectedly"); 94 | return; 95 | } 96 | }; 97 | } 98 | 99 | #[allow(unused, dead_code)] 100 | #[tracing::instrument( 101 | level = "info", 102 | skip_all, 103 | fields( 104 | remote_addr = tracing::field::Empty, 105 | node_id = tracing::field::Empty, 106 | node_cluster = tracing::field::Empty, 107 | ) 108 | )] 109 | async fn stream_delta_ads( 110 | snapshot: SnapshotCache, 111 | conn_stats: ConnectionStats, 112 | mut requests: Streaming, 113 | send_response: tokio::sync::mpsc::Sender>, 114 | ) { 115 | let _conn_active = crate::metrics::scoped_gauge!("delta_ads.active_connections", 1); 116 | 117 | // ?remote_addr shows us Some(_) when an addr is present and %remote_addr 118 | // doesn't compile. this is annoying but do it anyway. 119 | if let Some(addr) = &conn_stats.key.remote_addr { 120 | Span::current().record("remote_addr", addr.to_string()); 121 | } 122 | 123 | // save a handle to the snapshot to watch for changes 124 | let mut snapshot_changes = snapshot.changes(); 125 | 126 | macro_rules! send_xds { 127 | ($chan:expr, $message:expr) => { 128 | grpc_access::xds_delta_discovery_response(&$message); 129 | try_send!($chan, Ok($message)); 130 | counter!("delta_ads.tx").increment(1); 131 | }; 132 | } 133 | 134 | macro_rules! recv_xds { 135 | ($message:expr) => { 136 | match $message { 137 | Ok(Some(msg)) => { 138 | grpc_access::xds_delta_discovery_request(&msg); 139 | counter!("delta_ads.rx").increment(1); 140 | msg 141 | }, 142 | // the stream has ended 143 | Ok(None) => return, 144 | // the connection is hosed, just bail 145 | Err(e) if io_source(&e).is_some() => { 146 | trace!(err = %e, "closing connection: ignoring io error"); 147 | return; 148 | }, 149 | // something actually went wrong! 150 | Err(e) => { 151 | warn!(err = %e, "an unexpected error occurred, closing the connection"); 152 | return; 153 | }, 154 | } 155 | } 156 | } 157 | 158 | let mut initial_request = recv_xds!(requests.message().await); 159 | let mut conn = match delta::AdsConnection::from_initial_request(&mut initial_request, snapshot) 160 | { 161 | Ok(conn) => conn, 162 | Err(e) => { 163 | info!(err = %e, "refusing connection: invalid initial request"); 164 | try_send!(send_response, Err(e.into_status())); 165 | return; 166 | } 167 | }; 168 | 169 | let node = conn.node(); 170 | let current_span = Span::current(); 171 | current_span.record("node_id", &node.id); 172 | current_span.record("node_cluster", &node.cluster); 173 | conn_stats.update_node(node); 174 | 175 | conn.handle_ads_request(initial_request); 176 | 177 | let responses = conn.ads_responses(); 178 | for response in responses { 179 | send_xds!(send_response, response); 180 | } 181 | conn_stats.update_subs(conn.sent()); 182 | 183 | loop { 184 | // TODO: figure out how to coalesce multiple messages in a reasonable 185 | // way here. the right thing is something like "await the first one and 186 | // then try to pull out the same events a second-nth time", and to not 187 | // use a timer since the timer granularity is so low (like 1ms on 188 | // tokio?). 189 | tokio::select! { 190 | biased; 191 | 192 | resource_type = snapshot_changes.changed() => { 193 | conn.handle_snapshot_update(resource_type); 194 | }, 195 | request = requests.message() => { 196 | let request = recv_xds!(request); 197 | if let Err(e) = conn.handle_ads_request(request) { 198 | info!(node = ?conn.node(), err = %e, "closing connection: invalid request"); 199 | try_send!(send_response, Err(e.into_status())); 200 | return; 201 | } 202 | } 203 | } 204 | 205 | let responses = conn.ads_responses(); 206 | if !responses.is_empty() { 207 | for response in responses { 208 | send_xds!(send_response, response); 209 | } 210 | conn_stats.update_subs(conn.sent()); 211 | } 212 | } 213 | } 214 | 215 | #[tracing::instrument( 216 | level = "info", 217 | skip_all, 218 | fields( 219 | remote_addr = tracing::field::Empty, 220 | node_id = tracing::field::Empty, 221 | node_cluster = tracing::field::Empty, 222 | ) 223 | )] 224 | async fn stream_ads( 225 | snapshot: SnapshotCache, 226 | conn_stats: ConnectionStats, 227 | mut requests: Streaming, 228 | send_response: tokio::sync::mpsc::Sender>, 229 | ) { 230 | let _conn_active = crate::metrics::scoped_gauge!("ads.active_connections", 1); 231 | 232 | // ?remote_addr shows us Some(_) when an addr is present and %remote_addr 233 | // doesn't compile. this is annoying but do it anyway. 234 | if let Some(addr) = &conn_stats.key.remote_addr { 235 | Span::current().record("remote_addr", addr.to_string()); 236 | } 237 | 238 | macro_rules! send_xds { 239 | ($chan:expr, $message:expr) => { 240 | grpc_access::xds_discovery_response(&$message); 241 | try_send!($chan, Ok($message)); 242 | counter!("ads.tx").increment(1); 243 | }; 244 | } 245 | 246 | macro_rules! recv_xds { 247 | ($message:expr) => { 248 | match $message { 249 | Ok(Some(msg)) => { 250 | grpc_access::xds_discovery_request(&msg); 251 | counter!("ads.rx").increment(1); 252 | msg 253 | }, 254 | // the stream has ended 255 | Ok(None) => return, 256 | // the connection is hosed, just bail 257 | Err(e) if io_source(&e).is_some() => { 258 | trace!(err = %e, "closing connection: ignoring io error"); 259 | return; 260 | }, 261 | // something actually went wrong! 262 | Err(e) => { 263 | warn!(err = %e, "an unexpected error occurred, closing the connection"); 264 | return; 265 | }, 266 | } 267 | } 268 | } 269 | 270 | // save a handle to the snapshot to watch for changes 271 | let mut snapshot_changes = snapshot.changes(); 272 | 273 | // pull the Node out of the initial request and add the current node info to 274 | // the current span so we can forget about it for the rest of the stream. 275 | let mut initial_request = recv_xds!(requests.message().await); 276 | let mut conn = match sotw::AdsConnection::from_initial_request(&mut initial_request, snapshot) { 277 | Ok(conn) => conn, 278 | Err(e) => { 279 | info!(err = %e, "refusing connection: invalid initial request"); 280 | try_send!(send_response, Err(e.into_status())); 281 | return; 282 | } 283 | }; 284 | 285 | let node = conn.node(); 286 | let current_span = Span::current(); 287 | current_span.record("node_id", &node.id); 288 | current_span.record("node_cluster", &node.cluster); 289 | 290 | // first round of message handling. 291 | // 292 | // this is *almost* identical to handling any subsequent message, but there 293 | // are no interrupts from snapshot updates that we might have to handle yet. 294 | let (_, responses) = match conn.handle_ads_request(initial_request) { 295 | Ok((rty, res)) => (rty, res), 296 | Err(e) => { 297 | info!(node = ?conn.node(), err = %e, "closing connection: invalid request"); 298 | try_send!(send_response, Err(e.into_status())); 299 | return; 300 | } 301 | }; 302 | for response in responses { 303 | send_xds!(send_response, response); 304 | } 305 | conn_stats.update_subs(conn.sent()); 306 | 307 | // respond to either an incoming request or a snapshot update until the client 308 | // goes away. 309 | loop { 310 | let (_, responses) = tokio::select! { 311 | resource_type = snapshot_changes.changed() => { 312 | (Some(resource_type), conn.handle_snapshot_update(resource_type)) 313 | }, 314 | request = requests.message() => { 315 | let message = recv_xds!(request); 316 | 317 | match conn.handle_ads_request(message) { 318 | Ok((rty, res)) => (rty, res), 319 | Err(e) => { 320 | info!(node = ?conn.node(), err = %e, "closing connection: invalid request"); 321 | try_send!(send_response, Err(e.into_status())); 322 | return; 323 | }, 324 | } 325 | }, 326 | }; 327 | 328 | for response in responses { 329 | send_xds!(send_response, response); 330 | } 331 | conn_stats.update_subs(conn.sent()); 332 | } 333 | } 334 | 335 | fn io_source(status: &Status) -> Option<&std::io::Error> { 336 | let mut err: &(dyn std::error::Error + 'static) = status; 337 | 338 | loop { 339 | if let Some(e) = err.downcast_ref::() { 340 | return Some(e); 341 | } 342 | 343 | if let Some(e) = err.downcast_ref::().and_then(|e| e.get_io()) { 344 | return Some(e); 345 | } 346 | 347 | err = err.source()?; 348 | } 349 | } 350 | 351 | type SotwResponseStream = Pin> + Send>>; 352 | type DeltaResponseStream = 353 | Pin> + Send>>; 354 | 355 | #[tonic::async_trait] 356 | impl AggregatedDiscoveryService for AdsServer { 357 | type StreamAggregatedResourcesStream = SotwResponseStream; 358 | type DeltaAggregatedResourcesStream = DeltaResponseStream; 359 | 360 | async fn stream_aggregated_resources( 361 | &self, 362 | request: Request>, 363 | ) -> Result, Status> { 364 | let remote_addr = request.remote_addr(); 365 | let local_addr = request.local_addr(); 366 | let conn_stats = self.stats.new_connection("sotw", remote_addr, local_addr); 367 | 368 | let requests = request.into_inner(); 369 | let (tx, rx) = tokio::sync::mpsc::channel(1); 370 | 371 | tokio::spawn(stream_ads(self.cache.clone(), conn_stats, requests, tx)); 372 | Ok(Response::new(Box::pin(ReceiverStream::new(rx)))) 373 | } 374 | 375 | async fn delta_aggregated_resources( 376 | &self, 377 | request: Request>, 378 | ) -> std::result::Result, Status> { 379 | let remote_addr = request.remote_addr(); 380 | let local_addr = request.local_addr(); 381 | let conn_stats = self.stats.new_connection("delta", remote_addr, local_addr); 382 | 383 | let requests = request.into_inner(); 384 | let (tx, rx) = tokio::sync::mpsc::channel(1); 385 | 386 | tokio::spawn(stream_delta_ads( 387 | self.cache.clone(), 388 | conn_stats, 389 | requests, 390 | tx, 391 | )); 392 | Ok(Response::new(Box::pin(ReceiverStream::new(rx)))) 393 | } 394 | } 395 | 396 | macro_rules! impl_fetch_api { 397 | (impl $trait:ty => $resource_type:ident { type $sotw_stream:ident; type $delta_stream:ident; fn $fetch:ident; fn $stream:ident; fn $delta:ident;}) => { 398 | #[tonic::async_trait] 399 | impl $trait for AdsServer { 400 | type $sotw_stream = SotwResponseStream; 401 | type $delta_stream = DeltaResponseStream; 402 | 403 | async fn $fetch( 404 | &self, 405 | request: Request, 406 | ) -> Result, Status> { 407 | self.fetch(ResourceType::$resource_type, request) 408 | } 409 | 410 | async fn $stream( 411 | &self, 412 | _request: Request>, 413 | ) -> Result, Status> { 414 | return Err(Status::unimplemented( 415 | "ezbake does not support streaming EDS. please use ADS", 416 | )); 417 | } 418 | 419 | async fn $delta( 420 | &self, 421 | _request: Request>, 422 | ) -> std::result::Result, Status> { 423 | return Err(Status::unimplemented( 424 | "ezbake does not support Incremental EDS", 425 | )); 426 | } 427 | } 428 | }; 429 | } 430 | 431 | impl_fetch_api! { 432 | impl ListenerDiscoveryService => Listener { 433 | type StreamListenersStream; 434 | type DeltaListenersStream; 435 | 436 | fn fetch_listeners; 437 | fn stream_listeners; 438 | fn delta_listeners; 439 | } 440 | } 441 | 442 | impl_fetch_api! { 443 | impl RouteDiscoveryService => RouteConfiguration { 444 | type StreamRoutesStream; 445 | type DeltaRoutesStream; 446 | 447 | fn fetch_routes; 448 | fn stream_routes; 449 | fn delta_routes; 450 | } 451 | } 452 | 453 | impl_fetch_api! { 454 | impl ClusterDiscoveryService => Cluster { 455 | type StreamClustersStream; 456 | type DeltaClustersStream; 457 | 458 | fn fetch_clusters; 459 | fn stream_clusters; 460 | fn delta_clusters; 461 | } 462 | } 463 | 464 | impl_fetch_api! { 465 | impl EndpointDiscoveryService => ClusterLoadAssignment { 466 | 467 | type StreamEndpointsStream; 468 | type DeltaEndpointsStream; 469 | 470 | fn fetch_endpoints; 471 | fn stream_endpoints; 472 | fn delta_endpoints; 473 | } 474 | } 475 | 476 | type ClientStatusResponsestream = 477 | Pin> + Send>>; 478 | 479 | #[tonic::async_trait] 480 | impl ClientStatusDiscoveryService for AdsServer { 481 | type StreamClientStatusStream = ClientStatusResponsestream; 482 | 483 | async fn stream_client_status( 484 | &self, 485 | _request: Request>, 486 | ) -> Result, Status> { 487 | return Err(Status::unimplemented( 488 | "streaming client status is not supported", 489 | )); 490 | } 491 | 492 | async fn fetch_client_status( 493 | &self, 494 | request: Request, 495 | ) -> Result, Status> { 496 | let request = request.into_inner(); 497 | if !request.node_matchers.is_empty() { 498 | return Err(Status::invalid_argument("node_matchers are unsupported")); 499 | } 500 | 501 | let mut config = vec![]; 502 | for conn_stats in self.stats.iter() { 503 | let mut generic_xds_configs = vec![]; 504 | 505 | let node = conn_stats.node().clone(); 506 | for (rtype, subs) in conn_stats.subscriptions() { 507 | generic_xds_configs.extend(subs.to_generic_xds_config(rtype)); 508 | } 509 | 510 | config.push(ClientConfig { 511 | node: Some(node), 512 | generic_xds_configs, 513 | ..Default::default() 514 | }); 515 | } 516 | 517 | Ok(Response::new(ClientStatusResponse { config })) 518 | } 519 | } 520 | 521 | // an Arc'd map of connection stats 522 | #[derive(Clone, Default)] 523 | struct ConnectionSnapshot { 524 | connections: Arc>, 525 | } 526 | 527 | impl ConnectionSnapshot { 528 | fn new_connection( 529 | &self, 530 | proto: &'static str, 531 | remote_addr: Option, 532 | local_addr: Option, 533 | ) -> ConnectionStats { 534 | ConnectionStats { 535 | connections: self.connections.clone(), 536 | key: ConnKey { 537 | protocol: proto, 538 | remote_addr, 539 | local_addr, 540 | }, 541 | } 542 | } 543 | 544 | fn iter(&self) -> impl Iterator + '_ { 545 | self.connections.iter().map(ConnectionSnapshotEntry) 546 | } 547 | } 548 | 549 | pub(crate) struct ConnectionSnapshotEntry<'a>( 550 | crossbeam_skiplist::map::Entry<'a, ConnKey, ConnInfo>, 551 | ); 552 | 553 | impl ConnectionSnapshotEntry<'_> { 554 | pub(crate) fn node(&self) -> &xds_node::Node { 555 | &self.0.value().node 556 | } 557 | 558 | pub(crate) fn subscriptions(&self) -> &EnumMap { 559 | &self.0.value().subscriptions 560 | } 561 | } 562 | 563 | // an RAII stats counter for an individual connection 564 | struct ConnectionStats { 565 | connections: Arc>, 566 | key: ConnKey, 567 | } 568 | 569 | impl Drop for ConnectionStats { 570 | fn drop(&mut self) { 571 | self.connections.remove(&self.key); 572 | } 573 | } 574 | 575 | // a key to uniquely identify a connection 576 | #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] 577 | struct ConnKey { 578 | protocol: &'static str, 579 | remote_addr: Option, 580 | local_addr: Option, 581 | } 582 | 583 | impl ConnectionStats { 584 | fn update_node(&self, node: &xds_node::Node) { 585 | let subscriptions = match self.connections.get(&self.key) { 586 | Some(e) => e.value().subscriptions.clone(), 587 | None => Default::default(), 588 | }; 589 | 590 | self.connections.insert( 591 | self.key.clone(), 592 | ConnInfo { 593 | node: node.clone(), 594 | subscriptions, 595 | }, 596 | ); 597 | } 598 | 599 | fn update_subs(&self, subscriptions: EnumMap) { 600 | let node = self 601 | .connections 602 | .get(&self.key) 603 | .map(|e| e.value().node.clone()) 604 | .unwrap_or_default(); 605 | 606 | self.connections.insert( 607 | self.key.clone(), 608 | ConnInfo { 609 | node, 610 | subscriptions, 611 | }, 612 | ); 613 | } 614 | } 615 | 616 | // protocol-based connection info. agnostic to sotw vs. delta so we can use csds 617 | // for both. 618 | struct ConnInfo { 619 | node: xds_node::Node, 620 | subscriptions: EnumMap, 621 | } 622 | 623 | #[derive(Clone, Debug, Default)] 624 | pub(crate) struct SubInfo { 625 | pub(crate) applied: bool, 626 | pub(crate) sent: BTreeMap, 627 | } 628 | 629 | impl SubInfo { 630 | fn to_generic_xds_config(&self, rtype: ResourceType) -> Vec { 631 | let type_url = rtype.type_url(); 632 | let config_status = match self.applied { 633 | true => ConfigStatus::Synced, 634 | false => ConfigStatus::Error, 635 | }; 636 | let configs = self.sent.iter().map(|(name, version)| { 637 | let name = name.to_string(); 638 | let version_info = version.to_string(); 639 | GenericXdsConfig { 640 | type_url: type_url.to_string(), 641 | name, 642 | version_info, 643 | config_status: config_status.into(), 644 | ..Default::default() 645 | } 646 | }); 647 | configs.collect() 648 | } 649 | } 650 | -------------------------------------------------------------------------------- /src/xds/sotw.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::BTreeMap, str::FromStr}; 2 | 3 | use enum_map::EnumMap; 4 | use smol_str::{SmolStr, ToSmolStr}; 5 | use tracing::trace; 6 | use xds_api::pb::envoy::{ 7 | config::core::v3 as xds_node, 8 | service::discovery::v3::{DiscoveryRequest, DiscoveryResponse}, 9 | }; 10 | 11 | use crate::xds::resources::ResourceType; 12 | use crate::xds::{cache::SnapshotCache, is_nack}; 13 | 14 | use super::{cache::ResourceVersion, server::SubInfo, ResourceNames}; 15 | 16 | #[derive(Debug, thiserror::Error)] 17 | pub(crate) enum ConnectionError { 18 | #[error("missing node info")] 19 | MisingNode, 20 | 21 | #[error("invalid request: {0}")] 22 | InvalidRequest(anyhow::Error), 23 | } 24 | 25 | impl ConnectionError { 26 | pub(crate) fn into_status(self) -> tonic::Status { 27 | tonic::Status::invalid_argument(self.to_string()) 28 | } 29 | } 30 | 31 | /// The state of a subscription to an resource type, managed as part of an 32 | /// [AdsConnection]. 33 | #[derive(Clone, Debug, Default)] 34 | pub(crate) struct AdsSubscription { 35 | /// the names that the client is subscribed to 36 | names: ResourceNames, 37 | 38 | /// the version of the last response sent 39 | pub(crate) last_sent_version: Option, 40 | 41 | /// the nonce of the last reseponse sent 42 | pub(crate) last_sent_nonce: Option, 43 | 44 | /// the last version of each resource sent back to the client 45 | /// 46 | /// this isn't used to verify what resources were sent for LDS/CDS, but is 47 | /// kept to track state for CSDS. 48 | pub(crate) sent: BTreeMap, 49 | 50 | /// whether or not the client applied the last response 51 | pub(crate) applied: bool, 52 | 53 | /// the last version a client successfully ACK'd 54 | pub(crate) last_ack_version: Option, 55 | 56 | /// the last nonce a client successfully ACK'd 57 | pub(crate) last_ack_nonce: Option, 58 | } 59 | 60 | pub(crate) struct AdsConnection { 61 | node: xds_node::Node, 62 | nonce: u64, 63 | snapshot: SnapshotCache, 64 | subscriptions: EnumMap>, 65 | } 66 | 67 | impl AdsConnection { 68 | pub(crate) fn from_initial_request( 69 | request: &mut DiscoveryRequest, 70 | snapshot: SnapshotCache, 71 | ) -> Result { 72 | match request.node.take() { 73 | Some(node) => Ok(Self { 74 | nonce: 0, 75 | node, 76 | snapshot, 77 | subscriptions: EnumMap::default(), 78 | }), 79 | None => Err(ConnectionError::MisingNode), 80 | } 81 | } 82 | 83 | #[cfg(test)] 84 | fn test_new(node: xds_node::Node, snapshot: SnapshotCache) -> Self { 85 | Self { 86 | nonce: 0, 87 | node, 88 | snapshot, 89 | subscriptions: EnumMap::default(), 90 | } 91 | } 92 | 93 | pub(crate) fn node(&self) -> &xds_node::Node { 94 | &self.node 95 | } 96 | 97 | pub(crate) fn sent(&self) -> EnumMap { 98 | let mut sent = EnumMap::default(); 99 | 100 | for (rtype, sub) in &self.subscriptions { 101 | let Some(sub) = sub else { 102 | continue; 103 | }; 104 | sent[rtype] = SubInfo { 105 | applied: sub.applied, 106 | sent: sub.sent.clone(), 107 | } 108 | } 109 | 110 | sent 111 | } 112 | 113 | pub(crate) fn handle_ads_request( 114 | &mut self, 115 | request: DiscoveryRequest, 116 | ) -> Result<(Option, Vec), ConnectionError> { 117 | macro_rules! empty_response { 118 | () => { 119 | Ok((None, Vec::new())) 120 | }; 121 | } 122 | 123 | let Some(rtype) = ResourceType::from_type_url(&request.type_url) else { 124 | return Ok((None, Vec::new())); 125 | }; 126 | 127 | let sub = self.subscriptions[rtype].get_or_insert_with(AdsSubscription::default); 128 | 129 | // pull the request version and nonce and immediately verify that the 130 | // requests is not stale. bail out if it is. 131 | // 132 | // NOTE: should we actually validate that these are ostensibly resource 133 | // versions/nonces we produced? they're checked for matching but that's 134 | // it - is there a real benefit to telling a client it's behaving badly? 135 | let request_version = nonempty_then(&request.version_info, ResourceVersion::from_str) 136 | .transpose() 137 | .map_err(|e| ConnectionError::InvalidRequest(e.into()))?; 138 | let request_nonce = nonempty_then(&request.response_nonce, SmolStr::new); 139 | if request_nonce != sub.last_sent_nonce { 140 | trace!( 141 | v = request.version_info, 142 | n = request.response_nonce, 143 | ty = request.type_url, 144 | r = ?request.resource_names, 145 | last_sent_nonce = %sub.last_sent_nonce.as_ref().unwrap_or(&"".to_smolstr()), 146 | "ignoring stale request", 147 | ); 148 | return empty_response!(); 149 | } 150 | 151 | // if this isn't the initial request on a stream, update some state 152 | if request_nonce.is_some() { 153 | if is_nack(&request) { 154 | sub.applied = false; 155 | } else { 156 | sub.applied = true; 157 | sub.last_ack_nonce = request_nonce; 158 | // clone_from is here because clippy 159 | sub.last_ack_version.clone_from(&request_version); 160 | } 161 | } 162 | 163 | // updates should always go out if the version requested by the client 164 | // isn't the current version. 165 | let out_of_date = request_version != Some(self.snapshot.version(rtype)); 166 | 167 | // update the current subscription's resource names. if the names have 168 | // changed replace the current connection's names. send an update if 169 | // names change at all. 170 | let resource_names = ResourceNames::from_names(&sub.names, request.resource_names); 171 | let names_changed = sub.names != resource_names; 172 | if names_changed { 173 | sub.names = resource_names; 174 | } 175 | 176 | let mut responses = Vec::new(); 177 | if out_of_date || names_changed { 178 | if rtype.group_responses() { 179 | responses = sub.sotw_update(&self.snapshot, &mut self.nonce, rtype) 180 | } else { 181 | responses = sub.incremental_update(&self.snapshot, &mut self.nonce, rtype); 182 | } 183 | } 184 | 185 | Ok((Some(rtype), responses)) 186 | } 187 | 188 | pub(crate) fn handle_snapshot_update( 189 | &mut self, 190 | changed_type: ResourceType, 191 | ) -> Vec { 192 | let Some(sub) = &mut self.subscriptions[changed_type] else { 193 | return Vec::new(); 194 | }; 195 | 196 | trace!( 197 | sub_last_sent_version = ?sub.last_sent_version, 198 | snapshot_version = ?self.snapshot.version(changed_type), 199 | ?changed_type, 200 | "snapshot updated", 201 | ); 202 | if sub.last_sent_version == Some(self.snapshot.version(changed_type)) { 203 | return Vec::new(); 204 | } 205 | 206 | if changed_type.group_responses() { 207 | sub.sotw_update(&self.snapshot, &mut self.nonce, changed_type) 208 | } else { 209 | sub.incremental_update(&self.snapshot, &mut self.nonce, changed_type) 210 | } 211 | } 212 | } 213 | 214 | fn nonempty_then<'a, F, T>(s: &'a str, f: F) -> Option 215 | where 216 | F: FnOnce(&'a str) -> T, 217 | { 218 | if s.is_empty() { 219 | None 220 | } else { 221 | Some(f(s)) 222 | } 223 | } 224 | 225 | impl AdsSubscription { 226 | // TODO: don't return an update if nothing has changed! 227 | fn sotw_update( 228 | &mut self, 229 | snapshot: &SnapshotCache, 230 | nonce: &mut u64, 231 | rtype: ResourceType, 232 | ) -> Vec { 233 | if snapshot.len(rtype) == 0 { 234 | return Vec::new(); 235 | } 236 | let snapshot_version = snapshot.version(rtype); 237 | let iter = snapshot_iter(rtype, &self.names, snapshot); 238 | let (size_hint, _) = iter.size_hint(); 239 | let mut resources = Vec::with_capacity(size_hint); 240 | 241 | for entry in iter { 242 | self.sent 243 | .insert(entry.key().to_smolstr(), entry.value().version.to_smolstr()); 244 | resources.push(entry.value().proto.clone()); 245 | } 246 | 247 | let nonce = next_nonce(nonce); 248 | self.last_sent_nonce = Some(nonce.clone()); 249 | self.last_sent_version = Some(snapshot_version); 250 | 251 | let version_info = snapshot_version.to_string(); 252 | vec![DiscoveryResponse { 253 | type_url: rtype.type_url().to_string(), 254 | version_info, 255 | nonce: nonce.to_string(), 256 | resources, 257 | ..Default::default() 258 | }] 259 | } 260 | 261 | fn incremental_update( 262 | &mut self, 263 | snapshot: &SnapshotCache, 264 | nonce: &mut u64, 265 | rtype: ResourceType, 266 | ) -> Vec { 267 | // grab the snapshot version ahead of time in case there's a concurrent 268 | // update while we're sending. better to be a little behind than a 269 | // little ahead. 270 | let snapshot_version = snapshot.version(rtype); 271 | let iter = snapshot_iter(rtype, &self.names, snapshot); 272 | let (size_hint, _) = iter.size_hint(); 273 | 274 | let mut last_nonce = None; 275 | let mut responses = Vec::with_capacity(size_hint); 276 | for entry in iter { 277 | let name = entry.key(); 278 | let resource = entry.value(); 279 | let resource_version = resource.version.to_smolstr(); 280 | 281 | if self.sent.get(name.as_str()) != Some(&resource_version) { 282 | self.sent.insert(entry.key().to_smolstr(), resource_version); 283 | responses.push(DiscoveryResponse { 284 | type_url: rtype.type_url().to_string(), 285 | version_info: snapshot_version.to_string(), 286 | nonce: next_nonce(nonce).to_string(), 287 | resources: vec![resource.proto.clone()], 288 | ..Default::default() 289 | }); 290 | last_nonce = Some(*nonce); 291 | } 292 | } 293 | 294 | if let Some(last_nonce) = last_nonce { 295 | self.last_sent_version = Some(snapshot_version); 296 | self.last_sent_nonce = Some(last_nonce.to_smolstr()); 297 | } 298 | 299 | responses 300 | } 301 | } 302 | 303 | #[inline] 304 | fn next_nonce(nonce: &mut u64) -> SmolStr { 305 | *nonce = nonce.wrapping_add(1); 306 | nonce.to_smolstr() 307 | } 308 | 309 | fn snapshot_iter<'n, 's>( 310 | resource_type: ResourceType, 311 | names: &'n ResourceNames, 312 | snapshot: &'s SnapshotCache, 313 | ) -> SnapshotIter<'n, 's> { 314 | match names { 315 | ResourceNames::EmptyWildcard | ResourceNames::Wildcard(_) => { 316 | SnapshotIter::Wildcard(snapshot.iter(resource_type)) 317 | } 318 | ResourceNames::Explicit(names) => { 319 | SnapshotIter::Explicit(resource_type, names.iter(), snapshot) 320 | } 321 | } 322 | } 323 | 324 | enum SnapshotIter<'n, 's> { 325 | Wildcard(crossbeam_skiplist::map::Iter<'s, String, crate::xds::cache::VersionedProto>), 326 | Explicit( 327 | ResourceType, 328 | std::collections::btree_set::Iter<'n, String>, 329 | &'s SnapshotCache, 330 | ), 331 | } 332 | 333 | impl<'s> Iterator for SnapshotIter<'_, 's> { 334 | type Item = crate::xds::cache::Entry<'s>; 335 | 336 | #[allow(clippy::while_let_on_iterator)] 337 | fn next(&mut self) -> Option { 338 | match self { 339 | SnapshotIter::Wildcard(entries) => entries.next(), 340 | SnapshotIter::Explicit(rtype, names, snapshot) => { 341 | while let Some(name) = names.next() { 342 | if let Some(entry) = snapshot.get(*rtype, name) { 343 | return Some(entry); 344 | } 345 | } 346 | None 347 | } 348 | } 349 | } 350 | } 351 | 352 | #[cfg(test)] 353 | mod test { 354 | 355 | use crate::xds::{ResourceSnapshot, SnapshotWriter}; 356 | 357 | use super::*; 358 | use xds_api::pb::envoy::config::core::v3::{self as xds_core}; 359 | use xds_api::pb::google::protobuf; 360 | 361 | #[test] 362 | fn test_xds_init_no_data() { 363 | let snapshot = new_snapshot([]); 364 | let node = xds_core::Node { 365 | id: "test-node".to_string(), 366 | ..Default::default() 367 | }; 368 | 369 | // LDS and CDS should respond with no data 370 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 371 | let (_, responses) = conn 372 | .handle_ads_request(discovery_request(ResourceType::Listener, "", "", vec![])) 373 | .unwrap(); 374 | assert!(responses.is_empty()); 375 | 376 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 377 | let (_, responses) = conn 378 | .handle_ads_request(discovery_request(ResourceType::Cluster, "", "", vec![])) 379 | .unwrap(); 380 | assert!(responses.is_empty()); 381 | 382 | // EDS should return nothing 383 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 384 | let (_, responses) = conn 385 | .handle_ads_request(discovery_request( 386 | ResourceType::ClusterLoadAssignment, 387 | "", 388 | "", 389 | vec![], 390 | )) 391 | .unwrap(); 392 | assert!(responses.is_empty(), "EDS returns an no responses"); 393 | } 394 | 395 | #[test] 396 | fn test_xds_init_with_data() { 397 | let node = xds_core::Node { 398 | id: "test-node".to_string(), 399 | ..Default::default() 400 | }; 401 | 402 | let snapshot = new_snapshot([ 403 | (ResourceType::Listener, vec![(121, "default/nginx")]), 404 | ( 405 | ResourceType::Cluster, 406 | vec![ 407 | (123, "default/nginx/cluster"), 408 | (127, "default/nginx-staging/cluster"), 409 | ], 410 | ), 411 | ( 412 | ResourceType::ClusterLoadAssignment, 413 | vec![ 414 | (124, "default/nginx/endpoints"), 415 | (125, "default/nginx-staging/endpoints"), 416 | ], 417 | ), 418 | ]); 419 | 420 | // LDS should respond with a single message containing one resource 421 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 422 | let (_, resp) = conn 423 | .handle_ads_request(discovery_request(ResourceType::Listener, "", "", vec![])) 424 | .unwrap(); 425 | assert_eq!(resp.len(), 1); 426 | assert_eq!(resp[0].type_url, ResourceType::Listener.type_url()); 427 | assert_eq!(resp[0].resources.len(), 1); 428 | 429 | // CDS shoudl respond with a single message containing both resources 430 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 431 | let (_, resp) = conn 432 | .handle_ads_request(discovery_request(ResourceType::Cluster, "", "", vec![])) 433 | .unwrap(); 434 | assert_eq!(resp.len(), 1); 435 | assert_eq!(resp[0].type_url, ResourceType::Cluster.type_url()); 436 | assert_eq!(resp[0].resources.len(), 2); 437 | 438 | // EDS should only fetch the requested resource 439 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 440 | let (_, resp) = conn 441 | .handle_ads_request(discovery_request( 442 | ResourceType::ClusterLoadAssignment, 443 | "", 444 | "", 445 | vec!["default/nginx/endpoints", "default/nginx-staging/endpoints"], 446 | )) 447 | .unwrap(); 448 | assert_eq!(resp.len(), 2); 449 | assert_eq!( 450 | resp[0].type_url, 451 | ResourceType::ClusterLoadAssignment.type_url() 452 | ); 453 | assert_eq!(resp[0].resources.len(), 1); 454 | } 455 | 456 | #[test] 457 | fn test_lds_ack() { 458 | let node = xds_core::Node { 459 | id: "test-node".to_string(), 460 | ..Default::default() 461 | }; 462 | 463 | let snapshot = new_snapshot([ 464 | (ResourceType::Listener, vec![(121, "default/nginx")]), 465 | ( 466 | ResourceType::Cluster, 467 | vec![ 468 | (123, "default/nginx/cluster"), 469 | (127, "default/nginx-staging/cluster"), 470 | ], 471 | ), 472 | ( 473 | ResourceType::ClusterLoadAssignment, 474 | vec![ 475 | (125, "default/nginx-staging/endpoints"), 476 | (124, "default/nginx/endpoints"), 477 | ], 478 | ), 479 | ]); 480 | 481 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 482 | let (_, resp) = conn 483 | .handle_ads_request(discovery_request(ResourceType::Listener, "", "", vec![])) 484 | .unwrap(); 485 | assert_eq!(resp.len(), 1); 486 | assert_eq!(resp[0].type_url, ResourceType::Listener.type_url()); 487 | assert_eq!(resp[0].resources.len(), 1); 488 | 489 | // handle an ACK 490 | let (_, resp) = conn 491 | .handle_ads_request(discovery_ack( 492 | ResourceType::Listener, 493 | &resp[0].version_info, 494 | &resp[0].nonce, 495 | vec![], 496 | )) 497 | .unwrap(); 498 | assert!(resp.is_empty()); 499 | 500 | let sub = conn.subscriptions[ResourceType::Listener].as_ref().unwrap(); 501 | assert!( 502 | matches!( 503 | sub, 504 | AdsSubscription { 505 | last_ack_version: Some(_), 506 | last_ack_nonce: Some(_), 507 | applied: true, 508 | .. 509 | }, 510 | ), 511 | "should track the ACK in the subscription: sub={sub:?}", 512 | ); 513 | } 514 | 515 | #[test] 516 | fn test_lds_nack() { 517 | let node = xds_core::Node { 518 | id: "test-node".to_string(), 519 | ..Default::default() 520 | }; 521 | 522 | let snapshot = new_snapshot([ 523 | (ResourceType::Listener, vec![(121, "default/nginx")]), 524 | ( 525 | ResourceType::Cluster, 526 | vec![ 527 | (123, "default/nginx/cluster"), 528 | (127, "default/nginx-staging/cluster"), 529 | ], 530 | ), 531 | ( 532 | ResourceType::ClusterLoadAssignment, 533 | vec![ 534 | (125, "default/nginx-staging/endpoints"), 535 | (124, "default/nginx/endpoints"), 536 | ], 537 | ), 538 | ]); 539 | 540 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 541 | let (_, resp) = conn 542 | .handle_ads_request(discovery_request(ResourceType::Listener, "", "", vec![])) 543 | .unwrap(); 544 | assert_eq!(resp.len(), 1); 545 | assert_eq!(resp[0].type_url, ResourceType::Listener.type_url()); 546 | assert_eq!(resp[0].resources.len(), 1); 547 | 548 | // handle an ACK 549 | let (_, resp) = conn 550 | .handle_ads_request(discovery_nack( 551 | ResourceType::Listener, 552 | &resp[0].version_info, 553 | &resp[0].nonce, 554 | vec![], 555 | "you can't cut back on funding, you will regret this", 556 | )) 557 | .unwrap(); 558 | assert!(resp.is_empty()); 559 | 560 | let sub = conn.subscriptions[ResourceType::Listener].as_ref().unwrap(); 561 | assert!( 562 | matches!( 563 | sub, 564 | AdsSubscription { 565 | last_ack_version: None, 566 | last_ack_nonce: None, 567 | applied: false, 568 | .. 569 | } 570 | ), 571 | "should track the NACK in the subscription: sub={sub:?}", 572 | ); 573 | } 574 | 575 | #[test] 576 | fn test_cds_handle_ack_as_update() { 577 | let node = xds_core::Node { 578 | id: "test-node".to_string(), 579 | ..Default::default() 580 | }; 581 | 582 | let snapshot = new_snapshot([ 583 | (ResourceType::Listener, vec![(121, "default/nginx")]), 584 | ( 585 | ResourceType::Cluster, 586 | vec![ 587 | (123, "default/nginx/cluster"), 588 | (127, "default/nginx-staging/cluster"), 589 | ], 590 | ), 591 | ( 592 | ResourceType::ClusterLoadAssignment, 593 | vec![ 594 | (125, "default/nginx-staging/endpoints"), 595 | (124, "default/nginx/endpionts"), 596 | ], 597 | ), 598 | ]); 599 | 600 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 601 | let (_, resp) = conn 602 | .handle_ads_request(discovery_request(ResourceType::Cluster, "", "", vec![])) 603 | .unwrap(); 604 | assert_eq!(resp.len(), 1); 605 | assert_eq!(resp[0].type_url, ResourceType::Cluster.type_url()); 606 | assert_eq!(resp[0].resources.len(), 2); 607 | 608 | // first ACK changes the subscriptions 609 | // 610 | // this should generate a new response, because Clusters are SoTW for 611 | // update and need to send a response that removes one of the resources. 612 | let (_, resp) = conn 613 | .handle_ads_request(discovery_ack( 614 | ResourceType::Cluster, 615 | &resp[0].version_info, 616 | &resp[0].nonce, 617 | vec!["default/nginx-staging/cluster"], 618 | )) 619 | .unwrap(); 620 | assert_eq!(resp.len(), 1, "should send back a SotW response"); 621 | assert_eq!(resp[0].type_url, ResourceType::Cluster.type_url()); 622 | assert_eq!( 623 | resp[0].resources.len(), 624 | 1, 625 | "response should include a single cluster" 626 | ); 627 | 628 | let sub = conn.subscriptions[ResourceType::Cluster].as_ref().unwrap(); 629 | assert!( 630 | matches!( 631 | sub, 632 | AdsSubscription { 633 | last_ack_version: Some(_), 634 | last_ack_nonce: Some(_), 635 | applied: true, 636 | .. 637 | }, 638 | ), 639 | "should track the ACK in the subscription: sub={sub:?}", 640 | ); 641 | 642 | // second ACK shouldn't generate anything else, there's nothing to do 643 | // because the subscription hasn't changed. 644 | let (_, resp) = conn 645 | .handle_ads_request(discovery_ack( 646 | ResourceType::Cluster, 647 | &resp[0].version_info, 648 | &resp[0].nonce, 649 | vec!["default/nginx-staging/cluster"], 650 | )) 651 | .unwrap(); 652 | assert!(resp.is_empty()); 653 | } 654 | 655 | #[test] 656 | fn test_eds_ack() { 657 | let node = xds_core::Node { 658 | id: "test-node".to_string(), 659 | ..Default::default() 660 | }; 661 | 662 | let snapshot = new_snapshot([( 663 | ResourceType::ClusterLoadAssignment, 664 | vec![ 665 | (123, "default/nginx/endpoints"), 666 | (124, "default/something-else/endpoints"), 667 | ], 668 | )]); 669 | 670 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 671 | let (_, resp) = conn 672 | .handle_ads_request(discovery_request( 673 | ResourceType::ClusterLoadAssignment, 674 | "", 675 | "", 676 | vec!["default/nginx/endpoints"], 677 | )) 678 | .unwrap(); 679 | 680 | // should return a single response for the resource in cache 681 | assert_eq!(resp.len(), 1); 682 | assert_eq!( 683 | resp[0].type_url, 684 | ResourceType::ClusterLoadAssignment.type_url(), 685 | "should be an EDS resource", 686 | ); 687 | assert_eq!( 688 | resp[0].version_info, 689 | snapshot 690 | .version(ResourceType::ClusterLoadAssignment) 691 | .to_string(), 692 | ); 693 | let next_version = &resp[0].version_info; 694 | let next_nonce = &resp[0].nonce; 695 | 696 | // when the client ACKs the first response, it shouldn't change the state of the connection 697 | let (_, resp) = conn 698 | .handle_ads_request(discovery_request( 699 | ResourceType::ClusterLoadAssignment, 700 | next_version, 701 | next_nonce, 702 | vec!["default/nginx/endpoints"], 703 | )) 704 | .unwrap(); 705 | 706 | assert!(resp.is_empty()); 707 | 708 | let sub = conn.subscriptions[ResourceType::ClusterLoadAssignment] 709 | .as_ref() 710 | .unwrap(); 711 | assert_eq!( 712 | sub.last_sent_nonce, 713 | Some(next_nonce.to_smolstr()), 714 | "nonce should not change" 715 | ); 716 | } 717 | 718 | #[test] 719 | fn test_eds_update_remove_subscription() { 720 | let node = xds_core::Node { 721 | id: "test-node".to_string(), 722 | ..Default::default() 723 | }; 724 | 725 | let snapshot = new_snapshot([ 726 | (ResourceType::Listener, vec![(121, "default/nginx")]), 727 | ( 728 | ResourceType::Cluster, 729 | vec![ 730 | (123, "default/nginx/cluster"), 731 | (127, "default/nginx-staging/cluster"), 732 | ], 733 | ), 734 | ( 735 | ResourceType::ClusterLoadAssignment, 736 | vec![ 737 | (125, "default/nginx-staging/endpoints"), 738 | (124, "default/nginx/endpionts"), 739 | ], 740 | ), 741 | ]); 742 | 743 | // Initial EDS connection should return a a message for each EDS resource. 744 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 745 | let (_, resp) = conn 746 | .handle_ads_request(discovery_request( 747 | ResourceType::ClusterLoadAssignment, 748 | "", 749 | "", 750 | vec![], 751 | )) 752 | .unwrap(); 753 | 754 | assert_eq!(resp.len(), 2); 755 | assert!( 756 | resp.iter() 757 | .all(|msg| msg.type_url == ResourceType::ClusterLoadAssignment.type_url()), 758 | "should be EDS resources", 759 | ); 760 | assert!( 761 | resp.iter().all(|msg| msg.resources.len() == 1), 762 | "should contain a single response", 763 | ); 764 | let last_version = &resp[1].version_info; 765 | let last_nonce = &resp[1].nonce; 766 | 767 | // first ACK changes the subscriptions. shouldn't generate a response because 768 | // EDS doesn't require full sotw updates. 769 | let (_, resp) = conn 770 | .handle_ads_request(discovery_ack( 771 | ResourceType::ClusterLoadAssignment, 772 | last_version, 773 | last_nonce, 774 | vec!["default/nginx-staging/endpoints"], 775 | )) 776 | .unwrap(); 777 | assert_eq!(resp.len(), 0, "nothing has changed, shouldn't do anything"); 778 | 779 | let sub = conn.subscriptions[ResourceType::ClusterLoadAssignment] 780 | .as_ref() 781 | .unwrap(); 782 | assert!( 783 | matches!( 784 | sub, 785 | AdsSubscription { 786 | names: ResourceNames::Explicit(_), 787 | last_ack_version: Some(_), 788 | last_ack_nonce: Some(_), 789 | applied: true, 790 | .. 791 | } 792 | ), 793 | "should track the ACK in the subscription: sub={sub:?}", 794 | ); 795 | assert_eq!( 796 | sub.last_sent_nonce, 797 | Some(last_nonce.to_smolstr()), 798 | "should still have the last nonce: sub={sub:#?}", 799 | ); 800 | 801 | // second ACK shouldn't generate anything else, there's nothing to do 802 | // because the subscription hasn't changed. 803 | // 804 | // connection state shouldn't change 805 | let (_, resp) = conn 806 | .handle_ads_request(discovery_ack( 807 | ResourceType::ClusterLoadAssignment, 808 | last_version, 809 | last_nonce, 810 | vec!["default/nginx-staging/endpoints"], 811 | )) 812 | .unwrap(); 813 | assert!(resp.is_empty()); 814 | 815 | let sub = conn.subscriptions[ResourceType::ClusterLoadAssignment] 816 | .as_ref() 817 | .unwrap(); 818 | assert_eq!(sub.last_sent_nonce, Some(last_nonce.to_smolstr())); 819 | } 820 | 821 | #[test] 822 | fn test_eds_update_add_subscription() { 823 | let node = xds_core::Node { 824 | id: "test-node".to_string(), 825 | ..Default::default() 826 | }; 827 | 828 | let snapshot = new_snapshot([ 829 | (ResourceType::Listener, vec![(121, "default/nginx")]), 830 | ( 831 | ResourceType::Cluster, 832 | vec![ 833 | (123, "default/nginx/cluster"), 834 | (127, "default/nginx-staging/cluster"), 835 | ], 836 | ), 837 | ( 838 | ResourceType::ClusterLoadAssignment, 839 | vec![ 840 | (125, "default/nginx-staging/endpoints"), 841 | (124, "default/nginx/endpoints"), 842 | ], 843 | ), 844 | ]); 845 | 846 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 847 | let (_, resp) = conn 848 | .handle_ads_request(discovery_request( 849 | ResourceType::ClusterLoadAssignment, 850 | "", 851 | "", 852 | vec!["default/nginx/endpoints"], 853 | )) 854 | .unwrap(); 855 | 856 | // should return a single response 857 | assert_eq!(resp.len(), 1); 858 | assert!( 859 | resp.iter() 860 | .all(|msg| msg.type_url == ResourceType::ClusterLoadAssignment.type_url()), 861 | "should be EDS resources", 862 | ); 863 | assert!( 864 | resp.iter().all(|msg| msg.resources.len() == 1), 865 | "should contain a single response", 866 | ); 867 | 868 | let next_version = &resp[0].version_info; 869 | let next_nonce = &resp[0].nonce; 870 | 871 | // should ignore a stale incoming request 872 | let (_, resp) = conn 873 | .handle_ads_request(discovery_ack( 874 | ResourceType::ClusterLoadAssignment, 875 | "", 876 | "", 877 | vec![ 878 | "default/nginx-staging/endpoints", 879 | "default/nginx/endpoints", 880 | "stale/stale/stale", 881 | "stale/staler/stalest", 882 | ], 883 | )) 884 | .unwrap(); 885 | assert!(resp.is_empty()); 886 | 887 | // accept the ACK and generate no respones. last_sent_nonce shouldn't change or anything. 888 | let (_, resp) = conn 889 | .handle_ads_request(discovery_ack( 890 | ResourceType::ClusterLoadAssignment, 891 | next_version, 892 | next_nonce, 893 | vec!["default/nginx/endpoints"], 894 | )) 895 | .unwrap(); 896 | assert!(resp.is_empty()); 897 | assert_eq!( 898 | conn.subscriptions[ResourceType::ClusterLoadAssignment] 899 | .as_ref() 900 | .unwrap() 901 | .last_sent_nonce, 902 | Some(next_nonce.to_smolstr()) 903 | ); 904 | 905 | // should handle the next request as a subscription update. incremental 906 | // means returning only the data for new names. 907 | let (_, resp) = conn 908 | .handle_ads_request(discovery_ack( 909 | ResourceType::ClusterLoadAssignment, 910 | next_version, 911 | next_nonce, 912 | vec!["default/nginx-staging/endpoints", "default/nginx/endpoints"], 913 | )) 914 | .unwrap(); 915 | assert_eq!(resp.len(), 1); 916 | assert!( 917 | resp.iter() 918 | .all(|msg| msg.type_url == ResourceType::ClusterLoadAssignment.type_url()), 919 | "should be EDS resources", 920 | ); 921 | assert!( 922 | resp.iter().all(|msg| msg.resources.len() == 1), 923 | "should contain a single response", 924 | ); 925 | } 926 | 927 | #[test] 928 | fn test_eds_snapshot_update_during_ack() { 929 | let node = xds_core::Node { 930 | id: "test-node".to_string(), 931 | ..Default::default() 932 | }; 933 | 934 | let (snapshot, mut writer) = new_snapshot_with_writer([ 935 | (ResourceType::Listener, vec![(123, "default/nginx")]), 936 | ( 937 | ResourceType::Cluster, 938 | vec![ 939 | (123, "default/nginx/cluster"), 940 | (123, "default/nginx-staging/cluster"), 941 | ], 942 | ), 943 | ( 944 | ResourceType::ClusterLoadAssignment, 945 | vec![(127, "default/nginx/endpoints")], 946 | ), 947 | ]); 948 | 949 | let mut conn = AdsConnection::test_new(node.clone(), snapshot.clone()); 950 | let (_, resp) = conn 951 | .handle_ads_request(discovery_request( 952 | ResourceType::ClusterLoadAssignment, 953 | "", 954 | "", 955 | vec!["default/nginx/endpoints"], 956 | )) 957 | .unwrap(); 958 | 959 | // should return a single response for the resource in cache 960 | assert_eq!(resp.len(), 1); 961 | assert_eq!( 962 | resp[0].type_url, 963 | ResourceType::ClusterLoadAssignment.type_url(), 964 | "should be an EDS resource", 965 | ); 966 | let next_version = &resp[0].version_info; 967 | let next_nonce = &resp[0].nonce; 968 | 969 | // update the snapshot with a new endpoint 970 | let mut snapshot = ResourceSnapshot::new(); 971 | snapshot.insert_update( 972 | ResourceType::ClusterLoadAssignment, 973 | "some-endpoints".to_string(), 974 | anything(), 975 | ); 976 | writer.update(snapshot); 977 | 978 | // when the client ACKs the first response, it shouldn't change the state of the connection 979 | let (_, resp) = conn 980 | .handle_ads_request(discovery_request( 981 | ResourceType::ClusterLoadAssignment, 982 | next_version, 983 | next_nonce, 984 | vec!["default/nginx/endpoints"], 985 | )) 986 | .unwrap(); 987 | 988 | assert!(resp.is_empty()); 989 | 990 | let sub = conn.subscriptions[ResourceType::ClusterLoadAssignment] 991 | .as_ref() 992 | .unwrap(); 993 | assert_eq!( 994 | sub.last_sent_nonce, 995 | Some(next_nonce.to_smolstr()), 996 | "nonce should not change" 997 | ); 998 | } 999 | 1000 | fn new_snapshot( 1001 | data: impl IntoIterator)>, 1002 | ) -> SnapshotCache { 1003 | let (cache, _writer) = new_snapshot_with_writer(data); 1004 | cache 1005 | } 1006 | 1007 | fn new_snapshot_with_writer( 1008 | data: impl IntoIterator)>, 1009 | ) -> (SnapshotCache, SnapshotWriter) { 1010 | let mut snapshot = ResourceSnapshot::new(); 1011 | let mut max_version = 0; 1012 | for (rtype, mut names) in data { 1013 | names.sort_by_key(|(v, _)| *v); 1014 | for (version, name) in names { 1015 | max_version = u64::max(max_version, version); 1016 | snapshot.insert_update(rtype, name.to_string(), anything()); 1017 | } 1018 | } 1019 | 1020 | let (cache, mut writer) = crate::xds::snapshot([]); 1021 | writer.update(snapshot); 1022 | 1023 | (cache, writer) 1024 | } 1025 | 1026 | fn discovery_request( 1027 | rtype: ResourceType, 1028 | version_info: &str, 1029 | response_nonce: &str, 1030 | names: Vec<&'static str>, 1031 | ) -> DiscoveryRequest { 1032 | let names = names.into_iter().map(|n| n.to_string()).collect(); 1033 | DiscoveryRequest { 1034 | type_url: rtype.type_url().to_string(), 1035 | resource_names: names, 1036 | version_info: version_info.to_string(), 1037 | response_nonce: response_nonce.to_string(), 1038 | ..Default::default() 1039 | } 1040 | } 1041 | 1042 | fn discovery_ack( 1043 | rtype: ResourceType, 1044 | version_info: &str, 1045 | response_nonce: &str, 1046 | names: Vec<&str>, 1047 | ) -> DiscoveryRequest { 1048 | let names = names.into_iter().map(|n| n.to_string()).collect(); 1049 | DiscoveryRequest { 1050 | type_url: rtype.type_url().to_string(), 1051 | resource_names: names, 1052 | version_info: version_info.to_string(), 1053 | response_nonce: response_nonce.to_string(), 1054 | ..Default::default() 1055 | } 1056 | } 1057 | 1058 | fn discovery_nack( 1059 | rtype: ResourceType, 1060 | version_info: &str, 1061 | response_nonce: &str, 1062 | names: Vec<&str>, 1063 | error_detail: &str, 1064 | ) -> DiscoveryRequest { 1065 | let names = names.into_iter().map(|n| n.to_string()).collect(); 1066 | DiscoveryRequest { 1067 | type_url: rtype.type_url().to_string(), 1068 | resource_names: names, 1069 | version_info: version_info.to_string(), 1070 | response_nonce: response_nonce.to_string(), 1071 | error_detail: Some(xds_api::pb::google::rpc::Status { 1072 | code: tonic::Code::InvalidArgument.into(), 1073 | message: error_detail.to_string(), 1074 | ..Default::default() 1075 | }), 1076 | ..Default::default() 1077 | } 1078 | } 1079 | 1080 | fn anything() -> protobuf::Any { 1081 | protobuf::Any { 1082 | type_url: "type_url".to_string(), 1083 | value: vec![], 1084 | } 1085 | } 1086 | } 1087 | --------------------------------------------------------------------------------