├── .codespell.ignorewords ├── .codespell.skip ├── .editorconfig ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── feature_request.md │ ├── non--crash-security--bug.md │ └── other.md ├── PULL_REQUEST_TEMPLATE.md ├── dependabot.yml └── workflows │ ├── docker_builds_template.yaml │ ├── pr_style_check.yaml │ ├── release.yaml │ ├── style.yaml │ ├── tests.yaml │ └── trivy.yaml ├── .gitignore ├── .golangci.yml ├── .licenserc.yaml ├── .testcoverage.yml ├── .trivyignore ├── .yamllint ├── .yamllint.ignore ├── CODEOWNERS ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── Dockerfile ├── GOALS.md ├── LICENSE ├── Makefile ├── Makefile.tools.mk ├── README.md ├── RELEASES.md ├── api └── v1alpha1 │ ├── api.go │ ├── doc.go │ ├── registry.go │ ├── status.go │ └── zz_generated.deepcopy.go ├── cmd ├── aigw │ ├── ai-gateway-default-resources.yaml │ ├── envoy-gateway-config.yaml │ ├── main.go │ ├── main_test.go │ ├── run.go │ ├── run_test.go │ ├── testdata │ │ ├── translate_basic.in.yaml │ │ ├── translate_basic.out.yaml │ │ └── translate_nonairesources.yaml │ ├── translate.go │ └── translate_test.go ├── controller │ ├── main.go │ └── main_test.go └── extproc │ ├── main.go │ └── mainlib │ ├── main.go │ └── main_test.go ├── docs └── proposals │ ├── 001-ai-gateway-proposal │ ├── control_plane.png │ ├── data_plane.png │ └── proposal.md │ └── 002-inference-gateway-support │ └── proposal.md ├── examples ├── basic │ ├── README.md │ └── basic.yaml ├── extproc_custom_metrics │ ├── README.md │ └── main.go ├── extproc_custom_router │ ├── README.md │ └── main.go ├── monitoring │ ├── README.md │ └── monitoring.yaml ├── provider_fallback │ ├── README.md │ ├── base.yaml │ └── fallback.yaml └── token_ratelimit │ ├── README.md │ └── token_ratelimit.yaml ├── filterapi ├── filterconfig.go ├── filterconfig_test.go └── x │ └── x.go ├── go.mod ├── go.sum ├── internal ├── apischema │ ├── awsbedrock │ │ └── awsbedrock.go │ └── openai │ │ ├── openai.go │ │ └── openai_test.go ├── controller │ ├── ai_gateway_route.go │ ├── ai_gateway_route_test.go │ ├── ai_service_backend.go │ ├── ai_service_backend_test.go │ ├── backend_security_policy.go │ ├── backend_security_policy_test.go │ ├── controller.go │ ├── controller_test.go │ ├── gateway.go │ ├── gateway_mutator.go │ ├── gateway_mutator_test.go │ ├── gateway_test.go │ ├── rotators │ │ ├── aws_common.go │ │ ├── aws_common_test.go │ │ ├── aws_oidc_rotator.go │ │ ├── aws_oidc_rotator_test.go │ │ ├── azure_token_rotator.go │ │ ├── azure_token_rotator_test.go │ │ ├── common.go │ │ └── common_test.go │ ├── secret.go │ ├── secret_test.go │ └── tokenprovider │ │ ├── azure_client_secret_token_provider.go │ │ ├── azure_client_secret_token_provider_test.go │ │ ├── azure_token_provider.go │ │ ├── azure_token_provider_test.go │ │ ├── oidc_token_provider.go │ │ ├── oidc_token_provider_test.go │ │ ├── token_provider.go │ │ ├── token_provider_test.go │ │ ├── util.go │ │ └── util_test.go ├── extensionserver │ ├── extensionserver.go │ └── extensionserver_test.go ├── extproc │ ├── backendauth │ │ ├── api_key.go │ │ ├── api_key_test.go │ │ ├── auth.go │ │ ├── auth_test.go │ │ ├── aws.go │ │ ├── aws_test.go │ │ ├── azure.go │ │ └── azure_test.go │ ├── chatcompletion_processor.go │ ├── chatcompletion_processor_test.go │ ├── mocks_test.go │ ├── models_processor.go │ ├── models_processor_test.go │ ├── processor.go │ ├── processor_test.go │ ├── router │ │ ├── router.go │ │ └── router_test.go │ ├── server.go │ ├── server_test.go │ ├── translator │ │ ├── openai_awsbedrock.go │ │ ├── openai_awsbedrock_test.go │ │ ├── openai_azureopenai.go │ │ ├── openai_azureopenai_test.go │ │ ├── openai_openai.go │ │ ├── openai_openai_test.go │ │ ├── translator.go │ │ └── translator_test.go │ ├── watcher.go │ └── watcher_test.go ├── llmcostcel │ ├── cel.go │ └── cel_test.go ├── metrics │ ├── chat_completion_metrics.go │ ├── chat_completion_metrics_test.go │ └── genai.go ├── testing │ ├── credentials_context.go │ └── eventchan.go └── version │ └── version.go ├── manifests ├── charts │ ├── ai-gateway-crds-helm │ │ ├── Chart.yaml │ │ ├── templates │ │ │ ├── aigateway.envoyproxy.io_aigatewayroutes.yaml │ │ │ ├── aigateway.envoyproxy.io_aiservicebackends.yaml │ │ │ └── aigateway.envoyproxy.io_backendsecuritypolicies.yaml │ │ └── values.yaml │ └── ai-gateway-helm │ │ ├── .helmignore │ │ ├── Chart.yaml │ │ ├── templates │ │ ├── NOTES.txt │ │ ├── _helpers.tpl │ │ ├── admission_webhook.yaml │ │ ├── deployment.yaml │ │ ├── service.yaml │ │ └── serviceaccount.yaml │ │ └── values.yaml └── envoy-gateway-config │ ├── config.yaml │ ├── rbac.yaml │ └── redis.yaml ├── netlify.toml ├── pre-commit.sh ├── site ├── README.md ├── blog │ ├── 2024-10-18-introducing-envoy-ai-gw.md │ ├── 2024-11-14-kubecon-end-user-keynote.md │ ├── 2025-02-25-first-ai-gw-release.md │ ├── authors.yml │ └── tags.yml ├── crd-ref-docs │ ├── config-core.yaml │ └── templates │ │ ├── README │ │ ├── gv_details.tpl │ │ ├── gv_list.tpl │ │ ├── type.tpl │ │ └── type_members.tpl ├── docs │ ├── api │ │ └── api.mdx │ ├── capabilities │ │ ├── fallback.md │ │ ├── index.md │ │ ├── metrics.md │ │ ├── upstream-auth.mdx │ │ └── usage-based-ratelimiting.md │ ├── cli │ │ ├── index.md │ │ ├── installation.md │ │ ├── run.md │ │ └── translate.md │ ├── concepts │ │ ├── architecture │ │ │ ├── control-plane.md │ │ │ ├── data-plane.md │ │ │ ├── index.md │ │ │ └── system-architecture.md │ │ ├── index.md │ │ └── resources.md │ ├── getting-started │ │ ├── basic-usage.md │ │ ├── connect-providers │ │ │ ├── aws-bedrock.md │ │ │ ├── azure-openai.md │ │ │ ├── index.md │ │ │ └── openai.md │ │ ├── index.md │ │ ├── installation.md │ │ └── prerequisites.md │ ├── index.md │ └── terminology.md ├── docusaurus.config.ts ├── package-lock.json ├── package.json ├── sidebars.ts ├── src │ ├── components │ │ ├── ApiField.tsx │ │ └── HomepageFeatures │ │ │ ├── index.tsx │ │ │ └── styles.module.css │ ├── css │ │ └── custom.css │ ├── pages │ │ ├── index.tsx │ │ └── release-notes.md │ └── theme │ │ └── MDXComponents.tsx ├── static │ ├── .nojekyll │ ├── diagrams │ │ ├── upstream-auth.drawio │ │ └── upstream-auth.png │ └── img │ │ ├── 1.png │ │ ├── 2.png │ │ ├── 3.png │ │ ├── ai-gateway.svg │ │ ├── ai-gw-logo.svg │ │ ├── blog │ │ └── 0.1-release-image.png │ │ ├── control_plane.png │ │ ├── data_plane.png │ │ ├── favicon.ico │ │ ├── logo-white.svg │ │ ├── logo.svg │ │ └── social-card-envoy-ai-gw.png ├── tsconfig.json ├── versioned_docs │ └── version-0.1 │ │ ├── api │ │ └── api.mdx │ │ ├── capabilities │ │ ├── index.md │ │ └── usage-based-ratelimiting.md │ │ ├── concepts │ │ ├── architecture │ │ │ ├── control-plane.md │ │ │ ├── data-plane.md │ │ │ ├── index.md │ │ │ └── system-architecture.md │ │ ├── index.md │ │ └── resources.md │ │ ├── getting-started │ │ ├── basic-usage.md │ │ ├── connect-providers │ │ │ ├── aws-bedrock.md │ │ │ ├── index.md │ │ │ └── openai.md │ │ ├── index.md │ │ ├── installation.md │ │ └── prerequisites.md │ │ ├── index.md │ │ └── terminology.md ├── versioned_sidebars │ └── version-0.1-sidebars.json └── versions.json └── tests ├── README.md ├── controller └── controller_test.go ├── crdcel ├── main_test.go └── testdata │ ├── aigatewayroutes │ ├── basic.yaml │ ├── llmcosts.yaml │ ├── no_target_refs.yaml │ ├── non_openai_schema.yaml │ ├── unknown_schema.yaml │ └── unsupported_match.yaml │ ├── aiservicebackends │ ├── basic-eg-backend-aws.yaml │ ├── basic-eg-backend-azure.yaml │ ├── basic.yaml │ └── unknown_schema.yaml │ └── backendsecuritypolicies │ ├── aws_credential_file.yaml │ ├── aws_oidc.yaml │ ├── azure_credentials_missing_client_id.yaml │ ├── azure_credentials_missing_tenant_id.yaml │ ├── azure_missing_auth.yaml │ ├── azure_multiple_auth.yaml │ ├── azure_oidc.yaml │ ├── azure_valid_credentials.yaml │ ├── basic.yaml │ ├── missing_type.yaml │ ├── multiple_security_policies.yaml │ └── unknown_provider.yaml ├── e2e ├── basic_test.go ├── e2e_test.go ├── init │ └── testupstream │ │ └── manifest.yaml ├── provider_fallback_test.go ├── testdata │ └── translation_testupstream.yaml ├── token_ratelimit_test.go └── translation_testupstream_test.go ├── extproc ├── custom_extproc_test.go ├── envoy.yaml ├── extproc_test.go ├── real_providers_test.go ├── testdata │ ├── server.crt │ └── server.key └── testupstream_test.go └── internal ├── envtest.go └── testupstreamlib ├── testupstream.go └── testupstream ├── main.go └── main_test.go /.codespell.ignorewords: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/envoyproxy/ai-gateway/d17719cd338fb7f9b63b6b378ffa979370f78e19/.codespell.ignorewords -------------------------------------------------------------------------------- /.codespell.skip: -------------------------------------------------------------------------------- 1 | ./manifests/charts/ai-gateway-helm/crds/* 2 | ./site/package-lock.json 3 | ./site/package.json 4 | ./site/node_modules/* 5 | ./site/build/* 6 | ./site/npm-debug.log* 7 | ./site/yarn-debug.log* 8 | ./site/yarn-error.log* 9 | ./go.mod 10 | ./go.sum 11 | ./tests/e2e/logs 12 | *_for_tests.yaml 13 | ./tests/extproc/testdata/server.* 14 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | insert_final_newline = true 7 | trim_trailing_whitespace = true 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: "Crash bug" 4 | url: https://github.com/envoyproxy/envoy/security/policy 5 | about: "Please file any crash bug with envoy-security@googlegroups.com." 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: enhancement,triage 6 | assignees: '' 7 | 8 | --- 9 | 10 | *Description*: 11 | >Describe the desired behavior, what scenario it enables and how it 12 | would be used. 13 | 14 | [optional *Relevant Links*:] 15 | >Any extra documentation required to understand the issue. 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/non--crash-security--bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug 3 | about: Bugs 4 | title: '' 5 | labels: bug,triage 6 | assignees: '' 7 | 8 | --- 9 | 10 | *Description*: 11 | >What issue is being seen? Describe what should be happening instead of 12 | the bug, for example: Envoy should not crash, the expected value isn't 13 | returned, etc. 14 | 15 | *Repro steps*: 16 | > Include sample requests, environment, etc. All data and inputs 17 | required to reproduce the bug. 18 | 19 | >**Note**: If there are privacy concerns, sanitize the data prior to 20 | sharing. 21 | 22 | *Environment*: 23 | >Include the environment like gateway version, envoy version and so on. 24 | 25 | *Logs*: 26 | >Include the access logs and the Envoy logs. 27 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/other.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Other 3 | about: Questions, design proposals, tech debt, etc. 4 | title: '' 5 | labels: triage 6 | assignees: '' 7 | 8 | --- 9 | 10 | *Description*: 11 | >Describe the issue. 12 | 13 | [optional *Relevant Links*:] 14 | >Any extra documentation required to understand the issue. 15 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **Commit Message** 2 | 3 | 13 | 14 | **Related Issues/PRs (if applicable)** 15 | 16 | 25 | 26 | **Special notes for reviewers (if applicable)** 27 | 28 | 36 | 37 | 38 | 59 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: npm 9 | directory: site 10 | schedule: 11 | interval: weekly 12 | 13 | - package-ecosystem: gomod 14 | directories: 15 | - "/" 16 | schedule: 17 | interval: weekly 18 | groups: 19 | k8s.io: 20 | patterns: 21 | - "k8s.io/*" 22 | golang.org: 23 | patterns: 24 | - "golang.org/*" 25 | ignore: 26 | - dependency-name: "github.com/envoyproxy/gateway" 27 | -------------------------------------------------------------------------------- /.github/workflows/docker_builds_template.yaml: -------------------------------------------------------------------------------- 1 | name: Docker Builds Template 2 | 3 | on: 4 | workflow_call: 5 | 6 | jobs: 7 | docker_builds: 8 | name: ${{ matrix.command_name }} 9 | runs-on: ubuntu-latest 10 | strategy: 11 | matrix: 12 | include: 13 | - command_name: "controller" 14 | cmd_path_prefix: cmd 15 | - command_name: "extproc" 16 | cmd_path_prefix: cmd 17 | - command_name: "testupstream" 18 | cmd_path_prefix: tests/internal/testupstreamlib 19 | steps: 20 | - uses: actions/checkout@v4 21 | - uses: actions/setup-go@v5 22 | with: 23 | cache: false 24 | go-version-file: go.mod 25 | - uses: actions/cache@v4 26 | with: 27 | path: | 28 | ~/.cache/go-build 29 | ~/.cache/golangci-lint 30 | ~/go/pkg/mod 31 | ~/go/bin 32 | key: build-container-${{ hashFiles('**/go.mod', '**/go.sum', '**/Makefile') }} 33 | 34 | - uses: docker/setup-buildx-action@v3 35 | 36 | - name: Set up QEMU 37 | uses: docker/setup-qemu-action@49b3bc8e6bdd4a60e6116a5414239cba5943d3cf # v3.2.0 38 | 39 | - name: Set up Docker buildx 40 | id: buildx 41 | uses: docker/setup-buildx-action@988b5a0280414f521da01fcc63a27aeeb4b104db # v3.6.1 42 | 43 | - name: Login into DockerHub 44 | uses: docker/login-action@v3 45 | with: 46 | username: ${{ vars.DOCKERHUB_USERNAME }} 47 | password: ${{ secrets.DOCKERHUB_PASSWORD }} 48 | 49 | # Push images for the push events, e.g. when a new tag is pushed as well as PR merges. 50 | # * Only use the tag if the event is a tag event, otherwise use "latest". 51 | # * Build for both amd64 and arm64 platforms. 52 | - name: Build and Push Image 53 | run: | 54 | if [[ "$GITHUB_REF" == refs/tags/* ]]; then 55 | TAG="${GITHUB_REF#refs/tags/}" 56 | else 57 | TAG="latest" 58 | fi 59 | make docker-build.${{ matrix.command_name }} CMD_PATH_PREFIX=${{ matrix.cmd_path_prefix }} ENABLE_MULTI_PLATFORMS=true TAG=$TAG DOCKER_BUILD_ARGS="--push" 60 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | push: 4 | tags: 5 | - 'v[0-9]+.[0-9]+.[0-9]+**' # Ex. v0.2.0 v0.2.1-rc2 6 | 7 | jobs: 8 | docker_push: 9 | name: Push Docker Images 10 | uses: ./.github/workflows/docker_builds_template.yaml 11 | secrets: inherit 12 | 13 | release: 14 | needs: [docker_push] 15 | name: Release 16 | runs-on: ubuntu-latest 17 | steps: 18 | - name: Set HELM_CHART_VERSION and TAG envs 19 | run: | 20 | TAG="${GITHUB_REF#refs/tags/}" 21 | echo "HELM_CHART_VERSION=${TAG}" >> $GITHUB_ENV 22 | echo "HELM_CHART_VERSION_WITHOUT_V=$(echo ${TAG#v})" >> $GITHUB_ENV 23 | echo "TAG=${TAG}" >> $GITHUB_ENV 24 | 25 | # To include the helm chart in the release artifact, we build and push it here instead of the separate job. 26 | - uses: actions/checkout@v4 27 | - name: Login into DockerHub 28 | uses: docker/login-action@v3 29 | with: 30 | username: ${{ vars.DOCKERHUB_USERNAME }} 31 | password: ${{ secrets.DOCKERHUB_PASSWORD }} 32 | - name: Push Helm chart 33 | run: | 34 | make helm-push HELM_CHART_VERSION=${HELM_CHART_VERSION} 35 | make helm-push HELM_CHART_VERSION=${HELM_CHART_VERSION_WITHOUT_V} 36 | 37 | - name: Create a release candidate 38 | if: ${{ contains(github.ref, '-rc') }} 39 | run: | 40 | gh release create $TAG --prerelease --title $TAG --notes "Release candidate" ./out/ai-gateway-crds-helm-${TAG}.tgz ./out/ai-gateway-helm-${TAG}.tgz 41 | env: 42 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 43 | 44 | - name: Create a stable release 45 | if: ${{ !contains(github.ref, '-rc') }} 46 | run: | 47 | gh release create $TAG --draft --title $TAG --notes "To be written by the release manager" ./out/ai-gateway-crds-helm-${TAG}.tgz ./out/ai-gateway-helm-${TAG}.tgz 48 | env: 49 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 50 | -------------------------------------------------------------------------------- /.github/workflows/style.yaml: -------------------------------------------------------------------------------- 1 | name: Style 2 | on: 3 | pull_request: 4 | branches: 5 | - main 6 | push: 7 | branches: 8 | - main 9 | 10 | permissions: 11 | contents: read 12 | 13 | jobs: 14 | style: 15 | name: Check 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: actions/setup-go@v5 20 | with: 21 | cache: false 22 | go-version-file: go.mod 23 | - uses: actions/cache@v4 24 | with: 25 | path: | 26 | ~/.cache/go-build 27 | ~/.cache/golangci-lint 28 | ~/go/pkg/mod 29 | ~/go/bin 30 | key: code-style-check-${{ hashFiles('**/go.mod', '**/go.sum', '**/Makefile') }} 31 | - name: Ensure `make precommit` is executed 32 | run: make check 33 | -------------------------------------------------------------------------------- /.github/workflows/trivy.yaml: -------------------------------------------------------------------------------- 1 | name: Trivy 2 | 3 | on: 4 | push: 5 | branches: 6 | - "main" 7 | schedule: 8 | - cron: '0 20 * * *' 9 | 10 | permissions: 11 | contents: read 12 | 13 | jobs: 14 | image-scan: 15 | permissions: 16 | contents: read # Required for actions/checkout to fetch code 17 | name: Scan ${{ matrix.target.command_name }} Image 18 | runs-on: ubuntu-22.04 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | target: 23 | - command_name: "controller" 24 | - command_name: "extproc" 25 | steps: 26 | - name: Checkout code 27 | uses: actions/checkout@v4 28 | 29 | - name: Build ${{ matrix.target.command_name }} Docker image 30 | run: | 31 | make docker-build.${{ matrix.target.command_name }} TAG=${{ github.sha }} 32 | 33 | - name: Run Trivy vulnerability scanner for ${{ matrix.target.command_name }} 34 | uses: aquasecurity/trivy-action@master 35 | with: 36 | image-ref: docker.io/envoyproxy/ai-gateway-${{ matrix.target.command_name }}:${{ github.sha }} 37 | format: 'table' 38 | severity: 'CRITICAL,HIGH,MEDIUM,LOW' 39 | vuln-type: 'os,library' 40 | exit-code: '1' # Fail workflow on detected vulnerabilities 41 | ignore-unfixed: true # Ignore unfixed vulnerabilities 42 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.kube 2 | .bin 3 | out/ 4 | .DS_Store 5 | .terraform 6 | .idea 7 | *for_tests.yaml 8 | .vscode 9 | .makerc 10 | 11 | # This is the placeholder for the access log file during extproc tests. 12 | ACCESS_LOG_PATH 13 | 14 | tests/e2e/logs 15 | 16 | # Files and directories to ignore in the site directory 17 | # dependencies 18 | site/node_modules 19 | 20 | # production 21 | site/build 22 | 23 | # generated files 24 | site/.docusaurus 25 | site/.cache-loader 26 | 27 | # misc 28 | site/.DS_Store 29 | site/.env.local 30 | site/.env.development.local 31 | site/.env.test.local 32 | site/.env.production.local 33 | 34 | site/npm-debug.log* 35 | site/yarn-debug.log* 36 | site/yarn-error.log* 37 | site/static/.DS_Store 38 | site/temp 39 | -------------------------------------------------------------------------------- /.licenserc.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | header: 7 | license: 8 | spdx-id: Apache-2.0 9 | copyright-owner: Envoy AI Gateway Authors 10 | content: | 11 | Copyright Envoy AI Gateway Authors 12 | SPDX-License-Identifier: Apache-2.0 13 | The full text of the Apache license is available in the LICENSE file at 14 | the root of the repo. 15 | paths-ignore: 16 | - site 17 | - out 18 | - '.github' 19 | - '.bin' 20 | - '**/go.mod' 21 | - '**/go.sum' 22 | - '**/*.md' 23 | - '**/*.json' 24 | - '**/*.txt' 25 | - '**/*.hcl' 26 | - '**/.gitignore' 27 | - '**/.helmignore' 28 | - .trivyignore 29 | - '.codespell.*' 30 | - .editorconfig 31 | - .golangci.yml 32 | - .testcoverage.yml 33 | - '.yamllint*' 34 | - LICENSE 35 | - CODEOWNERS 36 | - pre-commit.sh 37 | - tests/extproc/testdata/server.key 38 | - tests/extproc/testdata/server.crt 39 | -------------------------------------------------------------------------------- /.testcoverage.yml: -------------------------------------------------------------------------------- 1 | # This is the configuration file for https://github.com/vladopajic/go-test-coverage 2 | 3 | profile: ./out/go-test-coverage.out 4 | local-prefix: "github.com/envoyproxy/ai-gateway/" 5 | 6 | threshold: 7 | file: 70 8 | # TODO: increase to 90. 9 | package: 81 10 | # TODO: increase to 95. 11 | total: 83 12 | 13 | exclude: 14 | paths: 15 | # Examples are not part of the main codebase. 16 | - ^examples/ 17 | # Main functions are always tested with integration tests. 18 | - cmd/ 19 | # Generated code should not be tested. 20 | - zz_generated.deepcopy.go 21 | # This is the test library. 22 | - tests/internal/envtest.go 23 | # TODO: Remove this exclusion. 24 | - internal/controller/controller.go 25 | -------------------------------------------------------------------------------- /.trivyignore: -------------------------------------------------------------------------------- 1 | # Ignore Envoy Gateway CVEs. We are using the latest EG main branch, which appears as a weird "0.5.0-rc1.0-*" convention in go.mod. 2 | # So, these CVEs are false positives. 3 | CVE-2025-24030 4 | CVE-2025-25294 5 | -------------------------------------------------------------------------------- /.yamllint: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | ignore-from-file: [.gitignore, .yamllint.ignore] 4 | 5 | rules: 6 | braces: 7 | min-spaces-inside: 0 8 | max-spaces-inside: 0 9 | min-spaces-inside-empty: -1 10 | max-spaces-inside-empty: -1 11 | brackets: 12 | min-spaces-inside: 0 13 | max-spaces-inside: 1 14 | min-spaces-inside-empty: -1 15 | max-spaces-inside-empty: -1 16 | colons: 17 | max-spaces-before: 0 18 | max-spaces-after: 1 19 | commas: 20 | max-spaces-before: 1 21 | min-spaces-after: 1 22 | max-spaces-after: 1 23 | comments: 24 | level: error 25 | require-starting-space: true 26 | min-spaces-from-content: 2 27 | comments-indentation: 28 | level: warning 29 | document-end: disable 30 | document-start: disable 31 | empty-lines: 32 | max: 2 33 | max-start: 0 34 | max-end: 1 35 | empty-values: 36 | forbid-in-block-mappings: false 37 | forbid-in-flow-mappings: true 38 | hyphens: 39 | max-spaces-after: 1 40 | indentation: 41 | spaces: 2 42 | indent-sequences: consistent # be consistent: don't mix indentation styles in one file. 43 | check-multi-line-strings: false 44 | key-duplicates: enable 45 | key-ordering: disable 46 | new-line-at-end-of-file: enable 47 | new-lines: 48 | type: unix 49 | trailing-spaces: enable 50 | truthy: 51 | check-keys: false # GitHub Actions uses "on:" as a key 52 | level: warning 53 | -------------------------------------------------------------------------------- /.yamllint.ignore: -------------------------------------------------------------------------------- 1 | manifests/charts/ai-gateway-helm 2 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @envoyproxy/ai-gateway-maintainers 2 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | Envoy AI Gateway follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/main/code-of-conduct.md). 4 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | FROM gcr.io/distroless/static-debian11:nonroot 7 | ARG COMMAND_NAME 8 | ARG TARGETOS 9 | ARG TARGETARCH 10 | 11 | COPY ./out/${COMMAND_NAME}-${TARGETOS}-${TARGETARCH} /app 12 | 13 | USER nonroot:nonroot 14 | ENTRYPOINT ["/app"] 15 | -------------------------------------------------------------------------------- /GOALS.md: -------------------------------------------------------------------------------- 1 | 2 | # Envoy AI Gateway GOALS.md 3 | 4 | ## Envoy AI Gateway Goals 5 | 6 | The high-level goal of the Envoy AI Gateway project is to facilitate seamless communication between application clients and multiple Generative AI (GenAI) services by leveraging Envoy Gateway. 7 | 8 | This open-source project aims to reduce integration complexity for developers and provide a secure, scalable solution for handling GenAI-specific traffic routing. 9 | 10 | Envoy AI Gateway will offer a flexible and simple API for configuring GenAI traffic handling with Envoy, leveraging Envoy Gateway. 11 | 12 | ## Objectives 13 | 14 | ### Enable GenAI traffic handling with Envoy 15 | 16 | Envoy AI Gateway leverages Envoy Gateway and Envoy Proxy to handle GenAI traffic handling. The Envoy AI Gateway will provide control plane extensions, where appropriate, to the Envoy Gateway API to define routing rules for handling traffic to Generative AI services. 17 | 18 | ### Easy Setup 19 | 20 | Envoy AI Gateway will simplify the process of setting up an AI Gateway to manage traffic to and from GenAI services. 21 | 22 | Envoy AI Gateway enables Platform Engineers to provide a Gateway solution that enables application developers to focus on leveraging GenAI for feature development. 23 | 24 | * **Preset Envoy Gateway Configurations:** Default configurations that simplify setup of routing to GenAI Services, making it accessible to application developers. 25 | * **Leveraging Envoy:** The project aims to leverage the functionality of the Envoy Gateway control plane and the Envoy Proxy data plane. 26 | 27 | ## Non-Objectives 28 | 29 | * **Disruption of Existing Envoy Patterns:** This project is an additive layer designed to expand use cases for Envoy Proxy and Envoy Gateway without changing existing deployment or control patterns. 30 | 31 | ## Personas 32 | 33 | ### Application Developer 34 | 35 | Focuses on implementing Generative AI services in applications. This user requires a simple and effective way to manage traffic and authentication with AI services. 36 | 37 | ### Infrastructure Administrator 38 | 39 | Responsible for provisioning and maintaining Envoy AI Gateway infrastructure. They need straightforward tools and API support to configure and monitor AI traffic flows securely and at scale. 40 | -------------------------------------------------------------------------------- /Makefile.tools.mk: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | LOCALBIN ?= $(shell pwd)/.bin 7 | $(LOCALBIN): 8 | mkdir -p $(LOCALBIN) 9 | 10 | CODESPELL = $(LOCALBIN)/.venv/codespell@v2.3.0/bin/codespell 11 | YAMLLINT = $(LOCALBIN)/.venv/yamllint@1.35.1/bin/yamllint 12 | 13 | .bin/.venv/%: 14 | mkdir -p $(@D) 15 | python3 -m venv $@ 16 | $@/bin/pip3 install $$(echo $* | sed 's/@/==/') 17 | 18 | $(CODESPELL): .bin/.venv/codespell@v2.3.0 19 | 20 | $(YAMLLINT): .bin/.venv/yamllint@1.35.1 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Envoy AI Gateway 2 | Envoy AI Gateway is an open source project for using [Envoy Gateway](https://github.com/envoyproxy/gateway) to handle request traffic from application clients to Generative AI services. 3 | 4 | ## Contact 5 | 6 | * Slack: Join the [Envoy Slack workspace][] if you're not already a member. Otherwise, use the 7 | [Envoy AI Gateway channel][] to start collaborating with the community. 8 | 9 | ## Get Involved 10 | 11 | We adhere to the [CNCF Code of conduct][Code of conduct] 12 | 13 | The Envoy AI Gateway team and community members meet every Thursday. 14 | Please register for the meeting, add agenda points, and get involved. The 15 | meeting details are available in the [public document][meeting]. 16 | 17 | To contribute to the project via pull requests, please read the [CONTRIBUTING.md](CONTRIBUTING.md) file 18 | which includes information on how to build and test the project. 19 | 20 | ## Background 21 | 22 | The proposal of using Envoy Gateway as a [Cloud Native LLM Gateway][Cloud Native LLM Gateway] inspired the initiation of this project. 23 | 24 | 25 | [meeting]: https://docs.google.com/document/d/10e1sfsF-3G3Du5nBHGmLjXw5GVMqqCvFDqp_O65B0_w/edit?tab=t.0 26 | [Envoy Slack workspace]: https://communityinviter.com/apps/envoyproxy/envoy 27 | [Envoy AI Gateway channel]: https://envoyproxy.slack.com/archives/C07Q4N24VAA 28 | [Code of conduct]: https://github.com/cncf/foundation/blob/main/code-of-conduct.md 29 | [Cloud Native LLM Gateway]: https://docs.google.com/document/d/1FQN_hGhTNeoTgV5Jj16ialzaSiAxC0ozxH1D9ngCVew/edit?tab=t.0#heading=h.uuu99yemq4eo 30 | -------------------------------------------------------------------------------- /api/v1alpha1/doc.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | // Package v1alpha1 contains API schema definitions for the aigateway.envoyproxy.io 7 | // API group. 8 | // 9 | // +kubebuilder:object:generate=true 10 | // +groupName=aigateway.envoyproxy.io 11 | package v1alpha1 12 | -------------------------------------------------------------------------------- /api/v1alpha1/registry.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package v1alpha1 7 | 8 | import ( 9 | "k8s.io/apimachinery/pkg/runtime/schema" 10 | "sigs.k8s.io/controller-runtime/pkg/scheme" 11 | ) 12 | 13 | func init() { 14 | SchemeBuilder.Register(&AIGatewayRoute{}, &AIGatewayRouteList{}) 15 | SchemeBuilder.Register(&AIServiceBackend{}, &AIServiceBackendList{}) 16 | SchemeBuilder.Register(&BackendSecurityPolicy{}, &BackendSecurityPolicyList{}) 17 | } 18 | 19 | const GroupName = "aigateway.envoyproxy.io" 20 | 21 | var ( 22 | // schemeGroupVersion is group version used to register these objects 23 | schemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: "v1alpha1"} 24 | 25 | // SchemeBuilder is used to add go types to the GroupVersionKind scheme 26 | SchemeBuilder = &scheme.Builder{GroupVersion: schemeGroupVersion} 27 | 28 | // AddToScheme adds the types in this group-version to the given scheme. 29 | AddToScheme = SchemeBuilder.AddToScheme 30 | ) 31 | -------------------------------------------------------------------------------- /api/v1alpha1/status.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package v1alpha1 7 | 8 | import metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 9 | 10 | const ( 11 | // ConditionTypeAccepted is a condition type for the reconciliation result 12 | // where resources are accepted. 13 | ConditionTypeAccepted = "Accepted" 14 | // ConditionTypeNotAccepted is a condition type for the reconciliation result 15 | // where resources are not accepted. 16 | ConditionTypeNotAccepted = "NotAccepted" 17 | ) 18 | 19 | // AIGatewayRouteStatus contains the conditions by the reconciliation result. 20 | type AIGatewayRouteStatus struct { 21 | // Conditions is the list of conditions by the reconciliation result. 22 | // Currently, at most one condition is set. 23 | // 24 | // Known .status.conditions.type are: "Accepted", "NotAccepted". 25 | Conditions []metav1.Condition `json:"conditions,omitempty"` 26 | } 27 | 28 | // AIServiceBackendStatus contains the conditions by the reconciliation result. 29 | type AIServiceBackendStatus struct { 30 | // Conditions is the list of conditions by the reconciliation result. 31 | // Currently, at most one condition is set. 32 | // 33 | // Known .status.conditions.type are: "Accepted", "NotAccepted". 34 | Conditions []metav1.Condition `json:"conditions,omitempty"` 35 | } 36 | 37 | // BackendSecurityPolicyStatus contains the conditions by the reconciliation result. 38 | type BackendSecurityPolicyStatus struct { 39 | // Conditions is the list of conditions by the reconciliation result. 40 | // Currently, at most one condition is set. 41 | // 42 | // Known .status.conditions.type are: "Accepted", "NotAccepted". 43 | Conditions []metav1.Condition `json:"conditions,omitempty"` 44 | } 45 | -------------------------------------------------------------------------------- /cmd/aigw/envoy-gateway-config.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: gateway.envoyproxy.io/v1alpha1 7 | kind: EnvoyGateway 8 | gateway: 9 | controllerName: gateway.envoyproxy.io/gatewayclass-controller 10 | provider: 11 | type: Custom 12 | custom: 13 | resource: 14 | type: File 15 | file: 16 | paths: ["PLACEHOLDER_TMPDIR"] 17 | infrastructure: 18 | type: Host 19 | host: {} 20 | logging: 21 | level: 22 | default: error 23 | extensionApis: 24 | enableBackend: true 25 | extensionManager: 26 | hooks: 27 | xdsTranslator: 28 | post: 29 | - VirtualHost 30 | - Translation 31 | service: 32 | fqdn: 33 | hostname: localhost 34 | port: 1061 35 | -------------------------------------------------------------------------------- /cmd/aigw/testdata/translate_nonairesources.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | --- 7 | apiVersion: v1 8 | kind: Service 9 | metadata: 10 | name: something 11 | spec: 12 | selector: 13 | app: something 14 | ports: 15 | - protocol: TCP 16 | port: 80 17 | targetPort: 8080 18 | type: ClusterIP 19 | --- 20 | apiVersion: v1 21 | kind: Secret 22 | metadata: 23 | name: something 24 | type: Opaque 25 | stringData: 26 | something: foo 27 | -------------------------------------------------------------------------------- /cmd/extproc/main.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package main 7 | 8 | import ( 9 | "context" 10 | "log" 11 | "os" 12 | "os/signal" 13 | "syscall" 14 | 15 | "github.com/envoyproxy/ai-gateway/cmd/extproc/mainlib" 16 | ) 17 | 18 | func main() { 19 | ctx, cancel := context.WithCancel(context.Background()) 20 | signalsChan := make(chan os.Signal, 1) 21 | signal.Notify(signalsChan, syscall.SIGINT, syscall.SIGTERM) 22 | go func() { 23 | <-signalsChan 24 | cancel() 25 | }() 26 | if err := mainlib.Main(ctx, os.Args[1:], os.Stderr); err != nil { 27 | log.Fatalf("error: %v", err) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /docs/proposals/001-ai-gateway-proposal/control_plane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/envoyproxy/ai-gateway/d17719cd338fb7f9b63b6b378ffa979370f78e19/docs/proposals/001-ai-gateway-proposal/control_plane.png -------------------------------------------------------------------------------- /docs/proposals/001-ai-gateway-proposal/data_plane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/envoyproxy/ai-gateway/d17719cd338fb7f9b63b6b378ffa979370f78e19/docs/proposals/001-ai-gateway-proposal/data_plane.png -------------------------------------------------------------------------------- /examples/basic/README.md: -------------------------------------------------------------------------------- 1 | This contains the basic example manifest to create an Envoy Gateway that handles 2 | the traffics for both OpenAI and AWS Bedrock at the same time. 3 | -------------------------------------------------------------------------------- /examples/extproc_custom_metrics/README.md: -------------------------------------------------------------------------------- 1 | This example shows how to insert a custom chat completion metrics implementation in the custom 2 | external processor using `filterapi` package. 3 | -------------------------------------------------------------------------------- /examples/extproc_custom_metrics/main.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package main 7 | 8 | import ( 9 | "context" 10 | "log" 11 | "log/slog" 12 | "os" 13 | "os/signal" 14 | "syscall" 15 | 16 | "go.opentelemetry.io/otel/attribute" 17 | "go.opentelemetry.io/otel/metric" 18 | 19 | "github.com/envoyproxy/ai-gateway/cmd/extproc/mainlib" 20 | "github.com/envoyproxy/ai-gateway/filterapi" 21 | "github.com/envoyproxy/ai-gateway/filterapi/x" 22 | ) 23 | 24 | // This demonstrates how to build a custom chat completion metrics for the external processor. 25 | func main() { 26 | // Initializes the custom chat completion metrics. 27 | x.NewCustomChatCompletionMetrics = newCustomChatCompletionMetrics 28 | 29 | // Executes the main function of the external processor. 30 | ctx, cancel := context.WithCancel(context.Background()) 31 | signalsChan := make(chan os.Signal, 1) 32 | signal.Notify(signalsChan, syscall.SIGINT, syscall.SIGTERM) 33 | go func() { 34 | <-signalsChan 35 | cancel() 36 | }() 37 | if err := mainlib.Main(ctx, os.Args[1:], os.Stderr); err != nil { 38 | log.Fatalf("error: %v", err) 39 | } 40 | } 41 | 42 | // newCustomChatCompletionMetrics implements [x.NewCustomChatCompletionMetrics]. 43 | func newCustomChatCompletionMetrics(meter metric.Meter) x.ChatCompletionMetrics { 44 | return &myCustomChatCompletionMetrics{ 45 | meter: meter, 46 | logger: slog.New(slog.NewTextHandler(os.Stdout, nil)), 47 | } 48 | } 49 | 50 | // myCustomChatCompletionMetrics implements [x.ChatCompletionMetrics]. 51 | type myCustomChatCompletionMetrics struct { 52 | meter metric.Meter 53 | logger *slog.Logger 54 | } 55 | 56 | func (m *myCustomChatCompletionMetrics) StartRequest(headers map[string]string) { 57 | m.logger.Info("StartRequest", "headers", headers) 58 | } 59 | 60 | func (m *myCustomChatCompletionMetrics) SetModel(model string) { 61 | m.logger.Info("SetModel", "model", model) 62 | } 63 | 64 | func (m *myCustomChatCompletionMetrics) SetBackend(backend *filterapi.Backend) { 65 | m.logger.Info("SetBackend", "backend", backend.Name) 66 | } 67 | 68 | func (m *myCustomChatCompletionMetrics) RecordTokenUsage(_ context.Context, inputTokens, outputTokens, totalTokens uint32, _ ...attribute.KeyValue) { 69 | m.logger.Info("RecordTokenUsage", "inputTokens", inputTokens, "outputTokens", outputTokens, "totalTokens", totalTokens) 70 | } 71 | 72 | func (m *myCustomChatCompletionMetrics) RecordRequestCompletion(_ context.Context, success bool, _ ...attribute.KeyValue) { 73 | m.logger.Info("RecordRequestCompletion", "success", success) 74 | } 75 | 76 | func (m *myCustomChatCompletionMetrics) RecordTokenLatency(_ context.Context, tokens uint32, _ ...attribute.KeyValue) { 77 | m.logger.Info("RecordTokenLatency", "tokens", tokens) 78 | } 79 | -------------------------------------------------------------------------------- /examples/extproc_custom_router/README.md: -------------------------------------------------------------------------------- 1 | This example shows how to insert a custom router in the custom external processor using `filterapi` package. 2 | -------------------------------------------------------------------------------- /examples/extproc_custom_router/main.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package main 7 | 8 | import ( 9 | "context" 10 | "fmt" 11 | "os" 12 | "os/signal" 13 | "syscall" 14 | 15 | "github.com/envoyproxy/ai-gateway/cmd/extproc/mainlib" 16 | "github.com/envoyproxy/ai-gateway/filterapi" 17 | "github.com/envoyproxy/ai-gateway/filterapi/x" 18 | ) 19 | 20 | // newCustomRouter implements [x.NewCustomRouter]. 21 | func newCustomRouter(defaultRouter x.Router, config *filterapi.Config) x.Router { 22 | // You can poke the current configuration of the routes, and the list of backends 23 | // specified in the AIGatewayRoute.Rules, etc. 24 | return &myCustomRouter{config: config, defaultRouter: defaultRouter} 25 | } 26 | 27 | // myCustomRouter implements [x.Router]. 28 | type myCustomRouter struct { 29 | config *filterapi.Config 30 | defaultRouter x.Router 31 | } 32 | 33 | // Calculate implements [x.Router.Calculate]. 34 | func (m *myCustomRouter) Calculate(headers map[string]string) (backend filterapi.RouteRuleName, err error) { 35 | // Simply logs the headers and delegates the calculation to the default router. 36 | modelName, ok := headers[m.config.ModelNameHeaderKey] 37 | if !ok { 38 | panic("model name not found in the headers") 39 | } 40 | fmt.Printf("model name: %s\n", modelName) 41 | return m.defaultRouter.Calculate(headers) 42 | } 43 | 44 | // This demonstrates how to build a custom router for the external processor. 45 | func main() { 46 | // Initializes the custom router. 47 | x.NewCustomRouter = newCustomRouter 48 | // Executes the main function of the external processor. 49 | ctx, cancel := context.WithCancel(context.Background()) 50 | signalsChan := make(chan os.Signal, 1) 51 | signal.Notify(signalsChan, syscall.SIGINT, syscall.SIGTERM) 52 | go func() { 53 | <-signalsChan 54 | cancel() 55 | }() 56 | mainlib.Main(ctx, os.Args[1:], os.Stderr) 57 | } 58 | -------------------------------------------------------------------------------- /examples/monitoring/README.md: -------------------------------------------------------------------------------- 1 | This `monitoring.yaml` file is a Kubernetes manifest file that deploys a Prometheus server that scrapes metrics from Envoy Gateway pods where AI Gateway filter is enabled. 2 | -------------------------------------------------------------------------------- /examples/provider_fallback/README.md: -------------------------------------------------------------------------------- 1 | This example demonstrates how to configure the "provider" fallback per routing rule. 2 | Specifically, this configures AIGatewayRoute to route requests to an always failing backend and then fallback to a healthy AWS Bedrock backend. 3 | The fallback behavior is achieved with [`BackendTrafficPolicy` API](https://gateway.envoyproxy.io/contributions/design/backend-traffic-policy/) of Envoy Gateway which is attached to a generated HTTPRoute. 4 | -------------------------------------------------------------------------------- /examples/provider_fallback/fallback.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | --- 7 | apiVersion: gateway.envoyproxy.io/v1alpha1 8 | kind: BackendTrafficPolicy 9 | metadata: 10 | name: passive-health-check 11 | spec: 12 | targetRefs: 13 | - group: gateway.networking.k8s.io 14 | kind: HTTPRoute 15 | # The HTTPRoute generated by the AIGatewayRoute has the same name as the AIGatewayRoute. 16 | name: provider-fallback 17 | retry: 18 | numRetries: 5 19 | perRetry: 20 | backOff: 21 | baseInterval: 100ms 22 | maxInterval: 10s 23 | timeout: 30s 24 | retryOn: 25 | httpStatusCodes: 26 | - 500 27 | triggers: 28 | - connect-failure 29 | - retriable-status-codes 30 | healthCheck: 31 | passive: 32 | baseEjectionTime: 30s 33 | interval: 2s 34 | maxEjectionPercent: 100 35 | consecutive5XxErrors: 1 36 | consecutiveGatewayErrors: 0 37 | consecutiveLocalOriginFailures: 1 38 | splitExternalLocalOriginErrors: false 39 | -------------------------------------------------------------------------------- /examples/token_ratelimit/README.md: -------------------------------------------------------------------------------- 1 | This example demonstrates how to use the token rate limit feature of the AI Gateway. 2 | This utilizes the Global Rate Limit API of Envoy Gateway combined with the 3 | AI Gateway's `llmRequestCosts` configuration to capture the consumed tokens 4 | of each request. 5 | -------------------------------------------------------------------------------- /filterapi/x/x.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | // Package x is an experimental package that provides the customizability of the AI Gateway filter. 7 | package x 8 | 9 | import ( 10 | "context" 11 | "errors" 12 | 13 | "go.opentelemetry.io/otel/attribute" 14 | "go.opentelemetry.io/otel/metric" 15 | 16 | "github.com/envoyproxy/ai-gateway/filterapi" 17 | ) 18 | 19 | // NewCustomRouter is the function to create a custom router over the default router. 20 | // This is nil by default and can be set by the custom build of external processor. 21 | var NewCustomRouter NewCustomRouterFn 22 | 23 | // ErrNoMatchingRule is the error the router function must return if there is no matching rule. 24 | var ErrNoMatchingRule = errors.New("no matching rule found") 25 | 26 | // NewCustomRouterFn is the function signature for [NewCustomRouter]. 27 | // 28 | // It accepts the exptproc config passed to the AI Gateway filter and returns a [Router]. 29 | // This is called when the new configuration is loaded. 30 | // 31 | // The defaultRouter can be used to delegate the calculation to the default router implementation. 32 | type NewCustomRouterFn func(defaultRouter Router, config *filterapi.Config) Router 33 | 34 | // Router is the interface for the router. 35 | // 36 | // Router must be goroutine-safe as it is shared across multiple requests. 37 | type Router interface { 38 | // Calculate determines the route to route to based on the request headers. 39 | // 40 | // The request headers include the populated [filterapi.Config.ModelNameHeaderKey] 41 | // with the parsed model name based on the [filterapi.Config] given to the NewCustomRouterFn. 42 | // 43 | // Returns the selected route rule name and the error if any. 44 | Calculate(requestHeaders map[string]string) (route filterapi.RouteRuleName, err error) 45 | } 46 | 47 | // NewCustomChatCompletionMetrics is the function to create a custom chat completion AI Gateway metrics over 48 | // the default metrics. This is nil by default and can be set by the custom build of external processor. 49 | var NewCustomChatCompletionMetrics NewCustomChatCompletionMetricsFn 50 | 51 | // NewCustomChatCompletionMetricsFn is the function to create a custom chat completion AI Gateway metrics. 52 | type NewCustomChatCompletionMetricsFn func(meter metric.Meter) ChatCompletionMetrics 53 | 54 | // ChatCompletionMetrics is the interface for the chat completion AI Gateway metrics. 55 | type ChatCompletionMetrics interface { 56 | // StartRequest initializes timing for a new request. 57 | StartRequest(headers map[string]string) 58 | // SetModel sets the model the request. This is usually called after parsing the request body . 59 | SetModel(model string) 60 | // SetBackend sets the selected backend when the routing decision has been made. This is usually called 61 | // after parsing the request body to determine the model and invoke the routing logic. 62 | SetBackend(backend *filterapi.Backend) 63 | 64 | // RecordTokenUsage records token usage metrics. 65 | RecordTokenUsage(ctx context.Context, inputTokens, outputTokens, totalTokens uint32, extraAttrs ...attribute.KeyValue) 66 | // RecordRequestCompletion records latency metrics for the entire request 67 | RecordRequestCompletion(ctx context.Context, success bool, extraAttrs ...attribute.KeyValue) 68 | // RecordTokenLatency records latency metrics for token generation. 69 | RecordTokenLatency(ctx context.Context, tokens uint32, extraAttrs ...attribute.KeyValue) 70 | } 71 | -------------------------------------------------------------------------------- /internal/controller/gateway_mutator_test.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package controller 7 | 8 | import ( 9 | "testing" 10 | 11 | "github.com/stretchr/testify/require" 12 | "go.uber.org/zap/zapcore" 13 | corev1 "k8s.io/api/core/v1" 14 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 15 | fake2 "k8s.io/client-go/kubernetes/fake" 16 | ctrl "sigs.k8s.io/controller-runtime" 17 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 18 | gwapiv1a2 "sigs.k8s.io/gateway-api/apis/v1alpha2" 19 | 20 | aigv1a1 "github.com/envoyproxy/ai-gateway/api/v1alpha1" 21 | ) 22 | 23 | func TestGatewayMutator_Default(t *testing.T) { 24 | fakeClient := requireNewFakeClientWithIndexes(t) 25 | fakeKube := fake2.NewClientset() 26 | ctrl.SetLogger(zap.New(zap.UseFlagOptions(&zap.Options{Development: true, Level: zapcore.DebugLevel}))) 27 | g := newGatewayMutator( 28 | fakeClient, fakeKube, ctrl.Log, "docker.io/envoyproxy/ai-gateway-extproc:latest", 29 | "info", "envoy-gateway-system", "/tmp/extproc.sock", 30 | ) 31 | pod := &corev1.Pod{ 32 | ObjectMeta: metav1.ObjectMeta{Name: "test-pod", Namespace: "test-namespace"}, 33 | Spec: corev1.PodSpec{ 34 | Containers: []corev1.Container{{Name: "envoy"}}, 35 | }, 36 | } 37 | err := fakeClient.Create(t.Context(), &aigv1a1.AIGatewayRoute{ 38 | ObjectMeta: metav1.ObjectMeta{Name: "test-gateway", Namespace: "test-namespace"}, 39 | Spec: aigv1a1.AIGatewayRouteSpec{}, 40 | }) 41 | require.NoError(t, err) 42 | err = g.Default(t.Context(), pod) 43 | require.NoError(t, err) 44 | } 45 | 46 | func TestGatewayMutator_mutatePod(t *testing.T) { 47 | fakeClient := requireNewFakeClientWithIndexes(t) 48 | fakeKube := fake2.NewClientset() 49 | ctrl.SetLogger(zap.New(zap.UseFlagOptions(&zap.Options{Development: true, Level: zapcore.DebugLevel}))) 50 | g := newGatewayMutator( 51 | fakeClient, fakeKube, ctrl.Log, "docker.io/envoyproxy/ai-gateway-extproc:latest", 52 | "info", "envoy-gateway-system", "/tmp/extproc.sock", 53 | ) 54 | 55 | const gwName, gwNamespace = "test-gateway", "test-namespace" 56 | err := fakeClient.Create(t.Context(), &aigv1a1.AIGatewayRoute{ 57 | ObjectMeta: metav1.ObjectMeta{Name: gwName, Namespace: gwNamespace}, 58 | Spec: aigv1a1.AIGatewayRouteSpec{ 59 | TargetRefs: []gwapiv1a2.LocalPolicyTargetReferenceWithSectionName{ 60 | { 61 | LocalPolicyTargetReference: gwapiv1a2.LocalPolicyTargetReference{ 62 | Name: gwName, Kind: "Gateway", Group: "gateway.networking.k8s.io", 63 | }, 64 | }, 65 | }, 66 | Rules: []aigv1a1.AIGatewayRouteRule{ 67 | {BackendRefs: []aigv1a1.AIGatewayRouteRuleBackendRef{{Name: "apple"}}}, 68 | }, 69 | APISchema: aigv1a1.VersionedAPISchema{Name: aigv1a1.APISchemaOpenAI, Version: "v1"}, 70 | FilterConfig: &aigv1a1.AIGatewayFilterConfig{}, 71 | }, 72 | }) 73 | require.NoError(t, err) 74 | 75 | pod := &corev1.Pod{ 76 | ObjectMeta: metav1.ObjectMeta{Name: "test-pod", Namespace: "test-namespace"}, 77 | Spec: corev1.PodSpec{ 78 | Containers: []corev1.Container{{Name: "envoy"}}, 79 | }, 80 | } 81 | err = g.mutatePod(t.Context(), pod, gwName, gwNamespace) 82 | require.NoError(t, err) 83 | } 84 | -------------------------------------------------------------------------------- /internal/controller/rotators/aws_common_test.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package rotators 7 | 8 | import ( 9 | "fmt" 10 | "testing" 11 | 12 | "github.com/aws/aws-sdk-go-v2/aws" 13 | "github.com/stretchr/testify/require" 14 | corev1 "k8s.io/api/core/v1" 15 | ) 16 | 17 | func TestNewSTSClient(t *testing.T) { 18 | stsClient := NewSTSClient(aws.Config{Region: "us-west-2"}) 19 | require.NotNil(t, stsClient) 20 | } 21 | 22 | func TestFormatAWSCredentialsFile(t *testing.T) { 23 | profile := "default" 24 | accessKey := "AKIAXXXXXXXXXXXXXXXX" 25 | secretKey := "XXXXXXXXXXXXXXXXXXXX" 26 | sessionToken := "XXXXXXXXXXXXXXXXXXXX" 27 | region := "us-west-2" 28 | credentials := awsCredentials{ 29 | profile: profile, 30 | accessKeyID: accessKey, 31 | secretAccessKey: secretKey, 32 | sessionToken: sessionToken, 33 | region: region, 34 | } 35 | 36 | awsCred := fmt.Sprintf("[%s]\naws_access_key_id = %s\naws_secret_access_key = %s\naws_session_token = %s\nregion = %s\n", profile, accessKey, 37 | secretKey, sessionToken, region) 38 | 39 | require.Equal(t, awsCred, formatAWSCredentialsFile(&awsCredentialsFile{credentials})) 40 | } 41 | 42 | func TestUpdateAWSCredentialsInSecret(t *testing.T) { 43 | secret := &corev1.Secret{} 44 | 45 | credentials := awsCredentials{ 46 | profile: "default", 47 | accessKeyID: "accessKey", 48 | secretAccessKey: "secretKey", 49 | sessionToken: "sessionToken", 50 | region: "region", 51 | } 52 | 53 | updateAWSCredentialsInSecret(secret, &awsCredentialsFile{credentials}) 54 | require.Len(t, secret.Data, 1) 55 | 56 | val, ok := secret.Data[AwsCredentialsKey] 57 | require.True(t, ok) 58 | require.NotEmpty(t, val) 59 | } 60 | -------------------------------------------------------------------------------- /internal/controller/rotators/common.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package rotators 7 | 8 | import ( 9 | "context" 10 | "fmt" 11 | "time" 12 | 13 | corev1 "k8s.io/api/core/v1" 14 | apierrors "k8s.io/apimachinery/pkg/api/errors" 15 | "sigs.k8s.io/controller-runtime/pkg/client" 16 | ) 17 | 18 | // ExpirationTimeAnnotationKey is exported for testing purposes within the controller. 19 | const ExpirationTimeAnnotationKey = "rotators/expiration-time" 20 | 21 | const rotatorSecretNamePrefix = "ai-eg-bsp" // #nosec G101 22 | 23 | // Rotator defines the interface for rotating provider credential. 24 | type Rotator interface { 25 | // IsExpired checks if the provider credentials needs to be renewed. 26 | IsExpired(preRotationExpirationTime time.Time) bool 27 | // GetPreRotationTime gets the time when the credentials need to be renewed. 28 | GetPreRotationTime(ctx context.Context) (time.Time, error) 29 | // Rotate will update the credential secret file with new credentials and return expiration time. 30 | Rotate(ctx context.Context) (time.Time, error) 31 | } 32 | 33 | // LookupSecret retrieves an existing secret. 34 | func LookupSecret(ctx context.Context, k8sClient client.Client, namespace, name string) (*corev1.Secret, error) { 35 | secret := &corev1.Secret{} 36 | if err := k8sClient.Get(ctx, client.ObjectKey{ 37 | Namespace: namespace, 38 | Name: name, 39 | }, secret); err != nil { 40 | if apierrors.IsNotFound(err) { 41 | return nil, err 42 | } 43 | return nil, fmt.Errorf("failed to get secret: %w", err) 44 | } 45 | return secret, nil 46 | } 47 | 48 | // updateExpirationSecretAnnotation will set the expiration time of credentials set in secret annotation. 49 | func updateExpirationSecretAnnotation(secret *corev1.Secret, updateTime time.Time) { 50 | if secret.Annotations == nil { 51 | secret.Annotations = make(map[string]string) 52 | } 53 | secret.Annotations[ExpirationTimeAnnotationKey] = updateTime.Format(time.RFC3339) 54 | } 55 | 56 | // GetExpirationSecretAnnotation will get the expiration time of credentials set in secret annotation. 57 | func GetExpirationSecretAnnotation(secret *corev1.Secret) (time.Time, error) { 58 | expirationTime, ok := secret.Annotations[ExpirationTimeAnnotationKey] 59 | if !ok { 60 | return time.Time{}, fmt.Errorf("secret %s/%s missing %s annotation", secret.Namespace, secret.Name, ExpirationTimeAnnotationKey) 61 | } 62 | expiration, err := time.Parse(time.RFC3339, expirationTime) 63 | if err != nil { 64 | return time.Time{}, fmt.Errorf("failed to parse expiration time annotation: %w", err) 65 | } 66 | return expiration, nil 67 | } 68 | 69 | // IsBufferedTimeExpired checks if the expired time minus duration buffer is before the current time. 70 | func IsBufferedTimeExpired(buffer time.Duration, expirationTime time.Time) bool { 71 | return expirationTime.Add(-buffer).Before(time.Now()) 72 | } 73 | 74 | // GetBSPSecretName will return the bspName with rotator prefix. 75 | func GetBSPSecretName(bspName string) string { 76 | return fmt.Sprintf("%s-%s", rotatorSecretNamePrefix, bspName) 77 | } 78 | -------------------------------------------------------------------------------- /internal/controller/secret.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package controller 7 | 8 | import ( 9 | "context" 10 | "fmt" 11 | 12 | "github.com/go-logr/logr" 13 | corev1 "k8s.io/api/core/v1" 14 | apierrors "k8s.io/apimachinery/pkg/api/errors" 15 | "k8s.io/client-go/kubernetes" 16 | ctrl "sigs.k8s.io/controller-runtime" 17 | "sigs.k8s.io/controller-runtime/pkg/client" 18 | "sigs.k8s.io/controller-runtime/pkg/event" 19 | "sigs.k8s.io/controller-runtime/pkg/reconcile" 20 | 21 | aigv1a1 "github.com/envoyproxy/ai-gateway/api/v1alpha1" 22 | ) 23 | 24 | // secretController implements reconcile.TypedReconciler for corev1.Secret. 25 | type secretController struct { 26 | client client.Client 27 | kubeClient kubernetes.Interface 28 | logger logr.Logger 29 | backendSecurityPolicyEventChan chan event.GenericEvent 30 | } 31 | 32 | // NewSecretController creates a new reconcile.TypedReconciler[reconcile.Request] for corev1.Secret. 33 | func NewSecretController(client client.Client, kubeClient kubernetes.Interface, 34 | logger logr.Logger, backendSecurityPolicyEventChan chan event.GenericEvent, 35 | ) reconcile.TypedReconciler[reconcile.Request] { 36 | return &secretController{ 37 | client: client, 38 | kubeClient: kubeClient, 39 | logger: logger, 40 | backendSecurityPolicyEventChan: backendSecurityPolicyEventChan, 41 | } 42 | } 43 | 44 | // Reconcile implements the reconcile.Reconciler for corev1.Secret. 45 | func (c *secretController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 46 | var secret corev1.Secret 47 | if err := c.client.Get(ctx, req.NamespacedName, &secret); err != nil { 48 | if apierrors.IsNotFound(err) { 49 | return ctrl.Result{}, nil 50 | } 51 | return ctrl.Result{}, err 52 | } 53 | c.logger.Info("Reconciling Secret", "namespace", req.Namespace, "name", req.Name) 54 | if err := c.syncSecret(ctx, req.Namespace, req.Name); err != nil { 55 | return ctrl.Result{}, err 56 | } 57 | return ctrl.Result{}, nil 58 | } 59 | 60 | // syncSecret syncs the state of all resource referencing the given secret. 61 | func (c *secretController) syncSecret(ctx context.Context, namespace, name string) error { 62 | var backendSecurityPolicies aigv1a1.BackendSecurityPolicyList 63 | err := c.client.List(ctx, &backendSecurityPolicies, 64 | client.MatchingFields{ 65 | k8sClientIndexSecretToReferencingBackendSecurityPolicy: backendSecurityPolicyKey(namespace, name), 66 | }, 67 | ) 68 | if err != nil { 69 | return fmt.Errorf("failed to list BackendSecurityPolicyList: %w", err) 70 | } 71 | for i := range backendSecurityPolicies.Items { 72 | backendSecurityPolicy := &backendSecurityPolicies.Items[i] 73 | c.logger.Info("Syncing BackendSecurityPolicy", 74 | "namespace", backendSecurityPolicy.Namespace, "name", backendSecurityPolicy.Name) 75 | c.backendSecurityPolicyEventChan <- event.GenericEvent{Object: backendSecurityPolicy} 76 | } 77 | return nil 78 | } 79 | -------------------------------------------------------------------------------- /internal/controller/secret_test.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package controller 7 | 8 | import ( 9 | "sort" 10 | "testing" 11 | 12 | "github.com/stretchr/testify/require" 13 | corev1 "k8s.io/api/core/v1" 14 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 15 | "k8s.io/apimachinery/pkg/types" 16 | fake2 "k8s.io/client-go/kubernetes/fake" 17 | ctrl "sigs.k8s.io/controller-runtime" 18 | "sigs.k8s.io/controller-runtime/pkg/reconcile" 19 | gwapiv1 "sigs.k8s.io/gateway-api/apis/v1" 20 | 21 | aigv1a1 "github.com/envoyproxy/ai-gateway/api/v1alpha1" 22 | internaltesting "github.com/envoyproxy/ai-gateway/internal/testing" 23 | ) 24 | 25 | func TestSecretController_Reconcile(t *testing.T) { 26 | eventCh := internaltesting.NewControllerEventChan[*aigv1a1.BackendSecurityPolicy]() 27 | fakeClient := requireNewFakeClientWithIndexes(t) 28 | c := NewSecretController(fakeClient, fake2.NewClientset(), ctrl.Log, eventCh.Ch) 29 | 30 | err := fakeClient.Create(t.Context(), &corev1.Secret{ 31 | ObjectMeta: metav1.ObjectMeta{Name: "mysecret", Namespace: "default"}, 32 | StringData: map[string]string{"key": "value"}, 33 | }) 34 | require.NoError(t, err) 35 | 36 | // Create a bsp that references the secret. 37 | originals := []*aigv1a1.BackendSecurityPolicy{ 38 | { 39 | ObjectMeta: metav1.ObjectMeta{Name: "foo", Namespace: "default"}, 40 | Spec: aigv1a1.BackendSecurityPolicySpec{ 41 | Type: aigv1a1.BackendSecurityPolicyTypeAPIKey, 42 | APIKey: &aigv1a1.BackendSecurityPolicyAPIKey{SecretRef: &gwapiv1.SecretObjectReference{Name: "mysecret"}}, 43 | }, 44 | }, 45 | { 46 | ObjectMeta: metav1.ObjectMeta{Name: "bar", Namespace: "default"}, 47 | Spec: aigv1a1.BackendSecurityPolicySpec{ 48 | Type: aigv1a1.BackendSecurityPolicyTypeAWSCredentials, 49 | AWSCredentials: &aigv1a1.BackendSecurityPolicyAWSCredentials{ 50 | Region: "us-west-2", 51 | CredentialsFile: &aigv1a1.AWSCredentialsFile{SecretRef: &gwapiv1.SecretObjectReference{Name: "mysecret"}}, 52 | }, 53 | }, 54 | }, 55 | } 56 | for _, bsp := range originals { 57 | require.NoError(t, fakeClient.Create(t.Context(), bsp)) 58 | } 59 | 60 | _, err = c.Reconcile(t.Context(), reconcile.Request{NamespacedName: types.NamespacedName{ 61 | Namespace: "default", Name: "mysecret", 62 | }}) 63 | require.NoError(t, err) 64 | actual := eventCh.RequireItemsEventually(t, len(originals)) 65 | sort.Slice(actual, func(i, j int) bool { 66 | return actual[i].Name < actual[j].Name 67 | }) 68 | sort.Slice(originals, func(i, j int) bool { 69 | return originals[i].Name < originals[j].Name 70 | }) 71 | require.Equal(t, originals, actual) 72 | 73 | // Test the case where the Secret is being deleted. 74 | err = fakeClient.Delete(t.Context(), &corev1.Secret{ 75 | ObjectMeta: metav1.ObjectMeta{Name: "mysecret", Namespace: "default"}, 76 | }) 77 | require.NoError(t, err) 78 | _, err = c.Reconcile(t.Context(), reconcile.Request{NamespacedName: types.NamespacedName{ 79 | Namespace: "default", Name: "mysecret", 80 | }}) 81 | require.NoError(t, err) 82 | } 83 | -------------------------------------------------------------------------------- /internal/controller/tokenprovider/azure_client_secret_token_provider.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package tokenprovider 7 | 8 | import ( 9 | "context" 10 | "net/http" 11 | "net/url" 12 | "os" 13 | 14 | "github.com/Azure/azure-sdk-for-go/sdk/azcore" 15 | "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" 16 | "github.com/Azure/azure-sdk-for-go/sdk/azidentity" 17 | ) 18 | 19 | // azureTokenProvider is a provider implements TokenProvider interface for Azure access tokens. 20 | type azureClientSecretTokenProvider struct { 21 | credential *azidentity.ClientSecretCredential 22 | tokenOption policy.TokenRequestOptions 23 | } 24 | 25 | // NewAzureClientSecretTokenProvider creates a new TokenProvider with the given tenant ID, client ID, client secret, and token request options. 26 | func NewAzureClientSecretTokenProvider(tenantID, clientID, clientSecret string, tokenOption policy.TokenRequestOptions) (TokenProvider, error) { 27 | clientOptions := GetClientSecretCredentialOptions() 28 | credential, err := azidentity.NewClientSecretCredential(tenantID, clientID, clientSecret, clientOptions) 29 | if err != nil { 30 | return nil, err 31 | } 32 | return &azureClientSecretTokenProvider{credential: credential, tokenOption: tokenOption}, nil 33 | } 34 | 35 | // GetToken implements TokenProvider.GetToken method to retrieve an Azure access token and its expiration time. 36 | func (a *azureClientSecretTokenProvider) GetToken(ctx context.Context) (TokenExpiry, error) { 37 | azureToken, err := a.credential.GetToken(ctx, a.tokenOption) 38 | if err != nil { 39 | return TokenExpiry{}, err 40 | } 41 | return TokenExpiry{Token: azureToken.Token, ExpiresAt: azureToken.ExpiresOn}, nil 42 | } 43 | 44 | func GetClientSecretCredentialOptions() *azidentity.ClientSecretCredentialOptions { 45 | if azureProxyURL := os.Getenv("AI_GATEWAY_AZURE_PROXY_URL"); azureProxyURL != "" { 46 | proxyURL, err := url.Parse(azureProxyURL) 47 | if err == nil { 48 | customTransport := &http.Transport{Proxy: http.ProxyURL(proxyURL)} 49 | customHTTPClient := &http.Client{Transport: customTransport} 50 | return &azidentity.ClientSecretCredentialOptions{ 51 | ClientOptions: azcore.ClientOptions{ 52 | Transport: customHTTPClient, 53 | }, 54 | } 55 | } 56 | } 57 | return nil 58 | } 59 | -------------------------------------------------------------------------------- /internal/controller/tokenprovider/azure_client_secret_token_provider_test.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package tokenprovider 7 | 8 | import ( 9 | "context" 10 | "net/http" 11 | "testing" 12 | 13 | "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" 14 | "github.com/stretchr/testify/require" 15 | ) 16 | 17 | func TestNewAzureClientSecretTokenProvider(t *testing.T) { 18 | _, err := NewAzureClientSecretTokenProvider("tenantID", "clientID", "", policy.TokenRequestOptions{}) 19 | require.Error(t, err) 20 | require.Contains(t, err.Error(), "secret can't be empty string") 21 | } 22 | 23 | func TestNewAzureClientSecretTokenProvider_GetToken(t *testing.T) { 24 | t.Run("missing azure scope", func(t *testing.T) { 25 | provider, err := NewAzureClientSecretTokenProvider("tenantID", "clientID", "clientSecret", policy.TokenRequestOptions{}) 26 | require.NoError(t, err) 27 | 28 | tokenExpiry, err := provider.GetToken(context.Background()) 29 | require.Error(t, err) 30 | require.Contains(t, err.Error(), "ClientSecretCredential.GetToken() requires at least one scope") 31 | require.Empty(t, tokenExpiry.Token) 32 | require.True(t, tokenExpiry.ExpiresAt.IsZero()) 33 | }) 34 | 35 | t.Run("invalid azure credential info", func(t *testing.T) { 36 | scopes := []string{"some-azure-scope"} 37 | provider, err := NewAzureClientSecretTokenProvider("invalidTenantID", "invalidClientID", "invalidClientSecret", policy.TokenRequestOptions{Scopes: scopes}) 38 | require.NoError(t, err) 39 | 40 | _, err = provider.GetToken(context.Background()) 41 | require.Error(t, err) 42 | require.Contains(t, err.Error(), "Tenant 'invalidtenantid' not found. Check to make sure you have the correct tenant ID and are signing into the correct cloud.") 43 | }) 44 | 45 | t.Run("azure proxy url", func(t *testing.T) { 46 | // Set environment variable for the test 47 | mockProxyURL := "http://localhost:8888" 48 | t.Setenv("AI_GATEWAY_AZURE_PROXY_URL", mockProxyURL) 49 | 50 | opts := GetClientSecretCredentialOptions() 51 | 52 | require.NotNil(t, opts) 53 | require.NotNil(t, opts.ClientOptions.Transport) 54 | 55 | // Assert that the transport has a proxy set 56 | transport, ok := opts.ClientOptions.Transport.(*http.Client) 57 | require.True(t, ok) 58 | require.NotNil(t, transport.Transport) 59 | 60 | // Check the proxy URL (optional, deeper inspection) 61 | innerTransport, ok := transport.Transport.(*http.Transport) 62 | require.True(t, ok) 63 | require.NotNil(t, innerTransport.Proxy) 64 | 65 | req, _ := http.NewRequest("GET", "http://example.com", nil) 66 | proxyFunc := innerTransport.Proxy 67 | proxyURL, err := proxyFunc(req) 68 | require.NoError(t, err) 69 | require.Equal(t, "http://localhost:8888", proxyURL.String()) 70 | }) 71 | } 72 | -------------------------------------------------------------------------------- /internal/controller/tokenprovider/azure_token_provider.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package tokenprovider 7 | 8 | import ( 9 | "context" 10 | "net/http" 11 | "net/url" 12 | "os" 13 | 14 | "github.com/Azure/azure-sdk-for-go/sdk/azcore" 15 | "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" 16 | "github.com/Azure/azure-sdk-for-go/sdk/azidentity" 17 | ) 18 | 19 | // azureTokenProvider is a provider implements TokenProvider interface for Azure access tokens. 20 | type azureTokenProvider struct { 21 | credential *azidentity.ClientAssertionCredential 22 | tokenOption policy.TokenRequestOptions 23 | } 24 | 25 | // NewAzureTokenProvider creates a new TokenProvider with the given tenant ID, client ID, tokenProvider, and token request options. 26 | func NewAzureTokenProvider(_ context.Context, tenantID, clientID string, tokenProvider TokenProvider, tokenOption policy.TokenRequestOptions) (TokenProvider, error) { 27 | clientOptions := GetClientAssertionCredentialOptions() 28 | credential, err := azidentity.NewClientAssertionCredential(tenantID, clientID, func(ctx context.Context) (string, error) { 29 | token, err := tokenProvider.GetToken(ctx) 30 | if err != nil { 31 | return "", err 32 | } 33 | return token.Token, nil 34 | }, clientOptions) 35 | if err != nil { 36 | return nil, err 37 | } 38 | return &azureTokenProvider{credential: credential, tokenOption: tokenOption}, nil 39 | } 40 | 41 | // GetToken implements TokenProvider.GetToken method to retrieve an Azure access token and its expiration time. 42 | func (a *azureTokenProvider) GetToken(ctx context.Context) (TokenExpiry, error) { 43 | azureToken, err := a.credential.GetToken(ctx, a.tokenOption) 44 | if err != nil { 45 | return TokenExpiry{}, err 46 | } 47 | return TokenExpiry{Token: azureToken.Token, ExpiresAt: azureToken.ExpiresOn}, nil 48 | } 49 | 50 | func GetClientAssertionCredentialOptions() *azidentity.ClientAssertionCredentialOptions { 51 | if azureProxyURL := os.Getenv("AI_GATEWAY_AZURE_PROXY_URL"); azureProxyURL != "" { 52 | proxyURL, err := url.Parse(azureProxyURL) 53 | if err == nil { 54 | customTransport := &http.Transport{Proxy: http.ProxyURL(proxyURL)} 55 | customHTTPClient := &http.Client{Transport: customTransport} 56 | return &azidentity.ClientAssertionCredentialOptions{ 57 | ClientOptions: azcore.ClientOptions{ 58 | Transport: customHTTPClient, 59 | }, 60 | } 61 | } 62 | } 63 | return nil 64 | } 65 | -------------------------------------------------------------------------------- /internal/controller/tokenprovider/azure_token_provider_test.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package tokenprovider 7 | 8 | import ( 9 | "context" 10 | "net/http" 11 | "testing" 12 | "time" 13 | 14 | "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" 15 | "github.com/stretchr/testify/require" 16 | ) 17 | 18 | func TestNewAzureTokenProvider(t *testing.T) { 19 | mockProvider := NewMockTokenProvider("mock-token", time.Now().Add(1*time.Hour), nil) 20 | _, err := NewAzureTokenProvider(t.Context(), "tenantID", "clientID", mockProvider, policy.TokenRequestOptions{}) 21 | require.NoError(t, err) 22 | } 23 | 24 | func TestAzureTokenProvider_GetToken(t *testing.T) { 25 | t.Run("missing azure scope", func(t *testing.T) { 26 | mockProvider := NewMockTokenProvider("mock-token", time.Now().Add(1*time.Hour), nil) 27 | provider, err := NewAzureTokenProvider(t.Context(), "tenantID", "clientID", mockProvider, policy.TokenRequestOptions{}) 28 | require.NoError(t, err) 29 | 30 | tokenExpiry, err := provider.GetToken(context.Background()) 31 | require.Error(t, err) 32 | require.Contains(t, err.Error(), "ClientAssertionCredential.GetToken() requires at least one scope") 33 | require.Empty(t, tokenExpiry.Token) 34 | require.True(t, tokenExpiry.ExpiresAt.IsZero()) 35 | }) 36 | 37 | t.Run("invalid azure credential info", func(t *testing.T) { 38 | scopes := []string{"some-azure-scope"} 39 | mockProvider := NewMockTokenProvider("mock-token", time.Now().Add(1*time.Hour), nil) 40 | provider, err := NewAzureTokenProvider(t.Context(), "invalidTenantID", "invalidClientID", mockProvider, policy.TokenRequestOptions{Scopes: scopes}) 41 | require.NoError(t, err) 42 | 43 | _, err = provider.GetToken(context.Background()) 44 | require.Error(t, err) 45 | require.Contains(t, err.Error(), "Tenant 'invalidtenantid' not found. Check to make sure you have the correct tenant ID and are signing into the correct cloud.") 46 | }) 47 | 48 | t.Run("azure proxy url", func(t *testing.T) { 49 | // Set environment variable for the test 50 | mockProxyURL := "http://localhost:8888" 51 | t.Setenv("AI_GATEWAY_AZURE_PROXY_URL", mockProxyURL) 52 | 53 | opts := GetClientAssertionCredentialOptions() 54 | 55 | require.NotNil(t, opts) 56 | require.NotNil(t, opts.ClientOptions.Transport) 57 | 58 | // Assert that the transport has a proxy set 59 | transport, ok := opts.ClientOptions.Transport.(*http.Client) 60 | require.True(t, ok) 61 | require.NotNil(t, transport.Transport) 62 | 63 | // Check the proxy URL (optional, deeper inspection) 64 | innerTransport, ok := transport.Transport.(*http.Transport) 65 | require.True(t, ok) 66 | require.NotNil(t, innerTransport.Proxy) 67 | 68 | req, _ := http.NewRequest("GET", "http://example.com", nil) 69 | proxyFunc := innerTransport.Proxy 70 | proxyURL, err := proxyFunc(req) 71 | require.NoError(t, err) 72 | require.Equal(t, "http://localhost:8888", proxyURL.String()) 73 | }) 74 | } 75 | -------------------------------------------------------------------------------- /internal/controller/tokenprovider/token_provider.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package tokenprovider 7 | 8 | import ( 9 | "context" 10 | "time" 11 | ) 12 | 13 | const ( 14 | // clientSecretKey is key used to store Azure and OIDC client secret in Kubernetes secrets. 15 | clientSecretKey = "client-secret" 16 | ) 17 | 18 | // TokenExpiry represents a token and its expiration time. 19 | type TokenExpiry struct { 20 | Token string // The token string. 21 | ExpiresAt time.Time // The expiration time of the token. 22 | } 23 | 24 | // TokenProvider is an interface for retrieving tokens. 25 | type TokenProvider interface { 26 | // GetToken retrieves a token and its expiration time. 27 | GetToken(ctx context.Context) (TokenExpiry, error) 28 | } 29 | 30 | // mockTokenProvider is used for unit tests to allow passing in a token string and expiry. 31 | type mockTokenProvider struct { 32 | token string // The mock token string. 33 | expiresAt time.Time // The mock expiration time. 34 | err error // The error to return when GetToken is called. 35 | } 36 | 37 | // GetToken implements TokenProvider.GetToken method to get mock access token and err if any. 38 | func (m *mockTokenProvider) GetToken(_ context.Context) (TokenExpiry, error) { 39 | return TokenExpiry{m.token, m.expiresAt}, m.err 40 | } 41 | 42 | // NewMockTokenProvider creates a new MockTokenProvider with the given token, expiration time, and error. 43 | func NewMockTokenProvider(mockToken string, mockExpireAt time.Time, err error) TokenProvider { 44 | mockProvider := mockTokenProvider{ 45 | token: mockToken, 46 | expiresAt: mockExpireAt, 47 | err: err, 48 | } 49 | return &mockProvider 50 | } 51 | -------------------------------------------------------------------------------- /internal/controller/tokenprovider/token_provider_test.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package tokenprovider 7 | 8 | import ( 9 | "context" 10 | "fmt" 11 | "testing" 12 | "time" 13 | 14 | "github.com/stretchr/testify/require" 15 | ) 16 | 17 | func TestMockTokenProvider_GetToken(t *testing.T) { 18 | t.Run("successful token retrieval", func(t *testing.T) { 19 | mockProvider := NewMockTokenProvider("mock-token", time.Now().Add(1*time.Hour), nil) 20 | ctx := context.Background() 21 | tokenExpiry, err := mockProvider.GetToken(ctx) 22 | require.NoError(t, err) 23 | require.Equal(t, "mock-token", tokenExpiry.Token) 24 | require.False(t, tokenExpiry.ExpiresAt.IsZero()) 25 | }) 26 | 27 | t.Run("failed token retrieval", func(t *testing.T) { 28 | mockProvider := NewMockTokenProvider("", time.Time{}, fmt.Errorf("failed to get token")) 29 | 30 | ctx := context.Background() 31 | _, err := mockProvider.GetToken(ctx) 32 | require.Error(t, err) 33 | require.Equal(t, "failed to get token", err.Error()) 34 | }) 35 | } 36 | -------------------------------------------------------------------------------- /internal/controller/tokenprovider/util.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package tokenprovider 7 | 8 | import ( 9 | "context" 10 | "fmt" 11 | 12 | corev1 "k8s.io/api/core/v1" 13 | "sigs.k8s.io/controller-runtime/pkg/client" 14 | ) 15 | 16 | // GetClientSecret retrieves the client secret from a Kubernetes secret. 17 | func GetClientSecret(ctx context.Context, cl client.Client, secretRef *corev1.SecretReference) (string, error) { 18 | secret := &corev1.Secret{} 19 | if err := cl.Get(ctx, client.ObjectKey{ 20 | Namespace: secretRef.Namespace, 21 | Name: secretRef.Name, 22 | }, secret); err != nil { 23 | return "", fmt.Errorf("failed to get client secret: %w", err) 24 | } 25 | 26 | clientSecret, ok := secret.Data[clientSecretKey] 27 | if !ok { 28 | return "", fmt.Errorf("failed to get client secret: no secret data found using key '%s' in secret name '%s' and namespace '%s", clientSecretKey, secretRef.Name, secretRef.Namespace) 29 | } 30 | return string(clientSecret), nil 31 | } 32 | -------------------------------------------------------------------------------- /internal/controller/tokenprovider/util_test.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package tokenprovider 7 | 8 | import ( 9 | "testing" 10 | 11 | "github.com/stretchr/testify/require" 12 | corev1 "k8s.io/api/core/v1" 13 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 14 | "k8s.io/apimachinery/pkg/runtime" 15 | "sigs.k8s.io/controller-runtime/pkg/client/fake" 16 | ) 17 | 18 | func TestGetClientSecret(t *testing.T) { 19 | scheme := runtime.NewScheme() 20 | scheme.AddKnownTypes(corev1.SchemeGroupVersion, 21 | &corev1.Secret{}, 22 | ) 23 | cl := fake.NewClientBuilder().WithScheme(scheme).Build() 24 | 25 | secretName, secretNamespace := "secret", "secret-ns" 26 | 27 | secret, err := GetClientSecret(t.Context(), cl, &corev1.SecretReference{ 28 | Name: secretName, 29 | Namespace: secretNamespace, 30 | }) 31 | require.Error(t, err) 32 | require.Empty(t, secret) 33 | 34 | err = cl.Create(t.Context(), &corev1.Secret{ 35 | ObjectMeta: metav1.ObjectMeta{ 36 | Name: secretName, 37 | Namespace: secretNamespace, 38 | }, 39 | Immutable: nil, 40 | Data: map[string][]byte{ 41 | "client-secret": []byte("client-secret"), 42 | }, 43 | StringData: nil, 44 | Type: "", 45 | }) 46 | require.NoError(t, err) 47 | 48 | secret, err = GetClientSecret(t.Context(), cl, &corev1.SecretReference{ 49 | Name: secretName, 50 | Namespace: secretNamespace, 51 | }) 52 | require.NoError(t, err) 53 | require.Equal(t, "client-secret", secret) 54 | } 55 | -------------------------------------------------------------------------------- /internal/extproc/backendauth/api_key.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package backendauth 7 | 8 | import ( 9 | "context" 10 | "fmt" 11 | "strings" 12 | 13 | corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 14 | extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" 15 | 16 | "github.com/envoyproxy/ai-gateway/filterapi" 17 | ) 18 | 19 | // apiKeyHandler implements [Handler] for api key authz. 20 | type apiKeyHandler struct { 21 | apiKey string 22 | } 23 | 24 | func newAPIKeyHandler(auth *filterapi.APIKeyAuth) (Handler, error) { 25 | return &apiKeyHandler{apiKey: strings.TrimSpace(auth.Key)}, nil 26 | } 27 | 28 | // Do implements [Handler.Do]. 29 | // 30 | // Extracts the api key from the local file and set it as an authorization header. 31 | func (a *apiKeyHandler) Do(_ context.Context, requestHeaders map[string]string, headerMut *extprocv3.HeaderMutation, _ *extprocv3.BodyMutation) error { 32 | requestHeaders["Authorization"] = fmt.Sprintf("Bearer %s", a.apiKey) 33 | headerMut.SetHeaders = append(headerMut.SetHeaders, &corev3.HeaderValueOption{ 34 | Header: &corev3.HeaderValue{Key: "Authorization", RawValue: []byte(requestHeaders["Authorization"])}, 35 | }) 36 | return nil 37 | } 38 | -------------------------------------------------------------------------------- /internal/extproc/backendauth/api_key_test.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package backendauth 7 | 8 | import ( 9 | "testing" 10 | 11 | corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 12 | extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" 13 | "github.com/stretchr/testify/require" 14 | 15 | "github.com/envoyproxy/ai-gateway/filterapi" 16 | ) 17 | 18 | func TestNewAPIKeyHandler(t *testing.T) { 19 | auth := filterapi.APIKeyAuth{Key: "test \n"} 20 | handler, err := newAPIKeyHandler(&auth) 21 | require.NoError(t, err) 22 | require.NotNil(t, handler) 23 | // apiKey should be trimmed. 24 | require.Equal(t, "test", handler.(*apiKeyHandler).apiKey) 25 | } 26 | 27 | func TestApiKeyHandler_Do(t *testing.T) { 28 | auth := filterapi.APIKeyAuth{Key: "test"} 29 | handler, err := newAPIKeyHandler(&auth) 30 | require.NoError(t, err) 31 | require.NotNil(t, handler) 32 | 33 | requestHeaders := map[string]string{":method": "POST"} 34 | headerMut := &extprocv3.HeaderMutation{ 35 | SetHeaders: []*corev3.HeaderValueOption{ 36 | {Header: &corev3.HeaderValue{ 37 | Key: ":path", 38 | Value: "/model/some-random-model/converse", 39 | }}, 40 | }, 41 | } 42 | bodyMut := &extprocv3.BodyMutation{ 43 | Mutation: &extprocv3.BodyMutation_Body{ 44 | Body: []byte(`{"messages": [{"role": "user", "content": [{"text": "Say this is a test!"}]}]}`), 45 | }, 46 | } 47 | err = handler.Do(t.Context(), requestHeaders, headerMut, bodyMut) 48 | require.NoError(t, err) 49 | 50 | bearerToken, ok := requestHeaders["Authorization"] 51 | require.True(t, ok) 52 | require.Equal(t, "Bearer test", bearerToken) 53 | 54 | require.Len(t, headerMut.SetHeaders, 2) 55 | require.Equal(t, "Authorization", headerMut.SetHeaders[1].Header.Key) 56 | require.Equal(t, []byte("Bearer test"), headerMut.SetHeaders[1].Header.GetRawValue()) 57 | } 58 | -------------------------------------------------------------------------------- /internal/extproc/backendauth/auth.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package backendauth 7 | 8 | import ( 9 | "context" 10 | "errors" 11 | 12 | extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" 13 | 14 | "github.com/envoyproxy/ai-gateway/filterapi" 15 | ) 16 | 17 | // Handler is the interface that deals with the backend auth for a specific backend. 18 | // 19 | // TODO: maybe this can be just "post-transformation" handler, as it is not really only about auth. 20 | type Handler interface { 21 | // Do performs the backend auth, and make changes to the request headers and body mutations. 22 | Do(ctx context.Context, requestHeaders map[string]string, headerMut *extprocv3.HeaderMutation, bodyMut *extprocv3.BodyMutation) error 23 | } 24 | 25 | // NewHandler returns a new implementation of [Handler] based on the configuration. 26 | func NewHandler(ctx context.Context, config *filterapi.BackendAuth) (Handler, error) { 27 | switch { 28 | case config.AWSAuth != nil: 29 | return newAWSHandler(ctx, config.AWSAuth) 30 | case config.APIKey != nil: 31 | return newAPIKeyHandler(config.APIKey) 32 | case config.AzureAuth != nil: 33 | return newAzureHandler(config.AzureAuth) 34 | default: 35 | return nil, errors.New("no backend auth handler found") 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /internal/extproc/backendauth/auth_test.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package backendauth 7 | 8 | import ( 9 | "testing" 10 | 11 | "github.com/stretchr/testify/require" 12 | 13 | "github.com/envoyproxy/ai-gateway/filterapi" 14 | ) 15 | 16 | func TestNewHandler(t *testing.T) { 17 | for _, tt := range []struct { 18 | name string 19 | config *filterapi.BackendAuth 20 | }{ 21 | { 22 | name: "AWSAuth", 23 | config: &filterapi.BackendAuth{AWSAuth: &filterapi.AWSAuth{ 24 | Region: "us-west-2", CredentialFileLiteral: ` 25 | [default] 26 | aws_access_key_id = test 27 | aws_secret_access_key = test 28 | `, 29 | }}, 30 | }, 31 | { 32 | name: "APIKey", 33 | config: &filterapi.BackendAuth{ 34 | APIKey: &filterapi.APIKeyAuth{Key: "TEST"}, 35 | }, 36 | }, 37 | { 38 | name: "AzureAuth", 39 | config: &filterapi.BackendAuth{ 40 | AzureAuth: &filterapi.AzureAuth{AccessToken: "some-access-token"}, 41 | }, 42 | }, 43 | } { 44 | t.Run(tt.name, func(t *testing.T) { 45 | _, err := NewHandler(t.Context(), tt.config) 46 | require.NoError(t, err) 47 | }) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /internal/extproc/backendauth/aws_test.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package backendauth 7 | 8 | import ( 9 | "sync" 10 | "testing" 11 | 12 | corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 13 | extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" 14 | "github.com/stretchr/testify/require" 15 | 16 | "github.com/envoyproxy/ai-gateway/filterapi" 17 | ) 18 | 19 | func TestNewAWSHandler(t *testing.T) { 20 | handler, err := newAWSHandler(t.Context(), &filterapi.AWSAuth{}) 21 | require.NoError(t, err) 22 | require.NotNil(t, handler) 23 | } 24 | 25 | func TestAWSHandler_Do(t *testing.T) { 26 | awsFileBody := "[default]\nAWS_ACCESS_KEY_ID=test\nAWS_SECRET_ACCESS_KEY=secret\n" 27 | credentialFileHandler, err := newAWSHandler(t.Context(), &filterapi.AWSAuth{ 28 | CredentialFileLiteral: awsFileBody, 29 | Region: "us-east-1", 30 | }) 31 | require.NoError(t, err) 32 | 33 | // Handler.Do is called concurrently, so we test it with 100 goroutines to ensure it is thread-safe. 34 | var wg sync.WaitGroup 35 | wg.Add(100) 36 | for range 100 { 37 | go func() { 38 | defer wg.Done() 39 | requestHeaders := map[string]string{":method": "POST"} 40 | headerMut := &extprocv3.HeaderMutation{ 41 | SetHeaders: []*corev3.HeaderValueOption{ 42 | {Header: &corev3.HeaderValue{ 43 | Key: ":path", 44 | Value: "/model/some-random-model/converse", 45 | }}, 46 | }, 47 | } 48 | bodyMut := &extprocv3.BodyMutation{ 49 | Mutation: &extprocv3.BodyMutation_Body{ 50 | Body: []byte(`{"messages": [{"role": "user", "content": [{"text": "Say this is a test!"}]}]}`), 51 | }, 52 | } 53 | err := credentialFileHandler.Do(t.Context(), requestHeaders, headerMut, bodyMut) 54 | require.NoError(t, err) 55 | 56 | // Ensures that the headers are set. 57 | headers := map[string]string{} 58 | for _, h := range headerMut.SetHeaders { 59 | headers[h.Header.Key] = h.Header.Value 60 | } 61 | require.Contains(t, headers, "X-Amz-Date") 62 | require.Contains(t, headers, "Authorization") 63 | }() 64 | } 65 | 66 | wg.Wait() 67 | } 68 | -------------------------------------------------------------------------------- /internal/extproc/backendauth/azure.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package backendauth 7 | 8 | import ( 9 | "context" 10 | "fmt" 11 | "strings" 12 | 13 | corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 14 | extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" 15 | 16 | "github.com/envoyproxy/ai-gateway/filterapi" 17 | ) 18 | 19 | type azureHandler struct { 20 | azureAccessToken string 21 | } 22 | 23 | func newAzureHandler(auth *filterapi.AzureAuth) (Handler, error) { 24 | return &azureHandler{azureAccessToken: strings.TrimSpace(auth.AccessToken)}, nil 25 | } 26 | 27 | // Do implements [Handler.Do]. 28 | // 29 | // Extracts the azure access token from the local file and set it as an authorization header. 30 | func (a *azureHandler) Do(_ context.Context, requestHeaders map[string]string, headerMut *extprocv3.HeaderMutation, _ *extprocv3.BodyMutation) error { 31 | requestHeaders["Authorization"] = fmt.Sprintf("Bearer %s", a.azureAccessToken) 32 | headerMut.SetHeaders = append(headerMut.SetHeaders, &corev3.HeaderValueOption{ 33 | Header: &corev3.HeaderValue{Key: "Authorization", RawValue: []byte(requestHeaders["Authorization"])}, 34 | }) 35 | return nil 36 | } 37 | -------------------------------------------------------------------------------- /internal/extproc/backendauth/azure_test.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package backendauth 7 | 8 | import ( 9 | "testing" 10 | 11 | corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 12 | extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" 13 | "github.com/stretchr/testify/require" 14 | 15 | "github.com/envoyproxy/ai-gateway/filterapi" 16 | ) 17 | 18 | func TestNewAzureHandler(t *testing.T) { 19 | auth := filterapi.AzureAuth{AccessToken: " some-access-token \n"} 20 | handler, err := newAzureHandler(&auth) 21 | require.NoError(t, err) 22 | require.NotNil(t, handler) 23 | 24 | require.Equal(t, "some-access-token", handler.(*azureHandler).azureAccessToken) 25 | } 26 | 27 | func TestNewAzureHandler_Do(t *testing.T) { 28 | auth := filterapi.AzureAuth{AccessToken: "some-access-token"} 29 | handler, err := newAzureHandler(&auth) 30 | require.NoError(t, err) 31 | require.NotNil(t, handler) 32 | 33 | requestHeaders := map[string]string{":method": "POST"} 34 | headerMut := &extprocv3.HeaderMutation{ 35 | SetHeaders: []*corev3.HeaderValueOption{ 36 | { 37 | Header: &corev3.HeaderValue{ 38 | Key: ":path", 39 | Value: "/model/some-random-model/chat/completion", 40 | }, 41 | }, 42 | }, 43 | } 44 | bodyMut := &extprocv3.BodyMutation{ 45 | Mutation: &extprocv3.BodyMutation_Body{ 46 | Body: []byte(`{"messages": [{"role": "user", "content": [{"text": "Say this is a test!"}]}]}`), 47 | }, 48 | } 49 | 50 | err = handler.Do(t.Context(), requestHeaders, headerMut, bodyMut) 51 | require.NoError(t, err) 52 | 53 | bearerToken, ok := requestHeaders["Authorization"] 54 | require.True(t, ok) 55 | require.Equal(t, "Bearer some-access-token", bearerToken) 56 | 57 | require.Len(t, headerMut.SetHeaders, 2) 58 | require.Equal(t, "Authorization", headerMut.SetHeaders[1].Header.Key) 59 | require.Equal(t, []byte("Bearer some-access-token"), headerMut.SetHeaders[1].Header.GetRawValue()) 60 | } 61 | -------------------------------------------------------------------------------- /internal/extproc/models_processor_test.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package extproc 7 | 8 | import ( 9 | "encoding/json" 10 | "log/slog" 11 | "testing" 12 | "time" 13 | 14 | corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 15 | extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" 16 | typev3 "github.com/envoyproxy/go-control-plane/envoy/type/v3" 17 | "github.com/stretchr/testify/require" 18 | 19 | "github.com/envoyproxy/ai-gateway/internal/apischema/openai" 20 | ) 21 | 22 | func TestModels_ProcessRequestHeaders(t *testing.T) { 23 | now := time.Now() 24 | cfg := &processorConfig{declaredModels: []model{ 25 | { 26 | name: "openai", 27 | ownedBy: "openai", 28 | createdAt: now, 29 | }, 30 | { 31 | name: "aws-bedrock", 32 | ownedBy: "aws", 33 | createdAt: now, 34 | }, 35 | }} 36 | p, err := NewModelsProcessor(cfg, nil, slog.Default(), false) 37 | require.NoError(t, err) 38 | res, err := p.ProcessRequestHeaders(t.Context(), &corev3.HeaderMap{ 39 | Headers: []*corev3.HeaderValue{{Key: "foo", Value: "bar"}}, 40 | }) 41 | require.NoError(t, err) 42 | 43 | ir, ok := res.Response.(*extprocv3.ProcessingResponse_ImmediateResponse) 44 | require.True(t, ok) 45 | require.Equal(t, typev3.StatusCode(200), ir.ImmediateResponse.Status.Code) 46 | require.Equal(t, uint32(0), ir.ImmediateResponse.GrpcStatus.Status) 47 | 48 | respHeaders := headers(ir.ImmediateResponse.Headers.SetHeaders) 49 | require.Equal(t, "application/json", respHeaders["content-type"]) 50 | 51 | var models openai.ModelList 52 | require.NoError(t, json.Unmarshal(ir.ImmediateResponse.Body, &models)) 53 | require.Equal(t, "list", models.Object) 54 | require.Len(t, models.Data, len(cfg.declaredModels)) 55 | for i, m := range cfg.declaredModels { 56 | require.Equal(t, "model", models.Data[i].Object) 57 | require.Equal(t, m.name, models.Data[i].ID) 58 | require.Equal(t, now.Unix(), time.Time(models.Data[i].Created).Unix()) 59 | require.Equal(t, m.ownedBy, models.Data[i].OwnedBy) 60 | } 61 | } 62 | 63 | func TestModels_UnimplementedMethods(t *testing.T) { 64 | p := &modelsProcessor{} 65 | _, err := p.ProcessRequestBody(t.Context(), &extprocv3.HttpBody{}) 66 | require.ErrorIs(t, err, errUnexpectedCall) 67 | _, err = p.ProcessResponseHeaders(t.Context(), &corev3.HeaderMap{}) 68 | require.ErrorIs(t, err, errUnexpectedCall) 69 | _, err = p.ProcessResponseBody(t.Context(), &extprocv3.HttpBody{}) 70 | require.ErrorIs(t, err, errUnexpectedCall) 71 | } 72 | 73 | func headers(in []*corev3.HeaderValueOption) map[string]string { 74 | h := make(map[string]string) 75 | for _, v := range in { 76 | h[v.Header.Key] = string(v.Header.RawValue) 77 | } 78 | return h 79 | } 80 | -------------------------------------------------------------------------------- /internal/extproc/processor_test.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package extproc 7 | 8 | import ( 9 | "testing" 10 | 11 | extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | func Test_passThroughProcessor(t *testing.T) { // This is mostly for coverage. 16 | p := passThroughProcessor{} 17 | resp, err := p.ProcessRequestHeaders(t.Context(), nil) 18 | require.NoError(t, err) 19 | require.NotNil(t, resp) 20 | _, ok := resp.Response.(*extprocv3.ProcessingResponse_RequestHeaders) 21 | require.True(t, ok) 22 | 23 | resp, err = p.ProcessRequestBody(t.Context(), nil) 24 | require.NoError(t, err) 25 | require.NotNil(t, resp) 26 | _, ok = resp.Response.(*extprocv3.ProcessingResponse_RequestBody) 27 | require.True(t, ok) 28 | 29 | resp, err = p.ProcessResponseHeaders(t.Context(), nil) 30 | require.NoError(t, err) 31 | require.NotNil(t, resp) 32 | _, ok = resp.Response.(*extprocv3.ProcessingResponse_ResponseHeaders) 33 | require.True(t, ok) 34 | 35 | resp, err = p.ProcessResponseBody(t.Context(), nil) 36 | require.NoError(t, err) 37 | require.NotNil(t, resp) 38 | _, ok = resp.Response.(*extprocv3.ProcessingResponse_ResponseBody) 39 | require.True(t, ok) 40 | } 41 | -------------------------------------------------------------------------------- /internal/extproc/router/router.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package router 7 | 8 | import ( 9 | "github.com/envoyproxy/ai-gateway/filterapi" 10 | "github.com/envoyproxy/ai-gateway/filterapi/x" 11 | ) 12 | 13 | // router implements [x.Router]. 14 | type router struct { 15 | rules []filterapi.RouteRule 16 | } 17 | 18 | // New creates a new [x.Router] implementation for the given config. 19 | func New(config *filterapi.Config, newCustomFn x.NewCustomRouterFn) (x.Router, error) { 20 | r := &router{rules: config.Rules} 21 | if newCustomFn != nil { 22 | customRouter := newCustomFn(r, config) 23 | return customRouter, nil 24 | } 25 | return r, nil 26 | } 27 | 28 | // Calculate implements [x.Router.Calculate]. 29 | func (r *router) Calculate(headers map[string]string) (name filterapi.RouteRuleName, err error) { 30 | var rule *filterapi.RouteRule 31 | outer: 32 | for i := range r.rules { 33 | _rule := &r.rules[i] 34 | for j := range _rule.Headers { 35 | hdr := &_rule.Headers[j] 36 | v, ok := headers[string(hdr.Name)] 37 | // Currently, we only do the exact matching. 38 | if ok && v == hdr.Value { 39 | rule = _rule 40 | break outer 41 | } 42 | } 43 | } 44 | if rule == nil { 45 | return "", x.ErrNoMatchingRule 46 | } 47 | return rule.Name, nil 48 | } 49 | -------------------------------------------------------------------------------- /internal/extproc/router/router_test.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package router 7 | 8 | import ( 9 | "sync" 10 | "sync/atomic" 11 | "testing" 12 | 13 | "github.com/stretchr/testify/require" 14 | 15 | "github.com/envoyproxy/ai-gateway/filterapi" 16 | "github.com/envoyproxy/ai-gateway/filterapi/x" 17 | ) 18 | 19 | // dummyCustomRouter implements [x.Router]. 20 | type dummyCustomRouter struct{ called bool } 21 | 22 | // Calculate implements [x.Router.Calculate]. 23 | func (c *dummyCustomRouter) Calculate(map[string]string) (filterapi.RouteRuleName, error) { 24 | c.called = true 25 | return "", nil 26 | } 27 | 28 | func TestRouter_NewRouter_Custom(t *testing.T) { 29 | r, err := New(&filterapi.Config{}, func(defaultRouter x.Router, _ *filterapi.Config) x.Router { 30 | require.NotNil(t, defaultRouter) 31 | _, ok := defaultRouter.(*router) 32 | require.True(t, ok) // Checking if the default router is correctly passed. 33 | return &dummyCustomRouter{} 34 | }) 35 | require.NoError(t, err) 36 | _, ok := r.(*dummyCustomRouter) 37 | require.True(t, ok) 38 | 39 | _, err = r.Calculate(nil) 40 | require.NoError(t, err) 41 | require.True(t, r.(*dummyCustomRouter).called) 42 | } 43 | 44 | func TestRouter_Calculate(t *testing.T) { 45 | _r, err := New(&filterapi.Config{ 46 | Rules: []filterapi.RouteRule{ 47 | { 48 | Name: "cat", 49 | Headers: []filterapi.HeaderMatch{ 50 | {Name: "x-some-random-non-model-header", Value: "dog"}, 51 | }, 52 | }, 53 | { 54 | Name: "foo", 55 | Headers: []filterapi.HeaderMatch{ 56 | {Name: "x-model-name", Value: "llama3.3333"}, 57 | }, 58 | }, 59 | { 60 | Name: "baz", 61 | Headers: []filterapi.HeaderMatch{ 62 | {Name: "x-model-name", Value: "o1"}, 63 | }, 64 | }, 65 | { 66 | Name: "openai", 67 | Headers: []filterapi.HeaderMatch{ 68 | {Name: "x-model-name", Value: "gpt4.4444"}, 69 | }, 70 | }, 71 | }, 72 | }, nil) 73 | require.NoError(t, err) 74 | r, ok := _r.(*router) 75 | require.True(t, ok) 76 | 77 | t.Run("no matching rule", func(t *testing.T) { 78 | _, err := r.Calculate(map[string]string{"x-model-name": "something-quirky"}) 79 | require.Error(t, err) 80 | }) 81 | t.Run("matching rule - single backend choice", func(t *testing.T) { 82 | b, err := r.Calculate(map[string]string{"x-model-name": "gpt4.4444"}) 83 | require.NoError(t, err) 84 | require.Equal(t, filterapi.RouteRuleName("openai"), b) 85 | }) 86 | t.Run("first match win", func(t *testing.T) { 87 | b, err := r.Calculate(map[string]string{"x-some-random-non-model-header": "dog", "x-model-name": "llama3.3333"}) 88 | require.NoError(t, err) 89 | require.Equal(t, filterapi.RouteRuleName("cat"), b) 90 | }) 91 | 92 | t.Run("concurrent access", func(t *testing.T) { 93 | var wg sync.WaitGroup 94 | wg.Add(1000) 95 | 96 | var count atomic.Int32 97 | for range 1000 { 98 | go func() { 99 | defer wg.Done() 100 | b, err := r.Calculate(map[string]string{"x-model-name": "llama3.3333"}) 101 | require.NoError(t, err) 102 | require.NotNil(t, b) 103 | count.Add(1) 104 | }() 105 | } 106 | wg.Wait() 107 | require.Greater(t, count.Load(), int32(200)) 108 | }) 109 | } 110 | -------------------------------------------------------------------------------- /internal/extproc/translator/openai_azureopenai.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package translator 7 | 8 | import ( 9 | "fmt" 10 | "strconv" 11 | 12 | corev3 "github.com/envoyproxy/go-control-plane/envoy/config/core/v3" 13 | extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" 14 | 15 | "github.com/envoyproxy/ai-gateway/internal/apischema/openai" 16 | ) 17 | 18 | // NewChatCompletionOpenAIToAzureOpenAITranslator implements [Factory] for OpenAI to Azure OpenAI translations. 19 | // Except RequestBody method requires modification to satisfy Microsoft Azure OpenAI spec 20 | // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chat-completions, other interface methods 21 | // are identical to NewChatCompletionOpenAIToOpenAITranslator's interface implementations. 22 | func NewChatCompletionOpenAIToAzureOpenAITranslator(apiVersion string) OpenAIChatCompletionTranslator { 23 | return &openAIToAzureOpenAITranslatorV1ChatCompletion{apiVersion: apiVersion} 24 | } 25 | 26 | type openAIToAzureOpenAITranslatorV1ChatCompletion struct { 27 | apiVersion string 28 | openAIToOpenAITranslatorV1ChatCompletion 29 | } 30 | 31 | func (o *openAIToAzureOpenAITranslatorV1ChatCompletion) RequestBody(raw []byte, req *openai.ChatCompletionRequest, onRetry bool) ( 32 | headerMutation *extprocv3.HeaderMutation, bodyMutation *extprocv3.BodyMutation, err error, 33 | ) { 34 | // Assume deployment_id is same as model name. 35 | pathTemplate := "/openai/deployments/%s/chat/completions?api-version=%s" 36 | headerMutation = &extprocv3.HeaderMutation{ 37 | SetHeaders: []*corev3.HeaderValueOption{ 38 | {Header: &corev3.HeaderValue{ 39 | Key: ":path", 40 | RawValue: []byte(fmt.Sprintf(pathTemplate, req.Model, o.apiVersion)), 41 | }}, 42 | }, 43 | } 44 | if req.Stream { 45 | o.stream = true 46 | } 47 | 48 | // On retry, the path might have changed to a different provider. So, this will ensure that the path is always set to OpenAI. 49 | if onRetry { 50 | headerMutation.SetHeaders = append(headerMutation.SetHeaders, &corev3.HeaderValueOption{Header: &corev3.HeaderValue{ 51 | Key: "content-length", 52 | RawValue: []byte(strconv.Itoa(len(raw))), 53 | }}) 54 | bodyMutation = &extprocv3.BodyMutation{ 55 | Mutation: &extprocv3.BodyMutation_Body{Body: raw}, 56 | } 57 | } 58 | return 59 | } 60 | -------------------------------------------------------------------------------- /internal/extproc/translator/openai_azureopenai_test.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package translator 7 | 8 | import ( 9 | "fmt" 10 | "testing" 11 | 12 | "github.com/stretchr/testify/require" 13 | 14 | "github.com/envoyproxy/ai-gateway/internal/apischema/openai" 15 | ) 16 | 17 | func TestOpenAIToAzureOpenAITranslatorV1ChatCompletion_RequestBody(t *testing.T) { 18 | t.Run("valid body", func(t *testing.T) { 19 | for _, stream := range []bool{true, false} { 20 | t.Run(fmt.Sprintf("stream=%t", stream), func(t *testing.T) { 21 | originalReq := &openai.ChatCompletionRequest{Model: "foo-bar-ai", Stream: stream} 22 | 23 | o := &openAIToAzureOpenAITranslatorV1ChatCompletion{apiVersion: "some-version"} 24 | hm, bm, err := o.RequestBody(nil, originalReq, false) 25 | require.Nil(t, bm) 26 | require.NoError(t, err) 27 | require.Equal(t, stream, o.stream) 28 | require.NotNil(t, hm) 29 | 30 | require.Equal(t, ":path", hm.SetHeaders[0].Header.Key) 31 | require.Equal(t, "/openai/deployments/foo-bar-ai/chat/completions?api-version=some-version", string(hm.SetHeaders[0].Header.RawValue)) 32 | }) 33 | } 34 | }) 35 | } 36 | -------------------------------------------------------------------------------- /internal/extproc/translator/translator_test.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package translator 7 | 8 | import ( 9 | "testing" 10 | 11 | extprocv3 "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | func TestIsGoodStatusCode(t *testing.T) { 16 | for _, s := range []int{200, 201, 299} { 17 | require.True(t, isGoodStatusCode(s)) 18 | } 19 | for _, s := range []int{100, 300, 400, 500} { 20 | require.False(t, isGoodStatusCode(s)) 21 | } 22 | } 23 | 24 | func TestSetContentLength(t *testing.T) { 25 | hm := &extprocv3.HeaderMutation{} 26 | setContentLength(hm, nil) 27 | require.Len(t, hm.SetHeaders, 1) 28 | require.Equal(t, "0", string(hm.SetHeaders[0].Header.RawValue)) 29 | 30 | hm = &extprocv3.HeaderMutation{} 31 | setContentLength(hm, []byte("body")) 32 | require.Len(t, hm.SetHeaders, 1) 33 | require.Equal(t, "4", string(hm.SetHeaders[0].Header.RawValue)) 34 | } 35 | -------------------------------------------------------------------------------- /internal/llmcostcel/cel.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | // Package llmcostcel provides functions to create and evaluate CEL programs to calculate costs. 7 | // 8 | // This exists as a separate package to be used both in the controller to validate the expression 9 | // and in the external processor to evaluate the expression. 10 | package llmcostcel 11 | 12 | import ( 13 | "fmt" 14 | 15 | "github.com/google/cel-go/cel" 16 | ) 17 | 18 | const ( 19 | celModelNameKey = "model" 20 | celBackendKey = "backend" 21 | celInputTokensKey = "input_tokens" 22 | celOutputTokensKey = "output_tokens" 23 | celTotalTokensKey = "total_tokens" 24 | ) 25 | 26 | var env *cel.Env 27 | 28 | func init() { 29 | var err error 30 | env, err = cel.NewEnv( 31 | cel.Variable(celModelNameKey, cel.StringType), 32 | cel.Variable(celBackendKey, cel.StringType), 33 | cel.Variable(celInputTokensKey, cel.UintType), 34 | cel.Variable(celOutputTokensKey, cel.UintType), 35 | cel.Variable(celTotalTokensKey, cel.UintType), 36 | ) 37 | if err != nil { 38 | panic(fmt.Sprintf("cannot create CEL environment: %v", err)) 39 | } 40 | } 41 | 42 | // NewProgram creates a new CEL program from the given expression. 43 | func NewProgram(expr string) (prog cel.Program, err error) { 44 | ast, issues := env.Compile(expr) 45 | if issues != nil && issues.Err() != nil { 46 | err = issues.Err() 47 | return nil, fmt.Errorf("cannot compile CEL expression: %w", err) 48 | } 49 | prog, err = env.Program(ast) 50 | if err != nil { 51 | return nil, fmt.Errorf("cannot create CEL program: %w", err) 52 | } 53 | 54 | // Sanity check by evaluating the expression with some dummy values. 55 | _, err = EvaluateProgram(prog, "dummy", "dummy", 0, 0, 0) 56 | if err != nil { 57 | return nil, fmt.Errorf("failed to evaluate CEL expression: %w", err) 58 | } 59 | return prog, nil 60 | } 61 | 62 | // EvaluateProgram evaluates the given CEL program with the given variables. 63 | func EvaluateProgram(prog cel.Program, modelName, backend string, inputTokens, outputTokens, totalTokens uint32) (uint64, error) { 64 | out, _, err := prog.Eval(map[string]interface{}{ 65 | celModelNameKey: modelName, 66 | celBackendKey: backend, 67 | celInputTokensKey: inputTokens, 68 | celOutputTokensKey: outputTokens, 69 | celTotalTokensKey: totalTokens, 70 | }) 71 | if err != nil || out == nil { 72 | return 0, fmt.Errorf("failed to evaluate CEL expression: %w", err) 73 | } 74 | 75 | switch out.Type() { 76 | case cel.IntType: 77 | result := out.Value().(int64) 78 | if result < 0 { 79 | return 0, fmt.Errorf("CEL expression result is negative (%d)", result) 80 | } 81 | return uint64(result), nil 82 | case cel.UintType: 83 | return out.Value().(uint64), nil 84 | default: 85 | return 0, fmt.Errorf("CEL expression result is not an integer, got %v", out.Type()) 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /internal/llmcostcel/cel_test.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package llmcostcel 7 | 8 | import ( 9 | "sync" 10 | "testing" 11 | 12 | "github.com/stretchr/testify/require" 13 | ) 14 | 15 | func TestNewProgram(t *testing.T) { 16 | t.Run("invalid", func(t *testing.T) { 17 | _, err := NewProgram("1 +") 18 | require.Error(t, err) 19 | }) 20 | t.Run("int", func(t *testing.T) { 21 | _, err := NewProgram("1 + 1") 22 | require.NoError(t, err) 23 | }) 24 | t.Run("uint", func(t *testing.T) { 25 | _, err := NewProgram("uint(1) + uint(1)") 26 | require.NoError(t, err) 27 | }) 28 | t.Run("variables", func(t *testing.T) { 29 | prog, err := NewProgram("model == 'cool_model' ? input_tokens * output_tokens : total_tokens") 30 | require.NoError(t, err) 31 | v, err := EvaluateProgram(prog, "cool_model", "cool_backend", 100, 2, 3) 32 | require.NoError(t, err) 33 | require.Equal(t, uint64(200), v) 34 | 35 | v, err = EvaluateProgram(prog, "not_cool_model", "cool_backend", 100, 2, 3) 36 | require.NoError(t, err) 37 | require.Equal(t, uint64(3), v) 38 | }) 39 | 40 | t.Run("uint", func(t *testing.T) { 41 | _, err := NewProgram("uint(1)-uint(1200)") 42 | require.ErrorContains(t, err, "failed to evaluate CEL expression: failed to evaluate CEL expression: unsigned integer overflow") 43 | }) 44 | 45 | t.Run("ensure concurrency safety", func(t *testing.T) { 46 | // Ensure that the program can be evaluated concurrently. 47 | var wg sync.WaitGroup 48 | wg.Add(100) 49 | for i := 0; i < 100; i++ { 50 | go func() { 51 | defer wg.Done() 52 | _, err := NewProgram("model == 'cool_model' ? input_tokens * output_tokens : total_tokens") 53 | require.NoError(t, err) 54 | }() 55 | } 56 | wg.Wait() 57 | }) 58 | } 59 | 60 | func TestEvaluateProgram(t *testing.T) { 61 | t.Run("signed integer negative", func(t *testing.T) { 62 | prog, err := NewProgram("int(input_tokens) - int(output_tokens)") 63 | require.NoError(t, err) 64 | _, err = EvaluateProgram(prog, "cool_model", "cool_backend", 100, 2000, 3) 65 | require.ErrorContains(t, err, "CEL expression result is negative (-1900)") 66 | }) 67 | t.Run("unsigned integer overflow", func(t *testing.T) { 68 | prog, err := NewProgram("input_tokens - output_tokens") 69 | require.NoError(t, err) 70 | _, err = EvaluateProgram(prog, "cool_model", "cool_backend", 100, 2000, 3) 71 | require.ErrorContains(t, err, "failed to evaluate CEL expression: unsigned integer overflow") 72 | }) 73 | t.Run("ensure concurrency safety", func(t *testing.T) { 74 | prog, err := NewProgram("model == 'cool_model' ? input_tokens * output_tokens : total_tokens") 75 | require.NoError(t, err) 76 | 77 | // Ensure that the program can be evaluated concurrently. 78 | var wg sync.WaitGroup 79 | wg.Add(100) 80 | for i := 0; i < 100; i++ { 81 | go func() { 82 | defer wg.Done() 83 | v, err := EvaluateProgram(prog, "cool_model", "cool_backend", 100, 2, 3) 84 | require.NoError(t, err) 85 | require.Equal(t, uint64(200), v) 86 | }() 87 | } 88 | wg.Wait() 89 | }) 90 | } 91 | -------------------------------------------------------------------------------- /internal/testing/eventchan.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package internaltesting 7 | 8 | import ( 9 | "context" 10 | "testing" 11 | "time" 12 | 13 | "sigs.k8s.io/controller-runtime/pkg/client" 14 | "sigs.k8s.io/controller-runtime/pkg/event" 15 | ) 16 | 17 | // NewControllerEventChanImpl is a test implementation of the controller event channels that are used in 18 | // the cross-controller communication. 19 | type NewControllerEventChanImpl[T client.Object] struct { 20 | Ch chan event.GenericEvent 21 | } 22 | 23 | // NewControllerEventChan creates a new SyncFnImpl. 24 | func NewControllerEventChan[T client.Object]() *NewControllerEventChanImpl[T] { 25 | return &NewControllerEventChanImpl[T]{Ch: make(chan event.GenericEvent, 100)} 26 | } 27 | 28 | // RequireItemsEventually returns a copy of the items. 29 | func (s *NewControllerEventChanImpl[T]) RequireItemsEventually(t *testing.T, exp int) []T { 30 | ctx, cancel := context.WithTimeout(t.Context(), 5*time.Second) 31 | defer cancel() 32 | var ret []T 33 | for len(ret) < exp { 34 | select { 35 | case <-ctx.Done(): 36 | t.Fatalf("timed out waiting for %d items, got %d", exp, len(ret)) 37 | case item := <-s.Ch: 38 | ret = append(ret, item.Object.(T)) 39 | default: 40 | } 41 | } 42 | return ret 43 | } 44 | -------------------------------------------------------------------------------- /internal/version/version.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package version 7 | 8 | // Version is the current version of build. This is populated by the Go linker. 9 | var Version = "dev" 10 | -------------------------------------------------------------------------------- /manifests/charts/ai-gateway-crds-helm/Chart.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: v2 7 | name: ai-gateway-crds-helm 8 | description: The Helm chart for Envoy AI Gateway CRD 9 | type: application 10 | 11 | # Note: in general version means the version of the chart, not the version of the application. 12 | # version must be a semver version number like 0.0.1-rc1, 1.0.0, etc., vs appVersion can be anything. 13 | # 14 | # Since we release both the chart and the application together from the same repo, we don't need to differentiate 15 | # between the two versions and use the release tag for both. However, on main branch, we use "latest" as the appVersion 16 | # to follow the convention of container images vs v0.0.0-latest for the chart version. 17 | version: v0.0.0-latest 18 | appVersion: "latest" 19 | icon: https://raw.githubusercontent.com/envoyproxy/ai-gateway/refs/heads/main/site/static/img/logo.svg 20 | 21 | maintainers: 22 | - name: envoy-ai-gateway-maintainers 23 | url: https://github.com/envoyproxy/ai-gateway/blob/main/CODEOWNERS 24 | 25 | keywords: 26 | - gateway-api 27 | - envoyproxy 28 | - envoy-gateway 29 | - eg 30 | - ai-gateway 31 | - ai 32 | 33 | home: https://aigateway.envoyproxy.io/ 34 | 35 | sources: 36 | - https://github.com/envoyproxy/ai-gateway 37 | -------------------------------------------------------------------------------- /manifests/charts/ai-gateway-crds-helm/values.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | -------------------------------------------------------------------------------- /manifests/charts/ai-gateway-helm/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /manifests/charts/ai-gateway-helm/Chart.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: v2 7 | name: ai-gateway-helm 8 | description: The Helm chart for Envoy AI Gateway 9 | type: application 10 | 11 | # Note: in general version means the version of the chart, not the version of the application. 12 | # version must be a semver version number like 0.0.1-rc1, 1.0.0, etc., vs appVersion can be anything. 13 | # 14 | # Since we release both the chart and the application together from the same repo, we don't need to differentiate 15 | # between the two versions and use the release tag for both. However, on main branch, we use "latest" as the appVersion 16 | # to follow the convention of container images vs v0.0.0-latest for the chart version. 17 | version: v0.0.0-latest 18 | appVersion: "latest" 19 | icon: https://raw.githubusercontent.com/envoyproxy/ai-gateway/refs/heads/main/site/static/img/logo.svg 20 | 21 | maintainers: 22 | - name: envoy-ai-gateway-maintainers 23 | url: https://github.com/envoyproxy/ai-gateway/blob/main/CODEOWNERS 24 | 25 | keywords: 26 | - gateway-api 27 | - envoyproxy 28 | - envoy-gateway 29 | - eg 30 | - ai-gateway 31 | - ai 32 | 33 | home: https://aigateway.envoyproxy.io/ 34 | 35 | sources: 36 | - https://github.com/envoyproxy/ai-gateway 37 | -------------------------------------------------------------------------------- /manifests/charts/ai-gateway-helm/templates/NOTES.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/envoyproxy/ai-gateway/d17719cd338fb7f9b63b6b378ffa979370f78e19/manifests/charts/ai-gateway-helm/templates/NOTES.txt -------------------------------------------------------------------------------- /manifests/charts/ai-gateway-helm/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {* 2 | Copyright Envoy AI Gateway Authors 3 | SPDX-License-Identifier: Apache-2.0 4 | The full text of the Apache license is available in the LICENSE file at 5 | the root of the repo. 6 | *} 7 | 8 | {{/* 9 | Expand the name of the chart. 10 | */}} 11 | {{- define "ai-gateway-helm.name" -}} 12 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} 13 | {{- end }} 14 | 15 | {{/* 16 | Create a default fully qualified app name. 17 | We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). 18 | If release name contains chart name it will be used as a full name. 19 | */}} 20 | {{- define "ai-gateway-helm.controller.fullname" -}} 21 | {{- if .Values.controller.fullnameOverride }} 22 | {{- .Values.controller.fullnameOverride | trunc 63 | trimSuffix "-" }} 23 | {{- else }} 24 | {{- $name := default .Chart.Name .Values.controller.nameOverride }} 25 | {{- if contains $name .Release.Name }} 26 | {{- .Release.Name | trunc 63 | trimSuffix "-" }} 27 | {{- else }} 28 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} 29 | {{- end }} 30 | {{- end }} 31 | {{- end }} 32 | 33 | {{/* 34 | Create chart name and version as used by the chart label. 35 | */}} 36 | {{- define "ai-gateway-helm.chart" -}} 37 | {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} 38 | {{- end }} 39 | 40 | {{/* 41 | Common labels 42 | */}} 43 | {{- define "ai-gateway-helm.labels" -}} 44 | helm.sh/chart: {{ include "ai-gateway-helm.chart" . }} 45 | {{ include "ai-gateway-helm.controller.selectorLabels" . }} 46 | {{- if .Chart.AppVersion }} 47 | app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} 48 | {{- end }} 49 | app.kubernetes.io/managed-by: {{ .Release.Service }} 50 | {{- end }} 51 | 52 | {{/* 53 | Selector labels 54 | */}} 55 | {{- define "ai-gateway-helm.controller.selectorLabels" -}} 56 | app.kubernetes.io/name: {{ include "ai-gateway-helm.name" . }} 57 | app.kubernetes.io/instance: {{ .Release.Name }} 58 | {{- end }} 59 | 60 | {{/* 61 | Create the name of the service account to use 62 | */}} 63 | {{- define "ai-gateway-helm.controller.serviceAccountName" -}} 64 | {{- if .Values.controller.serviceAccount.create }} 65 | {{- default (include "ai-gateway-helm.controller.fullname" .) .Values.controller.serviceAccount.name }} 66 | {{- else }} 67 | {{- default "default" .Values.controller.serviceAccount.name }} 68 | {{- end }} 69 | {{- end }} 70 | -------------------------------------------------------------------------------- /manifests/charts/ai-gateway-helm/templates/service.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: v1 7 | kind: Service 8 | metadata: 9 | name: {{ include "ai-gateway-helm.controller.fullname" . }} 10 | labels: 11 | {{- include "ai-gateway-helm.labels" . | nindent 4 }} 12 | spec: 13 | type: {{ .Values.controller.service.type }} 14 | {{- with .Values.controller.service.ports }} 15 | ports: 16 | {{- toYaml . | nindent 4 }} 17 | {{- end }} 18 | selector: 19 | {{- include "ai-gateway-helm.controller.selectorLabels" . | nindent 4 }} 20 | -------------------------------------------------------------------------------- /manifests/charts/ai-gateway-helm/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | {{ if .Values.controller.serviceAccount.create -}} 7 | apiVersion: v1 8 | kind: ServiceAccount 9 | metadata: 10 | name: {{ include "ai-gateway-helm.controller.serviceAccountName" . }} 11 | labels: 12 | {{- include "ai-gateway-helm.labels" . | nindent 4 }} 13 | {{- with .Values.controller.serviceAccount.annotations }} 14 | annotations: 15 | {{- toYaml . | nindent 4 }} 16 | {{- end }} 17 | --- 18 | apiVersion: rbac.authorization.k8s.io/v1 19 | kind: ClusterRole 20 | metadata: 21 | name: {{ include "ai-gateway-helm.controller.serviceAccountName" . }} 22 | rules: 23 | - apiGroups: [""] 24 | resources: 25 | - secrets 26 | - pods # TODO: this can be limited to EG system namespace, not the cluster level. 27 | verbs: 28 | - '*' 29 | - apiGroups: ["apps"] 30 | resources: 31 | - deployments # TODO: this can be limited to EG system namespace, not the cluster level. 32 | verbs: 33 | - '*' 34 | - apiGroups: 35 | - inference.networking.x-k8s.io 36 | resources: 37 | - '*' 38 | verbs: 39 | - '*' 40 | - apiGroups: 41 | - gateway.networking.k8s.io 42 | resources: 43 | - '*' 44 | verbs: 45 | - '*' 46 | - apiGroups: 47 | - aigateway.envoyproxy.io 48 | resources: 49 | - '*' 50 | verbs: 51 | - '*' 52 | - apiGroups: 53 | - gateway.envoyproxy.io 54 | resources: 55 | - '*' 56 | verbs: 57 | - '*' 58 | - apiGroups: 59 | - coordination.k8s.io 60 | resources: 61 | - leases 62 | verbs: 63 | - get 64 | - watch 65 | - list 66 | - create 67 | - update 68 | - apiGroups: 69 | - "" 70 | resources: 71 | - events 72 | verbs: 73 | - create 74 | - patch 75 | - apiGroups: 76 | - admissionregistration.k8s.io 77 | resources: 78 | - mutatingwebhookconfigurations 79 | verbs: 80 | - get 81 | - list 82 | - watch 83 | - apiGroups: 84 | - admissionregistration.k8s.io 85 | resources: 86 | - mutatingwebhookconfigurations 87 | resourceNames: 88 | - 'envoy-ai-gateway-gateway-pod-mutator.{{ .Release.Namespace }}' 89 | verbs: 90 | - update 91 | - patch 92 | --- 93 | apiVersion: rbac.authorization.k8s.io/v1 94 | kind: ClusterRoleBinding 95 | metadata: 96 | name: {{ include "ai-gateway-helm.controller.serviceAccountName" . }} 97 | roleRef: 98 | apiGroup: rbac.authorization.k8s.io 99 | kind: ClusterRole 100 | name: {{ include "ai-gateway-helm.controller.serviceAccountName" . }} 101 | subjects: 102 | - kind: ServiceAccount 103 | name: {{ include "ai-gateway-helm.controller.serviceAccountName" . }} 104 | namespace: '{{ .Release.Namespace }}' 105 | {{- end }} 106 | -------------------------------------------------------------------------------- /manifests/envoy-gateway-config/config.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: v1 7 | kind: ConfigMap 8 | metadata: 9 | name: envoy-gateway-config 10 | namespace: "envoy-gateway-system" 11 | labels: 12 | helm.sh/chart: gateway-helm-v0.0.0-latest 13 | app.kubernetes.io/name: gateway-helm 14 | app.kubernetes.io/instance: eg 15 | app.kubernetes.io/version: "latest" 16 | app.kubernetes.io/managed-by: Helm 17 | data: 18 | envoy-gateway.yaml: | 19 | apiVersion: gateway.envoyproxy.io/v1alpha1 20 | kind: EnvoyGateway 21 | gateway: 22 | controllerName: gateway.envoyproxy.io/gatewayclass-controller 23 | logging: 24 | level: 25 | default: info 26 | provider: 27 | kubernetes: 28 | rateLimitDeployment: 29 | patch: 30 | type: StrategicMerge 31 | value: 32 | spec: 33 | template: 34 | spec: 35 | containers: 36 | - imagePullPolicy: IfNotPresent 37 | name: envoy-ratelimit 38 | image: docker.io/envoyproxy/ratelimit:60d8e81b 39 | type: Kubernetes 40 | extensionApis: 41 | enableEnvoyPatchPolicy: true 42 | enableBackend: true 43 | extensionManager: 44 | hooks: 45 | xdsTranslator: 46 | post: 47 | - VirtualHost 48 | - Translation 49 | service: 50 | fqdn: 51 | hostname: ai-gateway-controller.envoy-ai-gateway-system.svc.cluster.local 52 | port: 1063 53 | rateLimit: 54 | backend: 55 | type: Redis 56 | redis: 57 | url: redis.redis-system.svc.cluster.local:6379 58 | --- 59 | -------------------------------------------------------------------------------- /manifests/envoy-gateway-config/rbac.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | --- 7 | apiVersion: rbac.authorization.k8s.io/v1 8 | kind: ClusterRole 9 | metadata: 10 | name: list-ai-gateway-controller 11 | rules: 12 | - apiGroups: 13 | - "aigateway.envoyproxy.io" 14 | resources: 15 | - "aigatewayroutes" 16 | - "aiservicebackends" 17 | - "backendSecurityPolicies" 18 | verbs: 19 | - "get" 20 | - "list" 21 | - "watch" 22 | --- 23 | apiVersion: rbac.authorization.k8s.io/v1 24 | kind: ClusterRoleBinding 25 | metadata: 26 | name: list-ai-gateway-controller 27 | roleRef: 28 | apiGroup: rbac.authorization.k8s.io 29 | kind: ClusterRole 30 | name: list-ai-gateway-controller 31 | subjects: 32 | - kind: ServiceAccount 33 | name: envoy-gateway 34 | namespace: envoy-gateway-system 35 | --- 36 | -------------------------------------------------------------------------------- /manifests/envoy-gateway-config/redis.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | # This is a simple example of a Redis deployment that is used 7 | # by the default Envoy Gateway setting in config.yaml. 8 | # 9 | # This is only necessary if you want to use the rate limit feature. 10 | --- 11 | kind: Namespace 12 | apiVersion: v1 13 | metadata: 14 | name: redis-system 15 | --- 16 | apiVersion: v1 17 | kind: Service 18 | metadata: 19 | name: redis 20 | namespace: redis-system 21 | labels: 22 | app: redis 23 | spec: 24 | ports: 25 | - name: redis 26 | port: 6379 27 | selector: 28 | app: redis 29 | --- 30 | apiVersion: apps/v1 31 | kind: Deployment 32 | metadata: 33 | name: redis 34 | namespace: redis-system 35 | spec: 36 | replicas: 1 37 | selector: 38 | matchLabels: 39 | app: redis 40 | template: 41 | metadata: 42 | labels: 43 | app: redis 44 | spec: 45 | containers: 46 | - image: redis:alpine 47 | imagePullPolicy: IfNotPresent 48 | name: redis 49 | ports: 50 | - name: redis 51 | containerPort: 6379 52 | restartPolicy: Always 53 | -------------------------------------------------------------------------------- /netlify.toml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | [build] 7 | base = "site/" 8 | publish = "build/" 9 | command = "npm run build" 10 | 11 | [context.deploy-preview] 12 | base = "site/" 13 | publish = "build/" 14 | command = "npm run build" 15 | 16 | [[headers]] 17 | for = "/*" 18 | [headers.values] 19 | X-Frame-Options = "DENY" 20 | X-Content-Type-Options = "nosniff" 21 | Referrer-Policy = "strict-origin-when-cross-origin" 22 | Permissions-Policy = "camera=(), microphone=(), geolocation=()" 23 | Strict-Transport-Security = "max-age=31536000; includeSubDomains" 24 | Content-Security-Policy = """ 25 | default-src 'self'; 26 | frame-src 'self' https://www.youtube.com https://app.netlify.com; 27 | script-src 'self' 'unsafe-inline' 'unsafe-eval' https://www.googletagmanager.com; 28 | style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; 29 | img-src 'self' data: https: blob:; 30 | font-src 'self' data: https://fonts.gstatic.com; 31 | connect-src 'self' https:; 32 | manifest-src 'self'; 33 | media-src 'self'; 34 | worker-src 'self' blob:; 35 | frame-ancestors 'none'; 36 | """ 37 | -------------------------------------------------------------------------------- /pre-commit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | make precommit 3 | -------------------------------------------------------------------------------- /site/README.md: -------------------------------------------------------------------------------- 1 | # Website 2 | 3 | This website is built using [Docusaurus](https://docusaurus.io/), a modern static website generator. 4 | 5 | ### Local Development 6 | 7 | ### Requirements {#requirements} 8 | 9 | - [Node.js](https://nodejs.org/en/download/) version 18.0 or above (which can be checked by running `node -v`). You can use [nvm](https://github.com/nvm-sh/nvm) for managing multiple Node versions on a single machine installed. 10 | - When installing Node.js, you are recommended to check all checkboxes related to dependencies. 11 | 12 | ### Install 13 | 14 | ``` 15 | npm install 16 | ``` 17 | 18 | ### Run locally 19 | 20 | #### NPX 21 | ``` 22 | npx docusaurus start 23 | ``` 24 | 25 | #### NPM 26 | ``` 27 | npm run start 28 | ``` 29 | 30 | #### **When to Use Which?** 31 | - Use npx docusaurus start: 32 | - For quick tests or temporary runs without installing the Docusaurus CLI. 33 | - If you want to use the latest version of Docusaurus globally. 34 | - Use npm run start: 35 | - For consistent and reproducible builds, ensuring you use the local version of Docusaurus. 36 | - In your development workflow, where the start script is part of your project setup. 37 | -------------------------------------------------------------------------------- /site/blog/2024-11-14-kubecon-end-user-keynote.md: -------------------------------------------------------------------------------- 1 | --- 2 | slug: kubecon-end-user-keynote-2024 3 | title: End User Keynote at KubeCon 2024 4 | authors: [missberg, alexagriffith] 5 | tags: [news, presentations] 6 | --- 7 | 8 | At KubeCon North America 2024, Alexa Griffith had the opportunity to present the End User Keynote on **Centralizing & Simplifying Enterprise AI Workflows with Envoy AI Gateway**. 9 | 10 | 11 | 12 | 13 | 14 | ## About the presentation 15 | 16 | As Generative AI reshapes the industry, the demands on AI platforms have rapidly evolved. Organizations now require centralized infrastructure to manage and optimize access to self-trained, open source, and commercial AI models at scale. 17 | 18 | In this talk, we introduce the Envoy AI Gateway, a collaborative open source effort led by engineers from [Bloomberg](https://www.bloomberg.com/company/what-we-do/engineering-cto/) and [Tetrate](https://tetrate.io). 19 | 20 | Learn how the Envoy AI Gateway, which is built atop Envoy Gateway and Envoy Proxy, provides a unified, scalable solution for model access, usage limiting, and upstream authorization. 21 | -------------------------------------------------------------------------------- /site/blog/authors.yml: -------------------------------------------------------------------------------- 1 | missberg: 2 | name: Erica Hughberg 3 | title: Envoy AI Gateway Maintainer - Tetrate 4 | url: https://github.com/missBerg 5 | image_url: https://github.com/missBerg.png 6 | page: true 7 | socials: 8 | github: missBerg 9 | linkedin: https://www.linkedin.com/in/ericahughberg/ 10 | alexagriffith: 11 | name: Alexa Griffith 12 | title: Senior Software Engineer, Bloomberg 13 | url: https://github.com/alexagriffith 14 | image_url: https://github.com/alexagriffith.png 15 | page: true 16 | socials: 17 | github: alexagriffith 18 | linkedin: https://www.linkedin.com/in/alexa-griffith/ 19 | dansun: 20 | name: Dan Sun 21 | title: Envoy AI Gateway Maintainer - Bloomberg 22 | url: https://github.com/yuzisun 23 | image_url: https://github.com/yuzisun.png 24 | page: true 25 | socials: 26 | github: yuzisun 27 | linkedin: https://www.linkedin.com/in/dan-sun-44a21b5/ 28 | aaronchoo: 29 | name: Aaron Choo 30 | title: Envoy AI Gateway Maintainer - Bloomberg 31 | url: https://github.com/aabchoo 32 | image_url: https://github.com/aabchoo.png 33 | page: true 34 | socials: 35 | github: https://github.com/aabchoo 36 | linkedin: https://www.linkedin.com/in/aaron-choo/ 37 | yaoweng: 38 | name: Yao Weng 39 | title: Envoy AI Gateway Maintainer - Bloomberg 40 | url: https://github.com/wengyao04 41 | image_url: https://github.com/wengyao04.png 42 | page: true 43 | socials: 44 | github: https://github.com/wengyao04 45 | linkedin: https://www.linkedin.com/in/yao-weng-ab091442/ 46 | mathetake: 47 | name: Takeshi Yoneda 48 | title: Envoy AI Gateway Maintainer - Tetrate 49 | url: https://github.com/mathetake 50 | image_url: https://github.com/mathetake.png 51 | page: true 52 | socials: 53 | github: https://github.com/mathetake 54 | linkedin: https://www.linkedin.com/in/mathetake/ 55 | -------------------------------------------------------------------------------- /site/blog/tags.yml: -------------------------------------------------------------------------------- 1 | news: 2 | label: News 3 | permalink: /news 4 | description: Project News 5 | presentations: 6 | label: Presentations 7 | permalink: /presentations 8 | description: Project Presentations 9 | releases: 10 | label: Releases 11 | permalink: /releases 12 | description: Project Releases 13 | -------------------------------------------------------------------------------- /site/crd-ref-docs/config-core.yaml: -------------------------------------------------------------------------------- 1 | processor: 2 | # RE2 regular expressions describing types that should be excluded from the generated documentation. 3 | ignoreTypes: 4 | - "(EnvoyProxy)List$" 5 | # RE2 regular expressions describing type fields that should be excluded from the generated documentation. 6 | ignoreFields: 7 | - "TypeMeta$" 8 | customMarkers: 9 | - name: "notImplementedHide" 10 | target: "field" 11 | 12 | render: 13 | # Version of Kubernetes to use when generating links to Kubernetes API documentation. 14 | kubernetesVersion: 1.29 15 | knownTypes: 16 | - name: BackendObjectReference 17 | package: sigs.k8s.io/gateway-api/apis/v1 18 | link: https://gateway-api.sigs.k8s.io/references/spec/#gateway.networking.k8s.io/v1.BackendObjectReference 19 | - name: SecretObjectReference 20 | package: sigs.k8s.io/gateway-api/apis/v1 21 | link: https://gateway-api.sigs.k8s.io/references/spec/#gateway.networking.k8s.io/v1.SecretObjectReference 22 | - name: LocalPolicyTargetReferenceWithSectionName 23 | package: sigs.k8s.io/gateway-api/apis/v1alpha2 24 | link: https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io/v1alpha2.LocalPolicyTargetReferenceWithSectionName 25 | - name: LocalPolicyTargetReference 26 | package: sigs.k8s.io/gateway-api/apis/v1alpha2 27 | link: https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io/v1alpha2.LocalPolicyTargetReference 28 | - name: Duration 29 | package: sigs.k8s.io/gateway-api/apis/v1 30 | link: https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io/v1.Duration 31 | - name: PolicyStatus 32 | package: sigs.k8s.io/gateway-api/apis/v1alpha2 33 | link: https://gateway-api.sigs.k8s.io/reference/spec/#gateway.networking.k8s.io/v1alpha2.PolicyStatus 34 | - name: ExtProc 35 | package: github.com/envoyproxy/gateway/api/v1alpha1 36 | link: https://gateway.envoyproxy.io/docs/api/extension_types/#extproc 37 | - name: OIDC 38 | package: github.com/envoyproxy/gateway/api/v1alpha1 39 | link: https://gateway.envoyproxy.io/docs/api/extension_types/#oidc 40 | 41 | navigation: 42 | includeTOC: true 43 | includeBackToTopLinks: true 44 | -------------------------------------------------------------------------------- /site/crd-ref-docs/templates/README: -------------------------------------------------------------------------------- 1 | Original from https://github.com/elastic/crd-ref-docs/tree/9a3105b6ca763ea03393fa6f396046ad7b5d4e38/templates/markdown 2 | -------------------------------------------------------------------------------- /site/crd-ref-docs/templates/gv_details.tpl: -------------------------------------------------------------------------------- 1 | {{- define "gvDetails" -}} 2 | {{- $gv := . -}} 3 | 4 | ## {{ $gv.GroupVersionString }} 5 | 6 | {{ $gv.Doc }} 7 | 8 | {{- if $gv.Kinds }} 9 | ## Resource Kinds 10 | 11 | ### Available Kinds 12 | {{- range $gv.SortedKinds }} 13 | - {{ $gv.TypeForKind . | markdownRenderTypeLink }} 14 | {{- end }} 15 | 16 | ### Kind Definitions 17 | {{- range $gv.SortedKinds }} 18 | {{- $type := $gv.TypeForKind . }} 19 | {{ template "type" $type }} 20 | {{- end }} 21 | {{- end }} 22 | 23 | {{- if $gv.Types }} 24 | ## Supporting Types 25 | 26 | ### Available Types 27 | {{- range $gv.SortedTypes }} 28 | {{- $type := . }} 29 | {{- $isKind := false }} 30 | {{- range $gv.Kinds }} 31 | {{- if eq . $type.Name }} 32 | {{- $isKind = true }} 33 | {{- end }} 34 | {{- end }} 35 | {{- if not $isKind }} 36 | - {{ markdownRenderTypeLink . }} 37 | {{- end }} 38 | {{- end }} 39 | 40 | ### Type Definitions 41 | {{- range $gv.SortedTypes }} 42 | {{- $type := . }} 43 | {{- $isKind := false }} 44 | {{- range $gv.Kinds }} 45 | {{- if eq . $type.Name }} 46 | {{- $isKind = true }} 47 | {{- end }} 48 | {{- end }} 49 | {{- if not $isKind }} 50 | {{ template "type" . }} 51 | {{- end }} 52 | {{- end }} 53 | {{- end }} 54 | 55 | {{- end -}} 56 | -------------------------------------------------------------------------------- /site/crd-ref-docs/templates/gv_list.tpl: -------------------------------------------------------------------------------- 1 | {{- define "gvList" -}} 2 | {{- $groupVersions := . -}} 3 | 4 | 5 | --- 6 | id: api_references 7 | title: API Reference 8 | toc_min_heading_level: 2 9 | toc_max_heading_level: 4 10 | --- 11 | 12 | {{ range $groupVersions }} 13 | {{ template "gvDetails" . }} 14 | {{ end }} 15 | 16 | {{- end -}} 17 | -------------------------------------------------------------------------------- /site/crd-ref-docs/templates/type.tpl: -------------------------------------------------------------------------------- 1 | {{- define "type" -}} 2 | {{- $type := . -}} 3 | {{- if markdownShouldRenderType $type -}} 4 | 5 | #### {{ $type.Name }} 6 | 7 | {{ if $type.IsAlias }}**Underlying type:** {{ markdownRenderTypeLink $type.UnderlyingType }}{{ end }} 8 | {{ if $type.References }} 9 | **Appears in:** 10 | {{- range $type.SortedReferences }} 11 | - {{ markdownRenderTypeLink . }} 12 | {{- end }} 13 | {{- end }} 14 | 15 | {{ $type.Doc }} 16 | 17 | {{ if $type.Members -}} 18 | 19 | ##### Fields 20 | 21 | {{ if $type.GVK -}} 22 | 28 | 29 | 35 | {{- end }} 36 | 37 | {{ range $type.Members -}} 38 | {{- if not .Markers.notImplementedHide -}} 39 | 48 | {{- end }} 49 | {{- end }} 50 | {{- end }} 51 | 52 | {{ if $type.EnumValues -}} 53 | ##### Possible Values 54 | 55 | {{ range $type.EnumValues -}} 56 | 62 | {{- end }} 63 | {{- end }} 64 | 65 | {{- end }} 66 | {{- end -}} 67 | -------------------------------------------------------------------------------- /site/crd-ref-docs/templates/type_members.tpl: -------------------------------------------------------------------------------- 1 | {{- define "type_members" -}} 2 | {{- $field := . -}} 3 | {{- if eq $field.Name "metadata" -}} 4 | Refer to Kubernetes API documentation for fields of `metadata`. 5 | {{- else -}} 6 | {{ markdownRenderFieldDoc $field.Doc | replace "\"" "`" }} 7 | {{- end -}} 8 | {{- end -}} 9 | -------------------------------------------------------------------------------- /site/docs/capabilities/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: capabilities 3 | title: Capabilities 4 | sidebar_position: 3 5 | --- 6 | 7 | # Envoy AI Gateway Capabilities 8 | 9 | Welcome to the Envoy AI Gateway capabilities documentation! This section provides detailed information about the various features and capabilities that Envoy AI Gateway offers to help you manage and optimize your AI/LLM traffic. 10 | 11 | -------------------------------------------------------------------------------- /site/docs/cli/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: cli 3 | title: "Envoy AI Gateway CLI" 4 | sidebar_position: 4 5 | --- 6 | 7 | # Envoy AI Gateway CLI (aigw) 8 | 9 | The Envoy AI Gateway CLI, `aigw`, is a command-line interface that provides a set of useful tools for using the gateway. 10 | 11 | :::warning 12 | The CLI is experimental and currently under active development. 13 | ::: 14 | 15 | Currently, you can do the following with the `aigw` CLI: 16 | 17 | - **Run**: Run the Envoy AI Gateway locally as a standalone proxy with a given configuration file without any dependencies such as docker or Kubernetes. 18 | - **Translate**: Translate a given Envoy AI Gateway configuration file to an Envoy Gateway configuration file. 19 | -------------------------------------------------------------------------------- /site/docs/cli/installation.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: aigwinstall 3 | title: Installation 4 | sidebar_position: 1 5 | --- 6 | 7 | 8 | To install the `aigw` CLI, run the following command (This may take a while): 9 | 10 | ```shell 11 | go install github.com/envoyproxy/ai-gateway/cmd/aigw@main 12 | ``` 13 | 14 | :::tip 15 | `go install` command installs a binary in the `$(go env GOPATH)/bin` directory. 16 | Make sure that the `$(go env GOPATH)/bin` directory is in your `PATH` environment variable. 17 | 18 | For example, you can add the following line to your shell profile (e.g., `~/.bashrc`, `~/.zshrc`, etc.): 19 | ```sh 20 | export PATH=$PATH:$(go env GOPATH)/bin 21 | ``` 22 | ::: 23 | 24 | Now, you can check if the installation was successful by running the following command: 25 | 26 | ```sh 27 | aigw --help 28 | ``` 29 | 30 | This will display the help message for the `aigw` CLI like this: 31 | 32 | ``` 33 | Usage: aigw [flags] 34 | 35 | Envoy AI Gateway CLI 36 | 37 | Flags: 38 | -h, --help Show context-sensitive help. 39 | 40 | Commands: 41 | version [flags] 42 | Show version. 43 | 44 | translate ... [flags] 45 | Translate yaml files containing AI Gateway resources to Envoy Gateway and Kubernetes resources. The translated resources are written to stdout. 46 | 47 | run [] [flags] 48 | Run the AI Gateway locally for given configuration. 49 | 50 | Run "aigw --help" for more information on a command. 51 | ``` 52 | 53 | ## What's next? 54 | 55 | The following sections provide more information about each of the CLI commands: 56 | 57 | - [aigw run](./run.md): Run the AI Gateway locally for a given configuration. 58 | - [aigw translate](./translate.md): Translate AI Gateway resources to Envoy Gateway and Kubernetes resources. 59 | 60 | -------------------------------------------------------------------------------- /site/docs/cli/translate.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: aigtranslate 3 | title: aigw translate 4 | sidebar_position: 3 5 | --- 6 | 7 | # `aigw translate` 8 | 9 | ## Overview 10 | 11 | This command translates the AI Gateway resources defined in a YAML file to Envoy Gateway and Kubernetes resources. 12 | This can be useful when: 13 | * You want to understand how the AI Gateway resources are translated to Envoy Gateway and Kubernetes resources. 14 | * Deploying the AI Gateway resources to a Kubernetes cluster without running the Envoy AI Gateway. 15 | * Note that not all functionality can be functional without the Envoy AI Gateway control plane. For example, OIDC credential rotation is not working without the control plane. 16 | 17 | You can check the help message via `aigw translate --help`: 18 | 19 | ``` 20 | Usage: aigw translate ... [flags] 21 | 22 | Translate yaml files containing AI Gateway resources to Envoy Gateway and Kubernetes resources. The translated resources are written to stdout. 23 | 24 | Arguments: 25 | ... Paths to yaml files to translate. 26 | 27 | Flags: 28 | -h, --help Show context-sensitive help. 29 | 30 | --debug Enable debug logging emitted to stderr. 31 | ``` 32 | 33 | ## Usage 34 | 35 | To translate the AI Gateway resources defined in a YAML file, say `config.yaml`, to Envoy Gateway and Kubernetes resources, run the following command: 36 | 37 | ```shell 38 | aigw translate config.yaml 39 | ``` 40 | 41 | This will output the translated resources to the standard output. You can redirect the output to a file if needed: 42 | 43 | ```shell 44 | aigw translate config.yaml > translated.yaml 45 | ``` 46 | -------------------------------------------------------------------------------- /site/docs/concepts/architecture/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: architecture 3 | title: Architecture 4 | sidebar_position: 2 5 | --- 6 | 7 | # Architecture 8 | 9 | This section provides a detailed look at the architectural components of Envoy AI Gateway. Understanding the architecture will help you better deploy, configure, and maintain your gateway installation. 10 | 11 | ## Overview 12 | 13 | Envoy AI Gateway follows a modern cloud-native architecture with distinct control and data planes. This separation of concerns allows for better scalability, maintainability, and flexibility in deployment options. 14 | 15 | Envoy AI Gateway integrates with Envoy Gateway for the control plane and Envoy Proxy for the data plane. 16 | 17 | ## Key Concepts 18 | 19 | ### Control Plane 20 | A control plane is a component that manages the configuration of the data plane. We utilize Envoy Gateway as a central control plane, and Envoy AI Gateway works in conjunction with it to manage the data plane configuration. 21 | 22 | ### Data Plane 23 | The data plane is the component that sits in the request path and processes the requests. In the context of Envoy AI Gateway, the data plane consists of Envoy Proxy and the AI Gateway external processor that processes the AI requests. 24 | 25 | ### Token Rate Limiting 26 | The major AI model endpoints return usage metrics called "tokens" per HTTP request. These tokens represent the computational resources consumed by the request. One of the major features of Envoy AI Gateway is rate limiting based on token usage instead of standard "requests per second" style rate limiting. 27 | 28 | We call such rate limiting "Token Rate Limiting" in our context, and the metrics that represent the token usage are called "Token Usage" or "Used Tokens". 29 | 30 | ## In This Section 31 | 32 | 1. [System Architecture Overview](./system-architecture.md) 33 | - High-level architecture overview 34 | - Control and data plane separation 35 | - Component interactions 36 | 37 | 2. [Control Plane](./control-plane.md) 38 | - AI Gateway Controller 39 | - Envoy Gateway Controller 40 | - Configuration management 41 | - Resource orchestration 42 | 43 | 3. [Data Plane](./data-plane.md) 44 | - External Processor functionality 45 | - Request processing flow 46 | - Provider integration 47 | 48 | ## What's Next 49 | 50 | After understanding the architecture: 51 | - Check out our [Getting Started](../../getting-started/index.md) guide for hands-on experience 52 | -------------------------------------------------------------------------------- /site/docs/concepts/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: concepts 3 | title: Concepts 4 | sidebar_position: 3 5 | --- 6 | 7 | # Concepts 8 | 9 | Welcome to the Concepts section of Envoy AI Gateway documentation. This section will help you understand the core components and how they work together to manage AI traffic effectively. 10 | 11 | ## Architecture 12 | 13 | - [System Architecture Overview](./architecture/system-architecture.md) - High-level overview of control and data planes 14 | - [Control Plane](./architecture/control-plane.md) - How configuration and management works 15 | - [Data Plane](./architecture/data-plane.md) - Request processing and traffic flow 16 | 17 | ## Resources 18 | 19 | - [Resources Overview](./resources.md) - Understanding the AI Gateway resources and their relationships 20 | -------------------------------------------------------------------------------- /site/docs/concepts/resources.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: resources 3 | title: Resources 4 | sidebar_position: 2 5 | --- 6 | 7 | # Resources 8 | 9 | The Envoy AI Gateway uses several custom resources to manage AI traffic. Here's an overview of the key resources and how they relate to each other: 10 | 11 | ## Resource Reference 12 | 13 | | Resource | Purpose | API Reference | 14 | |----------|---------|---------------| 15 | | AIGatewayRoute | Defines unified API and routing rules for AI traffic | [AIGatewayRoute](../api/api.mdx#aigatewayroute) | 16 | | AIServiceBackend | Represents individual AI service backends | [AIServiceBackend](../api/api.mdx#aiservicebackend) | 17 | | BackendSecurityPolicy | Configures authentication for backend access | [BackendSecurityPolicy](../api/api.mdx#backendsecuritypolicy) | 18 | 19 | ## Core Resources 20 | 21 | ### AIGatewayRoute 22 | 23 | A resource that defines a unified AI API for a Gateway, allowing clients to interact with multiple AI backends using a single schema. 24 | - Specifies the input API schema for client requests 25 | - Contains routing rules to direct traffic to appropriate backends 26 | - Manages request/response transformations between different API schemas 27 | - Can track LLM request costs (like token usage) 28 | 29 | ### AIServiceBackend 30 | 31 | Represents a single AI service backend that handles traffic with a specific API schema. 32 | 33 | - Defines the output API schema the backend expects 34 | - References a Kubernetes Service or Envoy Gateway Backend 35 | - Can reference a BackendSecurityPolicy for authentication 36 | 37 | ### BackendSecurityPolicy 38 | 39 | Configures authentication and authorization rules for backend access. 40 | 41 | - API Key authentication 42 | - AWS credentials authentication 43 | 44 | ## Resource Relationships 45 | 46 | ```mermaid 47 | graph TD 48 | A[AIGatewayRoute] -->|references| B[AIServiceBackend] 49 | B -->|references| C[K8s Service/Backend] 50 | B -->|references| D[BackendSecurityPolicy] 51 | D -->|contains| E[API Key/AWS Credentials] 52 | ``` 53 | 54 | The AIGatewayRoute acts as the entry point, defining how client requests are processed and routed to one or more AIServiceBackends. Each AIServiceBackend can reference a BackendSecurityPolicy, which provides the necessary credentials for accessing the underlying AI service. 55 | -------------------------------------------------------------------------------- /site/docs/getting-started/connect-providers/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: connect-providers 3 | title: Connect Providers 4 | sidebar_position: 5 5 | --- 6 | 7 | # Connect Providers 8 | 9 | After setting up the basic AI Gateway with the mock backend, you can configure it to work with real AI model providers. This section will guide you through connecting different AI providers to your gateway. 10 | 11 | ## Available Providers 12 | 13 | Currently, Envoy AI Gateway supports the following providers: 14 | 15 | - [OpenAI](./openai.md) - Connect to OpenAI's GPT models 16 | - [AWS Bedrock](./aws-bedrock.md) - Access AWS Bedrock's suite of foundation models 17 | - [Azure OpenAI](./azure-openai.md) - Access Azure OpenAI's suite of foundation models 18 | 19 | ## Before You Begin 20 | 21 | Before configuring any provider: 22 | 23 | 1. Complete the [Basic Usage](../basic-usage.md) guide 24 | 2. Remove the basic configuration with the mock backend 25 | 26 | ```shell 27 | kubectl delete -f https://raw.githubusercontent.com/envoyproxy/ai-gateway/main/examples/basic/basic.yaml 28 | 29 | kubectl wait pods --timeout=15s \ 30 | -l gateway.envoyproxy.io/owning-gateway-name=envoy-ai-gateway-basic \ 31 | -n envoy-gateway-system \ 32 | --for=delete 33 | ``` 34 | 35 | 3. Download configuration template 36 | 37 | ```shell 38 | curl -O https://raw.githubusercontent.com/envoyproxy/ai-gateway/main/examples/basic/basic.yaml 39 | ``` 40 | 41 | ## Security Best Practices 42 | 43 | When configuring AI providers, keep these security considerations in mind: 44 | 45 | - Store credentials securely using Kubernetes secrets 46 | - Never commit API keys or credentials to version control 47 | - Regularly rotate your credentials 48 | - Use the principle of least privilege when setting up access 49 | - Monitor usage and set up appropriate rate limits 50 | 51 | ## Next Steps 52 | 53 | Choose your provider to get started: 54 | - [Connect OpenAI](./openai.md) 55 | - [Connect AWS Bedrock](./aws-bedrock.md) 56 | - [Connect Azure OpenAI](./azure-openai.md) 57 | -------------------------------------------------------------------------------- /site/docs/getting-started/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: getting-started 3 | title: Getting Started 4 | sidebar_position: 2 5 | --- 6 | 7 | # Getting Started with Envoy AI Gateway 8 | 9 | Welcome to the Envoy AI Gateway getting started guide! 10 | 11 | This guide will walk you through setting up and using Envoy AI Gateway, a tool for managing GenAI traffic using Envoy. 12 | 13 | ## Guide Structure 14 | 15 | This getting started guide is organized into several sections: 16 | 17 | 1. [Prerequisites](./prerequisites.md) 18 | - Setting up your Kubernetes cluster 19 | - Installing required tools 20 | - Setting up Envoy Gateway 21 | 22 | 2. [Installation](./installation.md) 23 | - Installing Envoy AI Gateway 24 | - Configuring the gateway 25 | - Verifying the installation 26 | 27 | 3. [Basic Usage](./basic-usage.md) 28 | - Deploying a basic configuration 29 | - Making your first request 30 | - Understanding the response format 31 | 32 | 4. [Connect Providers](./connect-providers) 33 | - Setting up OpenAI integration 34 | - Configuring AWS Bedrock 35 | - Managing credentials securely 36 | 37 | ## Quick Start 38 | 39 | If you're familiar with Kubernetes and want to get started quickly, run these commands to install Envoy Gateway, Envoy AI Gateway, and deploy a basic configuration: 40 | 41 | ```shell 42 | helm upgrade -i eg oci://docker.io/envoyproxy/gateway-helm \ 43 | --version v0.0.0-latest \ 44 | --namespace envoy-gateway-system \ 45 | --create-namespace 46 | 47 | helm upgrade -i aieg oci://docker.io/envoyproxy/ai-gateway-helm \ 48 | --version v0.0.0-latest \ 49 | --namespace envoy-ai-gateway-system \ 50 | --create-namespace 51 | 52 | kubectl apply -f https://raw.githubusercontent.com/envoyproxy/ai-gateway/main/examples/basic/basic.yaml 53 | 54 | kubectl wait --timeout=2m -n envoy-gateway-system deployment/envoy-gateway --for=condition=Available 55 | kubectl wait --timeout=2m -n envoy-ai-gateway-system deployment/ai-gateway-controller --for=condition=Available 56 | ``` 57 | 58 | ### Make a request 59 | 60 | Check out Making a Request in the [Basic Usage Guide](./basic-usage.md) 61 | 62 | :::tip 63 | 64 | For detailed instructions and explanations, start with the [Prerequisites](./prerequisites.md) section. 65 | 66 | ::: 67 | 68 | ## Need Help? 69 | 70 | If you run into any issues: 71 | - Join our [Community Slack](https://envoyproxy.slack.com/archives/C07Q4N24VAA) 72 | - File an issue on [GitHub](https://github.com/envoyproxy/ai-gateway/issues) 73 | -------------------------------------------------------------------------------- /site/docs/getting-started/installation.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: installation 3 | title: Installation 4 | sidebar_position: 3 5 | --- 6 | 7 | import Tabs from '@theme/Tabs'; 8 | import TabItem from '@theme/TabItem'; 9 | 10 | This guide will walk you through installing Envoy AI Gateway and its required components. 11 | 12 | ## Installing Envoy AI Gateway 13 | 14 | The easiest way to install Envoy AI Gateway is using the Helm chart. First, install the AI Gateway Helm chart and wait for the deployment to be ready: 15 | 16 | ```shell 17 | helm upgrade -i aieg-crd oci://docker.io/envoyproxy/ai-gateway-crds-helm \ 18 | --version v0.0.0-latest \ 19 | --namespace envoy-ai-gateway-system \ 20 | --create-namespace 21 | 22 | helm upgrade -i aieg oci://docker.io/envoyproxy/ai-gateway-helm \ 23 | --version v0.0.0-latest \ 24 | --namespace envoy-ai-gateway-system \ 25 | --create-namespace 26 | 27 | kubectl wait --timeout=2m -n envoy-ai-gateway-system deployment/ai-gateway-controller --for=condition=Available 28 | ``` 29 | 30 | ## Configuring Envoy Gateway 31 | 32 | After installing Envoy AI Gateway, apply the AI Gateway-specific configuration to Envoy Gateway, restart the deployment, and wait for it to be ready: 33 | 34 | ```shell 35 | kubectl apply -f https://raw.githubusercontent.com/envoyproxy/ai-gateway/main/manifests/envoy-gateway-config/redis.yaml 36 | kubectl apply -f https://raw.githubusercontent.com/envoyproxy/ai-gateway/main/manifests/envoy-gateway-config/config.yaml 37 | kubectl apply -f https://raw.githubusercontent.com/envoyproxy/ai-gateway/main/manifests/envoy-gateway-config/rbac.yaml 38 | 39 | kubectl rollout restart -n envoy-gateway-system deployment/envoy-gateway 40 | 41 | kubectl wait --timeout=2m -n envoy-gateway-system deployment/envoy-gateway --for=condition=Available 42 | ``` 43 | 44 | Note that the redis configuration is only used for the rate limiting feature. If you don't need rate limiting, you can skip the redis configuration, 45 | but you need to remove the relevant configuration in the `config.yaml` file as well. 46 | 47 | :::tip Verify Installation 48 | 49 | Check the status of the pods. All pods should be in the `Running` state with `Ready` status. 50 | 51 | Check AI Gateway pods: 52 | ```shell 53 | kubectl get pods -n envoy-ai-gateway-system 54 | ``` 55 | 56 | Check Envoy Gateway pods: 57 | ```shell 58 | kubectl get pods -n envoy-gateway-system 59 | ``` 60 | 61 | ::: 62 | 63 | ## Next Steps 64 | 65 | After completing the installation: 66 | - Continue to [Basic Usage](./basic-usage.md) to learn how to make your first request 67 | - Or jump to [Connect Providers](./connect-providers) to set up OpenAI and AWS Bedrock integration 68 | -------------------------------------------------------------------------------- /site/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "my-website", 3 | "version": "0.0.0", 4 | "private": true, 5 | "scripts": { 6 | "docusaurus": "docusaurus", 7 | "start": "docusaurus start", 8 | "build": "docusaurus build", 9 | "swizzle": "docusaurus swizzle", 10 | "deploy": "docusaurus deploy", 11 | "clear": "docusaurus clear", 12 | "serve": "docusaurus serve", 13 | "write-translations": "docusaurus write-translations", 14 | "write-heading-ids": "docusaurus write-heading-ids", 15 | "typecheck": "tsc" 16 | }, 17 | "dependencies": { 18 | "@docusaurus/core": "^3.7.0", 19 | "@docusaurus/preset-classic": "^3.7.0", 20 | "@docusaurus/theme-classic": "^3.8.0", 21 | "@docusaurus/theme-mermaid": "^3.7.0", 22 | "@mdx-js/react": "^3.0.1", 23 | "clsx": "^2.1.0", 24 | "prism-react-renderer": "^2.3.1", 25 | "react": "^19.0.0", 26 | "react-dom": "^19.0.0" 27 | }, 28 | "devDependencies": { 29 | "@docusaurus/module-type-aliases": "^3.8.0", 30 | "@docusaurus/tsconfig": "^3.7.0", 31 | "@docusaurus/types": "^3.8.0", 32 | "typescript": "~5.8.3" 33 | }, 34 | "browserslist": { 35 | "production": [ 36 | ">0.5%", 37 | "not dead", 38 | "not op_mini all" 39 | ], 40 | "development": [ 41 | "last 3 chrome version", 42 | "last 3 firefox version", 43 | "last 5 safari version" 44 | ] 45 | }, 46 | "engines": { 47 | "node": ">=18.0" 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /site/sidebars.ts: -------------------------------------------------------------------------------- 1 | import type {SidebarsConfig} from '@docusaurus/plugin-content-docs'; 2 | 3 | // This runs in Node.js - Don't use client-side code here (browser APIs, JSX...) 4 | 5 | /** 6 | * Creating a sidebar enables you to: 7 | - create an ordered group of docs 8 | - render a sidebar for each doc of that group 9 | - provide next/previous navigation 10 | 11 | The sidebars can be generated from the filesystem, or explicitly defined here. 12 | 13 | Create as many sidebars as you want. 14 | */ 15 | const sidebars: SidebarsConfig = { 16 | // By default, Docusaurus generates a sidebar from the docs folder structure 17 | tutorialSidebar: [{type: 'autogenerated', dirName: '.'}], 18 | 19 | // But you can create a sidebar manually 20 | /* 21 | tutorialSidebar: [ 22 | 'intro', 23 | 'hello', 24 | { 25 | type: 'category', 26 | label: 'Tutorial', 27 | items: ['tutorial-basics/create-a-document'], 28 | }, 29 | ], 30 | */ 31 | }; 32 | 33 | export default sidebars; 34 | -------------------------------------------------------------------------------- /site/src/components/ApiField.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import clsx from 'clsx'; 3 | 4 | interface ApiFieldProps { 5 | name: string; 6 | type: string; 7 | required: string; 8 | description?: string; 9 | enumValues?: string[]; 10 | defaultValue?: string; 11 | } 12 | 13 | // Helper function to convert code content to HTML while preserving existing HTML 14 | const processDescription = (description: string): string => { 15 | // First handle triple backtick code blocks with optional language 16 | let processed = description.replace(/```(\w+)?\s*([\s\S]*?)```/g, (match, lang, codeContent) => { 17 | const language = lang || ''; 18 | const languageClass = language ? ` language-${language}` : ''; 19 | return `
${codeContent.trim()}
`; 20 | }); 21 | 22 | // Then handle single backtick inline code 23 | processed = processed.replace(/`([^`]+)`/g, '$1'); 24 | 25 | return processed; 26 | }; 27 | 28 | const ApiField: React.FC = ({ 29 | name, 30 | type, 31 | required, 32 | description, 33 | enumValues, 34 | defaultValue 35 | }) => { 36 | const isEnum = type === 'enum'; 37 | 38 | return ( 39 |
40 | {isEnum ? ( 41 | <> 42 |
43 | {name} 44 |
45 |
46 | {description && ( 47 |
48 | )} 49 |
50 | 51 | ) : ( 52 | <> 53 |
54 | {name} 55 | 59 | {required === "true" ? 'required' : 'optional'} 60 | 61 |
62 |
$1' 66 | ) 67 | }}/> 68 |
69 |
70 |
71 | {defaultValue && ( 72 |
73 | {defaultValue} 74 |
75 | )} 76 | {description && ( 77 |
78 | )} 79 | {enumValues && ( 80 |
81 | Enum:{' '} 82 | {enumValues.map(v => {v})} 83 |
84 | )} 85 |
86 | 87 | )} 88 |
89 | ); 90 | }; 91 | 92 | export default ApiField; 93 | -------------------------------------------------------------------------------- /site/src/components/HomepageFeatures/index.tsx: -------------------------------------------------------------------------------- 1 | import clsx from 'clsx'; 2 | import Heading from '@theme/Heading'; 3 | import styles from './styles.module.css'; 4 | 5 | type FeatureItem = { 6 | title: string; 7 | image: string; 8 | description: React.ReactElement; 9 | }; 10 | 11 | const FeatureList: FeatureItem[] = [ 12 | { 13 | title: 'Built together. Built in the open.', 14 | image: require('@site/static/img/1.png').default, 15 | description: ( 16 | <> 17 | Envoy AI Gateway is the result of the community coming together to address GenAI traffic handling needs using Envoy. 18 | 19 | ), 20 | }, 21 | { 22 | title: 'v0.1 Release now available', 23 | image: require('@site/static/img/3.png').default, 24 | description: ( 25 | <> 26 | The v0.1 Release of Envoy AI Gateway is now available. See the blog post and the release notes for more information. 27 | 28 | ), 29 | }, 30 | { 31 | title: 'Get involved in the community', 32 | image: require('@site/static/img/2.png').default, 33 | description: ( 34 | <> 35 | Join our community on Slack, join the conversation on GitHub, and attend our Thursday community meetings. See links in footer. 36 | 37 | ), 38 | }, 39 | ]; 40 | 41 | function Feature({title, image, description}: FeatureItem) { 42 | return ( 43 |
44 |
45 | {title} 46 |
47 |
48 | {title} 49 |

{description}

50 |
51 |
52 | ); 53 | } 54 | 55 | export default function HomepageFeatures(): React.ReactElement { 56 | return ( 57 |
58 |
59 |
60 | {FeatureList.map((props, idx) => ( 61 | 62 | ))} 63 |
64 |
65 |
66 | ); 67 | } 68 | -------------------------------------------------------------------------------- /site/src/components/HomepageFeatures/styles.module.css: -------------------------------------------------------------------------------- 1 | .features { 2 | display: flex; 3 | align-items: center; 4 | padding: 3rem 0; 5 | width: 100%; 6 | } 7 | 8 | .featureSvg { 9 | max-height: 120px; 10 | max-width: 120px; 11 | margin-bottom: 1.5rem; 12 | } 13 | -------------------------------------------------------------------------------- /site/src/pages/index.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import clsx from 'clsx'; 3 | import Link from '@docusaurus/Link'; 4 | import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; 5 | import Layout from '@theme/Layout'; 6 | import HomepageFeatures from '@site/src/components/HomepageFeatures'; 7 | import Heading from '@theme/Heading'; 8 | 9 | function HomepageHeader() { 10 | const {siteConfig} = useDocusaurusContext(); 11 | return ( 12 |
13 |
14 | Envoy AI Gateway 15 |

{siteConfig.tagline}

16 |
17 | 20 | Get Started 21 | 22 | 25 | View on GitHub 26 | 27 |
28 |
29 |
30 | ); 31 | } 32 | 33 | export default function Home(): React.ReactElement { 34 | const {siteConfig} = useDocusaurusContext(); 35 | return ( 36 | 39 | 40 |
41 | 42 |
43 |
44 | ); 45 | } 46 | -------------------------------------------------------------------------------- /site/src/theme/MDXComponents.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import MDXComponents from '@theme-original/MDXComponents'; 3 | import ApiField from '@site/src/components/ApiField'; 4 | 5 | export default { 6 | ...MDXComponents, 7 | ApiField, 8 | }; 9 | -------------------------------------------------------------------------------- /site/static/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/envoyproxy/ai-gateway/d17719cd338fb7f9b63b6b378ffa979370f78e19/site/static/.nojekyll -------------------------------------------------------------------------------- /site/static/diagrams/upstream-auth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/envoyproxy/ai-gateway/d17719cd338fb7f9b63b6b378ffa979370f78e19/site/static/diagrams/upstream-auth.png -------------------------------------------------------------------------------- /site/static/img/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/envoyproxy/ai-gateway/d17719cd338fb7f9b63b6b378ffa979370f78e19/site/static/img/1.png -------------------------------------------------------------------------------- /site/static/img/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/envoyproxy/ai-gateway/d17719cd338fb7f9b63b6b378ffa979370f78e19/site/static/img/2.png -------------------------------------------------------------------------------- /site/static/img/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/envoyproxy/ai-gateway/d17719cd338fb7f9b63b6b378ffa979370f78e19/site/static/img/3.png -------------------------------------------------------------------------------- /site/static/img/blog/0.1-release-image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/envoyproxy/ai-gateway/d17719cd338fb7f9b63b6b378ffa979370f78e19/site/static/img/blog/0.1-release-image.png -------------------------------------------------------------------------------- /site/static/img/control_plane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/envoyproxy/ai-gateway/d17719cd338fb7f9b63b6b378ffa979370f78e19/site/static/img/control_plane.png -------------------------------------------------------------------------------- /site/static/img/data_plane.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/envoyproxy/ai-gateway/d17719cd338fb7f9b63b6b378ffa979370f78e19/site/static/img/data_plane.png -------------------------------------------------------------------------------- /site/static/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/envoyproxy/ai-gateway/d17719cd338fb7f9b63b6b378ffa979370f78e19/site/static/img/favicon.ico -------------------------------------------------------------------------------- /site/static/img/logo-white.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /site/static/img/logo.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /site/static/img/social-card-envoy-ai-gw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/envoyproxy/ai-gateway/d17719cd338fb7f9b63b6b378ffa979370f78e19/site/static/img/social-card-envoy-ai-gw.png -------------------------------------------------------------------------------- /site/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | // This file is not used in compilation. It is here just for a nice editor experience. 3 | "extends": "@docusaurus/tsconfig", 4 | "compilerOptions": { 5 | "baseUrl": "." 6 | }, 7 | "exclude": [".docusaurus", "build"] 8 | } 9 | -------------------------------------------------------------------------------- /site/versioned_docs/version-0.1/capabilities/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: capabilities 3 | title: Capabilities 4 | sidebar_position: 3 5 | --- 6 | 7 | # Envoy AI Gateway Capabilities 8 | 9 | Welcome to the Envoy AI Gateway capabilities documentation! This section provides detailed information about the various features and capabilities that Envoy AI Gateway offers to help you manage and optimize your AI/LLM traffic. 10 | 11 | -------------------------------------------------------------------------------- /site/versioned_docs/version-0.1/concepts/architecture/control-plane.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: control-plane 3 | title: Control Plane Explained 4 | sidebar_position: 3 5 | --- 6 | 7 | # Control Plane Explained 8 | 9 | The control plane is responsible for configuring and managing the system. It consists of several key components working together to manage the AI Gateway configuration. 10 | 11 | ![Control Plane Architecture](/img/control_plane.png) 12 | 13 | ## How It Works 14 | 15 | The control plane operates through a chain of components that work together to manage the configuration: 16 | 17 | 1. The Envoy AI Gateway controller watches AI Gateway Custom Resources (CRs) 18 | 2. When changes are detected, it updates the Envoy Gateway configuration 19 | 3. Envoy Gateway then updates the Envoy Proxy configuration 20 | 4. The data plane (Envoy Proxy) processes AI traffic based on this configuration 21 | 22 | This architecture ensures a clear separation of concerns, where the AI Gateway controller focuses on AI-specific configuration while leveraging Envoy Gateway for general proxy management. 23 | 24 | ## Components 25 | 26 | ### AI Gateway Controller 27 | The AI Gateway Controller manages AI-specific components and configurations: 28 | 29 | #### ExtProc Management 30 | - Deploys and configures the External Processor (ExtProc) service 31 | - Creates and updates ExtProc ConfigMaps with processing rules 32 | - Configures ExtProc security policies and authentication 33 | - Manages ExtProc deployments and their lifecycle 34 | 35 | #### Resource Management 36 | - Watches AI Gateway Custom Resources (CRs) 37 | - Creates and manages `EnvoyExtensionPolicy` resources 38 | - Configures `HTTPRoute` resources for request routing 39 | - Manages backend security policies and authentication 40 | 41 | #### Integration with Envoy Gateway 42 | - Works alongside Envoy Gateway Controller (not directly configuring Envoy) 43 | - Creates resources that Envoy Gateway translates into Envoy configuration 44 | - Manages AI-specific extensions and filters 45 | - Enables token-based rate limiting through metadata 46 | 47 | ### Envoy Gateway Controller 48 | - Manages the core Envoy configuration through xDS 49 | - Handles service discovery and load balancing 50 | - Manages TLS certificates 51 | - Translates Gateway API resources into Envoy configuration 52 | 53 | ## Configuration Flow 54 | 55 | ```mermaid 56 | sequenceDiagram 57 | participant User 58 | participant K8s as Kubernetes API 59 | participant Controller as AI Gateway Controller 60 | participant EG as Envoy Gateway 61 | participant Envoy as Envoy Proxy 62 | 63 | User->>K8s: Apply AI Gateway CR 64 | K8s->>Controller: Notify of new/updated CR 65 | Controller->>K8s: Create/Update ExtProc Resources 66 | Controller->>K8s: Create EnvoyExtensionPolicy 67 | Controller->>K8s: Create HTTPRoute 68 | EG->>K8s: Watch Gateway Resources 69 | EG->>Envoy: Push xDS Configuration 70 | ``` 71 | 72 | The configuration flow shows how changes propagate through the system: 73 | 1. Users apply AI Gateway Custom Resources (CRs) 74 | 2. The AI Gateway Controller processes these CRs 75 | 3. It creates or updates necessary resources (ExtProc, EnvoyExtensionPolicy, HTTPRoute) 76 | 4. Envoy Gateway watches these resources 77 | 5. Finally, it pushes the configuration to Envoy Proxy via xDS 78 | 79 | 80 | ## Next Steps 81 | 82 | To learn more: 83 | - Understand the [Data Plane and Traffic Flow](./data-plane.md) 84 | -------------------------------------------------------------------------------- /site/versioned_docs/version-0.1/concepts/architecture/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: architecture 3 | title: Architecture 4 | sidebar_position: 2 5 | --- 6 | 7 | # Architecture 8 | 9 | This section provides a detailed look at the architectural components of Envoy AI Gateway. Understanding the architecture will help you better deploy, configure, and maintain your gateway installation. 10 | 11 | ## Overview 12 | 13 | Envoy AI Gateway follows a modern cloud-native architecture with distinct control and data planes. This separation of concerns allows for better scalability, maintainability, and flexibility in deployment options. 14 | 15 | Envoy AI Gateway integrates with Envoy Gateway for the control plane and Envoy Proxy for the data plane. 16 | 17 | ## Key Concepts 18 | 19 | ### Control Plane 20 | A control plane is a component that manages the configuration of the data plane. We utilize Envoy Gateway as a central control plane, and Envoy AI Gateway works in conjunction with it to manage the data plane configuration. 21 | 22 | ### Data Plane 23 | The data plane is the component that sits in the request path and processes the requests. In the context of Envoy AI Gateway, the data plane consists of Envoy Proxy and the AI Gateway external processor that processes the AI requests. 24 | 25 | ### Token Rate Limiting 26 | The major AI model endpoints return usage metrics called "tokens" per HTTP request. These tokens represent the computational resources consumed by the request. One of the major features of Envoy AI Gateway is rate limiting based on token usage instead of standard "requests per second" style rate limiting. 27 | 28 | We call such rate limiting "Token Rate Limiting" in our context, and the metrics that represent the token usage are called "Token Usage" or "Used Tokens". 29 | 30 | ## In This Section 31 | 32 | 1. [System Architecture Overview](./system-architecture.md) 33 | - High-level architecture overview 34 | - Control and data plane separation 35 | - Component interactions 36 | 37 | 2. [Control Plane](./control-plane.md) 38 | - AI Gateway Controller 39 | - Envoy Gateway Controller 40 | - Configuration management 41 | - Resource orchestration 42 | 43 | 3. [Data Plane](./data-plane.md) 44 | - External Processor functionality 45 | - Request processing flow 46 | - Provider integration 47 | 48 | ## What's Next 49 | 50 | After understanding the architecture: 51 | - Check out our [Getting Started](../../getting-started/index.md) guide for hands-on experience 52 | -------------------------------------------------------------------------------- /site/versioned_docs/version-0.1/concepts/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: concepts 3 | title: Concepts 4 | sidebar_position: 3 5 | --- 6 | 7 | # Concepts 8 | 9 | Welcome to the Concepts section of Envoy AI Gateway documentation. This section will help you understand the core components and how they work together to manage AI traffic effectively. 10 | 11 | ## Architecture 12 | 13 | - [System Architecture Overview](./architecture/system-architecture.md) - High-level overview of control and data planes 14 | - [Control Plane](./architecture/control-plane.md) - How configuration and management works 15 | - [Data Plane](./architecture/data-plane.md) - Request processing and traffic flow 16 | 17 | ## Resources 18 | 19 | - [Resources Overview](./resources.md) - Understanding the AI Gateway resources and their relationships 20 | -------------------------------------------------------------------------------- /site/versioned_docs/version-0.1/concepts/resources.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: resources 3 | title: Resources 4 | sidebar_position: 2 5 | --- 6 | 7 | # Resources 8 | 9 | The Envoy AI Gateway uses several custom resources to manage AI traffic. Here's an overview of the key resources and how they relate to each other: 10 | 11 | ## Resource Reference 12 | 13 | | Resource | Purpose | API Reference | 14 | |----------|---------|---------------| 15 | | AIGatewayRoute | Defines unified API and routing rules for AI traffic | [AIGatewayRoute](../api/api.mdx#aigatewayroute) | 16 | | AIServiceBackend | Represents individual AI service backends | [AIServiceBackend](../api/api.mdx#aiservicebackend) | 17 | | BackendSecurityPolicy | Configures authentication for backend access | [BackendSecurityPolicy](../api/api.mdx#backendsecuritypolicy) | 18 | 19 | ## Core Resources 20 | 21 | ### AIGatewayRoute 22 | 23 | A resource that defines a unified AI API for a Gateway, allowing clients to interact with multiple AI backends using a single schema. 24 | - Specifies the input API schema for client requests 25 | - Contains routing rules to direct traffic to appropriate backends 26 | - Manages request/response transformations between different API schemas 27 | - Can track LLM request costs (like token usage) 28 | 29 | ### AIServiceBackend 30 | 31 | Represents a single AI service backend that handles traffic with a specific API schema. 32 | 33 | - Defines the output API schema the backend expects 34 | - References a Kubernetes Service or Envoy Gateway Backend 35 | - Can reference a BackendSecurityPolicy for authentication 36 | 37 | ### BackendSecurityPolicy 38 | 39 | Configures authentication and authorization rules for backend access. 40 | 41 | - API Key authentication 42 | - AWS credentials authentication 43 | 44 | ## Resource Relationships 45 | 46 | ```mermaid 47 | graph TD 48 | A[AIGatewayRoute] -->|references| B[AIServiceBackend] 49 | B -->|references| C[K8s Service/Backend] 50 | B -->|references| D[BackendSecurityPolicy] 51 | D -->|contains| E[API Key/AWS Credentials] 52 | ``` 53 | 54 | The AIGatewayRoute acts as the entry point, defining how client requests are processed and routed to one or more AIServiceBackends. Each AIServiceBackend can reference a BackendSecurityPolicy, which provides the necessary credentials for accessing the underlying AI service. 55 | -------------------------------------------------------------------------------- /site/versioned_docs/version-0.1/getting-started/connect-providers/aws-bedrock.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: aws-bedrock 3 | title: Connect AWS Bedrock 4 | sidebar_position: 3 5 | --- 6 | 7 | # Connect AWS Bedrock 8 | 9 | This guide will help you configure Envoy AI Gateway to work with AWS Bedrock's foundation models. 10 | 11 | ## Prerequisites 12 | 13 | Before you begin, you'll need: 14 | - AWS credentials with access to Bedrock 15 | - Basic setup completed from the [Basic Usage](../basic-usage.md) guide 16 | - Basic configuration removed as described in the [Advanced Configuration](./index.md) overview 17 | 18 | ## AWS Credentials Setup 19 | 20 | Ensure you have: 21 | 1. An AWS account with Bedrock access enabled 22 | 2. AWS credentials with permissions to: 23 | - `bedrock:InvokeModel` 24 | - `bedrock:ListFoundationModels` 25 | 3. Your AWS access key ID and secret access key 26 | 4. Enabled model access to "Llama 3.2 1B Instruct" in the `us-east-1` region 27 | - If you want to use a different AWS region, you must update all instances of the string 28 | `us-east-1` with the desired region in `basic.yaml`. 29 | 30 | :::tip AWS Best Practices 31 | Consider using AWS IAM roles and limited-scope credentials for production environments. 32 | ::: 33 | 34 | ## Configuration Steps 35 | 36 | :::info Ready to proceed? 37 | Ensure you have followed the steps in [Connect Providers](../connect-providers/) 38 | ::: 39 | 40 | ### 1. Configure AWS Credentials 41 | 42 | Edit the `basic.yaml` file to replace these placeholder values: 43 | - `AWS_ACCESS_KEY_ID`: Your AWS access key ID 44 | - `AWS_SECRET_ACCESS_KEY`: Your AWS secret access key 45 | 46 | :::caution Security Note 47 | Make sure to keep your AWS credentials secure and never commit them to version control. 48 | The credentials will be stored in Kubernetes secrets. 49 | ::: 50 | 51 | ### 2. Apply Configuration 52 | 53 | Apply the updated configuration and wait for the Gateway pod to be ready. If you already have a Gateway running, 54 | then the secret credential update will be picked up automatically in a few seconds. 55 | 56 | ```shell 57 | kubectl apply -f basic.yaml 58 | 59 | kubectl wait pods --timeout=2m \ 60 | -l gateway.envoyproxy.io/owning-gateway-name=envoy-ai-gateway-basic \ 61 | -n envoy-gateway-system \ 62 | --for=condition=Ready 63 | ``` 64 | 65 | ### 4. Test the Configuration 66 | 67 | You should have set `$GATEWAY_URL` as part of the basic setup before connecting to providers. 68 | See the [Basic Usage](../basic-usage.md) page for instructions. 69 | 70 | ```shell 71 | curl -H "Content-Type: application/json" \ 72 | -d '{ 73 | "model": "us.meta.llama3-2-1b-instruct-v1:0", 74 | "messages": [ 75 | { 76 | "role": "user", 77 | "content": "Hi." 78 | } 79 | ] 80 | }' \ 81 | $GATEWAY_URL/v1/chat/completions 82 | ``` 83 | 84 | ## Troubleshooting 85 | 86 | If you encounter issues: 87 | 88 | 1. Verify your AWS credentials are correct and active 89 | 2. Check pod status: 90 | ```shell 91 | kubectl get pods -n envoy-gateway-system 92 | ``` 93 | 3. View controller logs: 94 | ```shell 95 | kubectl logs -n envoy-ai-gateway-system deployment/ai-gateway-controller 96 | ``` 97 | 4. Common errors: 98 | - 401/403: Invalid credentials or insufficient permissions 99 | - 404: Model not found or not available in region 100 | - 429: Rate limit exceeded 101 | -------------------------------------------------------------------------------- /site/versioned_docs/version-0.1/getting-started/connect-providers/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: connect-providers 3 | title: Connect Providers 4 | sidebar_position: 5 5 | --- 6 | 7 | # Connect Providers 8 | 9 | After setting up the basic AI Gateway with the mock backend, you can configure it to work with real AI model providers. This section will guide you through connecting different AI providers to your gateway. 10 | 11 | ## Available Providers 12 | 13 | Currently, Envoy AI Gateway supports the following providers: 14 | 15 | - [OpenAI](./openai.md) - Connect to OpenAI's GPT models 16 | - [AWS Bedrock](./aws-bedrock.md) - Access AWS Bedrock's suite of foundation models 17 | 18 | ## Before You Begin 19 | 20 | Before configuring any provider: 21 | 22 | 1. Complete the [Basic Usage](../basic-usage.md) guide 23 | 2. Remove the basic configuration with the mock backend 24 | 25 | ```shell 26 | kubectl delete -f https://raw.githubusercontent.com/envoyproxy/ai-gateway/refs/tags/v0.1.5/examples/basic/basic.yaml 27 | 28 | kubectl wait pods --timeout=15s \ 29 | -l gateway.envoyproxy.io/owning-gateway-name=envoy-ai-gateway-basic \ 30 | -n envoy-gateway-system \ 31 | --for=delete 32 | ``` 33 | 34 | 3. Download configuration template 35 | 36 | ```shell 37 | curl -O https://raw.githubusercontent.com/envoyproxy/ai-gateway/refs/tags/v0.1.5/examples/basic/basic.yaml 38 | ``` 39 | 40 | ## Security Best Practices 41 | 42 | When configuring AI providers, keep these security considerations in mind: 43 | 44 | - Store credentials securely using Kubernetes secrets 45 | - Never commit API keys or credentials to version control 46 | - Regularly rotate your credentials 47 | - Use the principle of least privilege when setting up access 48 | - Monitor usage and set up appropriate rate limits 49 | 50 | ## Next Steps 51 | 52 | Choose your provider to get started: 53 | - [Connect OpenAI](./openai.md) 54 | - [Connect AWS Bedrock](./aws-bedrock.md) 55 | -------------------------------------------------------------------------------- /site/versioned_docs/version-0.1/getting-started/connect-providers/openai.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: openai 3 | title: Connect OpenAI 4 | sidebar_position: 2 5 | --- 6 | 7 | # Connect OpenAI 8 | 9 | This guide will help you configure Envoy AI Gateway to work with OpenAI's models. 10 | 11 | ## Prerequisites 12 | 13 | Before you begin, you'll need: 14 | 15 | - An OpenAI API key from [OpenAI's platform](https://platform.openai.com) 16 | - Basic setup completed from the [Basic Usage](../basic-usage.md) guide 17 | - Basic configuration removed as described in the [Advanced Configuration](./index.md) overview 18 | 19 | ## Configuration Steps 20 | 21 | :::info Ready to proceed? 22 | Ensure you have followed the steps in [Connect Providers](../connect-providers/) 23 | ::: 24 | 25 | ### 1. Configure OpenAI Credentials 26 | 27 | Edit the `basic.yaml` file to replace the OpenAI placeholder value: 28 | 29 | - Find the section containing `OPENAI_API_KEY` 30 | - Replace it with your actual OpenAI API key 31 | 32 | :::caution Security Note 33 | Make sure to keep your API key secure and never commit it to version control. 34 | The key will be stored in a Kubernetes secret. 35 | ::: 36 | 37 | ### 2. Apply Configuration 38 | 39 | Apply the updated configuration and wait for the Gateway pod to be ready. If you already have a Gateway running, 40 | then the secret credential update will be picked up automatically in a few seconds. 41 | 42 | ```shell 43 | kubectl apply -f basic.yaml 44 | 45 | kubectl wait pods --timeout=2m \ 46 | -l gateway.envoyproxy.io/owning-gateway-name=envoy-ai-gateway-basic \ 47 | -n envoy-gateway-system \ 48 | --for=condition=Ready 49 | ``` 50 | 51 | ### 3. Test the Configuration 52 | 53 | You should have set `$GATEWAY_URL` as part of the basic setup before connecting to providers. 54 | See the [Basic Usage](../basic-usage.md) page for instructions. 55 | 56 | ```shell 57 | curl -H "Content-Type: application/json" \ 58 | -d '{ 59 | "model": "gpt-4o-mini", 60 | "messages": [ 61 | { 62 | "role": "user", 63 | "content": "Hi." 64 | } 65 | ] 66 | }' \ 67 | $GATEWAY_URL/v1/chat/completions 68 | ``` 69 | 70 | ## Troubleshooting 71 | 72 | If you encounter issues: 73 | 74 | 1. Verify your API key is correct and active 75 | 76 | 2. Check pod status: 77 | 78 | ```shell 79 | kubectl get pods -n envoy-gateway-system 80 | ``` 81 | 82 | 3. View controller logs: 83 | 84 | ```shell 85 | kubectl logs -n envoy-ai-gateway-system deployment/ai-gateway-controller 86 | ``` 87 | 88 | 4. View External Processor Logs 89 | 90 | ```shell 91 | kubectl logs services/ai-eg-route-extproc-envoy-ai-gateway-basic 92 | ``` 93 | 94 | 5. Common errors: 95 | - 401: Invalid API key 96 | - 429: Rate limit exceeded 97 | - 503: OpenAI service unavailable 98 | 99 | ## Next Steps 100 | 101 | After configuring OpenAI: 102 | 103 | - [Connect AWS Bedrock](./aws-bedrock.md) to add another provider 104 | -------------------------------------------------------------------------------- /site/versioned_docs/version-0.1/getting-started/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: getting-started 3 | title: Getting Started 4 | sidebar_position: 2 5 | --- 6 | 7 | # Getting Started with Envoy AI Gateway 8 | 9 | Welcome to the Envoy AI Gateway getting started guide! 10 | 11 | This guide will walk you through setting up and using Envoy AI Gateway, a tool for managing GenAI traffic using Envoy. 12 | 13 | ## Guide Structure 14 | 15 | This getting started guide is organized into several sections: 16 | 17 | 1. [Prerequisites](./prerequisites.md) 18 | - Setting up your Kubernetes cluster 19 | - Installing required tools 20 | - Setting up Envoy Gateway 21 | 22 | 2. [Installation](./installation.md) 23 | - Installing Envoy AI Gateway 24 | - Configuring the gateway 25 | - Verifying the installation 26 | 27 | 3. [Basic Usage](./basic-usage.md) 28 | - Deploying a basic configuration 29 | - Making your first request 30 | - Understanding the response format 31 | 32 | 4. [Connect Providers](./connect-providers) 33 | - Setting up OpenAI integration 34 | - Configuring AWS Bedrock 35 | - Managing credentials securely 36 | 37 | ## Quick Start 38 | 39 | If you're familiar with Kubernetes and want to get started quickly, run these commands to install Envoy Gateway, Envoy AI Gateway, and deploy a basic configuration: 40 | 41 | ```shell 42 | helm upgrade -i eg oci://docker.io/envoyproxy/gateway-helm \ 43 | --version v1.3.1 \ 44 | --namespace envoy-gateway-system \ 45 | --create-namespace 46 | 47 | helm upgrade -i aieg oci://docker.io/envoyproxy/ai-gateway-helm \ 48 | --version v0.1.5 \ 49 | --namespace envoy-ai-gateway-system \ 50 | --create-namespace 51 | 52 | kubectl apply -f https://raw.githubusercontent.com/envoyproxy/ai-gateway/refs/tags/v0.1.5/examples/basic/basic.yaml 53 | 54 | kubectl wait --timeout=2m -n envoy-gateway-system deployment/envoy-gateway --for=condition=Available 55 | kubectl wait --timeout=2m -n envoy-ai-gateway-system deployment/ai-gateway-controller --for=condition=Available 56 | ``` 57 | 58 | ### Make a request 59 | 60 | Check out Making a Request in the [Basic Usage Guide](./basic-usage.md) 61 | 62 | :::tip 63 | 64 | For detailed instructions and explanations, start with the [Prerequisites](./prerequisites.md) section. 65 | 66 | ::: 67 | 68 | ## Need Help? 69 | 70 | If you run into any issues: 71 | - Join our [Community Slack](https://envoyproxy.slack.com/archives/C07Q4N24VAA) 72 | - File an issue on [GitHub](https://github.com/envoyproxy/ai-gateway/issues) 73 | -------------------------------------------------------------------------------- /site/versioned_docs/version-0.1/getting-started/installation.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: installation 3 | title: Installation 4 | sidebar_position: 3 5 | --- 6 | 7 | import Tabs from '@theme/Tabs'; 8 | import TabItem from '@theme/TabItem'; 9 | 10 | This guide will walk you through installing Envoy AI Gateway and its required components. 11 | 12 | ## Installing Envoy AI Gateway 13 | 14 | The easiest way to install Envoy AI Gateway is using the Helm chart. First, install the AI Gateway Helm chart and wait for the deployment to be ready: 15 | 16 | ```shell 17 | helm upgrade -i aieg oci://docker.io/envoyproxy/ai-gateway-helm \ 18 | --version v0.1.5 \ 19 | --namespace envoy-ai-gateway-system \ 20 | --create-namespace 21 | 22 | kubectl wait --timeout=2m -n envoy-ai-gateway-system deployment/ai-gateway-controller --for=condition=Available 23 | ``` 24 | 25 | ## Configuring Envoy Gateway 26 | 27 | After installing Envoy AI Gateway, apply the AI Gateway-specific configuration to Envoy Gateway, restart the deployment, and wait for it to be ready: 28 | 29 | ```shell 30 | kubectl apply -f https://raw.githubusercontent.com/envoyproxy/ai-gateway/refs/tags/v0.1.5/manifests/envoy-gateway-config/redis.yaml 31 | kubectl apply -f https://raw.githubusercontent.com/envoyproxy/ai-gateway/refs/tags/v0.1.5/manifests/envoy-gateway-config/config.yaml 32 | kubectl apply -f https://raw.githubusercontent.com/envoyproxy/ai-gateway/refs/tags/v0.1.5/manifests/envoy-gateway-config/rbac.yaml 33 | 34 | kubectl rollout restart -n envoy-gateway-system deployment/envoy-gateway 35 | 36 | kubectl wait --timeout=2m -n envoy-gateway-system deployment/envoy-gateway --for=condition=Available 37 | ``` 38 | 39 | Note that the redis configuration is only used for the rate limiting feature. If you don't need rate limiting, you can skip the redis configuration, 40 | but you need to remove the relevant configuration in the `config.yaml` file as well. 41 | 42 | :::tip Verify Installation 43 | 44 | Check the status of the pods. All pods should be in the `Running` state with `Ready` status. 45 | 46 | Check AI Gateway pods: 47 | ```shell 48 | kubectl get pods -n envoy-ai-gateway-system 49 | ``` 50 | 51 | Check Envoy Gateway pods: 52 | ```shell 53 | kubectl get pods -n envoy-gateway-system 54 | ``` 55 | 56 | ::: 57 | 58 | ## Next Steps 59 | 60 | After completing the installation: 61 | - Continue to [Basic Usage](./basic-usage.md) to learn how to make your first request 62 | - Or jump to [Connect Providers](./connect-providers) to set up OpenAI and AWS Bedrock integration 63 | -------------------------------------------------------------------------------- /site/versioned_sidebars/version-0.1-sidebars.json: -------------------------------------------------------------------------------- 1 | { 2 | "tutorialSidebar": [ 3 | { 4 | "type": "autogenerated", 5 | "dirName": "." 6 | } 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /site/versions.json: -------------------------------------------------------------------------------- 1 | [ 2 | "0.1" 3 | ] 4 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | This directory contains various integration tests. Each directory 2 | corresponds to `make test-` target. 3 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/aigatewayroutes/basic.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: AIGatewayRoute 8 | metadata: 9 | name: apple 10 | namespace: default 11 | spec: 12 | schema: 13 | name: OpenAI 14 | targetRefs: 15 | - name: some-gateway 16 | kind: Gateway 17 | group: gateway.networking.k8s.io 18 | rules: 19 | - matches: 20 | - headers: 21 | - type: Exact 22 | name: x-ai-eg-model 23 | value: llama3-70b 24 | backendRefs: 25 | - name: kserve 26 | weight: 20 27 | - name: aws-bedrock 28 | weight: 40 29 | - name: azure-openai 30 | weight: 40 31 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/aigatewayroutes/llmcosts.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: AIGatewayRoute 8 | metadata: 9 | name: llmcosts 10 | namespace: default 11 | spec: 12 | schema: 13 | name: OpenAI 14 | targetRefs: 15 | - name: some-gateway 16 | kind: Gateway 17 | group: gateway.networking.k8s.io 18 | rules: 19 | - matches: 20 | - headers: 21 | - type: Exact 22 | name: x-ai-eg-model 23 | value: llama3-70b 24 | backendRefs: 25 | - name: kserve 26 | weight: 20 27 | - name: aws-bedrock 28 | weight: 40 29 | - name: azure-openai 30 | weight: 40 31 | llmRequestCosts: 32 | - metadataKey: llm_input_token 33 | type: InputToken 34 | - metadataKey: llm_output_token 35 | type: OutputToken 36 | - metadataKey: llm_total_token 37 | type: TotalToken 38 | - metadataKey: some_cel_cost 39 | type: CEL 40 | cel: "llm_input_token + llm_output_token + llm_total_token" 41 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/aigatewayroutes/no_target_refs.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: AIGatewayRoute 8 | metadata: 9 | name: apple 10 | namespace: default 11 | spec: 12 | targetRefs: [] 13 | schema: 14 | name: OpenAI 15 | rules: 16 | - matches: 17 | - headers: 18 | - type: Exact 19 | name: x-ai-eg-model 20 | value: llama3-70b 21 | backendRefs: 22 | - name: kserve 23 | weight: 20 24 | - name: aws-bedrock 25 | weight: 40 26 | - name: azure-openai 27 | weight: 40 28 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/aigatewayroutes/non_openai_schema.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: AIGatewayRoute 8 | metadata: 9 | name: apple 10 | namespace: default 11 | spec: 12 | schema: 13 | # Schema name must be OpenAI schema at the moment, so this is invalid. 14 | name: AWSBedrock 15 | targetRefs: 16 | - name: some-gateway 17 | kind: Gateway 18 | group: gateway.networking.k8s.io 19 | rules: 20 | - matches: 21 | - headers: 22 | - type: Exact 23 | name: x-ai-eg-model 24 | value: llama3-70b 25 | backendRefs: 26 | - name: kserve 27 | weight: 20 28 | - name: aws-bedrock 29 | weight: 80 30 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/aigatewayroutes/unknown_schema.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: AIGatewayRoute 8 | metadata: 9 | name: apple 10 | namespace: default 11 | spec: 12 | schema: 13 | # Schema name must be OpenAI schema at the moment, so this is invalid. 14 | name: SomeRandomVendor 15 | targetRefs: 16 | - name: some-gateway 17 | kind: Gateway 18 | group: gateway.networking.k8s.io 19 | rules: 20 | - matches: 21 | - headers: 22 | - type: Exact 23 | name: x-ai-eg-model 24 | value: llama3-70b 25 | backendRefs: 26 | - name: kserve 27 | weight: 20 28 | - name: aws-bedrock 29 | weight: 80 30 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/aigatewayroutes/unsupported_match.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: AIGatewayRoute 8 | metadata: 9 | name: apple 10 | namespace: default 11 | spec: 12 | schema: 13 | name: OpenAI 14 | targetRefs: 15 | - name: some-gateway 16 | kind: Gateway 17 | group: gateway.networking.k8s.io 18 | rules: 19 | - matches: 20 | - headers: 21 | - type: RegularExpression 22 | name: x-ai-eg-model 23 | value: llama3-70b 24 | backendRefs: 25 | - name: kserve 26 | weight: 20 27 | - name: aws-bedrock 28 | weight: 80 29 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/aiservicebackends/basic-eg-backend-aws.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: AIServiceBackend 8 | metadata: 9 | name: eg-backend 10 | namespace: default 11 | spec: 12 | schema: 13 | name: AWSBedrock 14 | backendRef: 15 | name: eg-backend 16 | kind: Backend 17 | group: gateway.envoyproxy.io 18 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/aiservicebackends/basic-eg-backend-azure.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: AIServiceBackend 8 | metadata: 9 | name: eg-backend 10 | namespace: default 11 | spec: 12 | schema: 13 | name: AzureOpenAI 14 | backendRef: 15 | name: eg-backend 16 | kind: Backend 17 | group: gateway.envoyproxy.io 18 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/aiservicebackends/basic.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: AIServiceBackend 8 | metadata: 9 | name: dog-backend 10 | namespace: default 11 | spec: 12 | schema: 13 | name: AWSBedrock 14 | backendRef: 15 | name: dog-service 16 | kind: Service 17 | port: 80 18 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/aiservicebackends/unknown_schema.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: AIServiceBackend 8 | metadata: 9 | name: cat-backend 10 | namespace: default 11 | spec: 12 | schema: 13 | # Name must be one of the known schemas, so this is invalid. 14 | name: SomeRandomVendor 15 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/backendsecuritypolicies/aws_credential_file.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: BackendSecurityPolicy 8 | metadata: 9 | name: dog-provider-policy 10 | namespace: default 11 | spec: 12 | type: AWSCredentials 13 | awsCredentials: 14 | region: us-east-1 15 | credentialsFile: 16 | secretRef: 17 | name: placeholder 18 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/backendsecuritypolicies/aws_oidc.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: BackendSecurityPolicy 8 | metadata: 9 | name: dog-provider-policy 10 | namespace: default 11 | spec: 12 | type: AWSCredentials 13 | awsCredentials: 14 | region: us-east-1 15 | oidcExchangeToken: 16 | awsRoleArn: placeholder 17 | oidc: 18 | provider: 19 | issuer: placeholder 20 | clientID: placeholder 21 | clientSecret: 22 | name: placeholder 23 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/backendsecuritypolicies/azure_credentials_missing_client_id.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: BackendSecurityPolicy 8 | metadata: 9 | name: dog-provider-policy 10 | namespace: default 11 | spec: 12 | type: AzureCredentials 13 | azureCredentials: 14 | tenantID: dummy_azure_tenant_id 15 | clientSecretRef: 16 | name: dummy_azure_secret_ref_name 17 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/backendsecuritypolicies/azure_credentials_missing_tenant_id.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: BackendSecurityPolicy 8 | metadata: 9 | name: dog-provider-policy 10 | namespace: default 11 | spec: 12 | type: AzureCredentials 13 | azureCredentials: 14 | clientID: dummy_azure_client_id 15 | clientSecretRef: 16 | name: dummy_azure_secret_ref_name 17 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/backendsecuritypolicies/azure_missing_auth.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: BackendSecurityPolicy 8 | metadata: 9 | name: azure-missing-provider-policy 10 | namespace: default 11 | spec: 12 | type: AzureCredentials 13 | azureCredentials: 14 | clientID: dummy_azure_client_id 15 | tenantID: dummy_azure_tenant_id 16 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/backendsecuritypolicies/azure_multiple_auth.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: BackendSecurityPolicy 8 | metadata: 9 | name: azure-multiple-auth-provider-policy 10 | namespace: default 11 | spec: 12 | type: AzureCredentials 13 | azureCredentials: 14 | clientID: dummy_azure_client_id 15 | tenantID: dummy_azure_tenant_id 16 | clientSecretRef: 17 | name: dummy_azure_secret_ref_name 18 | oidcExchangeToken: 19 | oidc: 20 | provider: 21 | issuer: placeholder 22 | clientID: placeholder 23 | clientSecret: 24 | name: placeholder 25 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/backendsecuritypolicies/azure_oidc.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: BackendSecurityPolicy 8 | metadata: 9 | name: bird-provider-policy 10 | namespace: default 11 | spec: 12 | type: AzureCredentials 13 | azureCredentials: 14 | clientID: dummy_azure_client_id 15 | tenantID: dummy_azure_tenant_id 16 | oidcExchangeToken: 17 | oidc: 18 | provider: 19 | issuer: placeholder 20 | clientID: placeholder 21 | clientSecret: 22 | name: placeholder 23 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/backendsecuritypolicies/azure_valid_credentials.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: BackendSecurityPolicy 8 | metadata: 9 | name: dog-provider-policy 10 | namespace: default 11 | spec: 12 | type: AzureCredentials 13 | azureCredentials: 14 | clientID: dummy_azure_client_id 15 | tenantID: dummy_azure_tenant_id 16 | clientSecretRef: 17 | name: dummy_azure_secret_ref_name 18 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/backendsecuritypolicies/basic.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: BackendSecurityPolicy 8 | metadata: 9 | name: dog-provider-policy 10 | namespace: default 11 | spec: 12 | type: APIKey 13 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/backendsecuritypolicies/missing_type.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: BackendSecurityPolicy 8 | metadata: 9 | name: dog-provider-policy 10 | namespace: default 11 | spec: 12 | awsCredentials: 13 | region: us-east-1å 14 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/backendsecuritypolicies/multiple_security_policies.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: BackendSecurityPolicy 8 | metadata: 9 | name: dog-provider-policy 10 | namespace: default 11 | spec: 12 | type: APIKey 13 | apiKey: 14 | secretRef: 15 | name: placeholder 16 | awsCredentials: 17 | region: us-east-1 18 | -------------------------------------------------------------------------------- /tests/crdcel/testdata/backendsecuritypolicies/unknown_provider.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: aigateway.envoyproxy.io/v1alpha1 7 | kind: BackendSecurityPolicy 8 | metadata: 9 | name: dog-provider-policy 10 | namespace: default 11 | spec: 12 | type: UnknownType 13 | -------------------------------------------------------------------------------- /tests/e2e/init/testupstream/manifest.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: apps/v1 7 | kind: Deployment 8 | metadata: 9 | name: testupstream 10 | namespace: default 11 | spec: 12 | replicas: 1 13 | selector: 14 | matchLabels: 15 | app: testupstream 16 | template: 17 | metadata: 18 | labels: 19 | app: testupstream 20 | spec: 21 | containers: 22 | - name: testupstream 23 | image: docker.io/envoyproxy/ai-gateway-testupstream:latest 24 | imagePullPolicy: IfNotPresent 25 | ports: 26 | - containerPort: 8080 27 | env: 28 | - name: TESTUPSTREAM_ID 29 | value: primary 30 | readinessProbe: 31 | httpGet: 32 | path: /health 33 | port: 8080 34 | initialDelaySeconds: 1 35 | periodSeconds: 1 36 | --- 37 | apiVersion: v1 38 | kind: Service 39 | metadata: 40 | name: testupstream 41 | namespace: default 42 | spec: 43 | selector: 44 | app: testupstream 45 | ports: 46 | - protocol: TCP 47 | port: 80 48 | targetPort: 8080 49 | type: ClusterIP 50 | 51 | --- 52 | apiVersion: apps/v1 53 | kind: Deployment 54 | metadata: 55 | name: testupstream-canary 56 | namespace: default 57 | spec: 58 | replicas: 1 59 | selector: 60 | matchLabels: 61 | app: testupstream-canary 62 | template: 63 | metadata: 64 | labels: 65 | app: testupstream-canary 66 | spec: 67 | containers: 68 | - name: testupstream-canary 69 | image: docker.io/envoyproxy/ai-gateway-testupstream:latest 70 | imagePullPolicy: IfNotPresent 71 | env: 72 | - name: TESTUPSTREAM_ID 73 | value: canary 74 | ports: 75 | - containerPort: 8080 76 | readinessProbe: 77 | httpGet: 78 | path: /health 79 | port: 8080 80 | initialDelaySeconds: 1 81 | periodSeconds: 1 82 | --- 83 | apiVersion: v1 84 | kind: Service 85 | metadata: 86 | name: testupstream-canary 87 | namespace: default 88 | spec: 89 | selector: 90 | app: testupstream-canary 91 | ports: 92 | - protocol: TCP 93 | port: 80 94 | targetPort: 8080 95 | type: ClusterIP 96 | -------------------------------------------------------------------------------- /tests/e2e/testdata/translation_testupstream.yaml: -------------------------------------------------------------------------------- 1 | # Copyright Envoy AI Gateway Authors 2 | # SPDX-License-Identifier: Apache-2.0 3 | # The full text of the Apache license is available in the LICENSE file at 4 | # the root of the repo. 5 | 6 | apiVersion: gateway.networking.k8s.io/v1 7 | kind: GatewayClass 8 | metadata: 9 | name: translation-testupstream 10 | spec: 11 | controllerName: gateway.envoyproxy.io/gatewayclass-controller 12 | --- 13 | apiVersion: gateway.networking.k8s.io/v1 14 | kind: Gateway 15 | metadata: 16 | name: translation-testupstream 17 | namespace: default 18 | spec: 19 | gatewayClassName: translation-testupstream 20 | listeners: 21 | - name: http 22 | protocol: HTTP 23 | port: 80 24 | --- 25 | apiVersion: aigateway.envoyproxy.io/v1alpha1 26 | kind: AIGatewayRoute 27 | metadata: 28 | name: translation-testupstream 29 | namespace: default 30 | spec: 31 | schema: 32 | name: OpenAI 33 | targetRefs: 34 | - name: translation-testupstream 35 | kind: Gateway 36 | group: gateway.networking.k8s.io 37 | rules: 38 | - matches: 39 | - headers: 40 | - type: Exact 41 | name: x-ai-eg-model 42 | value: some-cool-model 43 | backendRefs: 44 | - name: translation-testupstream-cool-model-backend 45 | weight: 100 46 | - matches: 47 | - headers: 48 | - type: Exact 49 | name: x-ai-eg-model 50 | value: another-cool-model 51 | backendRefs: 52 | - name: translation-testupstream-another-cool-model-backend 53 | weight: 100 54 | --- 55 | apiVersion: aigateway.envoyproxy.io/v1alpha1 56 | kind: AIServiceBackend 57 | metadata: 58 | name: translation-testupstream-cool-model-backend 59 | namespace: default 60 | spec: 61 | schema: 62 | name: OpenAI 63 | backendRef: 64 | name: testupstream 65 | kind: Backend 66 | group: gateway.envoyproxy.io 67 | --- 68 | apiVersion: aigateway.envoyproxy.io/v1alpha1 69 | kind: AIServiceBackend 70 | metadata: 71 | name: translation-testupstream-another-cool-model-backend 72 | namespace: default 73 | spec: 74 | schema: 75 | name: AWSBedrock 76 | backendRef: 77 | name: testupstream-canary 78 | kind: Backend 79 | group: gateway.envoyproxy.io 80 | --- 81 | apiVersion: gateway.envoyproxy.io/v1alpha1 82 | kind: Backend 83 | metadata: 84 | name: testupstream 85 | namespace: default 86 | spec: 87 | endpoints: 88 | - fqdn: 89 | hostname: testupstream.default.svc.cluster.local 90 | port: 80 91 | --- 92 | apiVersion: gateway.envoyproxy.io/v1alpha1 93 | kind: Backend 94 | metadata: 95 | name: testupstream-canary 96 | namespace: default 97 | spec: 98 | endpoints: 99 | - fqdn: 100 | hostname: testupstream-canary.default.svc.cluster.local 101 | port: 80 102 | -------------------------------------------------------------------------------- /tests/extproc/testdata/server.crt: -------------------------------------------------------------------------------- 1 | -----BEGIN CERTIFICATE----- 2 | MIIDazCCAlOgAwIBAgIUOSrQ5kL8uq0Uy4aVdSERQLFCttwwDQYJKoZIhvcNAQEL 3 | BQAwRTELMAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM 4 | GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDAeFw0yNTA1MDIwMjQwNDZaFw0yNjA1 5 | MDIwMjQwNDZaMEUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEw 6 | HwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQwggEiMA0GCSqGSIb3DQEB 7 | AQUAA4IBDwAwggEKAoIBAQCZM1iByTyeiCHWP7RTeeXILPSx5TYpIch7q24m7k/M 8 | nGvqAlfig+81b4kI3H9Ga21cFThrGur/OG6HCMhdMKqC2tLWK6WFR4c/pc51r5ep 9 | Uh46Ul1zdW3uA2Tc7moYv//h42Nvm9bcfd8dD7RDhaL2scLW+zSvIjtp8FgcoBnN 10 | /h9LypsCJvuRjHcAm7fZfEmkfOppSJ8zVN7OIWpGXtpcF1qcgWhl0LhrkQ3U6lWS 11 | sX5oItTxsFsUvXDiiZRWp+ZYrS/Vgtb9m3KiSzF8YE08cxRmYUrb0BeYe8uakpUV 12 | SHAcuFdf8gG0w02tUZC8G5gW/4b2/8lPPfal10ORIM1VAgMBAAGjUzBRMB0GA1Ud 13 | DgQWBBSQvPvh83+cmXvrwuynobWcyxHQTTAfBgNVHSMEGDAWgBSQvPvh83+cmXvr 14 | wuynobWcyxHQTTAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4IBAQBg 15 | 6AB3FlY2iagYKpeR6Ww59/VLxE8gyxPORbNvhkMLUZPlrMBUr+KcYF9giMsgGZo3 16 | sr75vY/eohKW9ye18jIchS4yIyLypuN0AVAEd98P7dHGkFpQkxja5VYy8PpfZfA+ 17 | gvXEEstFFqN4Ys3w5wIf8TIuGkuEgMu2gk6slfsT40jUZr4bXgNKWbl6KNbDoFTA 18 | f9wb6RNa66VmBmJFxJmsEwBH5ttmw5m8gBvpz31f9WcnFKaRe6T2BLy8cPwDowfw 19 | +X3QsH+6bJQ4fUXVNM2BGCcl4WPBC4YKcEc+qCx4xWH7wYxiVQlTgC9UUdeczTzM 20 | yJ2xUoXPM+W2MIVHj5Dd 21 | -----END CERTIFICATE----- 22 | -------------------------------------------------------------------------------- /tests/extproc/testdata/server.key: -------------------------------------------------------------------------------- 1 | -----BEGIN PRIVATE KEY----- 2 | MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCZM1iByTyeiCHW 3 | P7RTeeXILPSx5TYpIch7q24m7k/MnGvqAlfig+81b4kI3H9Ga21cFThrGur/OG6H 4 | CMhdMKqC2tLWK6WFR4c/pc51r5epUh46Ul1zdW3uA2Tc7moYv//h42Nvm9bcfd8d 5 | D7RDhaL2scLW+zSvIjtp8FgcoBnN/h9LypsCJvuRjHcAm7fZfEmkfOppSJ8zVN7O 6 | IWpGXtpcF1qcgWhl0LhrkQ3U6lWSsX5oItTxsFsUvXDiiZRWp+ZYrS/Vgtb9m3Ki 7 | SzF8YE08cxRmYUrb0BeYe8uakpUVSHAcuFdf8gG0w02tUZC8G5gW/4b2/8lPPfal 8 | 10ORIM1VAgMBAAECggEAExuSWG2u/97eyYgch6TWggcJZf6+qJasGLCjBnwGu/Bu 9 | jTtJltj5xuJoZxCJV1EizK9g4Ar1cGurbXAs/WKKgdOlJE1BUcRAHDIj9A24jsfN 10 | OVz4huQIl/0YYW3jEb8wvQ3NYyg+vjV81HUW+Kz8TGSTdpjSU12jp2zwsHFPhp+Q 11 | AdvfO5gWJwpjn4tWZs36RpIF35s97/OdELNSUBCXcOedVLl7O2M+LOzXGC9k3FmN 12 | eLbRScHT7glqWq58W2uhFcH6XPHL3nJBwDfF0Au8R6tJ8qEXQhczv4Vw9qVqiKl/ 13 | sKnLET4mw/7lqvQNcLdZkWRMKUuYuWtv4iiQVcFM6QKBgQDKjzJWjFR7v9fufTEN 14 | CqAKmag9kAlGNj4jN0Iuit26KLbdgMN29azcGGSVLCW+UEwhYXsCSgDANg7fDc7e 15 | qjCXP3plQkOybAVreeD8p1V2rYDCxGQXnQe9OYqKfr2fQ8+V1FX8HT+2OdyKK/Bh 16 | bVKz/xT3Qt3KyeUGFfNc3DPA3wKBgQDBnnhJvRAEc0sX5JmlqtTyS0aJWD3bzR9Y 17 | TIJPW3PcsW7zW4tVUV7A8m3tUi/7TLfc3G3B+bTQTdJS0IGdP3ocobcUBS30ZIuv 18 | w3U6ydgncTUKOnqquIkoHOKU+k8RECvcKCRaigCnfXrph/cggE5s7vSY7JOuUBmy 19 | kECG3HY0SwKBgQC+HOQ8Av2QICDEf8+3088d49XZ4gQyV9q+JrEBCejNPkVTY/UR 20 | A8g30tUkhQjvtGGfuuQ+48IMpTAT/du5dLaiXju8KhNrtKLpmrylpVA7UODW7Iic 21 | +Q84dHcpQYcrofOFRoNlWvZfS7NII7E7X+YNdk3xPFr6Z7+ClLuS3kHctQKBgARh 22 | nKE2iQVAwqNAxgVMr1LoRylWeNffdWUfHhfDnwSQc01IdgexNq6xDbHAGVvbaZGy 23 | 3zzzdJb+G9NJ6pTvWckKE8V9NIQCPnL0vlNdO+nwkt2vYxU35cqllK8AVwew0P1D 24 | X0c5NjiUC1L7g6kVnjCy25AWUmUwZNR4lS04EeivAoGAR+kG9I3kcm915yUDSFx7 25 | slkThh9ZR+y4rXaN8CMFyr/GXvtYjQOAZwkqch9JjZqhzEYeY8i8QqUexbAE2v3B 26 | MHCPAQC4o2WFQSHpWODUm00hGpibdMkJYE6V6YkoWuSV/pQx1v4N0F424Ng6aDuF 27 | gYzU7fbV8gfbrm9OhrkM7H8= 28 | -----END PRIVATE KEY----- 29 | -------------------------------------------------------------------------------- /tests/internal/testupstreamlib/testupstream.go: -------------------------------------------------------------------------------- 1 | // Copyright Envoy AI Gateway Authors 2 | // SPDX-License-Identifier: Apache-2.0 3 | // The full text of the Apache license is available in the LICENSE file at 4 | // the root of the repo. 5 | 6 | package testupstreamlib 7 | 8 | const ( 9 | // ResponseTypeKey is the key for the response type in the request. 10 | // This can be either empty, "sse", or "aws-event-stream". 11 | // * If this is "sse", the response body is expected to be a Server-Sent Event stream. 12 | // Each line in x-response-body is treated as a separate [data] payload. 13 | // * If this is "aws-event-stream", the response body is expected to be an AWS Event Stream. 14 | // Each line in x-response-body is treated as a separate event payload. 15 | // * If this is empty, the response body is expected to be a regular JSON response. 16 | ResponseTypeKey = "x-response-type" 17 | // ExpectedHeadersKey is the key for the expected headers in the request. 18 | // The value is a base64 encoded string of comma separated key-value pairs. 19 | // E.g. "key1:value1,key2:value2". 20 | ExpectedHeadersKey = "x-expected-headers" 21 | // ExpectedPathHeaderKey is the key for the expected path in the request. 22 | // The value is a base64 encoded. 23 | ExpectedPathHeaderKey = "x-expected-path" 24 | // ExpectedRequestBodyHeaderKey is the key for the expected request body in the request. 25 | // The value is a base64 encoded. 26 | ExpectedRequestBodyHeaderKey = "x-expected-request-body" 27 | // ResponseStatusKey is the key for the response status in the response, default is 200 if not set. 28 | ResponseStatusKey = "x-response-status" 29 | // ResponseHeadersKey is the key for the response headers in the response. 30 | // The value is a base64 encoded string of comma separated key-value pairs. 31 | // E.g. "key1:value1,key2:value2". 32 | ResponseHeadersKey = "x-response-headers" 33 | // ResponseBodyHeaderKey is the key for the response body in the response. 34 | // The value is a base64 encoded. 35 | ResponseBodyHeaderKey = "x-response-body" 36 | // NonExpectedRequestHeadersKey is the key for the non-expected request headers. 37 | // The value is a base64 encoded string of comma separated header keys expected to be absent. 38 | NonExpectedRequestHeadersKey = "x-non-expected-request-headers" 39 | // ExpectedTestUpstreamIDKey is the key for the expected testupstream-id in the request, 40 | // and the value will be compared with the TESTUPSTREAM_ID environment variable. 41 | // If the values do not match, the request will be rejected, meaning that the request 42 | // was routed to the wrong upstream. 43 | ExpectedTestUpstreamIDKey = "x-expected-testupstream-id" 44 | // ExpectedHostKey is the key for the expected host in the request. 45 | ExpectedHostKey = "x-expected-host" 46 | ) 47 | --------------------------------------------------------------------------------