├── .clang-format ├── .dockerignore ├── .gitignore ├── .gitlab-ci.yml ├── .golangci.yml ├── Dockerfile ├── LICENSE ├── Makefile ├── PROJECT ├── README.md ├── api ├── admin │ └── v1alpha1 │ │ ├── config_dump.pb.go │ │ └── config_dump.proto ├── buf.gen.yaml ├── buf.lock ├── buf.yaml ├── clean.sh ├── clusters │ └── v1alpha1 │ │ ├── cluster.pb.go │ │ └── cluster.proto ├── filters │ └── v1alpha1 │ │ ├── api_key_auth.pb.go │ │ ├── api_key_auth.proto │ │ ├── rate_limit.pb.go │ │ └── rate_limit.proto ├── google │ └── protobuf │ │ ├── any.proto │ │ ├── descriptor.proto │ │ ├── duration.proto │ │ ├── empty.proto │ │ ├── struct.proto │ │ ├── timestamp.proto │ │ └── wrappers.proto ├── listeners │ └── v1alpha1 │ │ ├── chat_listener.pb.go │ │ ├── chat_listener.proto │ │ ├── common.pb.go │ │ ├── common.proto │ │ ├── image_listener.pb.go │ │ └── image_listener.proto ├── route │ └── v1alpha1 │ │ ├── route.pb.go │ │ └── route.proto ├── service │ └── v1alpha1 │ │ ├── apikey_auth.pb.go │ │ ├── apikey_auth.proto │ │ ├── apikey_auth_grpc.pb.go │ │ ├── usage_stats.pb.go │ │ ├── usage_stats.proto │ │ └── usage_stats_grpc.pb.go └── v1alpha1 │ ├── common_types.go │ ├── groupversion_info.go │ ├── imagegenerationbackend_types.go │ ├── llmbackend_types.go │ ├── modelroute_types.go │ └── zz_generated.deepcopy.go ├── changes ├── v0.1 │ ├── CHANGELOG-v0.1.0-rc4.md │ ├── CHANGELOG-v0.1.0-rc5.md │ ├── CHANGELOG-v0.1.0.md │ ├── CHANGELOG-v0.1.1.md │ └── CHANGELOG-v0.1.2.md ├── v0.2 │ └── CHANGELOG-v0.2.0-rc0.md └── v0.3 │ ├── CHANGELOG-v0.3.0-rc0.md │ └── CHANGELOG-v0.3.0-rc1.md ├── cmd ├── admin │ └── admin.go ├── gateway │ ├── devClusters.go │ └── proxy.go ├── main.go └── server │ └── server.go ├── config ├── config.go ├── config.yaml ├── crd │ ├── bases │ │ ├── llm.knoway.dev_imagegenerationbackends.yaml │ │ ├── llm.knoway.dev_llmbackends.yaml │ │ └── llm.knoway.dev_modelroutes.yaml │ ├── kustomization.yaml │ └── kustomizeconfig.yaml ├── rbac │ ├── imagegenerationbackend_editor_role.yaml │ ├── imagegenerationbackend_viewer_role.yaml │ ├── llmbackend_editor_role.yaml │ ├── llmbackend_viewer_role.yaml │ ├── modelroute_admin_role.yaml │ ├── modelroute_editor_role.yaml │ ├── modelroute_viewer_role.yaml │ └── role.yaml └── samples │ ├── kustomization.yaml │ ├── llm_v1alpha1_imagegenerationbackend.yaml │ ├── llm_v1alpha1_llmbackend.yaml │ └── llm_v1alpha1_modelroute.yaml ├── cspell.config.yaml ├── go.mod ├── go.sum ├── hack └── boilerplate.go.txt ├── internal └── controller │ ├── backends.go │ ├── common.go │ ├── common_test.go │ ├── controller_test.go │ ├── enums.go │ ├── imagegenerationbackend_controller.go │ ├── imagegenerationbackend_controller_test.go │ ├── llmbackend_controller.go │ ├── llmbackend_controller_test.go │ ├── modelroute_controller.go │ ├── route.go │ └── status.go ├── license-lint.yml ├── manifests └── knoway │ ├── .helmignore │ ├── Chart.yaml │ ├── templates │ ├── _commons.tpl │ ├── _helpers.tpl │ ├── clusterrole.yaml │ ├── clusterrolebinding.yaml │ ├── configmap.yaml │ ├── deployment.yaml │ ├── hpa.yaml │ ├── llm.knoway.dev_imagegenerationbackends.yaml │ ├── llm.knoway.dev_llmbackends.yaml │ ├── llm.knoway.dev_modelroutes.yaml │ ├── service.yaml │ └── serviceaccount.yaml │ └── values.yaml ├── pkg ├── bootkit │ ├── bootkit.go │ ├── bootkit_test.go │ ├── lifecycle.go │ ├── lifecycle_test.go │ ├── options.go │ └── options_test.go ├── clusters │ ├── cluster │ │ └── cluster.go │ ├── filters │ │ ├── config.go │ │ └── openai │ │ │ ├── request.go │ │ │ └── response.go │ ├── interface.go │ └── manager │ │ └── cluster.go ├── constants │ ├── config.go │ └── listener.go ├── filters │ ├── auth │ │ ├── auth.go │ │ └── auth_test.go │ ├── config.go │ ├── ratelimit │ │ ├── local.go │ │ ├── rate_limit.go │ │ ├── rate_limit_test.go │ │ └── redis.go │ └── usage │ │ └── usage.go ├── listener │ ├── common.go │ ├── handler.go │ ├── listener.go │ ├── manager │ │ ├── chat │ │ │ ├── chat_completions.go │ │ │ ├── completions.go │ │ │ ├── listener.go │ │ │ └── models.go │ │ └── image │ │ │ ├── image_generations.go │ │ │ └── listener.go │ └── middlewares.go ├── metadata │ └── metadata.go ├── object │ ├── completion.go │ ├── error.go │ ├── errors.go │ └── images.go ├── observation │ └── attributes.go ├── protoutils │ └── proto.go ├── redis │ └── client.go ├── registry │ └── config │ │ ├── registry.go │ │ └── registry_test.go ├── route │ ├── loadbalance │ │ ├── load_balance.go │ │ └── load_balance_test.go │ ├── manager │ │ └── manager.go │ ├── route.go │ └── route │ │ └── route.go ├── types │ ├── openai │ │ ├── chat_completions_request.go │ │ ├── chat_completions_request_test.go │ │ ├── chat_completions_response.go │ │ ├── chat_completions_response_test.go │ │ ├── chat_completions_stream.go │ │ ├── chat_completions_stream_test.go │ │ ├── common.go │ │ ├── common_test.go │ │ ├── error_test.go │ │ ├── errors.go │ │ ├── event.go │ │ ├── http.go │ │ ├── image_generations_request.go │ │ ├── image_generations_request_test.go │ │ ├── image_generations_response.go │ │ ├── image_generations_response_test.go │ │ ├── jsonpatch.go │ │ ├── jsonpatch_test.go │ │ ├── testdata │ │ │ ├── GoogleSampleWebpImage.webp │ │ │ ├── SampleGIFImage_135kbmb.gif │ │ │ ├── SampleJPGImage_100kbmb.jpg │ │ │ └── SamplePNGImage_100kbmb.png │ │ └── usage.go │ └── sse │ │ └── event.go └── utils │ ├── crd_common_hash.go │ ├── http.go │ ├── json.go │ ├── json_test.go │ ├── lo.go │ ├── string.go │ └── string_test.go ├── samples └── api-key-server │ ├── config.yaml │ └── main.go └── scripts ├── build-or-download-binaries.sh ├── code-freeze.sh ├── copy-crds.sh ├── gen-change-logs.sh ├── gen-check.sh ├── pr-status-manage.sh ├── release-version.sh ├── run-make-gen.sh ├── trivy.sh ├── unit-test.sh ├── util.sh ├── verify-license.sh └── verify-staticcheck.sh /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | BasedOnStyle: Google 4 | --- 5 | Language: Proto 6 | BasedOnStyle: Google 7 | IndentWidth: 4 8 | AlignConsecutiveAssignments: true 9 | CommentPragmas: XValidation 10 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # More info: https://docs.docker.com/engine/reference/builder/#dockerignore-file 2 | # Ignore build and test binaries. 3 | bin/ 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | bin/* 8 | Dockerfile.cross 9 | dist/* 10 | out/* 11 | 12 | # Test binary, built with `go test -c` 13 | *.test 14 | 15 | # Output of the go coverage tool, specifically when used with LiteIDE 16 | *.out 17 | 18 | # Go workspace file 19 | go.work 20 | 21 | # Kubernetes Generated files - skip generated files, except for vendored files 22 | !vendor/**/zz_generated.* 23 | 24 | # editor and IDE paraphernalia 25 | .idea 26 | .vscode 27 | *.swp 28 | *.swo 29 | *~ 30 | config/local.yaml 31 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | linters: 2 | enable-all: true 3 | disable: 4 | - depguard 5 | - exportloopref 6 | - execinquery 7 | - gomnd 8 | - funlen 9 | - containedctx 10 | - exhaustruct 11 | - testpackage 12 | - varnamelen 13 | - maintidx 14 | - err113 15 | - nlreturn 16 | - wrapcheck 17 | - tagliatelle 18 | - paralleltest 19 | - lll 20 | - contextcheck 21 | - gochecknoglobals 22 | - tagalign 23 | - nilnil 24 | - godot 25 | - godox 26 | - gci 27 | - gocognit 28 | - gocyclo 29 | - cyclop 30 | - ireturn 31 | - gofumpt 32 | - gochecknoinits 33 | 34 | linters-settings: 35 | wsl: 36 | allow-assign-and-call: false 37 | strict-append: false 38 | allow-trailing-comment: true 39 | allow-cuddle-declarations: true 40 | allow-separated-leading-comment: true 41 | revive: 42 | rules: 43 | - name: blank-imports 44 | disabled: true 45 | nestif: 46 | # Minimal complexity of if statements to report. 47 | # Default: 5 48 | min-complexity: 9 49 | dupl: 50 | # Tokens count to trigger issue. 51 | # Default: 150 52 | threshold: 600 53 | mnd: 54 | ignored-functions: 55 | - "context.WithTimeout" 56 | - "strconv.ParseComplex" 57 | ignored-files: 58 | - "examples/.*" 59 | gocritic: 60 | disabled-checks: 61 | - ifElseChain 62 | gosec: 63 | excludes: 64 | - G115 65 | 66 | issues: 67 | exclude: 68 | - "if statements should only be cuddled with assignments" # from wsl 69 | - "if statements should only be cuddled with assignments used in the if statement itself" # from wsl 70 | - "assignments should only be cuddled with other assignments" # from wsl. false positive case: var a bool\nb := true 71 | exclude-rules: 72 | - path: _test\.go 73 | linters: 74 | - perfsprint 75 | exclude-dirs: 76 | - apis 77 | - api 78 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM docker.m.daocloud.io/alpine:3.15 AS artifacts 2 | 3 | ARG APP 4 | ARG TARGETOS 5 | ARG TARGETARCH 6 | 7 | COPY out/$TARGETOS/$TARGETARCH/$APP /files/app/${APP} 8 | 9 | FROM docker.m.daocloud.io/alpine:3.15 10 | 11 | WORKDIR /app 12 | 13 | ARG APP 14 | ARG TARGETOS 15 | ARG TARGETARCH 16 | 17 | ENV APP ${APP} 18 | 19 | ARG VERSION 20 | ENV VERSION ${VERSION} 21 | 22 | COPY --from=artifacts /files / 23 | 24 | CMD /app/${APP} 25 | -------------------------------------------------------------------------------- /PROJECT: -------------------------------------------------------------------------------- 1 | # Code generated by tool. DO NOT EDIT. 2 | # This file is used to track the info used to scaffold your project 3 | # and allow the plugins properly work. 4 | # More info: https://book.kubebuilder.io/reference/project-config.html 5 | domain: knoway.dev 6 | layout: 7 | - go.kubebuilder.io/v4 8 | projectName: knoway 9 | repo: knoway.dev 10 | resources: 11 | - api: 12 | crdVersion: v1 13 | namespaced: true 14 | domain: knoway.dev 15 | group: llm 16 | kind: LLMBackend 17 | path: knoway.dev/api/v1alpha1 18 | version: v1alpha1 19 | - api: 20 | crdVersion: v1 21 | namespaced: true 22 | controller: true 23 | domain: knoway.dev 24 | group: llm 25 | kind: ImageGenerationBackend 26 | path: knoway.dev/api/v1alpha1 27 | version: v1alpha1 28 | - api: 29 | crdVersion: v1 30 | namespaced: true 31 | controller: true 32 | domain: knoway.dev 33 | group: llm 34 | kind: ModelRoute 35 | path: knoway.dev/api/v1alpha1 36 | version: v1alpha1 37 | version: "3" 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # knoway 2 | 3 | > An Envoy inspired, ultimate LLM-first gateway for LLM serving and downstream application developers and enterprises 4 | 5 | ## Description 6 | 7 | Lite and easy dedicated Gateway with various of LLM specific optimizations and features. You can think of it as Nginx, but for LLMs, and upcoming supported models (such as Stable Diffusion, etc.). 8 | 9 | ## Features 10 | 11 | - 💬 **LLM-first**: Designed for LLMs, with optimizations and features that are specific to LLMs. 12 | - 🕸️ **Envoy Inspired**: Inspired by Envoy, similar architecture and features, if you are already familiar with Envoy, you will find it easy to use and understand this codebase. 13 | - 💻 **Single command deployment**: Deploy the gateway with a single command, just like `nginx -c nginx.conf`. 14 | - 🚢 **Kubernetes Native**: CRDs, control plane side implementations are batteries included, `helm install knoway` and you are ready to go. 15 | 16 | Some of the LLMs specific optimizations and features include: 17 | 18 | - 👷 **Serverless boot loader**: Able to boot up the upstream Pod of serving services on-demand, make LLM serving more cost-effective. 19 | - ✅ **Fault tolerance**: Fault tolerance for LLMs, with the ability to retry, circuit breaking, etc. when dealing with external providers. 20 | - 🚥 **Rate limiting**: Rate limiting based on tokens, prompts, etc., to protect the LLMs serving services from being abused. 21 | - 📚 **Semantic Cache**: Cache based on the semantics of the prompts and tokens, CDN of the LLMs. 22 | - 📖 **Semantic Route**: Route based on the difficulties, semantic meaning of prompts, etc., to make the LLMs serving services more efficient with right models. 23 | - 🔍 **OpenTelemetry**: OpenTelemetry support, with the ability to trace the calls to LLMs, and the gateway itself. 24 | 25 | ## Getting Started 26 | 27 | ### Prerequisites 28 | 29 | - `go` version v1.22.0+ 30 | - `docker` version 17.03+. 31 | - `kubectl` version v1.11.3+. 32 | - Access to a Kubernetes v1.11.3+ cluster. 33 | 34 | ### To Deploy on the cluster 35 | 36 | ```console 37 | $ export VERSION=v0.0.0-xxxx 38 | $ helm repo add knoway https://release-ci.daocloud.io/chartrepo/knoway 39 | $ helm repo update 40 | $ helm upgrade --install --create-namespace -n knoway-system knoway knoway/knoway --version=${VERSION} \ 41 | --set global.imageRegistry=release-ci.daocloud.io \ 42 | --set config.auth_server_url="" \ 43 | --set config.stats_server_url="" 44 | ``` 45 | 46 | ### To Uninstall 47 | 48 | ```console 49 | helm uninstall knoway -n knoway-system 50 | ``` 51 | 52 | ## Project Distribution 53 | 54 | Following are the steps to build the installer and distribute this project to users. 55 | 56 | 1. Build the installer for the image built and published in the registry: 57 | 58 | ```sh 59 | make build-installer IMG=/knoway:tag 60 | ``` 61 | 62 | NOTE: The makefile target mentioned above generates an 'install.yaml' 63 | file in the dist directory. This file contains all the resources built 64 | with Kustomize, which are necessary to install this project without 65 | its dependencies. 66 | 67 | 2. Using the installer 68 | 69 | Users can just run kubectl apply -f to install the project, i.e.: 70 | 71 | ```sh 72 | kubectl apply -f https://raw.githubusercontent.com//knoway//dist/install.yaml 73 | ``` 74 | 75 | ## Contributing 76 | 77 | > [!NOTE] 78 | > Run `make help` for more information on all potential `make` targets 79 | 80 | More information can be found via the [Kubebuilder Documentation](https://book.kubebuilder.io/introduction.html) 81 | 82 | ## License 83 | 84 | Copyright 2024. 85 | 86 | Licensed under the Apache License, Version 2.0 (the "License"); 87 | you may not use this file except in compliance with the License. 88 | You may obtain a copy of the License at 89 | 90 | http://www.apache.org/licenses/LICENSE-2.0 91 | 92 | Unless required by applicable law or agreed to in writing, software 93 | distributed under the License is distributed on an "AS IS" BASIS, 94 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 95 | See the License for the specific language governing permissions and 96 | limitations under the License. 97 | 98 | -------------------------------------------------------------------------------- /api/admin/v1alpha1/config_dump.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package knoway.admin.v1alpha1; 4 | 5 | import "google/protobuf/any.proto"; 6 | import "google/protobuf/duration.proto"; 7 | 8 | option go_package = "knoway.dev/api/admin/v1alpha1"; 9 | 10 | message ConfigDump { 11 | repeated google.protobuf.Any listeners = 1; 12 | repeated google.protobuf.Any clusters = 2; 13 | repeated google.protobuf.Any routes = 3; 14 | } 15 | -------------------------------------------------------------------------------- /api/buf.gen.yaml: -------------------------------------------------------------------------------- 1 | version: v1 2 | plugins: 3 | - name: go 4 | out: . 5 | opt: paths=source_relative 6 | # opt: use_proto_names=true 7 | - name: go-grpc 8 | out: . 9 | opt: paths=source_relative 10 | # - name: golang-deepcopy 11 | # out: . 12 | # opt: paths=source_relative 13 | -------------------------------------------------------------------------------- /api/buf.lock: -------------------------------------------------------------------------------- 1 | # Generated by buf. DO NOT EDIT. 2 | version: v1 3 | -------------------------------------------------------------------------------- /api/buf.yaml: -------------------------------------------------------------------------------- 1 | version: v1 2 | build: 3 | excludes: 4 | - common-protos 5 | lint: 6 | allow_comment_ignores: true 7 | use: 8 | - BASIC 9 | except: 10 | - FIELD_LOWER_SNAKE_CASE 11 | - PACKAGE_DIRECTORY_MATCH 12 | -------------------------------------------------------------------------------- /api/clean.sh: -------------------------------------------------------------------------------- 1 | set -eu 2 | 3 | PATTERNS=".validate.go _deepcopy.gen.go .gen.json gr.gen.go .pb.go _json.gen.go .pb.gw.go .swagger.json .deepcopy.go" 4 | 5 | for p in $PATTERNS; do 6 | rm -f ./**/**/*"${p}" 7 | rm -f ./**/*"${p}" 8 | done 9 | -------------------------------------------------------------------------------- /api/clusters/v1alpha1/cluster.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package knoway.clusters.v1alpha1; 4 | 5 | import "google/protobuf/any.proto"; 6 | import "google/protobuf/struct.proto"; 7 | 8 | option go_package = "knoway.dev/api/clusters/v1alpha1"; 9 | 10 | message ClusterFilter { 11 | string name = 1; 12 | google.protobuf.Any config = 2; 13 | } 14 | 15 | enum LoadBalancePolicy { 16 | LOAD_BALANCE_POLICY_UNSPECIFIED = 0; 17 | ROUND_ROBIN = 1; 18 | LEAST_CONNECTION = 2; 19 | IP_HASH = 3; 20 | 21 | // CUSTOM means the load balance policy is defined by the filters. 22 | CUSTOM = 15; 23 | } 24 | 25 | message TLSConfig { 26 | // todo 27 | } 28 | 29 | message Upstream { 30 | string url = 1; 31 | message Header { 32 | string key = 1; 33 | string value = 2; 34 | } 35 | repeated Header headers = 3; 36 | int32 timeout = 4; 37 | map defaultParams = 5; 38 | map overrideParams = 6; 39 | repeated string removeParamKeys = 7; 40 | } 41 | 42 | enum ClusterType { 43 | CLUSTER_TYPE_UNSPECIFIED = 0; 44 | LLM = 1; 45 | IMAGE_GENERATION = 2; 46 | } 47 | 48 | enum ClusterProvider { 49 | CLUSTER_PROVIDER_UNSPECIFIED = 0; 50 | OPEN_AI = 1; 51 | VLLM = 2; 52 | OLLAMA = 3; 53 | } 54 | 55 | message ClusterMeteringPolicy { 56 | enum SizeFrom { 57 | SIZE_FROM_UNSPECIFIED = 0; 58 | // For image generation, the size of the generated image is determined 59 | // by the input parameters. 60 | // 61 | // For example, even if the output image is 1024x1024, as long as the 62 | // input parameter specified 256x256, the size of the generated image 63 | // will be account as 256x256. 64 | SIZE_FROM_INPUT = 1; 65 | // For image generation, the size of the generated image is determined 66 | // by the output image. This is done by parsing through the actual 67 | // generated image file header by using Golang's std library to 68 | // determine the size of the image. 69 | // 70 | // For example, no matter what the input specified, if the output image 71 | // is 1024x1024, the size of the generated image will be account as 72 | // 1024x1024. 73 | SIZE_FROM_OUTPUT = 2; 74 | // For image generation, the size of the generated image is determined 75 | // by the greatest size of the input parameters and output image 76 | // resolution. 77 | // 78 | // For example, if the input parameter specified 256x256 and the output 79 | // image is 1024x1024, the size of the generated image will be account 80 | // as 1024x1024. On the other hand, if the input parameter specified 81 | // 1024x1024 and the output image is 256x256, the size of the generated 82 | // image will be account as 1024x1024. 83 | SIZE_FROM_GREATEST = 3; 84 | } 85 | 86 | optional SizeFrom sizeFrom = 1; 87 | } 88 | 89 | message Cluster { 90 | string name = 1; 91 | LoadBalancePolicy loadBalancePolicy = 2; 92 | Upstream upstream = 3; 93 | TLSConfig tlsConfig = 4; 94 | repeated ClusterFilter filters = 5; 95 | ClusterProvider provider = 6; 96 | int64 created = 7; 97 | ClusterType type = 8; 98 | ClusterMeteringPolicy meteringPolicy = 9; 99 | } 100 | -------------------------------------------------------------------------------- /api/filters/v1alpha1/api_key_auth.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package knoway.filters.v1alpha1; 4 | 5 | import "google/protobuf/duration.proto"; 6 | 7 | option go_package = "knoway.dev/api/filters/v1alpha1"; 8 | 9 | message APIKeyAuthConfig { 10 | message AuthServer { 11 | string url = 1; 12 | google.protobuf.Duration timeout = 2; // Default is 3s 13 | } 14 | AuthServer auth_server = 3; 15 | } 16 | 17 | message UsageStatsConfig { 18 | message StatsServer { 19 | string url = 1; 20 | google.protobuf.Duration timeout = 2; // Default is 3s 21 | } 22 | StatsServer stats_server = 3; 23 | } 24 | 25 | message OpenAIRequestHandlerConfig {} 26 | message OpenAIResponseHandlerConfig {} 27 | -------------------------------------------------------------------------------- /api/filters/v1alpha1/rate_limit.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package knoway.filters.v1alpha1; 4 | 5 | import "google/protobuf/any.proto"; 6 | import "google/protobuf/duration.proto"; 7 | 8 | option go_package = "knoway.dev/api/filters/v1alpha1"; 9 | 10 | message StringMatch { 11 | oneof match { 12 | string exact = 1; 13 | string prefix = 2; 14 | } 15 | } 16 | 17 | enum RateLimitBaseOn { 18 | RATE_LIMIT_BASE_ON_UNSPECIFIED = 0; 19 | USER_ID = 1; 20 | API_KEY = 2; 21 | } 22 | 23 | message RateLimitPolicy { 24 | StringMatch match = 1; 25 | int32 limit = 2; 26 | RateLimitBaseOn based_on = 3; 27 | google.protobuf.Duration duration = 4; 28 | } 29 | 30 | // RateLimitConfig defines rate limiting configuration 31 | message RateLimitConfig { 32 | repeated RateLimitPolicy policies = 1; 33 | RateLimitMode model = 2; 34 | string server_prefix = 3; 35 | 36 | RedisServer redis_server = 4; 37 | } 38 | 39 | enum RateLimitMode { 40 | RATE_LIMIT_MODEL_UNSPECIFIED = 0; 41 | LOCAL = 1; 42 | REDIS = 2; 43 | } 44 | 45 | message RedisServer { 46 | string url = 1; 47 | } 48 | -------------------------------------------------------------------------------- /api/google/protobuf/empty.proto: -------------------------------------------------------------------------------- 1 | // Protocol Buffers - Google's data interchange format 2 | // Copyright 2008 Google Inc. All rights reserved. 3 | // https://developers.google.com/protocol-buffers/ 4 | // 5 | // Redistribution and use in source and binary forms, with or without 6 | // modification, are permitted provided that the following conditions are 7 | // met: 8 | // 9 | // * Redistributions of source code must retain the above copyright 10 | // notice, this list of conditions and the following disclaimer. 11 | // * Redistributions in binary form must reproduce the above 12 | // copyright notice, this list of conditions and the following disclaimer 13 | // in the documentation and/or other materials provided with the 14 | // distribution. 15 | // * Neither the name of Google Inc. nor the names of its 16 | // contributors may be used to endorse or promote products derived from 17 | // this software without specific prior written permission. 18 | // 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | syntax = "proto3"; 32 | 33 | package google.protobuf; 34 | 35 | option csharp_namespace = "Google.Protobuf.WellKnownTypes"; 36 | option go_package = "google.golang.org/protobuf/types/known/emptypb"; 37 | option java_package = "com.google.protobuf"; 38 | option java_outer_classname = "EmptyProto"; 39 | option java_multiple_files = true; 40 | option objc_class_prefix = "GPB"; 41 | option cc_enable_arenas = true; 42 | 43 | // A generic empty message that you can re-use to avoid defining duplicated 44 | // empty messages in your APIs. A typical example is to use it as the request 45 | // or the response type of an API method. For instance: 46 | // 47 | // service Foo { 48 | // rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty); 49 | // } 50 | // 51 | // The JSON representation for `Empty` is empty JSON object `{}`. 52 | message Empty {} 53 | -------------------------------------------------------------------------------- /api/google/protobuf/struct.proto: -------------------------------------------------------------------------------- 1 | // Protocol Buffers - Google's data interchange format 2 | // Copyright 2008 Google Inc. All rights reserved. 3 | // https://developers.google.com/protocol-buffers/ 4 | // 5 | // Redistribution and use in source and binary forms, with or without 6 | // modification, are permitted provided that the following conditions are 7 | // met: 8 | // 9 | // * Redistributions of source code must retain the above copyright 10 | // notice, this list of conditions and the following disclaimer. 11 | // * Redistributions in binary form must reproduce the above 12 | // copyright notice, this list of conditions and the following disclaimer 13 | // in the documentation and/or other materials provided with the 14 | // distribution. 15 | // * Neither the name of Google Inc. nor the names of its 16 | // contributors may be used to endorse or promote products derived from 17 | // this software without specific prior written permission. 18 | // 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | 31 | syntax = "proto3"; 32 | 33 | package google.protobuf; 34 | 35 | option csharp_namespace = "Google.Protobuf.WellKnownTypes"; 36 | option cc_enable_arenas = true; 37 | option go_package = "github.com/golang/protobuf/ptypes/struct;structpb"; 38 | option java_package = "com.google.protobuf"; 39 | option java_outer_classname = "StructProto"; 40 | option java_multiple_files = true; 41 | option objc_class_prefix = "GPB"; 42 | 43 | // `Struct` represents a structured data value, consisting of fields 44 | // which map to dynamically typed values. In some languages, `Struct` 45 | // might be supported by a native representation. For example, in 46 | // scripting languages like JS a struct is represented as an 47 | // object. The details of that representation are described together 48 | // with the proto support for the language. 49 | // 50 | // The JSON representation for `Struct` is JSON object. 51 | message Struct { 52 | // Unordered map of dynamically typed values. 53 | map fields = 1; 54 | } 55 | 56 | // `Value` represents a dynamically typed value which can be either 57 | // null, a number, a string, a boolean, a recursive struct value, or a 58 | // list of values. A producer of value is expected to set one of that 59 | // variants, absence of any variant indicates an error. 60 | // 61 | // The JSON representation for `Value` is JSON value. 62 | message Value { 63 | // The kind of value. 64 | oneof kind { 65 | // Represents a null value. 66 | NullValue null_value = 1; 67 | // Represents a double value. 68 | double number_value = 2; 69 | // Represents a string value. 70 | string string_value = 3; 71 | // Represents a boolean value. 72 | bool bool_value = 4; 73 | // Represents a structured value. 74 | Struct struct_value = 5; 75 | // Represents a repeated `Value`. 76 | ListValue list_value = 6; 77 | } 78 | } 79 | 80 | // `NullValue` is a singleton enumeration to represent the null value for the 81 | // `Value` type union. 82 | // 83 | // The JSON representation for `NullValue` is JSON `null`. 84 | enum NullValue { 85 | // Null value. 86 | NULL_VALUE = 0; 87 | } 88 | 89 | // `ListValue` is a wrapper around a repeated field of values. 90 | // 91 | // The JSON representation for `ListValue` is JSON array. 92 | message ListValue { 93 | // Repeated field of dynamically typed values. 94 | repeated Value values = 1; 95 | } 96 | -------------------------------------------------------------------------------- /api/listeners/v1alpha1/chat_listener.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package knoway.listeners.v1alpha1; 4 | 5 | import "google/protobuf/any.proto"; 6 | import "listeners/v1alpha1/common.proto"; 7 | 8 | option go_package = "knoway.dev/api/listeners/v1alpha1"; 9 | 10 | message ChatCompletionListener { 11 | string name = 1; 12 | repeated ListenerFilter filters = 2; 13 | Log access_log = 3; 14 | } 15 | -------------------------------------------------------------------------------- /api/listeners/v1alpha1/common.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package knoway.listeners.v1alpha1; 4 | 5 | import "google/protobuf/any.proto"; 6 | 7 | option go_package = "knoway.dev/api/listeners/v1alpha1"; 8 | 9 | message ListenerFilter { 10 | string name = 1; 11 | google.protobuf.Any config = 2; 12 | } 13 | 14 | message Log { 15 | bool enable = 1; 16 | } 17 | -------------------------------------------------------------------------------- /api/listeners/v1alpha1/image_listener.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package knoway.listeners.v1alpha1; 4 | 5 | import "google/protobuf/any.proto"; 6 | import "listeners/v1alpha1/common.proto"; 7 | 8 | option go_package = "knoway.dev/api/listeners/v1alpha1"; 9 | 10 | message ImageListener { 11 | string name = 1; 12 | repeated ListenerFilter filters = 2; 13 | Log access_log = 3; 14 | } 15 | -------------------------------------------------------------------------------- /api/route/v1alpha1/route.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package knoway.route.v1alpha1; 4 | 5 | import "google/protobuf/any.proto"; 6 | import "google/protobuf/duration.proto"; 7 | 8 | option go_package = "knoway.dev/api/route/v1alpha1"; 9 | 10 | message RouteFilter { 11 | string name = 1; 12 | google.protobuf.Any config = 2; 13 | } 14 | 15 | message StringMatch { 16 | oneof match { 17 | string exact = 1; 18 | string prefix = 2; 19 | } 20 | } 21 | 22 | message Match { 23 | StringMatch model = 1; 24 | StringMatch message = 2; 25 | } 26 | 27 | message RouteDestination { 28 | string namespace = 1; 29 | string backend = 2; 30 | optional int32 weight = 3; 31 | string cluster = 4; 32 | } 33 | 34 | message RouteTarget { 35 | RouteDestination destination = 1; 36 | } 37 | 38 | // See also: 39 | // Supported load balancers — envoy 1.34.0-dev-e3a97f documentation 40 | // https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/load_balancing/load_balancers#arch-overview-load-balancing-types 41 | enum LoadBalancePolicy { 42 | LOAD_BALANCE_POLICY_UNSPECIFIED = 0; 43 | LOAD_BALANCE_POLICY_ROUND_ROBIN = 1; 44 | LOAD_BALANCE_POLICY_LEAST_REQUEST = 2; 45 | } 46 | 47 | message RouteFallback { 48 | // Only valid when previous attempt failed occurred, default: 0s 49 | // (immediately) 50 | optional google.protobuf.Duration pre_delay = 2; 51 | // Only valid when the ongoing attempt failed occurred, default: 0s 52 | // (immediately) 53 | optional google.protobuf.Duration post_delay = 3; 54 | // default: 3 55 | optional uint64 max_retries = 1; 56 | } 57 | 58 | message Route { 59 | string name = 1; 60 | repeated Match matches = 2; 61 | repeated RouteFilter filters = 3; 62 | LoadBalancePolicy load_balance_policy = 4; 63 | repeated RouteTarget targets = 5; 64 | optional RouteFallback fallback = 6; 65 | } 66 | -------------------------------------------------------------------------------- /api/service/v1alpha1/apikey_auth.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | option go_package = "knoway.dev/api/service/v1alpha1"; 4 | 5 | package knoway.service.v1alpha1; 6 | 7 | message APIKeyAuthRequest { 8 | string api_key = 1; 9 | } 10 | 11 | message APIKeyAuthResponse { 12 | // is_valid required: If it is true, it means that the apikey is valid. 13 | bool is_valid = 1; 14 | // allow_models optional: If it is empty, it means that the apikey can 15 | // access all models. If it is not empty, it means that the apikey can only 16 | // access the specified models. 17 | // The matching rules for each value follow the rules of glob. 18 | repeated string allow_models = 2; 19 | // api_key_id optional: It will be used in statistics to avoid leakage of 20 | // sensitive information. 21 | string api_key_id = 3; 22 | // user_id optional: the apikey's owner, will be used in route matching. 23 | string user_id = 4; 24 | // deny_models optional: similar to allow_models, but the opposite. 25 | // If it is not empty, it means that the apikey 26 | // cannot access the specified models. 27 | // The matching rules for each value follow the rules of glob. 28 | // it has higher priority than allow_models. 29 | repeated string deny_models = 5; 30 | } 31 | 32 | service AuthService { 33 | rpc APIKeyAuth(APIKeyAuthRequest) returns (APIKeyAuthResponse) {}; 34 | } 35 | -------------------------------------------------------------------------------- /api/service/v1alpha1/apikey_auth_grpc.pb.go: -------------------------------------------------------------------------------- 1 | // Code generated by protoc-gen-go-grpc. DO NOT EDIT. 2 | // versions: 3 | // - protoc-gen-go-grpc v1.2.0 4 | // - protoc (unknown) 5 | // source: service/v1alpha1/apikey_auth.proto 6 | 7 | package v1alpha1 8 | 9 | import ( 10 | context "context" 11 | 12 | grpc "google.golang.org/grpc" 13 | codes "google.golang.org/grpc/codes" 14 | status "google.golang.org/grpc/status" 15 | ) 16 | 17 | // This is a compile-time assertion to ensure that this generated file 18 | // is compatible with the grpc package it is being compiled against. 19 | // Requires gRPC-Go v1.32.0 or later. 20 | const _ = grpc.SupportPackageIsVersion7 21 | 22 | // AuthServiceClient is the client API for AuthService service. 23 | // 24 | // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. 25 | type AuthServiceClient interface { 26 | APIKeyAuth(ctx context.Context, in *APIKeyAuthRequest, opts ...grpc.CallOption) (*APIKeyAuthResponse, error) 27 | } 28 | 29 | type authServiceClient struct { 30 | cc grpc.ClientConnInterface 31 | } 32 | 33 | func NewAuthServiceClient(cc grpc.ClientConnInterface) AuthServiceClient { 34 | return &authServiceClient{cc} 35 | } 36 | 37 | func (c *authServiceClient) APIKeyAuth(ctx context.Context, in *APIKeyAuthRequest, opts ...grpc.CallOption) (*APIKeyAuthResponse, error) { 38 | out := new(APIKeyAuthResponse) 39 | err := c.cc.Invoke(ctx, "/knoway.service.v1alpha1.AuthService/APIKeyAuth", in, out, opts...) 40 | if err != nil { 41 | return nil, err 42 | } 43 | return out, nil 44 | } 45 | 46 | // AuthServiceServer is the server API for AuthService service. 47 | // All implementations must embed UnimplementedAuthServiceServer 48 | // for forward compatibility 49 | type AuthServiceServer interface { 50 | APIKeyAuth(context.Context, *APIKeyAuthRequest) (*APIKeyAuthResponse, error) 51 | mustEmbedUnimplementedAuthServiceServer() 52 | } 53 | 54 | // UnimplementedAuthServiceServer must be embedded to have forward compatible implementations. 55 | type UnimplementedAuthServiceServer struct { 56 | } 57 | 58 | func (UnimplementedAuthServiceServer) APIKeyAuth(context.Context, *APIKeyAuthRequest) (*APIKeyAuthResponse, error) { 59 | return nil, status.Errorf(codes.Unimplemented, "method APIKeyAuth not implemented") 60 | } 61 | func (UnimplementedAuthServiceServer) mustEmbedUnimplementedAuthServiceServer() {} 62 | 63 | // UnsafeAuthServiceServer may be embedded to opt out of forward compatibility for this service. 64 | // Use of this interface is not recommended, as added methods to AuthServiceServer will 65 | // result in compilation errors. 66 | type UnsafeAuthServiceServer interface { 67 | mustEmbedUnimplementedAuthServiceServer() 68 | } 69 | 70 | func RegisterAuthServiceServer(s grpc.ServiceRegistrar, srv AuthServiceServer) { 71 | s.RegisterService(&AuthService_ServiceDesc, srv) 72 | } 73 | 74 | func _AuthService_APIKeyAuth_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { 75 | in := new(APIKeyAuthRequest) 76 | if err := dec(in); err != nil { 77 | return nil, err 78 | } 79 | if interceptor == nil { 80 | return srv.(AuthServiceServer).APIKeyAuth(ctx, in) 81 | } 82 | info := &grpc.UnaryServerInfo{ 83 | Server: srv, 84 | FullMethod: "/knoway.service.v1alpha1.AuthService/APIKeyAuth", 85 | } 86 | handler := func(ctx context.Context, req interface{}) (interface{}, error) { 87 | return srv.(AuthServiceServer).APIKeyAuth(ctx, req.(*APIKeyAuthRequest)) 88 | } 89 | return interceptor(ctx, in, info, handler) 90 | } 91 | 92 | // AuthService_ServiceDesc is the grpc.ServiceDesc for AuthService service. 93 | // It's only intended for direct use with grpc.RegisterService, 94 | // and not to be introspected or modified (even as a copy) 95 | var AuthService_ServiceDesc = grpc.ServiceDesc{ 96 | ServiceName: "knoway.service.v1alpha1.AuthService", 97 | HandlerType: (*AuthServiceServer)(nil), 98 | Methods: []grpc.MethodDesc{ 99 | { 100 | MethodName: "APIKeyAuth", 101 | Handler: _AuthService_APIKeyAuth_Handler, 102 | }, 103 | }, 104 | Streams: []grpc.StreamDesc{}, 105 | Metadata: "service/v1alpha1/apikey_auth.proto", 106 | } 107 | -------------------------------------------------------------------------------- /api/service/v1alpha1/usage_stats.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package knoway.service.v1alpha1; 4 | 5 | option go_package = "knoway.dev/api/service/v1alpha1"; 6 | 7 | message UsageReportRequest { 8 | string api_key_id = 1; 9 | // user_model_name The name of the model that the user is using, such as 10 | // "kebe/mnist". 11 | string user_model_name = 2; 12 | // upstream_model_name The name of the model that the gateway send the 13 | // request to, such as "kebe-mnist". 14 | string upstream_model_name = 3; 15 | 16 | message UsageImage { 17 | uint64 width = 1; 18 | uint64 height = 2; 19 | uint64 numbers = 3; 20 | string quality = 4; 21 | string style = 5; 22 | } 23 | 24 | message Usage { 25 | uint64 input_tokens = 1; 26 | uint64 output_tokens = 2; 27 | UsageImage input_images = 3; 28 | UsageImage output_images = 4; 29 | } 30 | Usage usage = 4; 31 | 32 | enum Mode { 33 | MODE_UNSPECIFIED = 0; 34 | // The MODE_PER_REQUEST mode means that each time a request is received, 35 | // the usage of the request will be included. 36 | // If the server fails to process, statistical data may be lost. 37 | MODE_PER_REQUEST = 1; 38 | } 39 | Mode mode = 5; 40 | } 41 | 42 | message UsageReportResponse { 43 | // accepted required: If it is true, it means that the report is successful. 44 | bool accepted = 1; 45 | } 46 | 47 | service UsageStatsService { 48 | rpc UsageReport(UsageReportRequest) returns (UsageReportResponse) {} 49 | } 50 | -------------------------------------------------------------------------------- /api/v1alpha1/common_types.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | type Header struct { 4 | Key string `json:"key,omitempty"` 5 | Value string `json:"value,omitempty"` 6 | } 7 | 8 | // HeaderFromSource represents the source of a set of ConfigMaps or Secrets 9 | type HeaderFromSource struct { 10 | // An optional identifier to prepend to each key in the ref. 11 | Prefix string `json:"prefix,omitempty"` 12 | // Type of the source (ConfigMap or Secret) 13 | RefType ValueFromType `json:"refType,omitempty"` 14 | // Name of the source 15 | RefName string `json:"refName,omitempty"` 16 | } 17 | 18 | // ValueFromType defines the type of source for headers. 19 | // +kubebuilder:validation:Enum=ConfigMap;Secret 20 | type ValueFromType string 21 | 22 | const ( 23 | // ConfigMap indicates that the header source is a ConfigMap. 24 | ConfigMap ValueFromType = "ConfigMap" 25 | // Secret indicates that the header source is a Secret. 26 | Secret ValueFromType = "Secret" 27 | ) 28 | 29 | // StatusEnum defines the possible statuses for the LLMBackend, ImageGenerationBackend, and other types. 30 | type StatusEnum string 31 | 32 | const ( 33 | Unknown StatusEnum = "Unknown" 34 | Healthy StatusEnum = "Healthy" 35 | Failed StatusEnum = "Failed" 36 | ) 37 | 38 | type Provider string 39 | 40 | const ( 41 | ProviderOpenAI Provider = "OpenAI" 42 | ProviderVLLM Provider = "vLLM" 43 | ProviderOllama Provider = "Ollama" 44 | ) 45 | 46 | type BackendType string 47 | 48 | const ( 49 | BackendTypeLLM BackendType = "LLM" 50 | BackendTypeImageGeneration BackendType = "ImageGeneration" 51 | ) 52 | -------------------------------------------------------------------------------- /api/v1alpha1/groupversion_info.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | // Package v1alpha1 contains API Schema definitions for the llm v1alpha1 API group 18 | // +kubebuilder:object:generate=true 19 | // +groupName=llm.knoway.dev 20 | package v1alpha1 21 | 22 | import ( 23 | "k8s.io/apimachinery/pkg/runtime/schema" 24 | "sigs.k8s.io/controller-runtime/pkg/scheme" 25 | ) 26 | 27 | var ( 28 | // GroupVersion is group version used to register these objects 29 | GroupVersion = schema.GroupVersion{Group: "llm.knoway.dev", Version: "v1alpha1"} 30 | 31 | // SchemeBuilder is used to add go types to the GroupVersionKind scheme 32 | SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} 33 | 34 | // AddToScheme adds the types in this group-version to the given scheme. 35 | AddToScheme = SchemeBuilder.AddToScheme 36 | ) 37 | -------------------------------------------------------------------------------- /changes/v0.1/CHANGELOG-v0.1.0-rc4.md: -------------------------------------------------------------------------------- 1 | 2 | # v0.1.0-rc4 Change logs 3 | 4 | ## Change since v0.1.0-rc3 5 | 6 | ### Changes by Kind 7 | 8 | #### Bug 9 | 10 | - fix: properly handle status codes for ApiKeyAuth service call(!55) by @nekoayaka.zhang 11 | - Fix codefreeze version(!56) by @nicole.li 12 | 13 | 14 | #### Feature 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /changes/v0.1/CHANGELOG-v0.1.0-rc5.md: -------------------------------------------------------------------------------- 1 | 2 | # v0.1.0-rc5 Change logs 3 | 4 | ## Change since v0.1.0-rc4 5 | 6 | ### Changes by Kind 7 | 8 | #### Bug 9 | 10 | - fix: rare to reproduce panic when dealing with type asserting though generic helpers and slice type(!57) by @nekoayaka.zhang 11 | - fix: data race when calling filters(!58) by @nekoayaka.zhang 12 | - release 0.1 merge main(!59) by @nicole.li 13 | 14 | 15 | #### Feature 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /changes/v0.1/CHANGELOG-v0.1.0.md: -------------------------------------------------------------------------------- 1 | 2 | # v0.1.0 Change logs 3 | 4 | ## Change since v0.1.0-rc3 5 | 6 | ### Changes by Kind 7 | 8 | #### Bug 9 | 10 | - fix: properly handle status codes for ApiKeyAuth service call(!55) by @nekoayaka.zhang 11 | - Fix codefreeze version(!56) by @nicole.li 12 | - fix: rare to reproduce panic when dealing with type asserting though generic helpers and slice type(!57) by @nekoayaka.zhang 13 | - fix: data race when calling filters(!58) by @nekoayaka.zhang 14 | - release 0.1 merge main(!59) by @nicole.li 15 | - Auto cherry-pick !61 to release-0.1(!63) by @ndx-robot 16 | 17 | 18 | #### Feature 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /changes/v0.1/CHANGELOG-v0.1.1.md: -------------------------------------------------------------------------------- 1 | 2 | # v0.1.1 Change logs 3 | 4 | ## Change since v0.1.0 5 | 6 | ### Changes by Kind 7 | 8 | #### Bug 9 | 10 | - Auto cherry-pick !67 to release-0.1(!68) by @ndx-robot 11 | 12 | 13 | #### Feature 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /changes/v0.1/CHANGELOG-v0.1.2.md: -------------------------------------------------------------------------------- 1 | 2 | # v0.1.2 Change logs 3 | 4 | ## Change since v0.1.1 5 | 6 | ### Changes by Kind 7 | 8 | #### Bug 9 | 10 | 11 | 12 | #### Feature 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /changes/v0.2/CHANGELOG-v0.2.0-rc0.md: -------------------------------------------------------------------------------- 1 | 2 | # v0.2.0-rc0 Change logs 3 | 4 | ## Change since v0.1.2 5 | 6 | ### Changes by Kind 7 | 8 | #### Bug 9 | 10 | - feat: dedicated denied error for denied(!61) by @nekoayaka.zhang 11 | - fix: vllm error handling(!65) by @nekoayaka.zhang 12 | - > fix stream missing content-type(!67) by @kebe.liu 13 | - Revert "feat: support metering usages for images generations"(!70) by @nekoayaka.zhang 14 | - update ci image(!72) by @nicole.li 15 | - feat: add rate limits(!74) by @nicole.li 16 | - fix status equal(!76) by @nicole.li 17 | - feat: add lb for model route(!77) by @xiaowu.zhu 18 | - >fix base cluster not found & image chat not found(!79) by @nicole.li 19 | - + support config listener via config file(!80) by @kebe.liu 20 | - + add config_dump endpoint for debug(!81) by @kebe.liu 21 | - feat: supported ratelimit redis(!82) by @nicole.li 22 | - fix(fallback): not handling default LB policy & not handling invalid content-type with errored status code(!84) by @nekoayaka.zhang 23 | - > fix duplicated requests send due to fallback(!87) by @kebe.liu 24 | 25 | 26 | #### Feature 27 | 28 | - feat: FromString util support ptr(!62) by @nekoayaka.zhang 29 | - feat: image listener(!69) by @nekoayaka.zhang 30 | - feat: size config(!71) by @nekoayaka.zhang 31 | - feat: added ModelRoute CRD(!73) by @nekoayaka.zhang 32 | - chore(route): align route.proto fields with new LB and retry ModelRoute CRD design(!75) by @nekoayaka.zhang 33 | - feat(controller): model route(!78) by @nekoayaka.zhang 34 | - feat: fallback(!83) by @nekoayaka.zhang 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /changes/v0.3/CHANGELOG-v0.3.0-rc0.md: -------------------------------------------------------------------------------- 1 | 2 | # v0.3.0-rc0 Change logs 3 | 4 | ## Change since v0.2.0-rc0 5 | 6 | ### Changes by Kind 7 | 8 | #### Bug 9 | 10 | - + remove llm request params(!85) by @nicole.li 11 | - refactor: use manager -> entity pattern(!88) by @nekoayaka.zhang 12 | - > rename ratelimit to filters(!89) by @kebe.liu 13 | - > fix 500 when ratelimited(!90) by @kebe.liu 14 | 15 | 16 | #### Feature 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /changes/v0.3/CHANGELOG-v0.3.0-rc1.md: -------------------------------------------------------------------------------- 1 | 2 | # v0.3.0-rc1 Change logs 3 | 4 | ## Change since v0.3.0-rc0 5 | 6 | ### Changes by Kind 7 | 8 | #### Bug 9 | 10 | - Auto cherry-pick !91 to release-0.3(!92) by @ndx-robot 11 | 12 | 13 | #### Feature 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /cmd/admin/admin.go: -------------------------------------------------------------------------------- 1 | package admin 2 | 3 | import ( 4 | "context" 5 | "log/slog" 6 | "net" 7 | "net/http" 8 | "time" 9 | 10 | "knoway.dev/pkg/bootkit" 11 | 12 | "google.golang.org/protobuf/encoding/protojson" 13 | 14 | "knoway.dev/api/admin/v1alpha1" 15 | 16 | "github.com/gorilla/mux" 17 | "github.com/samber/lo" 18 | "google.golang.org/protobuf/proto" 19 | "google.golang.org/protobuf/types/known/anypb" 20 | 21 | clustermanager "knoway.dev/pkg/clusters/manager" 22 | "knoway.dev/pkg/listener" 23 | routemanager "knoway.dev/pkg/route/manager" 24 | ) 25 | 26 | type debugListener struct { 27 | staticListeners []*anypb.Any 28 | } 29 | 30 | func NewAdminListener(staticListeners []*anypb.Any) (listener.Listener, error) { 31 | return &debugListener{staticListeners: staticListeners}, nil 32 | } 33 | 34 | func (d *debugListener) Drain(ctx context.Context) error { 35 | return nil 36 | } 37 | 38 | func (d *debugListener) HasDrained() bool { 39 | return false 40 | } 41 | 42 | func sliceToAny[T proto.Message](s []T) []*anypb.Any { 43 | anySlice := make([]*anypb.Any, 0, len(s)) 44 | 45 | for _, v := range s { 46 | a, err := anypb.New(v) 47 | if err != nil { 48 | slog.Error("failed to convert to any", "err", err) 49 | continue 50 | } 51 | 52 | anySlice = append(anySlice, a) 53 | } 54 | 55 | return anySlice 56 | } 57 | 58 | func (d *debugListener) configDump(writer http.ResponseWriter, request *http.Request) { 59 | clusters := clustermanager.DebugDumpAllClusters() 60 | routes := routemanager.DebugDumpAllRoutes() 61 | listeners := d.staticListeners 62 | cd := &v1alpha1.ConfigDump{ 63 | Clusters: sliceToAny(clusters), 64 | Routes: sliceToAny(routes), 65 | Listeners: listeners, 66 | } 67 | bs := lo.Must1(protojson.MarshalOptions{ 68 | Multiline: true, 69 | Indent: " ", 70 | AllowPartial: false, 71 | UseProtoNames: false, 72 | UseEnumNumbers: false, 73 | EmitUnpopulated: false, 74 | EmitDefaultValues: false, 75 | Resolver: nil, 76 | }.Marshal(cd)) 77 | _, _ = writer.Write(bs) 78 | } 79 | 80 | func (d *debugListener) RegisterRoutes(mux *mux.Router) error { 81 | mux.HandleFunc("/config_dump", d.configDump) 82 | return nil 83 | } 84 | 85 | func NewAdminServer(_ context.Context, staticListeners []*anypb.Any, addr string, lifecycle bootkit.LifeCycle) error { 86 | m := listener.NewMux() 87 | m.Register(NewAdminListener(staticListeners)) 88 | 89 | server, err := m.BuildServer(&http.Server{Addr: addr, ReadTimeout: time.Minute}) 90 | if err != nil { 91 | return err 92 | } 93 | 94 | ln, err := net.Listen("tcp", addr) 95 | if err != nil { 96 | return err 97 | } 98 | 99 | lifecycle.Append(bootkit.LifeCycleHook{ 100 | OnStart: func(ctx context.Context) error { 101 | slog.Info("Starting admin server ...", "addr", ln.Addr().String()) 102 | 103 | if err := server.Serve(ln); err != nil && err != http.ErrServerClosed { 104 | return err 105 | } 106 | return nil 107 | }, 108 | OnStop: func(ctx context.Context) error { 109 | slog.Info("Stopping admin server ...") 110 | 111 | if err := server.Shutdown(ctx); err != nil { 112 | return err 113 | } 114 | 115 | slog.Info("Admin server stopped gracefully.") 116 | return nil 117 | }, 118 | }) 119 | 120 | return nil 121 | } 122 | -------------------------------------------------------------------------------- /cmd/gateway/devClusters.go: -------------------------------------------------------------------------------- 1 | package gateway 2 | 3 | import ( 4 | "github.com/samber/lo" 5 | "google.golang.org/protobuf/types/known/anypb" 6 | 7 | clusters "knoway.dev/api/clusters/v1alpha1" 8 | filters "knoway.dev/api/filters/v1alpha1" 9 | "knoway.dev/pkg/bootkit" 10 | clustermanager "knoway.dev/pkg/clusters/manager" 11 | routemanager "knoway.dev/pkg/route/manager" 12 | ) 13 | 14 | var StaticClustersConfig = map[string]*clusters.Cluster{ 15 | "openai/gpt-3.5-turbo": { 16 | Type: clusters.ClusterType_LLM, 17 | Name: "openai/gpt-3.5-turbo", 18 | Provider: clusters.ClusterProvider_OPEN_AI, 19 | LoadBalancePolicy: clusters.LoadBalancePolicy_ROUND_ROBIN, 20 | Upstream: &clusters.Upstream{ 21 | Url: "https://openrouter.ai/api/v1/chat/completions", 22 | Headers: []*clusters.Upstream_Header{ 23 | { 24 | Key: "Authorization", 25 | Value: "Bearer sk-or-v1-", 26 | }, 27 | }, 28 | }, 29 | TlsConfig: nil, 30 | Filters: []*clusters.ClusterFilter{ 31 | { 32 | Name: "openai-request-handler", 33 | Config: func() *anypb.Any { 34 | return lo.Must(anypb.New(&filters.OpenAIRequestHandlerConfig{})) 35 | }(), 36 | }, 37 | { 38 | Name: "openai-response-handler", 39 | Config: func() *anypb.Any { 40 | return lo.Must(anypb.New(&filters.OpenAIResponseHandlerConfig{})) 41 | }(), 42 | }, 43 | }, 44 | }, 45 | } 46 | 47 | func StaticRegisterClusters(clusterDetails map[string]*clusters.Cluster, lifecycle bootkit.LifeCycle) error { 48 | for _, c := range clusterDetails { 49 | if err := clustermanager.UpsertAndRegisterCluster(c, lifecycle); err != nil { 50 | return err 51 | } 52 | if err := routemanager.RegisterBaseRouteWithConfig(routemanager.InitDirectModelRoute(c.GetName()), lifecycle); err != nil { 53 | return err 54 | } 55 | } 56 | 57 | return nil 58 | } 59 | -------------------------------------------------------------------------------- /cmd/gateway/proxy.go: -------------------------------------------------------------------------------- 1 | package gateway 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "fmt" 7 | "log/slog" 8 | "net" 9 | "net/http" 10 | "time" 11 | 12 | "google.golang.org/protobuf/types/known/anypb" 13 | 14 | "google.golang.org/protobuf/proto" 15 | 16 | "knoway.dev/api/listeners/v1alpha1" 17 | "knoway.dev/pkg/bootkit" 18 | "knoway.dev/pkg/listener" 19 | "knoway.dev/pkg/listener/manager/chat" 20 | "knoway.dev/pkg/listener/manager/image" 21 | ) 22 | 23 | func StartGateway(_ context.Context, lifecycle bootkit.LifeCycle, listenerAddr string, cfg []*anypb.Any) error { 24 | if listenerAddr == "" { 25 | listenerAddr = ":8080" 26 | } 27 | if len(cfg) == 0 { 28 | return errors.New("no listener found") 29 | } 30 | mux := listener.NewMux() 31 | 32 | for _, c := range cfg { 33 | obj, err := anypb.UnmarshalNew(c, proto.UnmarshalOptions{}) 34 | if err != nil { 35 | return err 36 | } 37 | 38 | switch obj.(type) { 39 | case *v1alpha1.ChatCompletionListener: 40 | mux.Register(chat.NewOpenAIChatListenerConfigs(obj, lifecycle)) 41 | case *v1alpha1.ImageListener: 42 | mux.Register(image.NewOpenAIImageListenerConfigs(obj, lifecycle)) 43 | default: 44 | return fmt.Errorf("%s is not a valid listener", c.GetTypeUrl()) 45 | } 46 | } 47 | 48 | server, err := mux.BuildServer(&http.Server{Addr: listenerAddr, ReadTimeout: time.Minute}) 49 | if err != nil { 50 | return err 51 | } 52 | 53 | ln, err := net.Listen("tcp", listenerAddr) 54 | if err != nil { 55 | return err 56 | } 57 | 58 | lifecycle.Append(bootkit.LifeCycleHook{ 59 | OnStart: func(ctx context.Context) error { 60 | slog.Info("Starting gateway ...", "addr", ln.Addr().String()) 61 | 62 | if err := server.Serve(ln); err != nil && err != http.ErrServerClosed { 63 | return err 64 | } 65 | return nil 66 | }, 67 | OnStop: func(ctx context.Context) error { 68 | slog.Info("Stopping gateway ...") 69 | 70 | if err := server.Shutdown(ctx); err != nil { 71 | return err 72 | } 73 | 74 | slog.Info("Gateway stopped gracefully.") 75 | return nil 76 | }, 77 | }) 78 | 79 | return nil 80 | } 81 | -------------------------------------------------------------------------------- /cmd/main.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package main 18 | 19 | import ( 20 | "context" 21 | "flag" 22 | "log/slog" 23 | "os" 24 | "time" 25 | 26 | "buf.build/go/protoyaml" 27 | "github.com/samber/lo" 28 | "google.golang.org/protobuf/types/known/anypb" 29 | "sigs.k8s.io/yaml" 30 | 31 | "knoway.dev/cmd/admin" 32 | 33 | utilruntime "k8s.io/apimachinery/pkg/util/runtime" 34 | clientgoscheme "k8s.io/client-go/kubernetes/scheme" 35 | 36 | knowaydevv1alpha1 "knoway.dev/api/v1alpha1" 37 | 38 | "knoway.dev/cmd/gateway" 39 | "knoway.dev/cmd/server" 40 | "knoway.dev/config" 41 | "knoway.dev/pkg/bootkit" 42 | 43 | // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) 44 | // to ensure that exec-entrypoint and run can make use of them. 45 | _ "k8s.io/client-go/plugin/pkg/client/auth" 46 | // +kubebuilder:scaffold:imports 47 | ) 48 | 49 | func init() { 50 | utilruntime.Must(clientgoscheme.AddToScheme(clientgoscheme.Scheme)) 51 | 52 | utilruntime.Must(knowaydevv1alpha1.AddToScheme(clientgoscheme.Scheme)) 53 | // +kubebuilder:scaffold:scheme 54 | } 55 | 56 | func main() { 57 | var metricsAddr string 58 | var probeAddr string 59 | var listenerAddr string 60 | var adminAddr string 61 | var configPath string 62 | 63 | flag.StringVar(&listenerAddr, "gateway-listener-address", ":8080", "The address the gateway listener binds to.") 64 | flag.StringVar(&adminAddr, "admin-listener-address", "127.0.0.1:9080", "The address the admin listener binds to.") 65 | flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") 66 | flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metric endpoint binds to. "+ 67 | "Use the port :8080. If not set, it will be 0 in order to disable the metrics server") 68 | flag.StringVar(&configPath, "config", "config/config.yaml", "Path to the configuration file") 69 | flag.Parse() 70 | 71 | cfg, err := config.LoadConfig(configPath) 72 | if err != nil { 73 | slog.Error("Failed to load configuration", "error", err) 74 | return 75 | } 76 | 77 | app := bootkit.New(bootkit.StartTimeout(time.Second * 10)) //nolint:mnd 78 | 79 | logLevel := slog.LevelInfo 80 | if cfg.Debug { 81 | logLevel = slog.LevelDebug 82 | } 83 | 84 | slog.SetDefault(slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ 85 | Level: logLevel, 86 | }))) 87 | 88 | // development static server 89 | devStaticServer := false 90 | 91 | if devStaticServer { 92 | app.Add(func(_ context.Context, lifeCycle bootkit.LifeCycle) error { 93 | return gateway.StaticRegisterClusters(gateway.StaticClustersConfig, lifeCycle) 94 | }) 95 | } else { 96 | // Start the server and handle errors gracefully 97 | app.Add(func(ctx context.Context, lifeCycle bootkit.LifeCycle) error { 98 | return server.StartController(ctx, lifeCycle, 99 | metricsAddr, 100 | probeAddr, 101 | cfg.Controller) 102 | }) 103 | } 104 | 105 | staticListeners := toAnySlice(cfg.StaticListeners) 106 | 107 | app.Add(func(ctx context.Context, lifeCycle bootkit.LifeCycle) error { 108 | return gateway.StartGateway(ctx, lifeCycle, 109 | listenerAddr, 110 | staticListeners) 111 | }) 112 | app.Add(func(ctx context.Context, lifeCycle bootkit.LifeCycle) error { 113 | return admin.NewAdminServer(ctx, staticListeners, adminAddr, lifeCycle) 114 | }) 115 | 116 | app.Start() 117 | } 118 | 119 | func toAnySlice(cfg []map[string]interface{}) []*anypb.Any { 120 | anys := make([]*anypb.Any, 0, len(cfg)) 121 | 122 | for _, c := range cfg { 123 | bs := lo.Must1(yaml.Marshal(c)) 124 | n := new(anypb.Any) 125 | lo.Must0(protoyaml.Unmarshal(bs, n)) 126 | anys = append(anys, n) 127 | } 128 | 129 | return anys 130 | } 131 | -------------------------------------------------------------------------------- /config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "gopkg.in/yaml.v3" 8 | ) 9 | 10 | type ControllerConfig struct { 11 | EnableLeaderElection bool `yaml:"enable_leader_election" json:"enable_leader_election"` 12 | SecureMetrics bool `yaml:"secure_metrics" json:"secure_metrics"` 13 | EnableHTTP2 bool `yaml:"enable_http2" json:"enable_http_2"` 14 | } 15 | 16 | type Config struct { 17 | Debug bool `yaml:"debug" json:"debug"` 18 | Controller ControllerConfig `yaml:"controller" json:"controller"` 19 | // KubeConfig is the path to the kubeconfig file, used for local development, if empty, in-cluster config will be used. 20 | KubeConfig string `yaml:"kubeConfig" json:"kubeConfig"` 21 | 22 | StaticListeners []map[string]interface{} `yaml:"staticListeners" json:"staticListeners"` 23 | } 24 | 25 | // LoadConfig loads the configuration from the specified YAML file 26 | func LoadConfig(path string) (*Config, error) { 27 | file, err := os.Open(path) 28 | if err != nil { 29 | return nil, fmt.Errorf("failed to open config file: %w", err) 30 | } 31 | defer file.Close() 32 | 33 | var cfg Config 34 | 35 | decoder := yaml.NewDecoder(file) 36 | if err := decoder.Decode(&cfg); err != nil { 37 | return nil, fmt.Errorf("failed to decode config file: %w", err) 38 | } 39 | 40 | return &cfg, nil 41 | } 42 | -------------------------------------------------------------------------------- /config/config.yaml: -------------------------------------------------------------------------------- 1 | debug: true 2 | controller: 3 | secure_metrics: false 4 | enable_http2: false 5 | kubeConfig: "" 6 | staticListeners: 7 | - "@type": type.googleapis.com/knoway.listeners.v1alpha1.ChatCompletionListener 8 | name: openai-chat 9 | filters: 10 | - name: api-key-auth 11 | config: 12 | "@type": type.googleapis.com/knoway.filters.v1alpha1.APIKeyAuthConfig 13 | authServer: 14 | url: localhost:8083 15 | timeout: 3s 16 | - config: 17 | "@type": type.googleapis.com/knoway.filters.v1alpha1.UsageStatsConfig 18 | statsServer: 19 | url: localhost:8083 20 | timeout: 3s 21 | # - config: 22 | # "@type": type.googleapis.com/knoway.filters.v1alpha1.RateLimitConfig 23 | # policies: 24 | # - basedOn: USER_ID 25 | # duration: 30s 26 | 27 | accessLog: 28 | enable: true 29 | - "@type": type.googleapis.com/knoway.listeners.v1alpha1.ImageListener 30 | name: openai-image 31 | filters: 32 | - name: api-key-auth 33 | config: 34 | "@type": type.googleapis.com/knoway.filters.v1alpha1.APIKeyAuthConfig 35 | authServer: 36 | url: localhost:8083 37 | timeout: 3s 38 | - config: 39 | "@type": type.googleapis.com/knoway.filters.v1alpha1.UsageStatsConfig 40 | statsServer: 41 | url: localhost:8083 42 | timeout: 3s 43 | accessLog: 44 | enable: true 45 | -------------------------------------------------------------------------------- /config/crd/kustomization.yaml: -------------------------------------------------------------------------------- 1 | # This kustomization.yaml is not intended to be run by itself, 2 | # since it depends on service name and namespace that are out of this kustomize package. 3 | # It should be run by config/default 4 | resources: 5 | - bases/llm.knoway.dev_llmbackends.yaml 6 | - bases/llm.knoway.dev_imagegenerationbackends.yaml 7 | - bases/llm.knoway.dev_modelroutes.yaml 8 | # +kubebuilder:scaffold:crdkustomizeresource 9 | 10 | patches: 11 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix. 12 | # patches here are for enabling the conversion webhook for each CRD 13 | # +kubebuilder:scaffold:crdkustomizewebhookpatch 14 | 15 | # [CERTMANAGER] To enable cert-manager, uncomment all the sections with [CERTMANAGER] prefix. 16 | # patches here are for enabling the CA injection for each CRD 17 | #- path: patches/cainjection_in_llmbackends.yaml 18 | # +kubebuilder:scaffold:crdkustomizecainjectionpatch 19 | 20 | # [WEBHOOK] To enable webhook, uncomment the following section 21 | # the following config is for teaching kustomize how to do kustomization for CRDs. 22 | 23 | #configurations: 24 | #- kustomizeconfig.yaml 25 | -------------------------------------------------------------------------------- /config/crd/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # This file is for teaching kustomize how to substitute name and namespace reference in CRD 2 | nameReference: 3 | - kind: Service 4 | version: v1 5 | fieldSpecs: 6 | - kind: CustomResourceDefinition 7 | version: v1 8 | group: apiextensions.k8s.io 9 | path: spec/conversion/webhook/clientConfig/service/name 10 | 11 | namespace: 12 | - kind: CustomResourceDefinition 13 | version: v1 14 | group: apiextensions.k8s.io 15 | path: spec/conversion/webhook/clientConfig/service/namespace 16 | create: false 17 | 18 | varReference: 19 | - path: metadata/annotations 20 | -------------------------------------------------------------------------------- /config/rbac/imagegenerationbackend_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # This rule is not used by the project knoway itself. 2 | # It is provided to allow the cluster admin to help manage permissions for users. 3 | # 4 | # Grants permissions to create, update, and delete resources within the llm.knoway.dev. 5 | # This role is intended for users who need to manage these resources 6 | # but should not control RBAC or manage permissions for others. 7 | 8 | apiVersion: rbac.authorization.k8s.io/v1 9 | kind: ClusterRole 10 | metadata: 11 | labels: 12 | app.kubernetes.io/name: knoway 13 | app.kubernetes.io/managed-by: kustomize 14 | name: imagegenerationbackend-editor-role 15 | rules: 16 | - apiGroups: 17 | - llm.knoway.dev 18 | resources: 19 | - imagegenerationbackends 20 | verbs: 21 | - create 22 | - delete 23 | - get 24 | - list 25 | - patch 26 | - update 27 | - watch 28 | - apiGroups: 29 | - llm.knoway.dev 30 | resources: 31 | - imagegenerationbackends/status 32 | verbs: 33 | - get 34 | -------------------------------------------------------------------------------- /config/rbac/imagegenerationbackend_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # This rule is not used by the project knoway itself. 2 | # It is provided to allow the cluster admin to help manage permissions for users. 3 | # 4 | # Grants read-only access to llm.knoway.dev resources. 5 | # This role is intended for users who need visibility into these resources 6 | # without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. 7 | 8 | apiVersion: rbac.authorization.k8s.io/v1 9 | kind: ClusterRole 10 | metadata: 11 | labels: 12 | app.kubernetes.io/name: knoway 13 | app.kubernetes.io/managed-by: kustomize 14 | name: imagegenerationbackend-viewer-role 15 | rules: 16 | - apiGroups: 17 | - llm.knoway.dev 18 | resources: 19 | - imagegenerationbackends 20 | verbs: 21 | - get 22 | - list 23 | - watch 24 | - apiGroups: 25 | - llm.knoway.dev 26 | resources: 27 | - imagegenerationbackends/status 28 | verbs: 29 | - get 30 | -------------------------------------------------------------------------------- /config/rbac/llmbackend_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit llmbackends. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: knoway 7 | app.kubernetes.io/managed-by: kustomize 8 | name: llmbackend-editor-role 9 | rules: 10 | - apiGroups: 11 | - llm.knoway.dev 12 | resources: 13 | - llmbackends 14 | verbs: 15 | - create 16 | - delete 17 | - get 18 | - list 19 | - patch 20 | - update 21 | - watch 22 | - apiGroups: 23 | - llm.knoway.dev 24 | resources: 25 | - llmbackends/status 26 | verbs: 27 | - get 28 | -------------------------------------------------------------------------------- /config/rbac/llmbackend_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view llmbackends. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: knoway 7 | app.kubernetes.io/managed-by: kustomize 8 | name: llmbackend-viewer-role 9 | rules: 10 | - apiGroups: 11 | - llm.knoway.dev 12 | resources: 13 | - llmbackends 14 | verbs: 15 | - get 16 | - list 17 | - watch 18 | - apiGroups: 19 | - llm.knoway.dev 20 | resources: 21 | - llmbackends/status 22 | verbs: 23 | - get 24 | -------------------------------------------------------------------------------- /config/rbac/modelroute_admin_role.yaml: -------------------------------------------------------------------------------- 1 | # This rule is not used by the project knoway itself. 2 | # It is provided to allow the cluster admin to help manage permissions for users. 3 | # 4 | # Grants full permissions ('*') over llm.knoway.dev. 5 | # This role is intended for users authorized to modify roles and bindings within the cluster, 6 | # enabling them to delegate specific permissions to other users or groups as needed. 7 | 8 | apiVersion: rbac.authorization.k8s.io/v1 9 | kind: ClusterRole 10 | metadata: 11 | labels: 12 | app.kubernetes.io/name: knoway 13 | app.kubernetes.io/managed-by: kustomize 14 | name: modelroute-admin-role 15 | rules: 16 | - apiGroups: 17 | - llm.knoway.dev 18 | resources: 19 | - modelroutes 20 | verbs: 21 | - '*' 22 | - apiGroups: 23 | - llm.knoway.dev 24 | resources: 25 | - modelroutes/status 26 | verbs: 27 | - get 28 | -------------------------------------------------------------------------------- /config/rbac/modelroute_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # This rule is not used by the project knoway itself. 2 | # It is provided to allow the cluster admin to help manage permissions for users. 3 | # 4 | # Grants permissions to create, update, and delete resources within the llm.knoway.dev. 5 | # This role is intended for users who need to manage these resources 6 | # but should not control RBAC or manage permissions for others. 7 | 8 | apiVersion: rbac.authorization.k8s.io/v1 9 | kind: ClusterRole 10 | metadata: 11 | labels: 12 | app.kubernetes.io/name: knoway 13 | app.kubernetes.io/managed-by: kustomize 14 | name: modelroute-editor-role 15 | rules: 16 | - apiGroups: 17 | - llm.knoway.dev 18 | resources: 19 | - modelroutes 20 | verbs: 21 | - create 22 | - delete 23 | - get 24 | - list 25 | - patch 26 | - update 27 | - watch 28 | - apiGroups: 29 | - llm.knoway.dev 30 | resources: 31 | - modelroutes/status 32 | verbs: 33 | - get 34 | -------------------------------------------------------------------------------- /config/rbac/modelroute_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # This rule is not used by the project knoway itself. 2 | # It is provided to allow the cluster admin to help manage permissions for users. 3 | # 4 | # Grants read-only access to llm.knoway.dev resources. 5 | # This role is intended for users who need visibility into these resources 6 | # without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing. 7 | 8 | apiVersion: rbac.authorization.k8s.io/v1 9 | kind: ClusterRole 10 | metadata: 11 | labels: 12 | app.kubernetes.io/name: knoway 13 | app.kubernetes.io/managed-by: kustomize 14 | name: modelroute-viewer-role 15 | rules: 16 | - apiGroups: 17 | - llm.knoway.dev 18 | resources: 19 | - modelroutes 20 | verbs: 21 | - get 22 | - list 23 | - watch 24 | - apiGroups: 25 | - llm.knoway.dev 26 | resources: 27 | - modelroutes/status 28 | verbs: 29 | - get 30 | -------------------------------------------------------------------------------- /config/rbac/role.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: manager-role 6 | rules: 7 | - apiGroups: 8 | - llm.knoway.dev 9 | resources: 10 | - imagegenerationbackends 11 | - llmbackends 12 | - modelroutes 13 | verbs: 14 | - create 15 | - delete 16 | - get 17 | - list 18 | - patch 19 | - update 20 | - watch 21 | - apiGroups: 22 | - llm.knoway.dev 23 | resources: 24 | - imagegenerationbackends/finalizers 25 | - llmbackends/finalizers 26 | - modelroutes/finalizers 27 | verbs: 28 | - update 29 | - apiGroups: 30 | - llm.knoway.dev 31 | resources: 32 | - imagegenerationbackends/status 33 | - llmbackends/status 34 | - modelroutes/status 35 | verbs: 36 | - get 37 | - patch 38 | - update 39 | -------------------------------------------------------------------------------- /config/samples/kustomization.yaml: -------------------------------------------------------------------------------- 1 | ## Append samples of your project ## 2 | resources: 3 | - llm_v1alpha1_llmbackend.yaml 4 | - llm_v1alpha1_imagegenerationbackend.yaml 5 | - llm_v1alpha1_modelroute.yaml 6 | # +kubebuilder:scaffold:manifestskustomizesamples 7 | -------------------------------------------------------------------------------- /config/samples/llm_v1alpha1_imagegenerationbackend.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: llm.knoway.dev/v1alpha1 2 | kind: ImageGenerationBackend 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: knoway 6 | app.kubernetes.io/managed-by: kustomize 7 | name: imagegenerationbackend-sample 8 | spec: 9 | provider: OpenAI 10 | name: sd-3 11 | upstream: 12 | baseUrl: "https://api.openai.com/v1" 13 | headers: 14 | - key: "Authorization" 15 | value: "Bearer sk-or-v1-xxxxxxxxxx" 16 | timeout: 300 # ms 17 | defaultParams: 18 | openai: 19 | 'n': '1' 20 | quality: 'standard' 21 | overrideParams: 22 | openai: 23 | # upstream model 24 | model: "stabilityai/sd-3" 25 | filters: 26 | - custom: 27 | pluginName: "examplePlugin" 28 | pluginVersion: "1.0.0" 29 | -------------------------------------------------------------------------------- /config/samples/llm_v1alpha1_llmbackend.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: llm.knoway.dev/v1alpha1 2 | kind: LLMBackend 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: knoway 6 | app.kubernetes.io/managed-by: kustomize 7 | name: llmbackend-sample 8 | spec: 9 | # type: public # public | private | ... 10 | provider: OpenAI 11 | modelName: gpt-3.5-turbo 12 | upstream: 13 | baseUrl: "https://openrouter.ai/api/v1" 14 | headers: 15 | - key: "Authorization" 16 | value: "Bearer sk-or-v1-xxxxxxxxxx" 17 | timeout: 300 # ms 18 | defaultParams: 19 | openai: 20 | temperature: "0.5" 21 | max_tokens: 100 22 | overrideParams: 23 | openai: 24 | # upstream model 25 | model: "openai/gpt-3.5-turbo" 26 | removeParamKeys: 27 | - negative_prompt 28 | filters: 29 | - custom: 30 | pluginName: "examplePlugin" 31 | pluginVersion: "1.0.0" 32 | # future: 33 | # maxToken: 242444 34 | # capability: 35 | # steam: true 36 | # image: true 37 | # base64: true 38 | #status: 39 | # conditions: 40 | # - config-validator 41 | # - endpoint-check 42 | # - secret-check 43 | -------------------------------------------------------------------------------- /config/samples/llm_v1alpha1_modelroute.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: llm.knoway.dev/v1alpha1 2 | kind: ModelRoute 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: knoway 6 | app.kubernetes.io/managed-by: kustomize 7 | name: modelroute-example 8 | spec: 9 | modelName: deepseek-r1 10 | rateLimit: 11 | rules: 12 | - match: 13 | value: 14 | prefix: sk-prj-123 15 | limit: 100 16 | basedOn: APIKey 17 | duration: 60 18 | - match: 19 | value: 20 | exact: u-4587 21 | limit: 100 22 | basedOn: APIKey 23 | duration: 60 24 | - limit: 1 25 | basedOn: UserID 26 | duration: 60 27 | route: 28 | loadBalancePolicy: "WeightedRoundRobin" 29 | targets: 30 | - destination: 31 | backend: deepseek-r1 32 | namespace: public 33 | weight: 1 34 | - destination: 35 | backend: deepseek-r1-4090 36 | namespace: public 37 | weight: 2 38 | fallback: 39 | preDelay: 5s 40 | postDelay: 5s 41 | maxRetries: 3 42 | -------------------------------------------------------------------------------- /cspell.config.yaml: -------------------------------------------------------------------------------- 1 | version: "0.2" 2 | ignorePaths: [] 3 | dictionaryDefinitions: [] 4 | dictionaries: [] 5 | words: 6 | - anypb 7 | - apierrors 8 | - apikey 9 | - apimachinery 10 | - bmatcuk 11 | - bodyclose 12 | - bootkit 13 | - bufconn 14 | - bufnet 15 | - Causef 16 | - clientcmd 17 | - clientgoscheme 18 | - clusterfilters 19 | - clustermanager 20 | - containedctx 21 | - contextcheck 22 | - copyloopvar 23 | - cyclop 24 | - dall 25 | - daocloud 26 | - depguard 27 | - doublestar 28 | - Drainable 29 | - dupl 30 | - durationcheck 31 | - durationpb 32 | - Equalf 33 | - err113 34 | - errcheck 35 | - errchkjson 36 | - errname 37 | - errorlint 38 | - evanphx 39 | - execinquery 40 | - exhaustive 41 | - exhaustruct 42 | - exportloopref 43 | - fatcontext 44 | - filtersv1alpha1 45 | - finalizer 46 | - finalizers 47 | - Finalzer 48 | - forcetypeassert 49 | - funlen 50 | - gci 51 | - genclient 52 | - ginkgolinter 53 | - gochecknoglobals 54 | - gochecknoinits 55 | - gocognit 56 | - goconst 57 | - gocritic 58 | - gocyclo 59 | - godot 60 | - godox 61 | - gofmt 62 | - gofumpt 63 | - goimports 64 | - gomnd 65 | - goopenai 66 | - gosec 67 | - gosimple 68 | - govet 69 | - healthz 70 | - imagegenerationbackend 71 | - imagegenerationbackends 72 | - ineffassign 73 | - ireturn 74 | - jsonpatch 75 | - kebe 76 | - knoway 77 | - knowaydevv1alpha1 78 | - kubebuilder 79 | - kubeconfig 80 | - listenerfilters 81 | - lll 82 | - llmbackend 83 | - llmbackends 84 | - llmv1alpha1 85 | - Loggable 86 | - maintidx 87 | - Marshallers 88 | - metav1 89 | - metricsserver 90 | - misspell 91 | - modelroute 92 | - modelroutes 93 | - multierror 94 | - nakedret 95 | - nekomeowww 96 | - nestif 97 | - nilerr 98 | - nilfunc 99 | - nilnil 100 | - nlreturn 101 | - nolint 102 | - nolintlint 103 | - nosniff 104 | - nosprintfhostport 105 | - Ollama 106 | - openrouter 107 | - paralleltest 108 | - perfsprint 109 | - prealloc 110 | - predeclared 111 | - Preflights 112 | - printcolumn 113 | - protogetter 114 | - protojson 115 | - protoutils 116 | - ptypes 117 | - qwen 118 | - readyz 119 | - reassign 120 | - registrycluster 121 | - registryfilters 122 | - registryroute 123 | - revive 124 | - routemanager 125 | - routev1alpha1 126 | - rroute 127 | - rueidis 128 | - samber 129 | - sashabaranov 130 | - servicev1alpha1 131 | - stabilityai 132 | - staticcheck 133 | - Statusable 134 | - Statuser 135 | - stoewer 136 | - strcase 137 | - stretchr 138 | - structpb 139 | - stylecheck 140 | - subresource 141 | - tagalign 142 | - tagliatelle 143 | - testpackage 144 | - tolerations 145 | - ttlcache 146 | - typecheck 147 | - unconvert 148 | - unmarshalled 149 | - Unmarshaller 150 | - Unmarshallers 151 | - unparam 152 | - unused 153 | - usestdlibvars 154 | - utilruntime 155 | - varnamelen 156 | - vllm 157 | - webp 158 | - whitespace 159 | - wrapcheck 160 | - wsl 161 | ignoreWords: [] 162 | import: [] 163 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module knoway.dev 2 | 3 | go 1.23.0 4 | 5 | toolchain go1.23.2 6 | 7 | require ( 8 | buf.build/go/protoyaml v0.3.1 9 | github.com/bmatcuk/doublestar/v4 v4.7.1 10 | github.com/evanphx/json-patch/v5 v5.9.0 11 | github.com/golang/protobuf v1.5.4 12 | github.com/gorilla/mux v1.8.1 13 | github.com/hashicorp/go-multierror v1.1.1 14 | github.com/nekomeowww/fo v1.4.0 15 | github.com/nekomeowww/xo v1.12.0 16 | github.com/redis/rueidis v1.0.55 17 | github.com/samber/lo v1.49.1 18 | github.com/samber/mo v1.13.0 19 | github.com/sashabaranov/go-openai v1.35.7 20 | github.com/stoewer/go-strcase v1.3.0 21 | github.com/stretchr/testify v1.10.0 22 | go.opentelemetry.io/otel v1.29.0 23 | golang.org/x/image v0.24.0 24 | google.golang.org/grpc v1.65.0 25 | google.golang.org/protobuf v1.36.0 26 | gopkg.in/yaml.v3 v3.0.1 27 | k8s.io/api v0.30.0 28 | k8s.io/apimachinery v0.30.0 29 | k8s.io/client-go v0.30.0 30 | sigs.k8s.io/controller-runtime v0.18.2 31 | sigs.k8s.io/yaml v1.3.0 32 | ) 33 | 34 | require ( 35 | buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.0-20241127180247-a33202765966.1 // indirect 36 | cel.dev/expr v0.18.0 // indirect 37 | github.com/antlr4-go/antlr/v4 v4.13.0 // indirect 38 | github.com/beorn7/perks v1.0.1 // indirect 39 | github.com/bufbuild/protovalidate-go v0.8.0 // indirect 40 | github.com/cespare/xxhash/v2 v2.3.0 // indirect 41 | github.com/davecgh/go-spew v1.1.1 // indirect 42 | github.com/emicklei/go-restful/v3 v3.11.0 // indirect 43 | github.com/evanphx/json-patch v4.12.0+incompatible // indirect 44 | github.com/fsnotify/fsnotify v1.7.0 // indirect 45 | github.com/go-logr/logr v1.4.2 // indirect 46 | github.com/go-logr/zapr v1.3.0 // indirect 47 | github.com/go-openapi/jsonpointer v0.19.6 // indirect 48 | github.com/go-openapi/jsonreference v0.20.2 // indirect 49 | github.com/go-openapi/swag v0.22.3 // indirect 50 | github.com/gogo/protobuf v1.3.2 // indirect 51 | github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect 52 | github.com/google/cel-go v0.22.1 // indirect 53 | github.com/google/gnostic-models v0.6.8 // indirect 54 | github.com/google/go-cmp v0.7.0 // indirect 55 | github.com/google/gofuzz v1.2.0 // indirect 56 | github.com/google/uuid v1.6.0 // indirect 57 | github.com/hashicorp/errwrap v1.0.0 // indirect 58 | github.com/imdario/mergo v0.3.6 // indirect 59 | github.com/josharian/intern v1.0.0 // indirect 60 | github.com/json-iterator/go v1.1.12 // indirect 61 | github.com/mailru/easyjson v0.7.7 // indirect 62 | github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect 63 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 64 | github.com/modern-go/reflect2 v1.0.2 // indirect 65 | github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect 66 | github.com/pkg/errors v0.9.1 // indirect 67 | github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect 68 | github.com/prometheus/client_golang v1.16.0 // indirect 69 | github.com/prometheus/client_model v0.4.0 // indirect 70 | github.com/prometheus/common v0.44.0 // indirect 71 | github.com/prometheus/procfs v0.12.0 // indirect 72 | github.com/shopspring/decimal v1.4.0 // indirect 73 | github.com/spf13/pflag v1.0.5 // indirect 74 | go.uber.org/multierr v1.11.0 // indirect 75 | go.uber.org/zap v1.27.0 // indirect 76 | golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect 77 | golang.org/x/net v0.35.0 // indirect 78 | golang.org/x/oauth2 v0.20.0 // indirect 79 | golang.org/x/sys v0.30.0 // indirect 80 | golang.org/x/term v0.29.0 // indirect 81 | golang.org/x/text v0.22.0 // indirect 82 | golang.org/x/time v0.3.0 // indirect 83 | gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect 84 | google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 // indirect 85 | google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect 86 | gopkg.in/inf.v0 v0.9.1 // indirect 87 | gopkg.in/yaml.v2 v2.4.0 // indirect 88 | k8s.io/apiextensions-apiserver v0.30.0 // indirect 89 | k8s.io/klog/v2 v2.120.1 // indirect 90 | k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect 91 | k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect 92 | sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect 93 | sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect 94 | ) 95 | -------------------------------------------------------------------------------- /hack/boilerplate.go.txt: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ -------------------------------------------------------------------------------- /internal/controller/backends.go: -------------------------------------------------------------------------------- 1 | package controller 2 | 3 | import ( 4 | "context" 5 | 6 | apierrors "k8s.io/apimachinery/pkg/api/errors" 7 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 8 | "k8s.io/apimachinery/pkg/types" 9 | "sigs.k8s.io/controller-runtime/pkg/client" 10 | 11 | knowaydevv1alpha1 "knoway.dev/api/v1alpha1" 12 | ) 13 | 14 | type Backend interface { 15 | GetType() knowaydevv1alpha1.BackendType 16 | GetObjectObjectMeta() metav1.ObjectMeta 17 | GetStatus() Statusable[knowaydevv1alpha1.StatusEnum] 18 | GetModelName() string 19 | } 20 | 21 | var _ Backend = (*LLMBackend)(nil) 22 | 23 | type LLMBackend struct { 24 | *knowaydevv1alpha1.LLMBackend 25 | } 26 | 27 | func (b *LLMBackend) GetType() knowaydevv1alpha1.BackendType { 28 | return knowaydevv1alpha1.BackendTypeLLM 29 | } 30 | 31 | func (b *LLMBackend) GetObjectObjectMeta() metav1.ObjectMeta { 32 | return b.LLMBackend.ObjectMeta 33 | } 34 | 35 | func (b *LLMBackend) GetStatus() Statusable[knowaydevv1alpha1.StatusEnum] { 36 | return &LLMBackendStatus{LLMBackendStatus: &b.Status} 37 | } 38 | 39 | func (b *LLMBackend) GetModelName() string { 40 | return modelNameOrNamespacedName(b.LLMBackend) 41 | } 42 | 43 | func BackendFromLLMBackend(llmBackend *knowaydevv1alpha1.LLMBackend) Backend { 44 | return &LLMBackend{ 45 | LLMBackend: llmBackend, 46 | } 47 | } 48 | 49 | type LLMBackendStatus struct { 50 | *knowaydevv1alpha1.LLMBackendStatus 51 | } 52 | 53 | func (s *LLMBackendStatus) GetStatus() knowaydevv1alpha1.StatusEnum { 54 | return s.Status 55 | } 56 | 57 | func (s *LLMBackendStatus) SetStatus(status knowaydevv1alpha1.StatusEnum) { 58 | s.Status = status 59 | } 60 | 61 | func (s *LLMBackendStatus) GetConditions() []metav1.Condition { 62 | return s.Conditions 63 | } 64 | 65 | func (s *LLMBackendStatus) SetConditions(conditions []metav1.Condition) { 66 | s.Conditions = conditions 67 | } 68 | 69 | var _ Backend = (*ImageGenerationBackend)(nil) 70 | 71 | type ImageGenerationBackend struct { 72 | *knowaydevv1alpha1.ImageGenerationBackend 73 | } 74 | 75 | func (b *ImageGenerationBackend) GetType() knowaydevv1alpha1.BackendType { 76 | return knowaydevv1alpha1.BackendTypeImageGeneration 77 | } 78 | 79 | func (b *ImageGenerationBackend) GetObjectObjectMeta() metav1.ObjectMeta { 80 | return b.ImageGenerationBackend.ObjectMeta 81 | } 82 | 83 | func (b *ImageGenerationBackend) GetStatus() Statusable[knowaydevv1alpha1.StatusEnum] { 84 | return &ImageGenerationBackendStatus{ImageGenerationBackendStatus: &b.Status} 85 | } 86 | 87 | func (b *ImageGenerationBackend) GetModelName() string { 88 | return modelNameOrNamespacedName(b.ImageGenerationBackend) 89 | } 90 | 91 | func BackendFromImageGenerationBackend(imageGenerationBackend *knowaydevv1alpha1.ImageGenerationBackend) Backend { 92 | return &ImageGenerationBackend{ 93 | ImageGenerationBackend: imageGenerationBackend, 94 | } 95 | } 96 | 97 | type ImageGenerationBackendStatus struct { 98 | *knowaydevv1alpha1.ImageGenerationBackendStatus 99 | } 100 | 101 | func (s *ImageGenerationBackendStatus) GetStatus() knowaydevv1alpha1.StatusEnum { 102 | return s.Status 103 | } 104 | 105 | func (s *ImageGenerationBackendStatus) SetStatus(status knowaydevv1alpha1.StatusEnum) { 106 | s.Status = status 107 | } 108 | 109 | func (s *ImageGenerationBackendStatus) GetConditions() []metav1.Condition { 110 | return s.Conditions 111 | } 112 | 113 | func (s *ImageGenerationBackendStatus) SetConditions(conditions []metav1.Condition) { 114 | s.Conditions = conditions 115 | } 116 | 117 | func getBackendFromNamespacedName(ctx context.Context, kubeClient client.Client, namespacedName types.NamespacedName) (Backend, error) { 118 | var llmBackend knowaydevv1alpha1.LLMBackend 119 | 120 | err := kubeClient.Get(ctx, namespacedName, &llmBackend) 121 | if err != nil && !apierrors.IsNotFound(err) { 122 | return nil, err 123 | } 124 | if err == nil { 125 | return BackendFromLLMBackend(&llmBackend), nil 126 | } 127 | 128 | var imageGenerationBackend knowaydevv1alpha1.ImageGenerationBackend 129 | 130 | err = kubeClient.Get(ctx, namespacedName, &imageGenerationBackend) 131 | if err != nil && !apierrors.IsNotFound(err) { 132 | return nil, err 133 | } 134 | if err == nil { 135 | return BackendFromImageGenerationBackend(&imageGenerationBackend), nil 136 | } 137 | 138 | return nil, nil 139 | } 140 | -------------------------------------------------------------------------------- /internal/controller/common_test.go: -------------------------------------------------------------------------------- 1 | package controller 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/samber/lo" 7 | "github.com/stretchr/testify/assert" 8 | "github.com/stretchr/testify/require" 9 | "google.golang.org/protobuf/types/known/structpb" 10 | 11 | "knoway.dev/api/v1alpha1" 12 | ) 13 | 14 | func TestProcessStruct_OpenAIChatParams(t *testing.T) { 15 | tests := []struct { 16 | name string 17 | input *v1alpha1.ModelParams 18 | expected map[string]*structpb.Value 19 | expectError bool 20 | }{ 21 | { 22 | name: "Valid ChatRequest with new params", 23 | input: &v1alpha1.ModelParams{ 24 | OpenAI: &v1alpha1.OpenAIParam{ 25 | CommonParams: v1alpha1.CommonParams{ 26 | Model: "gpt-3.5-turbo", 27 | Temperature: lo.ToPtr("0.7"), 28 | }, 29 | MaxTokens: lo.ToPtr(100), 30 | MaxCompletionTokens: lo.ToPtr(200), 31 | TopP: lo.ToPtr("0.3"), 32 | Stream: lo.ToPtr(true), 33 | StreamOptions: &v1alpha1.StreamOptions{ 34 | IncludeUsage: lo.ToPtr(true), 35 | }, 36 | }, 37 | }, 38 | expected: map[string]*structpb.Value{ 39 | "model": structpb.NewStringValue("gpt-3.5-turbo"), 40 | "temperature": structpb.NewNumberValue(0.7), 41 | "max_tokens": structpb.NewNumberValue(100), 42 | "max_completion_tokens": structpb.NewNumberValue(200), 43 | "top_p": structpb.NewNumberValue(0.3), 44 | "stream": structpb.NewBoolValue(true), 45 | "stream_options": structpb.NewStructValue(&structpb.Struct{ 46 | Fields: map[string]*structpb.Value{ 47 | "include_usage": structpb.NewBoolValue(true), 48 | }, 49 | }), 50 | }, 51 | expectError: false, 52 | }, 53 | } 54 | 55 | for _, tt := range tests { 56 | t.Run(tt.name, func(t *testing.T) { 57 | params := make(map[string]*structpb.Value) 58 | 59 | // Call the function under test 60 | err := parseModelParams(tt.input, params) 61 | 62 | if tt.expectError { 63 | assert.Error(t, err) 64 | } else { 65 | require.NoError(t, err) 66 | // Validate the result 67 | assert.Equal(t, tt.expected, params) 68 | } 69 | }) 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /internal/controller/controller_test.go: -------------------------------------------------------------------------------- 1 | package controller 2 | 3 | import ( 4 | "flag" 5 | "testing" 6 | 7 | ctrl "sigs.k8s.io/controller-runtime" 8 | "sigs.k8s.io/controller-runtime/pkg/log/zap" 9 | ) 10 | 11 | func TestMain(m *testing.M) { 12 | copts := zap.Options{ 13 | Development: true, 14 | } 15 | 16 | copts.BindFlags(flag.CommandLine) 17 | ctrl.SetLogger(zap.New(zap.UseFlagOptions(&copts))) 18 | 19 | m.Run() 20 | } 21 | -------------------------------------------------------------------------------- /internal/controller/imagegenerationbackend_controller_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package controller 18 | 19 | import ( 20 | "context" 21 | "testing" 22 | 23 | "github.com/samber/lo" 24 | "github.com/stretchr/testify/require" 25 | "sigs.k8s.io/controller-runtime/pkg/client" 26 | "sigs.k8s.io/controller-runtime/pkg/reconcile" 27 | 28 | "knoway.dev/api/v1alpha1" 29 | 30 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 | ) 32 | 33 | func TestImageGenerationBackendReconciler_Reconcile(t *testing.T) { 34 | tests := []struct { 35 | name string 36 | setupClient func(client.Client) client.Client 37 | request reconcile.Request 38 | expectError bool 39 | validate func(*testing.T, client.Client) 40 | }{ 41 | { 42 | name: "Valid resource reconciled", 43 | setupClient: func(cl client.Client) client.Client { 44 | resource := &v1alpha1.ImageGenerationBackend{ 45 | ObjectMeta: metav1.ObjectMeta{ 46 | Name: "test-model", 47 | Namespace: "default", 48 | }, 49 | Spec: v1alpha1.ImageGenerationBackendSpec{ 50 | ModelName: lo.ToPtr("test-model"), 51 | Upstream: v1alpha1.ImageGenerationBackendUpstream{ 52 | BaseURL: "xx/v1", 53 | }, 54 | Filters: nil, 55 | }, 56 | Status: v1alpha1.ImageGenerationBackendStatus{}, 57 | } 58 | err := cl.Create(context.Background(), resource) 59 | if err != nil { 60 | t.Fatalf("failed to create resource: %v", err) 61 | } 62 | return cl 63 | }, 64 | request: reconcile.Request{ 65 | NamespacedName: client.ObjectKey{ 66 | Namespace: "default", 67 | Name: "test-model", 68 | }, 69 | }, 70 | expectError: false, 71 | validate: func(t *testing.T, cl client.Client) { 72 | t.Helper() 73 | resource := &v1alpha1.ImageGenerationBackend{} 74 | err := cl.Get(context.Background(), client.ObjectKey{ 75 | Namespace: "default", 76 | Name: "test-model", 77 | }, resource) 78 | require.NoError(t, err) 79 | }, 80 | }, 81 | } 82 | 83 | for _, tt := range tests { 84 | t.Run(tt.name, func(t *testing.T) { 85 | fakeClient := tt.setupClient(NewFakeClientWithStatus()) 86 | reconciler := &ImageGenerationBackendReconciler{ 87 | Client: fakeClient, 88 | } 89 | 90 | _, err := reconciler.Reconcile(context.TODO(), tt.request) 91 | if tt.expectError { 92 | require.Error(t, err) 93 | } else { 94 | require.NoError(t, err) 95 | } 96 | 97 | if tt.validate != nil { 98 | tt.validate(t, fakeClient) 99 | } 100 | }) 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /internal/controller/llmbackend_controller_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package controller 18 | 19 | import ( 20 | "context" 21 | "testing" 22 | 23 | "github.com/samber/lo" 24 | "github.com/stretchr/testify/require" 25 | 26 | "k8s.io/apimachinery/pkg/runtime" 27 | "sigs.k8s.io/controller-runtime/pkg/client/fake" 28 | 29 | "knoway.dev/api/v1alpha1" 30 | 31 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 32 | "sigs.k8s.io/controller-runtime/pkg/client" 33 | "sigs.k8s.io/controller-runtime/pkg/reconcile" 34 | ) 35 | 36 | func TestLLMBackendReconciler_Reconcile(t *testing.T) { 37 | tests := []struct { 38 | name string 39 | setupClient func(client.Client) client.Client 40 | request reconcile.Request 41 | expectError bool 42 | validate func(*testing.T, client.Client) 43 | }{ 44 | { 45 | name: "Valid resource reconciled", 46 | setupClient: func(cl client.Client) client.Client { 47 | resource := &v1alpha1.LLMBackend{ 48 | ObjectMeta: metav1.ObjectMeta{ 49 | Name: "test-model", 50 | Namespace: "default", 51 | }, 52 | Spec: v1alpha1.LLMBackendSpec{ 53 | ModelName: lo.ToPtr("test-model"), 54 | Upstream: v1alpha1.BackendUpstream{ 55 | BaseURL: "xx/v1", 56 | }, 57 | Filters: nil, 58 | }, 59 | Status: v1alpha1.LLMBackendStatus{}, 60 | } 61 | err := cl.Create(context.Background(), resource) 62 | if err != nil { 63 | t.Fatalf("failed to create resource: %v", err) 64 | } 65 | return cl 66 | }, 67 | request: reconcile.Request{ 68 | NamespacedName: client.ObjectKey{ 69 | Namespace: "default", 70 | Name: "test-model", 71 | }, 72 | }, 73 | expectError: false, 74 | validate: func(t *testing.T, cl client.Client) { 75 | t.Helper() 76 | resource := &v1alpha1.LLMBackend{} 77 | err := cl.Get(context.Background(), client.ObjectKey{ 78 | Namespace: "default", 79 | Name: "test-model", 80 | }, resource) 81 | require.NoError(t, err) 82 | }, 83 | }, 84 | } 85 | 86 | for _, tt := range tests { 87 | t.Run(tt.name, func(t *testing.T) { 88 | fakeClient := tt.setupClient(NewFakeClientWithStatus()) 89 | reconciler := &LLMBackendReconciler{ 90 | Client: fakeClient, 91 | } 92 | 93 | _, err := reconciler.Reconcile(context.TODO(), tt.request) 94 | if tt.expectError { 95 | require.Error(t, err) 96 | } else { 97 | require.NoError(t, err) 98 | } 99 | 100 | if tt.validate != nil { 101 | tt.validate(t, fakeClient) 102 | } 103 | }) 104 | } 105 | } 106 | 107 | func NewFakeClientWithStatus() client.Client { 108 | return &FakeClientWithStatus{ 109 | Client: fake.NewClientBuilder().WithScheme(createTestScheme()).Build(), 110 | } 111 | } 112 | 113 | type FakeClientWithStatus struct { 114 | client.Client 115 | } 116 | 117 | func (f *FakeClientWithStatus) Status() client.StatusWriter { 118 | return &FakeStatusWriter{Client: f.Client} 119 | } 120 | 121 | type FakeStatusWriter struct { 122 | client.Client 123 | } 124 | 125 | func (f *FakeStatusWriter) Create(ctx context.Context, obj client.Object, subResource client.Object, opts ...client.SubResourceCreateOption) error { 126 | panic("implement me") 127 | } 128 | 129 | func (f *FakeStatusWriter) Update(ctx context.Context, obj client.Object, opts ...client.SubResourceUpdateOption) error { 130 | return f.Client.Update(ctx, obj) 131 | } 132 | 133 | func (f *FakeStatusWriter) Patch(ctx context.Context, obj client.Object, patch client.Patch, opts ...client.SubResourcePatchOption) error { 134 | panic("implement me") 135 | } 136 | 137 | func createTestScheme() *runtime.Scheme { 138 | scheme := runtime.NewScheme() 139 | _ = v1alpha1.AddToScheme(scheme) 140 | 141 | return scheme 142 | } 143 | -------------------------------------------------------------------------------- /internal/controller/route.go: -------------------------------------------------------------------------------- 1 | package controller 2 | 3 | import ( 4 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 5 | 6 | knowaydevv1alpha1 "knoway.dev/api/v1alpha1" 7 | ) 8 | 9 | type ModelRouteStatus struct { 10 | *knowaydevv1alpha1.ModelRouteStatus 11 | } 12 | 13 | func (s *ModelRouteStatus) GetStatus() knowaydevv1alpha1.StatusEnum { 14 | return s.Status 15 | } 16 | 17 | func (s *ModelRouteStatus) SetStatus(status knowaydevv1alpha1.StatusEnum) { 18 | s.Status = status 19 | } 20 | 21 | func (s *ModelRouteStatus) GetConditions() []metav1.Condition { 22 | return s.Conditions 23 | } 24 | 25 | func (s *ModelRouteStatus) SetConditions(conditions []metav1.Condition) { 26 | s.Conditions = conditions 27 | } 28 | 29 | func (s *ModelRouteStatus) GetTargetsStatus() []knowaydevv1alpha1.ModelRouteStatusTarget { 30 | return s.Targets 31 | } 32 | 33 | func (s *ModelRouteStatus) SetTargetsStatus(targets []knowaydevv1alpha1.ModelRouteStatusTarget) { 34 | s.Targets = targets 35 | } 36 | -------------------------------------------------------------------------------- /internal/controller/status.go: -------------------------------------------------------------------------------- 1 | package controller 2 | 3 | import ( 4 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 5 | 6 | llmv1alpha1 "knoway.dev/api/v1alpha1" 7 | ) 8 | 9 | type Statusable[S any] interface { 10 | GetStatus() S 11 | SetStatus(status S) 12 | GetConditions() []metav1.Condition 13 | SetConditions(conditions []metav1.Condition) 14 | } 15 | 16 | type RouteStatusable[S any] interface { 17 | Statusable[S] 18 | 19 | GetTargetsStatus() []llmv1alpha1.ModelRouteStatusTarget 20 | SetTargetsStatus(targets []llmv1alpha1.ModelRouteStatusTarget) 21 | } 22 | -------------------------------------------------------------------------------- /license-lint.yml: -------------------------------------------------------------------------------- 1 | unrestricted_licenses: 2 | - Apache-2.0 3 | - CC-BY-3.0 4 | - ISC 5 | - AFL-2.1 6 | - AFL-3.0 7 | - Artistic-1.0 8 | - Artistic-2.0 9 | - Apache-1.1 10 | - BSD-1-Clause 11 | - BSD-2-Clause 12 | - BSD-3-Clause 13 | - 0BSD 14 | - FTL 15 | - LPL-1.02 16 | - MS-PL 17 | - MIT 18 | - NCSA 19 | - OpenSSL 20 | - PHP-3.0 21 | - TCP-wrappers 22 | - W3C 23 | - Xnet 24 | - Zlib 25 | 26 | reciprocal_licenses: 27 | - CC0-1.0 28 | - APSL-2.0 29 | - CDDL-1.0 30 | - CDDL-1.1 31 | - CPL-1.0 32 | - EPL-1.0 33 | - IPL-1.0 34 | - MPL-1.0 35 | - MPL-1.1 36 | - MPL-2.0 37 | - MPL-2.0-no-copyleft-exception 38 | - Ruby 39 | 40 | restricted_licenses: 41 | - GPL-1.0-only 42 | - GPL-1.0-or-later 43 | - GPL-2.0-only 44 | - GPL-2.0-or-later 45 | - GPL-3.0-only 46 | - GPL-3.0-or-later 47 | - LGPL-2.0-only 48 | - LGPL-2.0-or-later 49 | - LGPL-2.1-only 50 | - LGPL-2.1-or-later 51 | - LGPL-3.0-only 52 | - LGPL-3.0-or-later 53 | - NPL-1.0 54 | - NPL-1.1 55 | - OSL-1.0 56 | - OSL-1.1 57 | - OSL-2.0 58 | - OSL-2.1 59 | - OSL-3.0 60 | - QPL-1.0 61 | - Sleepycat 62 | 63 | allowlisted_modules: 64 | # BSD: https://github.com/gogo/protobuf/blob/master/LICENSE 65 | - github.com/gogo/protobuf 66 | 67 | # MIT: https://github.com/kubernetes-sigs/yaml/blob/master/LICENSE 68 | - sigs.k8s.io/yaml 69 | -------------------------------------------------------------------------------- /manifests/knoway/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /manifests/knoway/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: knoway 3 | description: A Helm chart to deploy Knoway and its related components into Kubernetes 4 | 5 | # A chart can be either an 'application' or a 'library' chart. 6 | # 7 | # Application charts are a collection of templates that can be packaged into versioned archives 8 | # to be deployed. 9 | # 10 | # Library charts provide useful utilities or functions for the chart developer. They're included as 11 | # a dependency of application charts to inject those utilities and functions into the rendering 12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed. 13 | type: application 14 | 15 | # This is the chart version. This version number should be incremented each time you make changes 16 | # to the chart and its templates, including the app version. 17 | # Versions are expected to follow Semantic Versioning (https://semver.org/) 18 | version: 0.1.0 19 | 20 | # This is the version number of the application being deployed. This version number should be 21 | # incremented each time you make changes to the application. Versions are not expected to 22 | # follow Semantic Versioning. They should reflect the version the application is using. 23 | # It is recommended to use it with quotes. 24 | appVersion: "1.16.0" 25 | -------------------------------------------------------------------------------- /manifests/knoway/templates/_commons.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Common Template 3 | */}} 4 | 5 | {{/* 6 | Merge imagePullSecrets: common.images.pullSecrets 7 | */}} 8 | {{- define "common.images.pullSecrets" -}} 9 | {{- $pullSecrets := list }} 10 | 11 | {{- if .Values.global -}} 12 | {{- range .Values.global.imagePullSecrets -}} 13 | {{- $pullSecrets = append $pullSecrets . -}} 14 | {{- end -}} 15 | {{- end -}} 16 | {{- range .Values.imagePullSecrets -}} 17 | {{- $pullSecrets = append $pullSecrets . -}} 18 | {{- end -}} 19 | 20 | {{- if (not (empty $pullSecrets)) }} 21 | imagePullSecrets: 22 | {{- range $pullSecrets }} 23 | - name: {{ . }} 24 | {{- end }} 25 | {{- end }} 26 | {{- end -}} 27 | 28 | {{/* 29 | Merge Resource: common.images.resources 30 | */}} 31 | {{- define "common.images.resources" -}} 32 | 33 | {{- if .Values.resources }} 34 | {{ toYaml .Values.resources }} 35 | {{- else if .Values.global }} 36 | {{- if .Values.global.resources }} 37 | {{ toYaml .Values.global.resources }} 38 | {{- end }} 39 | {{- end }} 40 | 41 | {{- end -}} 42 | 43 | {{/* 44 | Return the proper image name 45 | Usage: {{ include "common.images.image" ( dict "imageRoot" .imageRootPath "global" .globalPath "defaultTag" .tagPath) }} 46 | */}} 47 | {{- define "common.images.image" -}} 48 | {{- $registryName := .imageRoot.registry -}} 49 | {{- $repositoryName := .imageRoot.repository -}} 50 | {{- $tag := .defaultTag -}} 51 | {{- if .global }} 52 | {{- if .global.imageRegistry }} 53 | {{- $registryName = .global.imageRegistry -}} 54 | {{- end -}} 55 | {{- end -}} 56 | {{- if .imageRoot.registry }} 57 | {{- $registryName = .imageRoot.registry -}} 58 | {{- end -}} 59 | {{- if .imageRoot.tag }} 60 | {{- $tag = .imageRoot.tag -}} 61 | {{- end -}} 62 | {{- if $registryName }} 63 | {{- printf "%s/%s:%s" $registryName $repositoryName $tag -}} 64 | {{- else -}} 65 | {{- printf "%s:%s" $repositoryName $tag -}} 66 | {{- end -}} 67 | {{- end -}} 68 | 69 | {{- define "replicas" -}} 70 | {{- if .Values.replicas }}{{.Values.replicas}}{{else}}{{ if .Values.global.high_available }}2{{else}}1{{end}}{{end -}} 71 | {{- end -}} 72 | 73 | {{- define "hpa.min_replicas" -}} 74 | {{- if .Values.global.high_available }}2{{else}}1{{end}} 75 | {{- end -}} 76 | -------------------------------------------------------------------------------- /manifests/knoway/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Return the proper image name 3 | */}} 4 | 5 | {{- define "knoway.gateway.image" -}} 6 | {{ include "common.images.image" (dict "imageRoot" .Values.gateway.image "global" .Values.global "defaultTag" .Chart.Version) }} 7 | {{- end -}} 8 | -------------------------------------------------------------------------------- /manifests/knoway/templates/clusterrole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: {{ .Values.fullNameOverride | default .Release.Name }} 5 | namespace: {{ .Release.Namespace }} 6 | labels: 7 | app: {{ .Values.fullNameOverride | default .Release.Name }} 8 | rules: 9 | - apiGroups: 10 | - "llm.knoway.dev" 11 | resources: 12 | - "*" 13 | verbs: 14 | - "*" 15 | - apiGroups: 16 | - "" 17 | resources: 18 | - secrets 19 | - configmaps 20 | - services 21 | - pods 22 | - namespaces 23 | verbs: 24 | - get 25 | - list 26 | - watch 27 | - create 28 | - update 29 | - patch 30 | -------------------------------------------------------------------------------- /manifests/knoway/templates/clusterrolebinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: {{ .Values.fullNameOverride | default .Release.Name }} 5 | namespace: {{ .Release.Namespace }} 6 | labels: 7 | app: {{ .Values.fullNameOverride | default .Release.Name }} 8 | roleRef: 9 | apiGroup: rbac.authorization.k8s.io 10 | kind: ClusterRole 11 | name: {{ .Values.fullNameOverride | default .Release.Name }} 12 | subjects: 13 | - kind: ServiceAccount 14 | name: {{ .Values.fullNameOverride | default .Release.Name }} 15 | namespace: {{ .Release.Namespace }} 16 | -------------------------------------------------------------------------------- /manifests/knoway/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: {{ .Values.fullNameOverride | default .Release.Name }} 5 | namespace: {{ .Release.Namespace }} 6 | labels: 7 | app: {{ .Values.fullNameOverride | default .Release.Name }} 8 | data: 9 | config.yaml: |- 10 | debug: {{.Values.debug }} 11 | staticListeners: 12 | - '@type': type.googleapis.com/knoway.listeners.v1alpha1.ChatCompletionListener 13 | name: openai-chat 14 | filters: 15 | - name: api-key-auth 16 | config: 17 | '@type': type.googleapis.com/knoway.filters.v1alpha1.APIKeyAuthConfig 18 | authServer: 19 | url: {{ .Values.config.auth_server.url }} 20 | timeout: {{ .Values.config.auth_server.timeout }} 21 | - config: 22 | '@type': type.googleapis.com/knoway.filters.v1alpha1.UsageStatsConfig 23 | statsServer: 24 | url: {{ .Values.config.stats_server.url }} 25 | timeout: {{ .Values.config.stats_server.timeout }} 26 | {{- if .Values.config.rate_limit.enable }} 27 | - config: 28 | '@type': type.googleapis.com/knoway.filters.v1alpha1.RateLimitConfig 29 | policies: {{- toYaml .Values.config.rate_limit.policies | nindent 16 }} 30 | {{- end }} 31 | accessLog: {{- toYaml .Values.config.log.access_log | nindent 10 }} 32 | - '@type': type.googleapis.com/knoway.listeners.v1alpha1.ImageListener 33 | name: openai-image 34 | filters: 35 | - name: api-key-auth 36 | config: 37 | '@type': type.googleapis.com/knoway.filters.v1alpha1.APIKeyAuthConfig 38 | authServer: 39 | url: {{ .Values.config.auth_server.url }} 40 | timeout: {{ .Values.config.auth_server.timeout }} 41 | - config: 42 | '@type': type.googleapis.com/knoway.filters.v1alpha1.UsageStatsConfig 43 | statsServer: 44 | url: {{ .Values.config.stats_server.url }} 45 | timeout: {{ .Values.config.stats_server.timeout }} 46 | {{- if .Values.config.rate_limit.enable }} 47 | - config: 48 | '@type': type.googleapis.com/knoway.filters.v1alpha1.RateLimitConfig 49 | policies: {{- toYaml .Values.config.rate_limit.policies | nindent 16 }} 50 | {{- end }} 51 | accessLog: {{- toYaml .Values.config.log.access_log | nindent 10 }} 52 | -------------------------------------------------------------------------------- /manifests/knoway/templates/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ .Values.fullNameOverride | default .Release.Name }}-gateway 5 | namespace: {{ .Release.Namespace }} 6 | labels: 7 | app: {{ .Values.fullNameOverride | default .Release.Name }}-gateway 8 | spec: 9 | replicas: {{ include "replicas" . }} 10 | selector: 11 | matchLabels: 12 | app: {{ .Values.fullNameOverride | default .Release.Name }}-gateway 13 | strategy: 14 | rollingUpdate: 15 | maxSurge: 50% 16 | maxUnavailable: 50% 17 | type: RollingUpdate 18 | template: 19 | metadata: 20 | labels: 21 | app: {{ .Values.fullNameOverride | default .Release.Name }}-gateway 22 | spec: 23 | {{- include "common.images.pullSecrets" . | indent 6 }} 24 | {{- if and (.Values.global.nodeSelector) (not .Values.gateway.nodeSelector) }} 25 | nodeSelector: 26 | {{ toYaml .Values.global.nodeSelector | indent 8 }} 27 | {{- end }} 28 | {{- if .Values.gateway.nodeSelector }} 29 | nodeSelector: 30 | {{ toYaml .Values.gateway.nodeSelector | indent 8 }} 31 | {{- end }} 32 | {{- if and (.Values.global.affinity) (not .Values.gateway.affinity) }} 33 | affinity: 34 | {{ toYaml .Values.global.affinity | indent 8 }} 35 | {{- end }} 36 | {{- if .Values.gateway.affinity }} 37 | affinity: 38 | {{ toYaml .Values.gateway.affinity | indent 8 }} 39 | {{- end }} 40 | {{- if and (.Values.global.tolerations) (not .Values.gateway.tolerations) }} 41 | tolerations: 42 | {{ toYaml .Values.global.tolerations | indent 8 }} 43 | {{- end }} 44 | {{- if .Values.gateway.tolerations }} 45 | tolerations: 46 | {{ toYaml .Values.gateway.tolerations | indent 8 }} 47 | {{- end }} 48 | dnsPolicy: ClusterFirst 49 | containers: 50 | - name: proxy 51 | image: {{ template "knoway.gateway.image" . }} 52 | imagePullPolicy: {{ .Values.global.imagePullPolicy }} 53 | {{- if include "common.images.resources" . }} 54 | resources: 55 | {{- include "common.images.resources" . | indent 12 }} 56 | {{- end }} 57 | ports: 58 | - containerPort: 8080 59 | name: http 60 | volumeMounts: 61 | - readOnly: true 62 | mountPath: /app/config 63 | name: config 64 | readinessProbe: 65 | httpGet: 66 | path: /readyz 67 | port: 8081 68 | initialDelaySeconds: 10 69 | periodSeconds: 15 70 | serviceAccountName: {{ .Values.fullNameOverride | default .Release.Name }} 71 | volumes: 72 | - name: config 73 | configMap: 74 | name: {{ .Values.fullNameOverride | default .Release.Name }} 75 | -------------------------------------------------------------------------------- /manifests/knoway/templates/hpa.yaml: -------------------------------------------------------------------------------- 1 | {{ if .Values.global.use_hpa }} 2 | {{ if .Capabilities.APIVersions.Has "autoscaling/v2/HorizontalPodAutoscaler" -}} 3 | apiVersion: autoscaling/v2 4 | {{ else }} 5 | apiVersion: autoscaling/v1 6 | {{ end }} 7 | kind: HorizontalPodAutoscaler 8 | metadata: 9 | labels: 10 | app: {{ .Values.fullNameOverride | default .Release.Name }}-gateway 11 | name: {{ .Values.fullNameOverride | default .Release.Name }}-gateway 12 | namespace: {{ .Release.Namespace }} 13 | spec: 14 | maxReplicas: 2 15 | {{ if .Capabilities.APIVersions.Has "autoscaling/v2/HorizontalPodAutoscaler" -}} 16 | metrics: 17 | - resource: 18 | name: cpu 19 | target: 20 | averageUtilization: 80 21 | type: Utilization 22 | type: Resource 23 | {{ else }} 24 | targetCPUUtilizationPercentage: 80 25 | {{ end }} 26 | minReplicas: {{ include "hpa.min_replicas" . }} 27 | scaleTargetRef: 28 | apiVersion: apps/v1 29 | kind: Deployment 30 | name: {{ .Values.fullNameOverride | default .Release.Name }}-gateway 31 | {{ end }} 32 | -------------------------------------------------------------------------------- /manifests/knoway/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ .Values.fullNameOverride | default .Release.Name }}-gateway 5 | namespace: {{ .Release.Namespace }} 6 | labels: 7 | app: {{ .Values.fullNameOverride | default .Release.Name }}-gateway 8 | spec: 9 | type: ClusterIP 10 | ports: 11 | - port: 8080 12 | protocol: TCP 13 | name: http 14 | selector: 15 | app: {{ .Values.fullNameOverride | default .Release.Name }}-gateway 16 | -------------------------------------------------------------------------------- /manifests/knoway/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: {{ .Values.fullNameOverride | default .Release.Name }} 5 | namespace: {{ .Release.Namespace }} 6 | labels: 7 | app: {{ .Values.fullNameOverride | default .Release.Name }} 8 | -------------------------------------------------------------------------------- /manifests/knoway/values.yaml: -------------------------------------------------------------------------------- 1 | global: 2 | imageRegistry: release.daocloud.io 3 | # If you do want to specify resources, uncomment the following 4 | # lines, adjust them as necessary, and remove the curly braces after 'resources:'. 5 | # requests: 6 | # cpu: 300m 7 | # memory: 200Mi 8 | # limits: 9 | # cpu: 900m 10 | # memory: 200Mi 11 | resources: {} 12 | imagePullPolicy: IfNotPresent 13 | # -- Global Docker registry secret names as an array 14 | imagePullSecrets: [] 15 | high_available: false 16 | # Global node selector 17 | # If set, this will apply to all knoway components 18 | # Individual components can be set to a different node selector 19 | nodeSelector: {} 20 | # Global tolerations 21 | # If set, this will apply to all knoway components 22 | # Individual components can be set to a different tolerations 23 | tolerations: [] 24 | # Global affinity 25 | # If set, this will apply to all knoway components 26 | # Individual components can be set to a different affinity 27 | affinity: {} 28 | 29 | fullNameOverride: "" 30 | 31 | debug: false 32 | config: 33 | auth_server: 34 | url: '' 35 | timeout: 3s 36 | stats_server: 37 | url: '' 38 | timeout: 3s 39 | log: 40 | access_log: 41 | enable: true 42 | rate_limit: 43 | enable: false 44 | policies: [] 45 | 46 | gateway: 47 | image: 48 | registry: '' 49 | repository: knoway/knoway-gateway 50 | tag: 51 | nodeSelector: {} 52 | tolerations: [] 53 | affinity: {} 54 | -------------------------------------------------------------------------------- /pkg/bootkit/lifecycle.go: -------------------------------------------------------------------------------- 1 | package bootkit 2 | 3 | import ( 4 | "context" 5 | "sync" 6 | ) 7 | 8 | var _ lifeCycler = LifeCycleHook{} 9 | 10 | type lifeCycler interface { 11 | Start(ctx context.Context) error 12 | Stop(ctx context.Context) error 13 | } 14 | 15 | type LifeCycleHook struct { 16 | OnStart func(ctx context.Context) error 17 | OnStop func(ctx context.Context) error 18 | } 19 | 20 | func (l LifeCycleHook) Start(ctx context.Context) error { 21 | if l.OnStart == nil { 22 | return nil 23 | } 24 | 25 | return l.OnStart(ctx) 26 | } 27 | 28 | func (l LifeCycleHook) Stop(ctx context.Context) error { 29 | if l.OnStop == nil { 30 | return nil 31 | } 32 | 33 | return l.OnStop(ctx) 34 | } 35 | 36 | type LifeCycle interface { 37 | Append(hook LifeCycleHook) 38 | } 39 | 40 | type lifeCycle struct { 41 | hooks []lifeCycler 42 | 43 | mutex sync.Mutex 44 | } 45 | 46 | func (l *lifeCycle) GetHooks() []lifeCycler { 47 | l.mutex.Lock() 48 | defer l.mutex.Unlock() 49 | 50 | return append([]lifeCycler{}, l.hooks...) // 返回一个拷贝,避免外部修改 51 | } 52 | 53 | func (l *lifeCycle) Append(hook LifeCycleHook) { 54 | l.mutex.Lock() 55 | defer l.mutex.Unlock() 56 | 57 | l.hooks = append(l.hooks, hook) 58 | } 59 | 60 | type EmptyLifeCycle struct{} 61 | 62 | func (*EmptyLifeCycle) Append(LifeCycleHook) {} 63 | 64 | func newLifeCycle() *lifeCycle { 65 | return &lifeCycle{ 66 | hooks: make([]lifeCycler, 0), 67 | } 68 | } 69 | 70 | func NewEmptyLifeCycle() LifeCycle { 71 | return &EmptyLifeCycle{} 72 | } 73 | -------------------------------------------------------------------------------- /pkg/bootkit/lifecycle_test.go: -------------------------------------------------------------------------------- 1 | package bootkit 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestLifeCycleHook_Start(t *testing.T) { 12 | t.Parallel() 13 | 14 | l := LifeCycleHook{} 15 | 16 | require.NotPanics(t, func() { 17 | err := l.Start(context.Background()) 18 | assert.NoError(t, err) 19 | }) 20 | 21 | l = LifeCycleHook{ 22 | OnStart: func(ctx context.Context) error { 23 | return nil 24 | }, 25 | } 26 | 27 | require.NotPanics(t, func() { 28 | err := l.Start(context.Background()) 29 | assert.NoError(t, err) 30 | }) 31 | } 32 | 33 | func TestLifeCycleHook_Stop(t *testing.T) { 34 | t.Parallel() 35 | 36 | l := LifeCycleHook{} 37 | 38 | require.NotPanics(t, func() { 39 | err := l.Stop(context.Background()) 40 | assert.NoError(t, err) 41 | }) 42 | 43 | l = LifeCycleHook{ 44 | OnStop: func(ctx context.Context) error { 45 | return nil 46 | }, 47 | } 48 | 49 | require.NotPanics(t, func() { 50 | err := l.Stop(context.Background()) 51 | assert.NoError(t, err) 52 | }) 53 | } 54 | 55 | func TestLifeCycle_Append(t *testing.T) { 56 | t.Parallel() 57 | 58 | l := newLifeCycle() 59 | 60 | l.Append(LifeCycleHook{}) 61 | l.Append(LifeCycleHook{}) 62 | 63 | assert.Len(t, l.hooks, 2) 64 | } 65 | -------------------------------------------------------------------------------- /pkg/bootkit/options.go: -------------------------------------------------------------------------------- 1 | package bootkit 2 | 3 | import "time" 4 | 5 | type bootkitOptions struct { 6 | startTimeout time.Duration 7 | stopTimeout time.Duration 8 | } 9 | 10 | type bootkitApplyOptions struct { 11 | bootkit *bootkitOptions 12 | } 13 | 14 | type Option interface { 15 | apply(options *bootkitApplyOptions) 16 | } 17 | 18 | type startTimeoutOption time.Duration 19 | 20 | func (t startTimeoutOption) apply(m *bootkitApplyOptions) { 21 | m.bootkit.startTimeout = time.Duration(t) 22 | } 23 | 24 | func StartTimeout(duration time.Duration) Option { 25 | return startTimeoutOption(duration) 26 | } 27 | 28 | type stopTimeoutOption time.Duration 29 | 30 | func (t stopTimeoutOption) apply(m *bootkitApplyOptions) { 31 | m.bootkit.stopTimeout = time.Duration(t) 32 | } 33 | 34 | func StopTimeout(duration time.Duration) Option { 35 | return stopTimeoutOption(duration) 36 | } 37 | -------------------------------------------------------------------------------- /pkg/bootkit/options_test.go: -------------------------------------------------------------------------------- 1 | package bootkit 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestStartTimeout(t *testing.T) { 11 | t.Parallel() 12 | 13 | option := StartTimeout(time.Second * 100) 14 | applyOptions := &bootkitApplyOptions{&bootkitOptions{}} 15 | option.apply(applyOptions) 16 | 17 | assert.Equal(t, time.Second*100, applyOptions.bootkit.startTimeout) 18 | } 19 | 20 | func TestStopTimeout(t *testing.T) { 21 | t.Parallel() 22 | 23 | option := StopTimeout(time.Second * 100) 24 | applyOptions := &bootkitApplyOptions{&bootkitOptions{}} 25 | option.apply(applyOptions) 26 | 27 | assert.Equal(t, time.Second*100, applyOptions.bootkit.stopTimeout) 28 | } 29 | -------------------------------------------------------------------------------- /pkg/clusters/filters/openai/request.go: -------------------------------------------------------------------------------- 1 | package openai 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "encoding/json" 7 | "fmt" 8 | "io" 9 | "net/http" 10 | "net/url" 11 | "strings" 12 | 13 | "github.com/samber/lo" 14 | "google.golang.org/protobuf/types/known/anypb" 15 | 16 | v1alpha1clusters "knoway.dev/api/clusters/v1alpha1" 17 | "knoway.dev/api/filters/v1alpha1" 18 | "knoway.dev/pkg/bootkit" 19 | clusterfilters "knoway.dev/pkg/clusters/filters" 20 | "knoway.dev/pkg/object" 21 | "knoway.dev/pkg/protoutils" 22 | ) 23 | 24 | func NewRequestHandlerWithConfig(cfg *anypb.Any, _ bootkit.LifeCycle) (clusterfilters.ClusterFilter, error) { 25 | c, err := protoutils.FromAny(cfg, &v1alpha1.OpenAIRequestHandlerConfig{}) 26 | if err != nil { 27 | return nil, fmt.Errorf("invalid config type %T", cfg) 28 | } 29 | 30 | return &requestHandler{ 31 | cfg: c, 32 | }, nil 33 | } 34 | 35 | var _ clusterfilters.ClusterFilterRequestModifier = (*requestHandler)(nil) 36 | var _ clusterfilters.ClusterFilterUpstreamRequestMarshaller = (*requestHandler)(nil) 37 | 38 | type requestHandler struct { 39 | clusterfilters.IsClusterFilter 40 | 41 | cfg *v1alpha1.OpenAIRequestHandlerConfig 42 | } 43 | 44 | func (f *requestHandler) RequestModifier(ctx context.Context, cluster *v1alpha1clusters.Cluster, request object.LLMRequest) (object.LLMRequest, error) { 45 | err := request.SetModel(cluster.GetName()) 46 | if err != nil { 47 | return request, err 48 | } 49 | 50 | err = request.SetDefaultParams(cluster.GetUpstream().GetDefaultParams()) 51 | if err != nil { 52 | return request, err 53 | } 54 | 55 | err = request.SetOverrideParams(cluster.GetUpstream().GetOverrideParams()) 56 | if err != nil { 57 | return request, err 58 | } 59 | 60 | err = request.RemoveParamKeys(cluster.GetUpstream().GetRemoveParamKeys()) 61 | if err != nil { 62 | return request, err 63 | } 64 | 65 | return request, nil 66 | } 67 | 68 | func (f *requestHandler) MarshalUpstreamRequest(ctx context.Context, cluster *v1alpha1clusters.Cluster, llmRequest object.LLMRequest, request *http.Request) (*http.Request, error) { 69 | upstreamURL := cluster.GetUpstream().GetUrl() 70 | upstreamURL = strings.TrimSuffix(upstreamURL, "/") 71 | 72 | switch llmRequest.GetRequestType() { 73 | case object.RequestTypeChatCompletions: 74 | upstreamURL += "/chat/completions" 75 | case object.RequestTypeCompletions: 76 | upstreamURL += "/completions" 77 | case object.RequestTypeImageGenerations: 78 | upstreamURL += "/images/generations" 79 | default: 80 | panic("unknown request type: " + string(llmRequest.GetRequestType())) 81 | } 82 | 83 | parsedUpstreamURL, err := url.Parse(upstreamURL) 84 | if err != nil { 85 | return nil, err 86 | } 87 | 88 | jsonBody, err := json.Marshal(llmRequest) 89 | if err != nil { 90 | return nil, err 91 | } 92 | 93 | if request == nil { 94 | request, err = http.NewRequestWithContext(ctx, http.MethodPost, upstreamURL, bytes.NewReader(jsonBody)) 95 | if err != nil { 96 | return nil, err 97 | } 98 | } else { 99 | request.URL = parsedUpstreamURL 100 | request.Method = http.MethodPost 101 | request.Body = io.NopCloser(bytes.NewReader(jsonBody)) 102 | } 103 | 104 | request.Header.Set("Content-Type", "application/json") 105 | // Apply headers 106 | if llmRequest.IsStream() { // non stream 107 | request.Header.Set("Accept", "text/event-stream") 108 | request.Header.Set("Cache-Control", "no-cache") 109 | request.Header.Set("Connection", "keep-alive") 110 | } 111 | 112 | // Apply user-defined headers 113 | lo.ForEach(cluster.GetUpstream().GetHeaders(), func(h *v1alpha1clusters.Upstream_Header, _ int) { 114 | request.Header.Set(h.GetKey(), h.GetValue()) 115 | }) 116 | 117 | return request, nil 118 | } 119 | -------------------------------------------------------------------------------- /pkg/clusters/filters/openai/response.go: -------------------------------------------------------------------------------- 1 | package openai 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "context" 7 | "fmt" 8 | "io" 9 | "net/http" 10 | "strings" 11 | 12 | v1alpha12 "knoway.dev/api/clusters/v1alpha1" 13 | 14 | "google.golang.org/protobuf/types/known/anypb" 15 | 16 | "knoway.dev/api/filters/v1alpha1" 17 | "knoway.dev/pkg/bootkit" 18 | clusterfilters "knoway.dev/pkg/clusters/filters" 19 | "knoway.dev/pkg/object" 20 | "knoway.dev/pkg/protoutils" 21 | "knoway.dev/pkg/types/openai" 22 | ) 23 | 24 | func NewResponseHandlerWithConfig(cfg *anypb.Any, _ bootkit.LifeCycle) (clusterfilters.ClusterFilter, error) { 25 | c, err := protoutils.FromAny(cfg, &v1alpha1.OpenAIResponseHandlerConfig{}) 26 | if err != nil { 27 | return nil, fmt.Errorf("invalid config type %T", cfg) 28 | } 29 | 30 | return &responseHandler{ 31 | cfg: c, 32 | }, nil 33 | } 34 | 35 | var _ clusterfilters.ClusterFilterResponseUnmarshaller = (*responseHandler)(nil) 36 | var _ clusterfilters.ClusterFilterResponseModifier = (*responseHandler)(nil) 37 | 38 | type responseHandler struct { 39 | cfg *v1alpha1.OpenAIResponseHandlerConfig 40 | clusterfilters.ClusterFilter 41 | } 42 | 43 | func (f *responseHandler) UnmarshalResponseBody(ctx context.Context, cluster *v1alpha12.Cluster, req object.LLMRequest, rawResponse *http.Response, reader *bufio.Reader, pre object.LLMResponse) (object.LLMResponse, error) { 44 | contentType := rawResponse.Header.Get("Content-Type") 45 | 46 | switch req.GetRequestType() { 47 | case 48 | object.RequestTypeChatCompletions, 49 | object.RequestTypeCompletions: 50 | switch { 51 | case strings.HasPrefix(contentType, "application/json"): 52 | return openai.NewChatCompletionResponse(req, rawResponse, reader) 53 | case strings.HasPrefix(contentType, "text/event-stream"): 54 | return openai.NewChatCompletionStreamResponse(req, rawResponse, reader) 55 | default: 56 | break 57 | } 58 | case 59 | object.RequestTypeImageGenerations: 60 | switch { 61 | case strings.HasPrefix(contentType, "application/json"): 62 | return openai.NewImageGenerationsResponse(ctx, req, rawResponse, reader, 63 | openai.NewImageGenerationsResponseWithUsage(cluster.GetMeteringPolicy()), 64 | ) 65 | default: 66 | break 67 | } 68 | default: 69 | return nil, fmt.Errorf("unsupported request type %s", req.GetRequestType()) 70 | } 71 | 72 | if rawResponse.StatusCode >= http.StatusBadRequest { 73 | tryReadBody := new(bytes.Buffer) 74 | 75 | _, err := tryReadBody.ReadFrom(rawResponse.Body) 76 | if err != nil { 77 | return nil, fmt.Errorf("failed to read body: %w", err) 78 | } 79 | 80 | rawResponse.Body.Close() 81 | rawResponse.Body = io.NopCloser(bytes.NewBuffer(tryReadBody.Bytes())) 82 | 83 | return nil, fmt.Errorf("upstream returned status code %d with body %s", rawResponse.StatusCode, tryReadBody.String()) 84 | } 85 | 86 | return nil, fmt.Errorf("unsupported content type %s", contentType) 87 | } 88 | 89 | func (f *responseHandler) ResponseModifier(ctx context.Context, cluster *v1alpha12.Cluster, request object.LLMRequest, response object.LLMResponse) (object.LLMResponse, error) { 90 | err := response.SetModel(cluster.GetName()) 91 | if err != nil { 92 | return response, err 93 | } 94 | 95 | return response, nil 96 | } 97 | -------------------------------------------------------------------------------- /pkg/clusters/interface.go: -------------------------------------------------------------------------------- 1 | package clusters 2 | 3 | import ( 4 | "context" 5 | 6 | "knoway.dev/api/clusters/v1alpha1" 7 | "knoway.dev/pkg/object" 8 | ) 9 | 10 | type Cluster interface { 11 | GetClusterType() v1alpha1.ClusterType 12 | GetClusterConfig() *v1alpha1.Cluster 13 | DoUpstreamRequest(ctx context.Context, req object.LLMRequest) (object.LLMResponse, error) 14 | } 15 | -------------------------------------------------------------------------------- /pkg/clusters/manager/cluster.go: -------------------------------------------------------------------------------- 1 | package manager 2 | 3 | import ( 4 | "context" 5 | "log/slog" 6 | "sync" 7 | 8 | "github.com/samber/lo" 9 | "github.com/samber/mo" 10 | 11 | "knoway.dev/api/clusters/v1alpha1" 12 | "knoway.dev/pkg/bootkit" 13 | clusters2 "knoway.dev/pkg/clusters" 14 | cluster "knoway.dev/pkg/clusters/cluster" 15 | "knoway.dev/pkg/metadata" 16 | "knoway.dev/pkg/object" 17 | ) 18 | 19 | var clusterRegister *Register 20 | 21 | func HandleRequest(ctx context.Context, clusterName string, request object.LLMRequest) (object.LLMResponse, error) { 22 | foundCluster, ok := clusterRegister.FindClusterByName(clusterName) 23 | if !ok { 24 | return nil, object.NewErrorModelNotFoundOrNotAccessible(request.GetModel()) 25 | } 26 | 27 | rMeta := metadata.RequestMetadataFromCtx(ctx) 28 | rMeta.SelectedCluster = mo.Some(foundCluster) 29 | 30 | resp, err := foundCluster.DoUpstreamRequest(ctx, request) 31 | if err != nil { 32 | // Cluster will ensure that error will always be LLMError 33 | return resp, err 34 | } 35 | if resp.GetError() != nil { 36 | return resp, resp.GetError() 37 | } 38 | 39 | return resp, err 40 | } 41 | 42 | func RemoveCluster(cluster *v1alpha1.Cluster) { 43 | clusterRegister.DeleteCluster(cluster.GetName()) 44 | } 45 | 46 | func UpsertAndRegisterCluster(cluster *v1alpha1.Cluster, lifecycle bootkit.LifeCycle) error { 47 | return clusterRegister.UpsertAndRegisterCluster(cluster, lifecycle) 48 | } 49 | 50 | func ListModels() []*v1alpha1.Cluster { 51 | if clusterRegister == nil { 52 | return nil 53 | } 54 | 55 | return clusterRegister.ListModels() 56 | } 57 | 58 | func init() { //nolint:gochecknoinits 59 | if clusterRegister == nil { 60 | InitClusterRegister() 61 | } 62 | } 63 | 64 | type Register struct { 65 | clusters map[string]clusters2.Cluster 66 | clustersDetails map[string]*v1alpha1.Cluster 67 | clustersLock sync.RWMutex 68 | } 69 | 70 | type RegisterOptions struct { 71 | DevConfig bool 72 | } 73 | 74 | func NewClusterRegister() *Register { 75 | r := &Register{ 76 | clusters: make(map[string]clusters2.Cluster), 77 | clustersDetails: make(map[string]*v1alpha1.Cluster), 78 | clustersLock: sync.RWMutex{}, 79 | } 80 | 81 | return r 82 | } 83 | 84 | func InitClusterRegister() { 85 | c := NewClusterRegister() 86 | clusterRegister = c 87 | } 88 | 89 | func (cr *Register) DeleteCluster(name string) { 90 | cr.clustersLock.Lock() 91 | defer cr.clustersLock.Unlock() 92 | 93 | delete(cr.clusters, name) 94 | delete(cr.clustersDetails, name) 95 | slog.Info("remove cluster", "name", name) 96 | } 97 | 98 | func (cr *Register) FindClusterByName(name string) (clusters2.Cluster, bool) { 99 | cr.clustersLock.RLock() 100 | defer cr.clustersLock.RUnlock() 101 | 102 | c, ok := cr.clusters[name] 103 | 104 | return c, ok 105 | } 106 | 107 | func (cr *Register) UpsertAndRegisterCluster(c *v1alpha1.Cluster, lifecycle bootkit.LifeCycle) error { 108 | cr.clustersLock.Lock() 109 | defer cr.clustersLock.Unlock() 110 | 111 | name := c.GetName() 112 | 113 | newCluster, err := cluster.NewWithConfigs(c, lifecycle) 114 | if err != nil { 115 | return err 116 | } 117 | 118 | cr.clustersDetails[c.GetName()] = c 119 | cr.clusters[name] = newCluster 120 | 121 | slog.Info("register cluster", "name", name) 122 | 123 | return nil 124 | } 125 | 126 | func (cr *Register) ListModels() []*v1alpha1.Cluster { 127 | cr.clustersLock.RLock() 128 | defer cr.clustersLock.RUnlock() 129 | 130 | clusters := make([]*v1alpha1.Cluster, 0, len(cr.clusters)) 131 | for _, cluster := range cr.clustersDetails { 132 | clusters = append(clusters, cluster) 133 | } 134 | 135 | return clusters 136 | } 137 | 138 | func (cr *Register) dumpAllClusters() []*v1alpha1.Cluster { 139 | cr.clustersLock.RLock() 140 | defer cr.clustersLock.RUnlock() 141 | 142 | return lo.Values(clusterRegister.clustersDetails) 143 | } 144 | 145 | func DebugDumpAllClusters() []*v1alpha1.Cluster { 146 | return clusterRegister.dumpAllClusters() 147 | } 148 | -------------------------------------------------------------------------------- /pkg/constants/config.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | const TestClusterConfigPath = "dist/load_config.json" 4 | const DefaultClusterConfigPath = "config/default_config.json" 5 | -------------------------------------------------------------------------------- /pkg/constants/listener.go: -------------------------------------------------------------------------------- 1 | package constants 2 | 3 | import "time" 4 | 5 | const ( 6 | DefaultDrainWaitTime = 30 * time.Second 7 | ) 8 | -------------------------------------------------------------------------------- /pkg/filters/config.go: -------------------------------------------------------------------------------- 1 | package filters 2 | 3 | import ( 4 | "context" 5 | "net/http" 6 | 7 | "knoway.dev/pkg/object" 8 | "knoway.dev/pkg/utils" 9 | ) 10 | 11 | const ( 12 | ListenerFilterResultTypeSucceeded = iota 13 | ListenerFilterResultTypeFailed 14 | ListenerFilterResultTypeSkipped 15 | ) 16 | 17 | type RequestFilterResult struct { 18 | // Type Succeeded, Failed, or Skipped 19 | Type int 20 | Error error 21 | } 22 | 23 | func (r RequestFilterResult) IsFailed() bool { 24 | return r.Type == ListenerFilterResultTypeFailed 25 | } 26 | 27 | func (r RequestFilterResult) IsSkipped() bool { 28 | return r.Type == ListenerFilterResultTypeSkipped 29 | } 30 | 31 | func (r RequestFilterResult) IsSSucceeded() bool { 32 | return r.Type == ListenerFilterResultTypeSucceeded 33 | } 34 | 35 | func NewOK() RequestFilterResult { 36 | return RequestFilterResult{Type: ListenerFilterResultTypeSucceeded} 37 | } 38 | 39 | func NewFailed(err error) RequestFilterResult { 40 | return RequestFilterResult{Type: ListenerFilterResultTypeFailed, Error: err} 41 | } 42 | 43 | type RequestFilter interface { 44 | isRequestFilter() 45 | } 46 | 47 | var _ RequestFilter = IsRequestFilter{} 48 | 49 | type IsRequestFilter struct{} 50 | 51 | func (IsRequestFilter) isRequestFilter() {} 52 | 53 | type OnRequestPreFilter interface { 54 | RequestFilter 55 | 56 | OnRequestPre(ctx context.Context, sourceHTTPRequest *http.Request) RequestFilterResult 57 | } 58 | 59 | type OnCompletionRequestFilter interface { 60 | RequestFilter 61 | 62 | OnCompletionRequest(ctx context.Context, request object.LLMRequest, sourceHTTPRequest *http.Request) RequestFilterResult 63 | } 64 | 65 | type OnImageGenerationsRequestFilter interface { 66 | RequestFilter 67 | 68 | OnImageGenerationsRequest(ctx context.Context, request object.LLMRequest, sourceHTTPRequest *http.Request) RequestFilterResult 69 | } 70 | 71 | type OnCompletionResponseFilter interface { 72 | RequestFilter 73 | 74 | OnCompletionResponse(ctx context.Context, request object.LLMRequest, response object.LLMResponse) RequestFilterResult 75 | } 76 | 77 | type OnCompletionStreamResponseFilter interface { 78 | RequestFilter 79 | 80 | OnCompletionStreamResponse(ctx context.Context, request object.LLMRequest, response object.LLMStreamResponse, responseChunk object.LLMChunkResponse) RequestFilterResult 81 | } 82 | 83 | type OnImageGenerationsResponseFilter interface { 84 | RequestFilter 85 | 86 | OnImageGenerationsResponse(ctx context.Context, request object.LLMRequest, response object.LLMResponse) RequestFilterResult 87 | } 88 | 89 | type OnResponsePostFilter interface { 90 | RequestFilter 91 | 92 | OnResponsePost(ctx context.Context, request *http.Request, response any, err error) 93 | } 94 | 95 | type RequestFilters []RequestFilter 96 | 97 | func (r RequestFilters) OnRequestPreFilters() []OnRequestPreFilter { 98 | return utils.TypeAssertFrom[RequestFilter, OnRequestPreFilter](r) 99 | } 100 | 101 | func (r RequestFilters) OnCompletionRequestFilters() []OnCompletionRequestFilter { 102 | return utils.TypeAssertFrom[RequestFilter, OnCompletionRequestFilter](r) 103 | } 104 | 105 | func (r RequestFilters) OnImageGenerationsRequestFilters() []OnImageGenerationsRequestFilter { 106 | return utils.TypeAssertFrom[RequestFilter, OnImageGenerationsRequestFilter](r) 107 | } 108 | 109 | func (r RequestFilters) OnCompletionResponseFilters() []OnCompletionResponseFilter { 110 | return utils.TypeAssertFrom[RequestFilter, OnCompletionResponseFilter](r) 111 | } 112 | 113 | func (r RequestFilters) OnCompletionStreamResponseFilters() []OnCompletionStreamResponseFilter { 114 | return utils.TypeAssertFrom[RequestFilter, OnCompletionStreamResponseFilter](r) 115 | } 116 | 117 | func (r RequestFilters) OnImageGenerationsResponseFilters() []OnImageGenerationsResponseFilter { 118 | return utils.TypeAssertFrom[RequestFilter, OnImageGenerationsResponseFilter](r) 119 | } 120 | 121 | func (r RequestFilters) OnResponsePostFilters() []OnResponsePostFilter { 122 | return utils.TypeAssertFrom[RequestFilter, OnResponsePostFilter](r) 123 | } 124 | -------------------------------------------------------------------------------- /pkg/filters/ratelimit/redis.go: -------------------------------------------------------------------------------- 1 | package ratelimit 2 | 3 | import ( 4 | "context" 5 | "log/slog" 6 | "strconv" 7 | "time" 8 | ) 9 | 10 | //nolint:dupword 11 | var redisRateLimitScript = ` 12 | -- KEYS[1]: rate limit key 13 | -- ARGV[1]: limit (max tokens) 14 | -- ARGV[2]: window in milliseconds 15 | -- ARGV[3]: current timestamp in milliseconds 16 | -- ARGV[4]: precision multiplier 17 | 18 | local function init_bucket(limit, now) 19 | return { 20 | tokens = limit, 21 | last_update = now, 22 | limit = limit 23 | } 24 | end 25 | 26 | local key = KEYS[1] 27 | local limit = tonumber(ARGV[1]) 28 | local window_ms = tonumber(ARGV[2]) 29 | local now = tonumber(ARGV[3]) 30 | local precision = tonumber(ARGV[4]) 31 | 32 | -- Calculate bucket parameters 33 | local capacity = limit * precision 34 | local fill_rate = capacity / window_ms -- tokens per millisecond 35 | 36 | -- Get or create bucket atomically 37 | local bucket = redis.call('HGETALL', key) 38 | local state = {} 39 | if #bucket == 0 then 40 | state = init_bucket(capacity, now) 41 | else 42 | -- Convert array to hash 43 | for i = 1, #bucket, 2 do 44 | state[bucket[i]] = tonumber(bucket[i + 1]) 45 | end 46 | 47 | -- Handle rate limit changes 48 | if state.limit ~= capacity then 49 | state = init_bucket(capacity, now) 50 | end 51 | end 52 | 53 | -- Calculate available tokens 54 | local elapsed_ms = now - state.last_update 55 | local new_tokens = math.min(capacity, state.tokens + (elapsed_ms * fill_rate)) 56 | 57 | -- Attempt to consume token 58 | local allowed = 0 59 | if new_tokens >= precision then 60 | new_tokens = new_tokens - precision 61 | allowed = 1 62 | end 63 | 64 | -- Update bucket state 65 | local ttl = math.max(300000, math.ceil(window_ms * 2)) -- Set TTL to max(5min, 2x window) for safety 66 | redis.call('HMSET', key, 67 | 'tokens', new_tokens, 68 | 'last_update', now, 69 | 'limit', capacity 70 | ) 71 | redis.call('PEXPIRE', key, ttl) 72 | 73 | return allowed 74 | ` 75 | 76 | func (rl *RateLimiter) checkBucketRedis(key string, window time.Duration, limit int) (bool, error) { 77 | now := time.Now().UnixMilli() // 使用毫秒精度 78 | windowMs := window.Milliseconds() 79 | 80 | cmd := rl.redisClient.B().Eval().Script(redisRateLimitScript). 81 | Numkeys(1). 82 | Key(key). 83 | Arg( 84 | strconv.Itoa(limit), 85 | strconv.FormatInt(windowMs, 10), 86 | strconv.FormatInt(now, 10), 87 | strconv.Itoa(precision), 88 | ). 89 | Build() 90 | 91 | result := rl.redisClient.Do(context.Background(), cmd) 92 | if err := result.NonRedisError(); err != nil { 93 | slog.ErrorContext(context.Background(), "redis error", append(rl.logCommonAttrs(), slog.Any("error", err))...) 94 | return false, err 95 | } 96 | 97 | allowed, err := result.AsInt64() 98 | if err != nil { 99 | slog.ErrorContext(context.Background(), "failed to parse redis result", append(rl.logCommonAttrs(), slog.Any("error", err))...) 100 | return false, err 101 | } 102 | 103 | return allowed != 0, nil 104 | } 105 | -------------------------------------------------------------------------------- /pkg/listener/handler.go: -------------------------------------------------------------------------------- 1 | package listener 2 | 3 | import ( 4 | "net/http" 5 | ) 6 | 7 | type HandlerFunc func(writer http.ResponseWriter, request *http.Request) (any, error) 8 | 9 | type Middleware func(HandlerFunc) HandlerFunc 10 | 11 | func WithMiddlewares(middlewares ...Middleware) Middleware { 12 | return func(next HandlerFunc) HandlerFunc { 13 | for i := len(middlewares) - 1; i >= 0; i-- { 14 | next = middlewares[i](next) 15 | } 16 | 17 | return next 18 | } 19 | } 20 | 21 | func HTTPHandlerFunc(fn HandlerFunc) http.HandlerFunc { 22 | return func(writer http.ResponseWriter, request *http.Request) { 23 | _, _ = fn(writer, request) 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /pkg/listener/listener.go: -------------------------------------------------------------------------------- 1 | package listener 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "net/http" 7 | "sync" 8 | 9 | "github.com/gorilla/mux" 10 | ) 11 | 12 | type Drainable interface { 13 | Drain(ctx context.Context) error 14 | HasDrained() bool 15 | } 16 | 17 | type Listener interface { 18 | Drainable 19 | 20 | RegisterRoutes(mux *mux.Router) error 21 | } 22 | 23 | type Mux struct { 24 | *mux.Router 25 | 26 | errors []error 27 | mutex sync.Mutex 28 | } 29 | 30 | func NewMux() *Mux { 31 | return &Mux{ 32 | Router: mux.NewRouter(), 33 | errors: make([]error, 0), 34 | } 35 | } 36 | 37 | func (r *Mux) Error() error { 38 | r.mutex.Lock() 39 | defer r.mutex.Unlock() 40 | 41 | return errors.Join(r.errors...) 42 | } 43 | 44 | func (r *Mux) Register(listener Listener, err error) *Mux { 45 | if err != nil { 46 | r.mutex.Lock() 47 | r.errors = append(r.errors, err) 48 | r.mutex.Unlock() 49 | 50 | return r 51 | } 52 | 53 | err = listener.RegisterRoutes(r.Router) 54 | if err != nil { 55 | r.mutex.Lock() 56 | r.errors = append(r.errors, err) 57 | r.mutex.Unlock() 58 | } 59 | 60 | return r 61 | } 62 | 63 | func (r *Mux) BuildServer(server *http.Server) (*http.Server, error) { 64 | if len(r.errors) > 0 { 65 | return nil, r.Error() 66 | } 67 | 68 | server.Handler = r.Router 69 | 70 | return server, nil 71 | } 72 | -------------------------------------------------------------------------------- /pkg/listener/manager/chat/chat_completions.go: -------------------------------------------------------------------------------- 1 | package chat 2 | 3 | import ( 4 | "net/http" 5 | 6 | "knoway.dev/pkg/metadata" 7 | "knoway.dev/pkg/object" 8 | "knoway.dev/pkg/types/openai" 9 | ) 10 | 11 | func (l *OpenAIChatListener) unmarshalChatCompletionsRequestToLLMRequest(request *http.Request) (object.LLMRequest, error) { 12 | llmRequest, err := openai.NewChatCompletionRequest(request) 13 | if err != nil { 14 | return nil, err 15 | } 16 | 17 | if llmRequest.GetModel() == "" { 18 | return nil, openai.NewErrorMissingModel() 19 | } 20 | 21 | rMeta := metadata.RequestMetadataFromCtx(request.Context()) 22 | rMeta.RequestModel = llmRequest.GetModel() 23 | 24 | return llmRequest, nil 25 | } 26 | -------------------------------------------------------------------------------- /pkg/listener/manager/chat/completions.go: -------------------------------------------------------------------------------- 1 | package chat 2 | 3 | import ( 4 | "net/http" 5 | 6 | "knoway.dev/pkg/metadata" 7 | "knoway.dev/pkg/object" 8 | "knoway.dev/pkg/types/openai" 9 | ) 10 | 11 | func (l *OpenAIChatListener) unmarshalCompletionsRequestToLLMRequest(request *http.Request) (object.LLMRequest, error) { 12 | llmRequest, err := openai.NewCompletionsRequest(request) 13 | if err != nil { 14 | return nil, err 15 | } 16 | 17 | if llmRequest.GetModel() == "" { 18 | return nil, openai.NewErrorMissingModel() 19 | } 20 | 21 | rMeta := metadata.RequestMetadataFromCtx(request.Context()) 22 | rMeta.RequestModel = llmRequest.GetModel() 23 | 24 | return llmRequest, nil 25 | } 26 | -------------------------------------------------------------------------------- /pkg/listener/manager/chat/listener.go: -------------------------------------------------------------------------------- 1 | package chat 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "sync" 7 | 8 | "github.com/gorilla/mux" 9 | "github.com/samber/lo/mutable" 10 | "google.golang.org/protobuf/proto" 11 | 12 | "knoway.dev/api/listeners/v1alpha1" 13 | "knoway.dev/pkg/bootkit" 14 | "knoway.dev/pkg/constants" 15 | "knoway.dev/pkg/filters" 16 | "knoway.dev/pkg/listener" 17 | "knoway.dev/pkg/registry/config" 18 | "knoway.dev/pkg/types/openai" 19 | "knoway.dev/pkg/utils" 20 | ) 21 | 22 | var _ listener.Listener = (*OpenAIChatListener)(nil) 23 | var _ listener.Drainable = (*OpenAIChatListener)(nil) 24 | 25 | type OpenAIChatListener struct { 26 | cfg *v1alpha1.ChatCompletionListener 27 | filters filters.RequestFilters 28 | reversedFilters filters.RequestFilters 29 | cancellable *listener.CancellableRequestMap 30 | 31 | mutex sync.RWMutex 32 | drained bool 33 | } 34 | 35 | func NewOpenAIChatListenerConfigs(cfg proto.Message, lifecycle bootkit.LifeCycle) (listener.Listener, error) { 36 | c, ok := cfg.(*v1alpha1.ChatCompletionListener) 37 | if !ok { 38 | return nil, fmt.Errorf("invalid config type %T", cfg) 39 | } 40 | 41 | l := &OpenAIChatListener{ 42 | cfg: c, 43 | cancellable: listener.NewCancellableRequestMap(), 44 | } 45 | 46 | lifecycle.Append(bootkit.LifeCycleHook{ 47 | OnStop: l.Drain, 48 | }) 49 | 50 | for _, fc := range c.GetFilters() { 51 | f, err := config.NewRequestFilterWithConfig(fc.GetName(), fc.GetConfig(), lifecycle) 52 | if err != nil { 53 | return nil, err 54 | } 55 | 56 | l.filters = append(l.filters, f) 57 | } 58 | 59 | l.reversedFilters = utils.Clone(l.filters) 60 | mutable.Reverse(l.reversedFilters) 61 | 62 | return l, nil 63 | } 64 | 65 | func (l *OpenAIChatListener) RegisterRoutes(mux *mux.Router) error { 66 | middlewares := listener.WithMiddlewares( 67 | listener.WithCancellable(l.cancellable), 68 | listener.WithInitMetadata(), 69 | listener.WithAccessLog(l.cfg.GetAccessLog().GetEnable()), 70 | listener.WithRequestTimer(), 71 | listener.WithOptions(), 72 | listener.WithResponseHandler(openai.ResponseHandler()), 73 | listener.WithRecoverWithError(), 74 | listener.WithRejectAfterDrainedWithError(l), 75 | ) 76 | 77 | mux.HandleFunc("/v1/chat/completions", listener.HTTPHandlerFunc(middlewares(listener.CommonListenerHandler(l.filters, l.reversedFilters, l.unmarshalChatCompletionsRequestToLLMRequest)))) 78 | mux.HandleFunc("/v1/completions", listener.HTTPHandlerFunc(middlewares(listener.CommonListenerHandler(l.filters, l.reversedFilters, l.unmarshalCompletionsRequestToLLMRequest)))) 79 | mux.HandleFunc("/v1/models", listener.HTTPHandlerFunc(middlewares(l.listModels))) 80 | 81 | return nil 82 | } 83 | 84 | func (l *OpenAIChatListener) HasDrained() bool { 85 | l.mutex.RLock() 86 | defer l.mutex.RUnlock() 87 | 88 | return l.drained 89 | } 90 | 91 | func (l *OpenAIChatListener) Drain(ctx context.Context) error { 92 | l.mutex.Lock() 93 | l.drained = true 94 | l.mutex.Unlock() 95 | 96 | l.cancellable.CancelAllAfterWithContext(ctx, constants.DefaultDrainWaitTime) 97 | 98 | return nil 99 | } 100 | -------------------------------------------------------------------------------- /pkg/listener/manager/chat/models.go: -------------------------------------------------------------------------------- 1 | package chat 2 | 3 | import ( 4 | "net/http" 5 | "sort" 6 | "strings" 7 | 8 | "github.com/samber/lo" 9 | goopenai "github.com/sashabaranov/go-openai" 10 | 11 | v1alpha4 "knoway.dev/api/clusters/v1alpha1" 12 | clustermanager "knoway.dev/pkg/clusters/manager" 13 | "knoway.dev/pkg/filters/auth" 14 | "knoway.dev/pkg/metadata" 15 | ) 16 | 17 | func ClustersToOpenAIModels(clusters []*v1alpha4.Cluster) []goopenai.Model { 18 | res := make([]goopenai.Model, 0) 19 | for _, c := range clusters { 20 | res = append(res, ClusterToOpenAIModel(c)) 21 | } 22 | 23 | return res 24 | } 25 | 26 | func ClusterToOpenAIModel(c *v1alpha4.Cluster) goopenai.Model { 27 | // from https://platform.openai.com/docs/api-reference/models/object 28 | return goopenai.Model{ 29 | CreatedAt: c.GetCreated(), 30 | ID: c.GetName(), 31 | // The object type, which is always "model". 32 | Object: "model", 33 | OwnedBy: c.GetProvider().String(), 34 | // todo 35 | Permission: nil, 36 | Root: "", 37 | Parent: "", 38 | } 39 | } 40 | 41 | func (l *OpenAIChatListener) listModels(writer http.ResponseWriter, request *http.Request) (any, error) { 42 | for _, f := range l.filters.OnRequestPreFilters() { 43 | fResult := f.OnRequestPre(request.Context(), request) 44 | if fResult.IsFailed() { 45 | return nil, fResult.Error 46 | } 47 | } 48 | 49 | clusters := clustermanager.ListModels() 50 | 51 | // auth filters 52 | rMeta := metadata.RequestMetadataFromCtx(request.Context()) 53 | 54 | if rMeta.EnabledAuthFilter { 55 | if rMeta.AuthInfo != nil { 56 | clusters = lo.Filter(clusters, func(item *v1alpha4.Cluster, index int) bool { 57 | return auth.CanAccessModel(item.GetName(), rMeta.AuthInfo.GetAllowModels(), rMeta.AuthInfo.GetDenyModels()) 58 | }) 59 | } 60 | } 61 | 62 | sort.Slice(clusters, func(i, j int) bool { 63 | return strings.Compare(clusters[i].GetName(), clusters[j].GetName()) < 0 64 | }) 65 | 66 | ms := ClustersToOpenAIModels(clusters) 67 | body := goopenai.ModelsList{ 68 | Models: ms, 69 | } 70 | 71 | return body, nil 72 | } 73 | -------------------------------------------------------------------------------- /pkg/listener/manager/image/image_generations.go: -------------------------------------------------------------------------------- 1 | package image 2 | 3 | import ( 4 | "net/http" 5 | 6 | "knoway.dev/pkg/metadata" 7 | "knoway.dev/pkg/object" 8 | "knoway.dev/pkg/types/openai" 9 | ) 10 | 11 | func (l *OpenAIImageListener) unmarshalImageGenerationsRequestToImageGenerationRequest(request *http.Request) (object.LLMRequest, error) { 12 | llmRequest, err := openai.NewImageGenerationsRequest(request) 13 | if err != nil { 14 | return nil, err 15 | } 16 | 17 | if llmRequest.GetModel() == "" { 18 | return nil, openai.NewErrorMissingModel() 19 | } 20 | 21 | rMeta := metadata.RequestMetadataFromCtx(request.Context()) 22 | rMeta.RequestModel = llmRequest.GetModel() 23 | 24 | return llmRequest, nil 25 | } 26 | -------------------------------------------------------------------------------- /pkg/listener/manager/image/listener.go: -------------------------------------------------------------------------------- 1 | package image 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "sync" 7 | 8 | "github.com/gorilla/mux" 9 | "github.com/samber/lo/mutable" 10 | "google.golang.org/protobuf/proto" 11 | 12 | "knoway.dev/api/listeners/v1alpha1" 13 | "knoway.dev/pkg/bootkit" 14 | "knoway.dev/pkg/constants" 15 | "knoway.dev/pkg/filters" 16 | "knoway.dev/pkg/listener" 17 | "knoway.dev/pkg/registry/config" 18 | "knoway.dev/pkg/types/openai" 19 | "knoway.dev/pkg/utils" 20 | ) 21 | 22 | var _ listener.Listener = (*OpenAIImageListener)(nil) 23 | var _ listener.Drainable = (*OpenAIImageListener)(nil) 24 | 25 | type OpenAIImageListener struct { 26 | cfg *v1alpha1.ImageListener 27 | filters filters.RequestFilters 28 | reversedFilters filters.RequestFilters 29 | cancellable *listener.CancellableRequestMap 30 | 31 | mutex sync.RWMutex 32 | drained bool 33 | } 34 | 35 | func NewOpenAIImageListenerConfigs(cfg proto.Message, lifecycle bootkit.LifeCycle) (listener.Listener, error) { 36 | c, ok := cfg.(*v1alpha1.ImageListener) 37 | if !ok { 38 | return nil, fmt.Errorf("invalid config type %T", cfg) 39 | } 40 | 41 | l := &OpenAIImageListener{ 42 | cfg: c, 43 | cancellable: listener.NewCancellableRequestMap(), 44 | } 45 | 46 | lifecycle.Append(bootkit.LifeCycleHook{ 47 | OnStop: l.Drain, 48 | }) 49 | 50 | for _, fc := range c.GetFilters() { 51 | f, err := config.NewRequestFilterWithConfig(fc.GetName(), fc.GetConfig(), lifecycle) 52 | if err != nil { 53 | return nil, err 54 | } 55 | 56 | l.filters = append(l.filters, f) 57 | } 58 | 59 | l.reversedFilters = utils.Clone(l.filters) 60 | mutable.Reverse(l.reversedFilters) 61 | 62 | return l, nil 63 | } 64 | 65 | func (l *OpenAIImageListener) RegisterRoutes(mux *mux.Router) error { 66 | middlewares := listener.WithMiddlewares( 67 | listener.WithCancellable(l.cancellable), 68 | listener.WithInitMetadata(), 69 | listener.WithAccessLog(l.cfg.GetAccessLog().GetEnable()), 70 | listener.WithRequestTimer(), 71 | listener.WithOptions(), 72 | listener.WithResponseHandler(openai.ResponseHandler()), 73 | listener.WithRecoverWithError(), 74 | listener.WithRejectAfterDrainedWithError(l), 75 | ) 76 | 77 | mux.HandleFunc("/v1/images/generations", listener.HTTPHandlerFunc(middlewares(listener.CommonListenerHandler(l.filters, l.reversedFilters, l.unmarshalImageGenerationsRequestToImageGenerationRequest)))) 78 | 79 | return nil 80 | } 81 | 82 | func (l *OpenAIImageListener) HasDrained() bool { 83 | l.mutex.RLock() 84 | defer l.mutex.RUnlock() 85 | 86 | return l.drained 87 | } 88 | 89 | func (l *OpenAIImageListener) Drain(ctx context.Context) error { 90 | l.mutex.Lock() 91 | l.drained = true 92 | l.mutex.Unlock() 93 | 94 | l.cancellable.CancelAllAfterWithContext(ctx, constants.DefaultDrainWaitTime) 95 | 96 | return nil 97 | } 98 | -------------------------------------------------------------------------------- /pkg/metadata/metadata.go: -------------------------------------------------------------------------------- 1 | package metadata 2 | 3 | import ( 4 | "context" 5 | "net/http" 6 | "time" 7 | 8 | "github.com/samber/mo" 9 | 10 | "knoway.dev/api/clusters/v1alpha1" 11 | servicev1alpha1 "knoway.dev/api/service/v1alpha1" 12 | "knoway.dev/pkg/clusters" 13 | "knoway.dev/pkg/object" 14 | "knoway.dev/pkg/route" 15 | ) 16 | 17 | type RequestMetadata struct { 18 | // RequestModel is the requested model name from user side, 19 | // used to route to the correct cluster and corresponding model. 20 | // Much similar to server_name in nginx or vHost in Apache. 21 | RequestModel string 22 | RequestAt time.Time 23 | // ResponseModel is the model name that the user expects to receive. 24 | // In many scenarios, this is the same as RequestModel, except for 25 | // auto-routed models, where RequestModel could be `auto`, and 26 | // the actual selected Cluster of model name will be selected based on 27 | // the request payload / inference difficulty. 28 | ResponseModel string 29 | RespondAt time.Time 30 | 31 | // Egress related metadata 32 | StatusCode int 33 | ErrorMessage string 34 | 35 | // Auth related metadata 36 | EnabledAuthFilter bool // Set in AuthFilter 37 | AuthInfo *servicev1alpha1.APIKeyAuthResponse // Set in AuthFilter 38 | 39 | // SelectedCluster is the cluster that the request is routed to 40 | SelectedCluster mo.Option[clusters.Cluster] 41 | 42 | // Upstream related metadata 43 | UpstreamProvider v1alpha1.ClusterProvider // Set in Cluster Manager 44 | UpstreamResponseStatusCode int // Set in Cluster Manager 45 | UpstreamResponseHeader mo.Option[http.Header] // Set in Cluster Manager 46 | UpstreamResponseErrorMessage string // Set in Cluster Manager 47 | // UpstreamRequestModel is the model name that the gateway will send to 48 | // upstream provider, generally the same as how Cluster overrides `model` 49 | // parameter in the request payload. 50 | UpstreamRequestModel string // Set in Cluster Manager 51 | UpstreamRequestAt time.Time // Set in Cluster Manager 52 | // UpstreamResponseModel is the model name that the upstream provider 53 | // will respond with. Same as explained in ResponseModel, when 54 | // UpstreamRequestModel set to `auto`, the actual model name will be 55 | // different from the UpstreamRequestModel since the load-balancing or 56 | // generic model routing will be done by the upstream provider. 57 | UpstreamResponseModel string // Set in Cluster Manager 58 | UpstreamRespondAt time.Time // Set in Cluster Manager 59 | // Setting in Listener is because when reading and handling the stream 60 | // of data, the response has been made and processed by Cluster, which 61 | // leaves the scope of Cluster Manager, and marshalling and writing to 62 | // Connection IO writer is done by Listener, thus the only actor that 63 | // knows when the first valid chunk of data is received. 64 | UpstreamFirstValidChunkAt time.Time // Set in Listener 65 | 66 | // Overall usage consumption 67 | LLMUpstreamTokensUsage mo.Option[object.LLMTokensUsage] 68 | LLMUpstreamImagesUsage mo.Option[object.LLMImagesUsage] 69 | 70 | MatchRoute route.Route 71 | } 72 | 73 | // RequestMetadataFromCtx retrieves RequestMetadata from context 74 | // Note: The returned pointer allows direct access and modification of the underlying RequestMetadata 75 | // Be careful when modifying the properties as they are shared across the request context 76 | func RequestMetadataFromCtx(ctx context.Context) *RequestMetadata { 77 | props, pok := ctx.Value(metadataKey{}).(*metadata) 78 | if !pok { 79 | return nil 80 | } 81 | 82 | return props.request 83 | } 84 | 85 | func InitMetadataContext(request *http.Request) context.Context { 86 | return context.WithValue(request.Context(), metadataKey{}, &metadata{ 87 | request: &RequestMetadata{}, 88 | }) 89 | } 90 | 91 | type metadataKey struct{} 92 | 93 | type metadata struct { 94 | request *RequestMetadata 95 | } 96 | -------------------------------------------------------------------------------- /pkg/object/completion.go: -------------------------------------------------------------------------------- 1 | package object 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "net/http" 7 | 8 | structpb "github.com/golang/protobuf/ptypes/struct" 9 | 10 | "knoway.dev/pkg/types/sse" 11 | ) 12 | 13 | type RequestType string 14 | 15 | const ( 16 | RequestTypeChatCompletions RequestType = "chat_completions" 17 | RequestTypeCompletions RequestType = "completions" 18 | RequestTypeImageGenerations RequestType = "image_generations" 19 | ) 20 | 21 | type LLMRequest interface { 22 | IsStream() bool 23 | GetModel() string 24 | SetModel(modelName string) error 25 | 26 | SetOverrideParams(params map[string]*structpb.Value) error 27 | SetDefaultParams(params map[string]*structpb.Value) error 28 | RemoveParamKeys(keys []string) error 29 | 30 | GetRequestType() RequestType 31 | GetRawRequest() *http.Request 32 | } 33 | 34 | type LLMResponse interface { 35 | json.Marshaler 36 | 37 | IsStream() bool 38 | GetRequestID() string 39 | GetUsage() LLMUsage 40 | GetError() LLMError 41 | 42 | GetModel() string 43 | SetModel(modelName string) error 44 | } 45 | 46 | func IsLLMResponse(r any) bool { 47 | _, ok := r.(LLMResponse) 48 | return ok 49 | } 50 | 51 | type LLMStreamResponse interface { 52 | LLMResponse 53 | 54 | IsEOF() bool 55 | NextChunk() (LLMChunkResponse, error) 56 | WaitUntilEOF() <-chan LLMStreamResponse 57 | OnChunk(cb func(ctx context.Context, stream LLMStreamResponse, chunk LLMChunkResponse)) 58 | } 59 | 60 | func IsLLMStreamResponse(r any) bool { 61 | _, ok := r.(LLMStreamResponse) 62 | if ok { 63 | return true 64 | } 65 | 66 | llmResp, ok := r.(LLMStreamResponse) 67 | 68 | return ok && llmResp.IsStream() 69 | } 70 | 71 | type LLMChunkResponse interface { 72 | json.Marshaler 73 | 74 | IsFirst() bool 75 | IsEmpty() bool 76 | IsDone() bool 77 | IsUsage() bool 78 | GetResponse() LLMStreamResponse 79 | 80 | GetModel() string 81 | SetModel(modelName string) error 82 | GetUsage() LLMUsage 83 | 84 | ToServerSentEvent() (*sse.Event, error) 85 | } 86 | 87 | type LLMUsage interface { 88 | isLLMUsage() 89 | } 90 | 91 | type LLMTokensUsage interface { 92 | LLMUsage 93 | 94 | GetTotalTokens() uint64 95 | GetCompletionTokens() uint64 96 | GetPromptTokens() uint64 97 | } 98 | 99 | func AsLLMTokensUsage(u LLMUsage) (LLMTokensUsage, bool) { 100 | t, ok := u.(LLMTokensUsage) 101 | return t, ok 102 | } 103 | 104 | var _ LLMUsage = (*IsLLMUsage)(nil) 105 | 106 | type IsLLMUsage struct{} 107 | 108 | func (IsLLMUsage) isLLMUsage() {} 109 | -------------------------------------------------------------------------------- /pkg/object/error.go: -------------------------------------------------------------------------------- 1 | package object 2 | 3 | import "encoding/json" 4 | 5 | type LLMError interface { 6 | json.Marshaler 7 | json.Unmarshaler 8 | error 9 | 10 | GetCode() string 11 | GetMessage() string 12 | GetStatus() int 13 | } 14 | 15 | func IsLLMError(err error) bool { 16 | // Assert with interface, cannot use errors.As 17 | _, ok := err.(LLMError) //nolint:errorlint 18 | return ok 19 | } 20 | 21 | func AsLLMError(err error) LLMError { 22 | if IsLLMError(err) { 23 | llmError, _ := err.(LLMError) //nolint:errorlint 24 | return llmError 25 | } 26 | 27 | return nil 28 | } 29 | -------------------------------------------------------------------------------- /pkg/object/images.go: -------------------------------------------------------------------------------- 1 | package object 2 | 3 | type ImageGenerationsUsageImage interface { 4 | GetWidth() uint64 5 | GetHeight() uint64 6 | GetStyle() string 7 | GetQuality() string 8 | } 9 | 10 | type LLMImagesUsage interface { 11 | LLMUsage 12 | 13 | GetOutputImages() []ImageGenerationsUsageImage 14 | } 15 | 16 | func AsLLMImagesUsage(u LLMUsage) (LLMImagesUsage, bool) { 17 | t, ok := u.(LLMImagesUsage) 18 | return t, ok 19 | } 20 | -------------------------------------------------------------------------------- /pkg/observation/attributes.go: -------------------------------------------------------------------------------- 1 | package observation 2 | 3 | import ( 4 | "strings" 5 | 6 | "go.opentelemetry.io/otel/attribute" 7 | ) 8 | 9 | // AttributeKey is a generic key that can be used for both metrics labels and tracing attributes. 10 | type AttributeKey string 11 | 12 | // AsAttribute converts the AttributeKey to an OpenTelemetry attribute.Key. 13 | func (a AttributeKey) AsAttribute() attribute.Key { 14 | return attribute.Key(a) 15 | } 16 | 17 | // AsLabelKey converts the AttributeKey to an OpenTelemetry label key. 18 | func (a AttributeKey) AsLabelKey() string { 19 | return formatLabel(string(a)) 20 | } 21 | 22 | func formatLabel(label string) string { 23 | return strings.ReplaceAll(label, ".", "_") 24 | } 25 | 26 | var ( 27 | LLMRequestType = AttributeKey("llm.request.type") 28 | LLMRequestStream = AttributeKey("llm.request.stream") 29 | LLMRequestModel = AttributeKey("llm.request.model") 30 | LLMRequestHeaders = AttributeKey("llm.request.headers") 31 | 32 | LLMResponseModel = AttributeKey("llm.response.model") 33 | LLMResponseCode = AttributeKey("llm.response.code") 34 | LLMResponseErrorMessage = AttributeKey("llm.response.error_message") 35 | LLMResponseHeaders = AttributeKey("llm.response.headers") 36 | LLMResponseDuration = AttributeKey("llm.response.duration") 37 | 38 | LLMTokenType = AttributeKey("llm.usage.token_type") 39 | LLMUsageTotalTokens = AttributeKey("llm.usage.total_tokens") 40 | LLMUsageCompletionTokens = AttributeKey("llm.usage.completion_tokens") 41 | LLMUsagePromptTokens = AttributeKey("llm.usage.prompt_tokens") 42 | 43 | KnowayAuthInfoAPIKey = AttributeKey("knoway.auth.apikey") 44 | KnowayAuthInfoUser = AttributeKey("knoway.auth.user") 45 | ) 46 | 47 | type LLMTokenTypeEnum string 48 | 49 | const ( 50 | PromptTokenType LLMTokenTypeEnum = "prompt" 51 | CompletionTokenType LLMTokenTypeEnum = "completion" 52 | ) 53 | -------------------------------------------------------------------------------- /pkg/protoutils/proto.go: -------------------------------------------------------------------------------- 1 | package protoutils 2 | 3 | import ( 4 | "reflect" 5 | 6 | "google.golang.org/protobuf/proto" 7 | "google.golang.org/protobuf/types/known/anypb" 8 | ) 9 | 10 | func TypeURLOrDie(obj proto.Message) string { 11 | a, err := anypb.New(obj) 12 | if err != nil { 13 | panic(err) 14 | } 15 | 16 | return a.GetTypeUrl() 17 | } 18 | 19 | func FromAny[T proto.Message](a *anypb.Any, prototype T) (T, error) { 20 | newObj, _ := reflect.New(reflect.TypeOf(prototype).Elem()).Interface().(T) 21 | if err := a.UnmarshalTo(newObj); err != nil { 22 | return newObj, err 23 | } 24 | 25 | return newObj, nil 26 | } 27 | -------------------------------------------------------------------------------- /pkg/redis/client.go: -------------------------------------------------------------------------------- 1 | package redis 2 | 3 | import ( 4 | "github.com/redis/rueidis" 5 | ) 6 | 7 | func NewRedisClient(url string) (rueidis.Client, error) { 8 | redisClient, err := rueidis.NewClient(rueidis.MustParseURL(url)) 9 | if err != nil { 10 | return nil, err 11 | } 12 | 13 | return redisClient, nil 14 | } 15 | -------------------------------------------------------------------------------- /pkg/registry/config/registry.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "fmt" 5 | 6 | "google.golang.org/protobuf/types/known/anypb" 7 | 8 | filtersv1alpha1 "knoway.dev/api/filters/v1alpha1" 9 | "knoway.dev/pkg/bootkit" 10 | clusterfilters "knoway.dev/pkg/clusters/filters" 11 | "knoway.dev/pkg/clusters/filters/openai" 12 | "knoway.dev/pkg/filters" 13 | "knoway.dev/pkg/filters/auth" 14 | "knoway.dev/pkg/filters/ratelimit" 15 | "knoway.dev/pkg/filters/usage" 16 | "knoway.dev/pkg/protoutils" 17 | ) 18 | 19 | var ( 20 | requestFilters = map[string]func(cfg *anypb.Any, lifecycle bootkit.LifeCycle) (filters.RequestFilter, error){} 21 | 22 | clustersFilters = map[string]func(cfg *anypb.Any, lifecycle bootkit.LifeCycle) (clusterfilters.ClusterFilter, error){} 23 | ) 24 | 25 | func ClusterDefaultFilters(lifecycle bootkit.LifeCycle) []clusterfilters.ClusterFilter { 26 | res := make([]clusterfilters.ClusterFilter, 0) 27 | 28 | pb, _ := anypb.New(&filtersv1alpha1.OpenAIRequestHandlerConfig{}) 29 | reqMar, _ := NewClusterFilterWithConfig("global", pb, lifecycle) 30 | res = append(res, reqMar) 31 | 32 | responsePb, _ := anypb.New(&filtersv1alpha1.OpenAIResponseHandlerConfig{}) 33 | respMar, _ := NewClusterFilterWithConfig("global", responsePb, lifecycle) 34 | res = append(res, respMar) 35 | 36 | return res 37 | } 38 | 39 | func init() { 40 | requestFilters[protoutils.TypeURLOrDie(&filtersv1alpha1.APIKeyAuthConfig{})] = auth.NewWithConfig 41 | requestFilters[protoutils.TypeURLOrDie(&filtersv1alpha1.RateLimitConfig{})] = ratelimit.NewWithConfig 42 | requestFilters[protoutils.TypeURLOrDie(&filtersv1alpha1.UsageStatsConfig{})] = usage.NewWithConfig 43 | 44 | // internal base Filters 45 | clustersFilters[protoutils.TypeURLOrDie(&filtersv1alpha1.OpenAIRequestHandlerConfig{})] = openai.NewRequestHandlerWithConfig 46 | clustersFilters[protoutils.TypeURLOrDie(&filtersv1alpha1.OpenAIResponseHandlerConfig{})] = openai.NewResponseHandlerWithConfig 47 | } 48 | 49 | func NewRequestFilterWithConfig(name string, cfg *anypb.Any, lifecycle bootkit.LifeCycle) (filters.RequestFilter, error) { 50 | if f, ok := requestFilters[cfg.GetTypeUrl()]; ok { 51 | return f(cfg, lifecycle) 52 | } 53 | 54 | return nil, fmt.Errorf("unknown listener filter %q, %s", name, cfg.GetTypeUrl()) 55 | } 56 | 57 | func NewClusterFilterWithConfig(name string, cfg *anypb.Any, lifecycle bootkit.LifeCycle) (clusterfilters.ClusterFilter, error) { 58 | if f, ok := clustersFilters[cfg.GetTypeUrl()]; ok { 59 | return f(cfg, lifecycle) 60 | } 61 | 62 | return nil, fmt.Errorf("unknown cluster filter %q, %s", name, cfg.GetTypeUrl()) 63 | } 64 | 65 | // NewRequestFiltersKeys returns the keys of the requestFilters map 66 | func NewRequestFiltersKeys() []string { 67 | keys := make([]string, 0, len(requestFilters)) 68 | for k := range requestFilters { 69 | keys = append(keys, k) 70 | } 71 | 72 | return keys 73 | } 74 | 75 | // NewClustersFiltersKeys returns the keys of the clustersFilters map 76 | func NewClustersFiltersKeys() []string { 77 | keys := make([]string, 0, len(clustersFilters)) 78 | for k := range clustersFilters { 79 | keys = append(keys, k) 80 | } 81 | 82 | return keys 83 | } 84 | -------------------------------------------------------------------------------- /pkg/registry/config/registry_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestNewRequestFiltersKeys(t *testing.T) { 11 | checkKeys := func(expectedKeys []string, actualKeys []string) { 12 | require.NotEmpty(t, actualKeys) 13 | 14 | for _, expectedKey := range expectedKeys { 15 | found := false 16 | 17 | for _, key := range actualKeys { 18 | if key == expectedKey { 19 | found = true 20 | break 21 | } 22 | } 23 | 24 | assert.True(t, found) 25 | } 26 | } 27 | 28 | expectedRequestFiltersKeys := []string{ 29 | "type.googleapis.com/knoway.filters.v1alpha1.APIKeyAuthConfig", 30 | "type.googleapis.com/knoway.filters.v1alpha1.UsageStatsConfig", 31 | } 32 | keys := NewRequestFiltersKeys() 33 | checkKeys(expectedRequestFiltersKeys, keys) 34 | 35 | expectedClustersFiltersKeys := []string{ 36 | "type.googleapis.com/knoway.filters.v1alpha1.OpenAIRequestHandlerConfig", 37 | "type.googleapis.com/knoway.filters.v1alpha1.OpenAIResponseHandlerConfig", 38 | } 39 | cKeys := NewClustersFiltersKeys() 40 | checkKeys(expectedClustersFiltersKeys, cKeys) 41 | } 42 | -------------------------------------------------------------------------------- /pkg/route/manager/manager.go: -------------------------------------------------------------------------------- 1 | package manager 2 | 3 | import ( 4 | "context" 5 | "log/slog" 6 | "sync" 7 | 8 | "knoway.dev/pkg/bootkit" 9 | "knoway.dev/pkg/metadata" 10 | "knoway.dev/pkg/object" 11 | 12 | "knoway.dev/api/route/v1alpha1" 13 | "knoway.dev/pkg/route" 14 | rroute "knoway.dev/pkg/route/route" 15 | 16 | "github.com/samber/lo" 17 | ) 18 | 19 | var ( 20 | matchRouteRegistry = make(map[string]route.Route) 21 | routeRegistry = make(map[string]route.Route) 22 | 23 | routes = make([]route.Route, 0) 24 | routeLock sync.RWMutex 25 | ) 26 | 27 | func InitDirectModelRoute(modelName string) *v1alpha1.Route { 28 | return &v1alpha1.Route{ 29 | Name: modelName, 30 | Matches: []*v1alpha1.Match{ 31 | { 32 | Model: &v1alpha1.StringMatch{ 33 | Match: &v1alpha1.StringMatch_Exact{ 34 | Exact: modelName, 35 | }, 36 | }, 37 | }, 38 | }, 39 | Targets: []*v1alpha1.RouteTarget{ 40 | { 41 | Destination: &v1alpha1.RouteDestination{ 42 | Cluster: modelName, 43 | }, 44 | }, 45 | }, 46 | Filters: nil, // todo future 47 | } 48 | } 49 | 50 | func RegisterMatchRouteWithConfig(cfg *v1alpha1.Route, lifecycle bootkit.LifeCycle) error { 51 | routeLock.Lock() 52 | defer routeLock.Unlock() 53 | 54 | r, err := rroute.NewWithConfig(cfg, lifecycle) 55 | if err != nil { 56 | return err 57 | } 58 | 59 | matchRouteRegistry[cfg.GetName()] = r 60 | routes = mergeRoutes() 61 | 62 | slog.Info("register match route", "name", cfg.GetName()) 63 | 64 | return nil 65 | } 66 | 67 | func RemoveMatchRoute(rName string) { 68 | routeLock.Lock() 69 | defer routeLock.Unlock() 70 | 71 | delete(matchRouteRegistry, rName) 72 | routes = mergeRoutes() 73 | 74 | slog.Info("remove match route", "name", rName) 75 | } 76 | 77 | func RegisterBaseRouteWithConfig(cfg *v1alpha1.Route, lifecycle bootkit.LifeCycle) error { 78 | routeLock.Lock() 79 | defer routeLock.Unlock() 80 | 81 | r, err := rroute.NewWithConfig(cfg, lifecycle) 82 | if err != nil { 83 | return err 84 | } 85 | 86 | routeRegistry[cfg.GetName()] = r 87 | 88 | if _, exists := matchRouteRegistry[cfg.GetName()]; exists { 89 | slog.Info("route exists in matchRouteRegistry, skipping base route registration", "name", cfg.GetName()) 90 | return nil 91 | } 92 | 93 | routes = mergeRoutes() 94 | 95 | slog.Info("register base route", "name", cfg.GetName()) 96 | 97 | return nil 98 | } 99 | 100 | func RemoveBaseRoute(rName string) { 101 | routeLock.Lock() 102 | defer routeLock.Unlock() 103 | 104 | delete(routeRegistry, rName) 105 | routes = mergeRoutes() 106 | 107 | slog.Info("remove base route", "name", rName) 108 | } 109 | 110 | func mergeRoutes() []route.Route { 111 | uniqueRoutes := make(map[string]route.Route) 112 | 113 | for k, v := range matchRouteRegistry { 114 | uniqueRoutes[k] = v 115 | } 116 | 117 | for k, v := range routeRegistry { 118 | if _, exists := uniqueRoutes[k]; !exists { 119 | uniqueRoutes[k] = v 120 | } 121 | } 122 | 123 | return lo.Values(uniqueRoutes) 124 | } 125 | 126 | func MatchRoute(ctx context.Context, request object.LLMRequest) route.Route { 127 | routeLock.RLock() 128 | defer routeLock.RUnlock() 129 | 130 | for _, r := range routes { 131 | if r.Match(ctx, request) { 132 | return r 133 | } 134 | } 135 | 136 | return nil 137 | } 138 | 139 | func HandleRequest(ctx context.Context, llmRequest object.LLMRequest) (object.LLMResponse, error) { 140 | route := MatchRoute(ctx, llmRequest) 141 | if route == nil { 142 | return nil, object.NewErrorModelNotFoundOrNotAccessible(llmRequest.GetModel()) 143 | } 144 | 145 | rMeta := metadata.RequestMetadataFromCtx(ctx) 146 | rMeta.MatchRoute = route 147 | 148 | return route.HandleRequest(ctx, llmRequest) 149 | } 150 | 151 | func DebugDumpAllRoutes() []*v1alpha1.Route { 152 | routeLock.Lock() 153 | defer routeLock.Unlock() 154 | 155 | return lo.Map(routes, func(r route.Route, _ int) *v1alpha1.Route { 156 | return r.GetRouteConfig() 157 | }) 158 | } 159 | -------------------------------------------------------------------------------- /pkg/route/route.go: -------------------------------------------------------------------------------- 1 | package route 2 | 3 | import ( 4 | "context" 5 | 6 | routev1alpha1 "knoway.dev/api/route/v1alpha1" 7 | "knoway.dev/pkg/object" 8 | ) 9 | 10 | type Route interface { 11 | // Match returns true if the route matches the request 12 | Match(ctx context.Context, request object.LLMRequest) bool 13 | // HandleRequest handles the request 14 | HandleRequest(ctx context.Context, request object.LLMRequest) (object.LLMResponse, error) 15 | 16 | // GetRouteConfig returns the route config 17 | GetRouteConfig() *routev1alpha1.Route 18 | } 19 | -------------------------------------------------------------------------------- /pkg/types/openai/chat_completions_request_test.go: -------------------------------------------------------------------------------- 1 | package openai 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "encoding/json" 7 | "net/http" 8 | "testing" 9 | 10 | "github.com/samber/lo" 11 | "github.com/stretchr/testify/assert" 12 | "github.com/stretchr/testify/require" 13 | "google.golang.org/protobuf/types/known/structpb" 14 | ) 15 | 16 | func TestSetModel(t *testing.T) { 17 | httpRequest, err := http.NewRequestWithContext(context.Background(), http.MethodPost, "http://example.com", bytes.NewBufferString(` 18 | { 19 | "model": "some", 20 | "messages": [ 21 | { 22 | "role": "user", 23 | "content": "hi" 24 | } 25 | ] 26 | } 27 | `)) 28 | require.NoError(t, err) 29 | 30 | request, err := NewChatCompletionRequest(httpRequest) 31 | require.NoError(t, err) 32 | 33 | newModel := lo.RandomString(10, lo.LettersCharset) 34 | 35 | err = request.SetModel(newModel) 36 | require.NoError(t, err) 37 | assert.Equal(t, newModel, request.GetModel()) 38 | 39 | // Verify the body buffer has been updated 40 | var body map[string]any 41 | 42 | err = json.Unmarshal(lo.Must(json.Marshal(request)), &body) 43 | require.NoError(t, err) 44 | assert.Equal(t, newModel, body["model"]) 45 | 46 | messages := []map[string]any{ 47 | { 48 | "role": "user", 49 | "content": "hi", 50 | }, 51 | } 52 | 53 | newMessages, ok := body["messages"].([]interface{}) 54 | require.True(t, ok) 55 | assert.Equal(t, len(messages), len(newMessages)) 56 | 57 | for i, msg := range messages { 58 | newMessageMap, ok := newMessages[i].(map[string]interface{}) 59 | require.True(t, ok) 60 | 61 | assert.Equal(t, msg["role"], newMessageMap["role"]) 62 | assert.Equal(t, msg["content"], newMessageMap["content"]) 63 | } 64 | } 65 | 66 | func TestSetDefaultParams(t *testing.T) { 67 | body := []byte(`{ 68 | "model": "gpt-4", 69 | "stream": false 70 | }`) 71 | 72 | req, err := http.NewRequestWithContext(context.TODO(), http.MethodPost, "/api/v1", bytes.NewReader(body)) 73 | require.NoError(t, err) 74 | 75 | chatReq, err := NewChatCompletionRequest(req) 76 | require.NoError(t, err) 77 | 78 | params := map[string]*structpb.Value{ 79 | "model": structpb.NewStringValue("openai/gpt-4"), 80 | "stream": structpb.NewBoolValue(true), 81 | "temperature": structpb.NewNumberValue(0.7), 82 | "max_tokens": structpb.NewNumberValue(100), 83 | } 84 | 85 | err = chatReq.SetDefaultParams(params) 86 | require.NoError(t, err) 87 | 88 | assert.Equal(t, false, chatReq.bodyParsed["stream"]) 89 | assert.Equal(t, "gpt-4", chatReq.bodyParsed["model"]) 90 | assert.InDelta(t, 0.7, chatReq.bodyParsed["temperature"], 0.0001) 91 | assert.InDelta(t, 100.0, chatReq.bodyParsed["max_tokens"], 0.0001) 92 | } 93 | 94 | func TestSetOverrideParams(t *testing.T) { 95 | body := []byte(`{ 96 | "model": "gpt-4", 97 | "stream": false, 98 | "temperature": 0.5, 99 | "max_tokens": 200 100 | }`) 101 | 102 | req, err := http.NewRequestWithContext(context.TODO(), http.MethodPost, "/api/v1", bytes.NewReader(body)) 103 | require.NoError(t, err) 104 | 105 | chatReq, err := NewChatCompletionRequest(req) 106 | require.NoError(t, err) 107 | 108 | params := map[string]*structpb.Value{ 109 | "model": structpb.NewStringValue("openai/gpt-4"), 110 | "stream": structpb.NewBoolValue(true), 111 | "temperature": structpb.NewNumberValue(0.7), 112 | "max_tokens": structpb.NewNumberValue(100), 113 | "stream_options": structpb.NewStructValue(&structpb.Struct{ 114 | Fields: map[string]*structpb.Value{ 115 | "include_usage": structpb.NewBoolValue(true), 116 | }, 117 | }), 118 | } 119 | 120 | err = chatReq.SetOverrideParams(params) 121 | require.NoError(t, err) 122 | 123 | assert.Equal(t, "openai/gpt-4", chatReq.bodyParsed["model"]) 124 | assert.InDelta(t, 0.7, chatReq.bodyParsed["temperature"], 0.0001) 125 | assert.InDelta(t, 100.0, chatReq.bodyParsed["max_tokens"], 0.0001) 126 | 127 | assert.Equal(t, true, chatReq.bodyParsed["stream"]) 128 | assert.Equal(t, map[string]any{ 129 | "include_usage": true, 130 | }, chatReq.bodyParsed["stream_options"]) 131 | } 132 | -------------------------------------------------------------------------------- /pkg/types/openai/chat_completions_response.go: -------------------------------------------------------------------------------- 1 | package openai 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "encoding/json" 7 | "fmt" 8 | "net/http" 9 | 10 | "knoway.dev/pkg/object" 11 | "knoway.dev/pkg/utils" 12 | ) 13 | 14 | var _ object.LLMResponse = (*ChatCompletionsResponse)(nil) 15 | 16 | type ChatCompletionsResponse struct { 17 | Status int `json:"status"` 18 | Model string `json:"model"` 19 | Usage *ChatCompletionsUsage `json:"usage,omitempty"` 20 | Error *ErrorResponse `json:"error,omitempty"` 21 | Stream bool `json:"stream"` 22 | 23 | request object.LLMRequest 24 | responseBody json.RawMessage 25 | bodyParsed map[string]any 26 | outgoingResponse *http.Response 27 | } 28 | 29 | func NewChatCompletionResponse(request object.LLMRequest, response *http.Response, reader *bufio.Reader) (*ChatCompletionsResponse, error) { 30 | resp := new(ChatCompletionsResponse) 31 | 32 | buffer := new(bytes.Buffer) 33 | 34 | _, err := buffer.ReadFrom(reader) 35 | if err != nil { 36 | return nil, err 37 | } 38 | 39 | err = resp.processBytes(buffer.Bytes(), response) 40 | if err != nil { 41 | return nil, fmt.Errorf("failed to unmarshal response: %w", err) 42 | } 43 | 44 | resp.request = request 45 | resp.outgoingResponse = response 46 | 47 | return resp, nil 48 | } 49 | 50 | func (r *ChatCompletionsResponse) processBytes(bs []byte, response *http.Response) error { 51 | if r == nil { 52 | return nil 53 | } 54 | 55 | r.responseBody = bs 56 | r.Status = response.StatusCode 57 | 58 | var body map[string]any 59 | 60 | err := json.Unmarshal(bs, &body) 61 | if err != nil { 62 | return fmt.Errorf("failed to unmarshal response body: %w", err) 63 | } 64 | 65 | r.bodyParsed = body 66 | 67 | r.Model = utils.GetByJSONPath[string](body, "{ .model }") 68 | usageMap := utils.GetByJSONPath[map[string]any](body, "{ .usage }") 69 | 70 | r.Usage, err = utils.FromMap[ChatCompletionsUsage](usageMap) 71 | if err != nil { 72 | return fmt.Errorf("failed to unmarshal usage: %w", err) 73 | } 74 | 75 | errorResponse, err := unmarshalErrorResponseFromParsedBody(body, response, bs) 76 | if err != nil { 77 | return err 78 | } 79 | if errorResponse != nil { 80 | r.Error = errorResponse 81 | } 82 | 83 | return nil 84 | } 85 | 86 | func (r *ChatCompletionsResponse) MarshalJSON() ([]byte, error) { 87 | return r.responseBody, nil 88 | } 89 | 90 | func (r *ChatCompletionsResponse) IsStream() bool { 91 | return false 92 | } 93 | 94 | func (r *ChatCompletionsResponse) GetRequestID() string { 95 | // TODO: implement 96 | return "" 97 | } 98 | 99 | func (r *ChatCompletionsResponse) GetModel() string { 100 | return r.Model 101 | } 102 | 103 | func (r *ChatCompletionsResponse) SetModel(model string) error { 104 | if r.Error == nil { 105 | var err error 106 | 107 | r.responseBody, r.bodyParsed, err = modifyBytesBodyAndParsed(r.responseBody, NewReplace("/model", model)) 108 | if err != nil { 109 | return err 110 | } 111 | } 112 | 113 | r.Model = model 114 | 115 | return nil 116 | } 117 | 118 | func (r *ChatCompletionsResponse) GetUsage() object.LLMUsage { 119 | return r.Usage 120 | } 121 | 122 | func (r *ChatCompletionsResponse) GetError() object.LLMError { 123 | if r.Error != nil { 124 | return r.Error 125 | } 126 | 127 | return nil 128 | } 129 | -------------------------------------------------------------------------------- /pkg/types/openai/common_test.go: -------------------------------------------------------------------------------- 1 | package openai 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/samber/lo" 7 | "github.com/stretchr/testify/assert" 8 | "github.com/stretchr/testify/require" 9 | ) 10 | 11 | func TestParseImageGenerationsSizeString(t *testing.T) { 12 | size, err := parseImageGenerationsSizeString(nil) 13 | require.NoError(t, err) 14 | require.Nil(t, size) 15 | 16 | size, err = parseImageGenerationsSizeString(lo.ToPtr("")) 17 | require.Error(t, err) 18 | require.Nil(t, size) 19 | require.EqualError(t, err, "empty size string") 20 | 21 | size, err = parseImageGenerationsSizeString(lo.ToPtr("1024x1024")) 22 | require.NoError(t, err) 23 | require.NotNil(t, size) 24 | assert.Equal(t, uint64(1024), size.Width) 25 | assert.Equal(t, uint64(1024), size.Height) 26 | 27 | size, err = parseImageGenerationsSizeString(lo.ToPtr("1024x")) 28 | require.Error(t, err) 29 | require.Nil(t, size) 30 | require.EqualError(t, err, "invalid height `` in \"size\" value `1024x`") 31 | 32 | size, err = parseImageGenerationsSizeString(lo.ToPtr("x1024")) 33 | require.Error(t, err) 34 | require.Nil(t, size) 35 | require.EqualError(t, err, "invalid width `` in \"size\" value `x1024`") 36 | 37 | size, err = parseImageGenerationsSizeString(lo.ToPtr("1024x1024x1024")) 38 | require.Error(t, err) 39 | require.Nil(t, size) 40 | require.EqualError(t, err, "invalid `1024x1024x1024` in \"size\" value: too many parts") 41 | 42 | size, err = parseImageGenerationsSizeString(lo.ToPtr("1024")) 43 | require.Error(t, err) 44 | require.Nil(t, size) 45 | require.EqualError(t, err, "invalid `1024` in \"size\" value") 46 | 47 | size, err = parseImageGenerationsSizeString(lo.ToPtr("1024x1024x")) 48 | require.Error(t, err) 49 | require.Nil(t, size) 50 | require.EqualError(t, err, "invalid `1024x1024x` in \"size\" value: too many parts") 51 | 52 | size, err = parseImageGenerationsSizeString(lo.ToPtr("testx1024")) 53 | require.Error(t, err) 54 | require.Nil(t, size) 55 | require.EqualError(t, err, "invalid width `test` in \"size\" value `testx1024`") 56 | } 57 | -------------------------------------------------------------------------------- /pkg/types/openai/event.go: -------------------------------------------------------------------------------- 1 | package openai 2 | 3 | type Event string 4 | 5 | const ( 6 | EventError Event = "error" 7 | ) 8 | 9 | type ErrorEvent struct { 10 | Event Event `json:"event"` 11 | Error Error `json:"error"` 12 | } 13 | -------------------------------------------------------------------------------- /pkg/types/openai/http.go: -------------------------------------------------------------------------------- 1 | package openai 2 | 3 | import ( 4 | "errors" 5 | "log/slog" 6 | "net/http" 7 | 8 | "knoway.dev/pkg/metadata" 9 | "knoway.dev/pkg/utils" 10 | ) 11 | 12 | var ( 13 | SkipStreamResponse = errors.New("skip writing stream response") //nolint:errname,stylecheck 14 | ) 15 | 16 | func ResponseHandler() func(resp any, err error, writer http.ResponseWriter, request *http.Request) { 17 | return func(resp any, err error, writer http.ResponseWriter, request *http.Request) { 18 | rMeta := metadata.RequestMetadataFromCtx(request.Context()) 19 | 20 | if err == nil { 21 | if resp != nil { 22 | rMeta.StatusCode = http.StatusOK 23 | 24 | utils.WriteJSONForHTTP(http.StatusOK, resp, writer) 25 | } 26 | 27 | return 28 | } 29 | 30 | if errors.Is(err, SkipStreamResponse) { 31 | // NOTICE: special case where the response is already handled by the stream 32 | // handler as we assume the stream handler will handle the response as 33 | // status code 200 OK. 34 | rMeta.StatusCode = http.StatusOK 35 | 36 | return 37 | } 38 | 39 | openAIError := NewErrorFromLLMError(err) 40 | if openAIError.FromUpstream { 41 | slog.Error("upstream returned an error", 42 | "status", openAIError.Status, 43 | "code", openAIError.ErrorBody.Code, 44 | "message", openAIError.ErrorBody.Message, 45 | "type", openAIError.ErrorBody.Type, 46 | ) 47 | } else if openAIError.Status >= http.StatusInternalServerError { 48 | slog.Error("failed to handle request", "error", openAIError, "cause", openAIError.Cause, "source_error", err.Error()) 49 | } 50 | 51 | rMeta.StatusCode = openAIError.Status 52 | rMeta.ErrorMessage = openAIError.Error() 53 | 54 | utils.WriteJSONForHTTP(openAIError.Status, openAIError, writer) 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /pkg/types/openai/image_generations_request_test.go: -------------------------------------------------------------------------------- 1 | package openai 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "net/http" 7 | "testing" 8 | 9 | "github.com/stretchr/testify/assert" 10 | "github.com/stretchr/testify/require" 11 | "google.golang.org/protobuf/types/known/structpb" 12 | ) 13 | 14 | func TestImageGenerationsSetDefaultParams(t *testing.T) { 15 | body := []byte(`{ 16 | "model": "public/sd-3", 17 | "n": 3, 18 | "size": "1024x1792" 19 | }`) 20 | 21 | req, err := http.NewRequestWithContext(context.TODO(), http.MethodPost, "/api/v1", bytes.NewReader(body)) 22 | require.NoError(t, err) 23 | 24 | chatReq, err := NewImageGenerationsRequest(req) 25 | require.NoError(t, err) 26 | 27 | params := map[string]*structpb.Value{ 28 | "model": structpb.NewStringValue("openai/dall-e-3"), 29 | "n": structpb.NewNumberValue(1), 30 | "style": structpb.NewStringValue("natural"), 31 | "quality": structpb.NewStringValue("hd"), 32 | "size": structpb.NewStringValue("1792x1024"), 33 | } 34 | 35 | err = chatReq.SetDefaultParams(params) 36 | require.NoError(t, err) 37 | 38 | assert.Equal(t, "public/sd-3", chatReq.bodyParsed["model"]) 39 | assert.InDelta(t, 3.0, chatReq.bodyParsed["n"], 0.0001) 40 | assert.Equal(t, "natural", chatReq.bodyParsed["style"]) 41 | assert.Equal(t, "hd", chatReq.bodyParsed["quality"]) 42 | assert.Equal(t, "1024x1792", chatReq.bodyParsed["size"]) 43 | } 44 | 45 | func TestImageGenerationsSetOverrideParams(t *testing.T) { 46 | body := []byte(`{ 47 | "model": "public/sd-3", 48 | "n": 3, 49 | "style": "natural" 50 | }`) 51 | 52 | req, err := http.NewRequestWithContext(context.TODO(), http.MethodPost, "/api/v1", bytes.NewReader(body)) 53 | require.NoError(t, err) 54 | 55 | chatReq, err := NewImageGenerationsRequest(req) 56 | require.NoError(t, err) 57 | 58 | params := map[string]*structpb.Value{ 59 | "model": structpb.NewStringValue("openai/dall-e-3"), 60 | "n": structpb.NewNumberValue(1), 61 | "size": structpb.NewStringValue("1792x1024"), 62 | } 63 | 64 | err = chatReq.SetOverrideParams(params) 65 | require.NoError(t, err) 66 | 67 | assert.Equal(t, "openai/dall-e-3", chatReq.bodyParsed["model"]) 68 | assert.InDelta(t, 1.0, chatReq.bodyParsed["n"], 0.0001) 69 | assert.Equal(t, "natural", chatReq.bodyParsed["style"]) 70 | } 71 | -------------------------------------------------------------------------------- /pkg/types/openai/jsonpatch.go: -------------------------------------------------------------------------------- 1 | package openai 2 | 3 | import ( 4 | "encoding/json" 5 | 6 | "github.com/samber/lo" 7 | ) 8 | 9 | type JSONPatchOperation string 10 | 11 | const ( 12 | JSONPatchOperationAdd JSONPatchOperation = "add" 13 | JSONPatchOperationRemove JSONPatchOperation = "remove" 14 | JSONPatchOperationReplace JSONPatchOperation = "replace" 15 | ) 16 | 17 | type JSONPatchOperationObject struct { 18 | Operation JSONPatchOperation `json:"op"` 19 | Path string `json:"path"` 20 | Value any `json:"value,omitempty"` 21 | } 22 | 23 | func NewPatches(operations ...*JSONPatchOperationObject) []byte { 24 | return lo.Must(json.Marshal(operations)) 25 | } 26 | 27 | func NewReplace(path string, to any) *JSONPatchOperationObject { 28 | return &JSONPatchOperationObject{ 29 | Operation: JSONPatchOperationReplace, 30 | Path: path, 31 | Value: to, 32 | } 33 | } 34 | 35 | func NewAdd(path string, value any) *JSONPatchOperationObject { 36 | return &JSONPatchOperationObject{ 37 | Operation: JSONPatchOperationAdd, 38 | Path: path, 39 | Value: value, 40 | } 41 | } 42 | 43 | func NewRemove(path string) *JSONPatchOperationObject { 44 | return &JSONPatchOperationObject{ 45 | Operation: JSONPatchOperationRemove, 46 | Path: path, 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /pkg/types/openai/jsonpatch_test.go: -------------------------------------------------------------------------------- 1 | package openai 2 | 3 | import ( 4 | "encoding/json" 5 | "testing" 6 | 7 | jsonpatch "github.com/evanphx/json-patch/v5" 8 | "github.com/samber/lo" 9 | "github.com/stretchr/testify/require" 10 | ) 11 | 12 | func TestJSONPatchReplace(t *testing.T) { 13 | patch, err := jsonpatch.DecodePatch(NewPatches( 14 | NewReplace("/model", "gpt-3.5-turbo"), 15 | )) 16 | require.NoError(t, err) 17 | 18 | patched, err := patch.Apply(lo.Must(json.Marshal(map[string]interface{}{ 19 | "model": "gpt-3.5", 20 | }))) 21 | require.NoError(t, err) 22 | 23 | require.JSONEq(t, `{"model":"gpt-3.5-turbo"}`, string(patched)) 24 | } 25 | 26 | func TestJSONPatchAdd(t *testing.T) { 27 | patch, err := jsonpatch.DecodePatch(NewPatches( 28 | NewAdd("/stream_options", map[string]any{ 29 | "include_usage": true, 30 | }), 31 | )) 32 | require.NoError(t, err) 33 | 34 | patched, err := patch.Apply(lo.Must(json.Marshal(map[string]interface{}{ 35 | "model": "gpt-3.5", 36 | }))) 37 | require.NoError(t, err) 38 | 39 | require.JSONEq(t, `{"model":"gpt-3.5","stream_options":{"include_usage":true}}`, string(patched)) 40 | } 41 | 42 | func TestJSONPatchRemove(t *testing.T) { 43 | patch, err := jsonpatch.DecodePatch(NewPatches( 44 | NewRemove("/model"), 45 | )) 46 | require.NoError(t, err) 47 | 48 | patched, err := patch.Apply(lo.Must(json.Marshal(map[string]interface{}{ 49 | "model": "gpt-3.5", 50 | }))) 51 | require.NoError(t, err) 52 | 53 | require.JSONEq(t, `{}`, string(patched)) 54 | } 55 | -------------------------------------------------------------------------------- /pkg/types/openai/testdata/GoogleSampleWebpImage.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/knoway-dev/knoway/76080a36d3e84786d710f9952657d71df4fce873/pkg/types/openai/testdata/GoogleSampleWebpImage.webp -------------------------------------------------------------------------------- /pkg/types/openai/testdata/SampleGIFImage_135kbmb.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/knoway-dev/knoway/76080a36d3e84786d710f9952657d71df4fce873/pkg/types/openai/testdata/SampleGIFImage_135kbmb.gif -------------------------------------------------------------------------------- /pkg/types/openai/testdata/SampleJPGImage_100kbmb.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/knoway-dev/knoway/76080a36d3e84786d710f9952657d71df4fce873/pkg/types/openai/testdata/SampleJPGImage_100kbmb.jpg -------------------------------------------------------------------------------- /pkg/types/openai/testdata/SamplePNGImage_100kbmb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/knoway-dev/knoway/76080a36d3e84786d710f9952657d71df4fce873/pkg/types/openai/testdata/SamplePNGImage_100kbmb.png -------------------------------------------------------------------------------- /pkg/types/openai/usage.go: -------------------------------------------------------------------------------- 1 | package openai 2 | 3 | import ( 4 | "knoway.dev/pkg/object" 5 | "knoway.dev/pkg/utils" 6 | ) 7 | 8 | type CompletionTokensDetails struct { 9 | AcceptedPredictionTokens uint64 `json:"accepted_prediction_tokens"` // When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion. 10 | AudioTokens uint64 `json:"audio_tokens"` // Audio input tokens generated by the model. 11 | ReasoningTokens uint64 `json:"reasoning_tokens"` // Tokens generated by the model for reasoning. 12 | RejectedPredictionTokens uint64 `json:"rejected_prediction_tokens"` // When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits. 13 | } 14 | 15 | type PromptTokensDetails struct { 16 | AudioTokens uint64 `json:"audio_tokens"` // Audio input tokens generated by the model. 17 | CachedTokens uint64 `json:"cached_tokens"` // Tokens generated by the model that were cached from previous completions. 18 | } 19 | 20 | var _ object.LLMUsage = (*ChatCompletionsUsage)(nil) 21 | var _ object.LLMTokensUsage = (*ChatCompletionsUsage)(nil) 22 | 23 | type ChatCompletionsUsage struct { 24 | object.IsLLMUsage 25 | 26 | TotalTokens uint64 `json:"total_tokens,omitempty"` // Total number of tokens used in the request (prompt + completion). 27 | CompletionTokens uint64 `json:"completion_tokens,omitempty"` // Number of tokens in the generated completion. 28 | PromptTokens uint64 `json:"prompt_tokens,omitempty"` // Number of tokens in the prompt. 29 | CompletionTokensDetails *CompletionTokensDetails `json:"completion_tokens_details,omitempty"` // Breakdown of tokens used in a completion. 30 | PromptTokensDetails *PromptTokensDetails `json:"prompt_tokens_details,omitempty"` // Breakdown of tokens used in the prompt. 31 | } 32 | 33 | func (u *ChatCompletionsUsage) GetTotalTokens() uint64 { 34 | return u.TotalTokens 35 | } 36 | 37 | func (u *ChatCompletionsUsage) GetCompletionTokens() uint64 { 38 | return u.CompletionTokens 39 | } 40 | 41 | func (u *ChatCompletionsUsage) GetPromptTokens() uint64 { 42 | return u.PromptTokens 43 | } 44 | 45 | func (u *ChatCompletionsUsage) GetOutputImages() []object.ImageGenerationsUsageImage { 46 | return make([]object.ImageGenerationsUsageImage, 0) 47 | } 48 | 49 | var _ object.ImageGenerationsUsageImage = (*ImageGenerationsUsageImage)(nil) 50 | 51 | type ImageGenerationsUsageImage struct { 52 | Width uint64 `json:"width,omitempty"` // Width of the generated image. 53 | Height uint64 `json:"height,omitempty"` // Height of the generated image. 54 | Style string `json:"style,omitempty"` // Style of the generated image. 55 | Quality string `json:"quality,omitempty"` // Quality of the generated image. 56 | } 57 | 58 | func (i *ImageGenerationsUsageImage) GetWidth() uint64 { 59 | return i.Width 60 | } 61 | 62 | func (i *ImageGenerationsUsageImage) GetHeight() uint64 { 63 | return i.Height 64 | } 65 | 66 | func (i *ImageGenerationsUsageImage) GetStyle() string { 67 | return i.Style 68 | } 69 | 70 | func (i *ImageGenerationsUsageImage) GetQuality() string { 71 | return i.Quality 72 | } 73 | 74 | var _ object.LLMUsage = (*ImageGenerationsUsage)(nil) 75 | var _ object.LLMImagesUsage = (*ImageGenerationsUsage)(nil) 76 | 77 | type ImageGenerationsUsage struct { 78 | object.IsLLMUsage 79 | 80 | Images []*ImageGenerationsUsageImage `json:"images,omitempty"` // Usage information for each generated image. 81 | } 82 | 83 | func (u *ImageGenerationsUsage) GetTotalTokens() uint64 { 84 | return 0 85 | } 86 | 87 | func (u *ImageGenerationsUsage) GetCompletionTokens() uint64 { 88 | return 0 89 | } 90 | 91 | func (u *ImageGenerationsUsage) GetPromptTokens() uint64 { 92 | return 0 93 | } 94 | 95 | func (u *ImageGenerationsUsage) GetOutputImages() []object.ImageGenerationsUsageImage { 96 | return utils.TypeAssertFrom[*ImageGenerationsUsageImage, object.ImageGenerationsUsageImage](u.Images) 97 | } 98 | -------------------------------------------------------------------------------- /pkg/types/sse/event.go: -------------------------------------------------------------------------------- 1 | package sse 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | 8 | "knoway.dev/pkg/utils" 9 | ) 10 | 11 | // Event represents Server-Sent Event. 12 | // SSE explanation: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format 13 | type Event struct { 14 | // ID is used to set the EventSource object's last event ID value. 15 | ID []byte 16 | // Data field is for the message. When the EventSource receives multiple consecutive lines 17 | // that begin with data:, it concatenates them, inserting a newline character between each one. 18 | // Trailing newlines are removed. 19 | Data []byte 20 | // Event is a string identifying the type of event described. If this is specified, an event 21 | // will be dispatched on the browser to the listener for the specified event name; the website 22 | // source code should use addEventListener() to listen for named events. The onmessage handler 23 | // is called if no event name is specified for a message. 24 | Event []byte 25 | // Retry is the reconnection time. If the connection to the server is lost, the browser will 26 | // wait for the specified time before attempting to reconnect. This must be an integer, specifying 27 | // the reconnection time in milliseconds. If a non-integer value is specified, the field is ignored. 28 | Retry []byte 29 | // Comment line can be used to prevent connections from timing out; a server can send a comment 30 | // periodically to keep the connection alive. 31 | Comment []byte 32 | } 33 | 34 | // MarshalTo marshals Event to given Writer. 35 | func (ev *Event) MarshalTo(w io.Writer) error { 36 | // Marshalling part is taken from: https://github.com/r3labs/sse/blob/c6d5381ee3ca63828b321c16baa008fd6c0b4564/http.go#L16 37 | if len(ev.Data) == 0 && len(ev.Comment) == 0 { 38 | return nil 39 | } 40 | 41 | defer utils.SafeFlush(w) 42 | 43 | if len(ev.Data) > 0 { //nolint:nestif 44 | if len(ev.ID) > 0 { 45 | if _, err := fmt.Fprintf(w, "id: %s\n", ev.ID); err != nil { 46 | return err 47 | } 48 | } 49 | 50 | sd := bytes.Split(ev.Data, []byte("\n")) 51 | for i := range sd { 52 | if _, err := fmt.Fprintf(w, "data: %s\n", sd[i]); err != nil { 53 | return err 54 | } 55 | } 56 | 57 | if len(ev.Event) > 0 { 58 | if _, err := fmt.Fprintf(w, "event: %s\n", ev.Event); err != nil { 59 | return err 60 | } 61 | } 62 | 63 | if len(ev.Retry) > 0 { 64 | if _, err := fmt.Fprintf(w, "retry: %s\n", ev.Retry); err != nil { 65 | return err 66 | } 67 | } 68 | } 69 | 70 | if len(ev.Comment) > 0 { 71 | if _, err := fmt.Fprintf(w, ": %s\n", ev.Comment); err != nil { 72 | return err 73 | } 74 | } 75 | 76 | if _, err := fmt.Fprint(w, "\n"); err != nil { 77 | return err 78 | } 79 | 80 | return nil 81 | } 82 | -------------------------------------------------------------------------------- /pkg/utils/crd_common_hash.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "crypto/sha256" 5 | "encoding/hex" 6 | "strings" 7 | ) 8 | 9 | func CalcKeysHash(keys []string) string { 10 | if len(keys) == 0 { 11 | return "" 12 | } 13 | 14 | h := sha256.New() 15 | h.Write([]byte(strings.Join(keys, "/"))) 16 | bs := h.Sum(nil) 17 | 18 | return hex.EncodeToString(bs)[:8] 19 | } 20 | -------------------------------------------------------------------------------- /pkg/utils/http.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "encoding/json" 5 | "net/http" 6 | ) 7 | 8 | func SafeFlush(writer any) { 9 | f, ok := writer.(http.Flusher) 10 | if ok && f != nil { 11 | f.Flush() 12 | } 13 | } 14 | 15 | func WriteJSONForHTTP(status int, resp any, writer http.ResponseWriter) { 16 | bs, _ := json.Marshal(resp) //nolint:errchkjson 17 | 18 | writer.Header().Set("Content-Type", "application/json; charset=utf-8") 19 | writer.WriteHeader(status) 20 | 21 | SafeFlush(writer) 22 | 23 | _, _ = writer.Write(bs) 24 | } 25 | 26 | func WriteEventStreamHeadersForHTTP(writer http.ResponseWriter) { 27 | writer.Header().Set("Content-Type", "text/event-stream; charset=utf-8") 28 | writer.Header().Set("Cache-Control", "no-cache") 29 | writer.Header().Set("Connection", "keep-alive") 30 | writer.Header().Set("Transfer-Encoding", "chunked") 31 | writer.WriteHeader(http.StatusOK) 32 | 33 | SafeFlush(writer) 34 | } 35 | -------------------------------------------------------------------------------- /pkg/utils/json.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "io" 7 | 8 | "k8s.io/client-go/util/jsonpath" 9 | ) 10 | 11 | func GetByJSONPathWithoutConvert(input any, template string) (string, error) { 12 | j := jsonpath.New("document") 13 | j.AllowMissingKeys(true) 14 | 15 | err := j.Parse(template) 16 | if err != nil { 17 | return "", err 18 | } 19 | 20 | buffer := new(bytes.Buffer) 21 | 22 | err = j.Execute(buffer, input) 23 | if err != nil { 24 | return "", err 25 | } 26 | 27 | return buffer.String(), nil 28 | } 29 | 30 | func GetByJSONPath[T any](input any, template string) T { 31 | var empty T 32 | 33 | result, err := GetByJSONPathWithoutConvert(input, template) 34 | if err != nil { 35 | return empty 36 | } 37 | 38 | return FromStringOrEmpty[T](result) 39 | } 40 | 41 | func ReadAsJSONWithClose(readCloser io.ReadCloser) (*bytes.Buffer, map[string]any, error) { 42 | defer func() { 43 | _ = readCloser.Close() 44 | }() 45 | 46 | buffer, jsonMap, err := ReadAsJSON(readCloser) 47 | if err != nil { 48 | return buffer, jsonMap, err 49 | } 50 | 51 | return buffer, jsonMap, nil 52 | } 53 | 54 | func ReadAsJSON(reader io.Reader) (*bytes.Buffer, map[string]any, error) { 55 | buffer := new(bytes.Buffer) 56 | jsonMap := make(map[string]any) 57 | 58 | _, err := io.Copy(buffer, reader) 59 | if err != nil { 60 | return buffer, jsonMap, err 61 | } 62 | 63 | err = json.Unmarshal(buffer.Bytes(), &jsonMap) 64 | if err != nil { 65 | return buffer, jsonMap, err 66 | } 67 | 68 | return buffer, jsonMap, nil 69 | } 70 | 71 | func FromMap[T any, MK comparable, MV any](m map[MK]MV) (*T, error) { 72 | if m == nil { 73 | return nil, nil 74 | } 75 | 76 | if len(m) == 0 { 77 | return nil, nil 78 | } 79 | 80 | var initial T 81 | 82 | bs, err := json.Marshal(m) 83 | if err != nil { 84 | return nil, err 85 | } 86 | 87 | err = json.Unmarshal(bs, &initial) 88 | if err != nil { 89 | return nil, err 90 | } 91 | 92 | return &initial, nil 93 | } 94 | -------------------------------------------------------------------------------- /pkg/utils/json_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestJSONPathExecute(t *testing.T) { 10 | t.Parallel() 11 | 12 | t.Run("string", func(t *testing.T) { 13 | t.Parallel() 14 | 15 | type testCase struct { 16 | name string 17 | payload map[string]any 18 | template string 19 | expected any 20 | } 21 | 22 | testCases := []testCase{ 23 | { 24 | name: "model", 25 | payload: map[string]any{ 26 | "model": "gpt-4o", 27 | }, 28 | template: "{ .model }", 29 | expected: "gpt-4o", 30 | }, 31 | { 32 | name: "message role", 33 | payload: map[string]any{ 34 | "model": "gpt-4o", 35 | "messages": []any{ 36 | map[string]any{ 37 | "role": "user", 38 | "content": "Hello", 39 | }, 40 | }, 41 | }, 42 | template: "{ .messages[0].role }", 43 | expected: "user", 44 | }, 45 | { 46 | name: "message content", 47 | payload: map[string]any{ 48 | "model": "gpt-4o", 49 | "messages": []any{ 50 | map[string]any{ 51 | "role": "user", 52 | "content": "Hello", 53 | }, 54 | }, 55 | }, 56 | template: "{ .messages[0].content }", 57 | expected: "Hello", 58 | }, 59 | } 60 | 61 | for _, tc := range testCases { 62 | t.Run(tc.name, func(t *testing.T) { 63 | t.Parallel() 64 | assert.Equal(t, tc.expected, GetByJSONPath[string](tc.payload, tc.template)) 65 | }) 66 | } 67 | }) 68 | 69 | t.Run("number", func(t *testing.T) { 70 | t.Parallel() 71 | 72 | payload := map[string]any{ 73 | "code": 401, 74 | } 75 | 76 | assert.Equal(t, 401, GetByJSONPath[int](payload, "{ .code }")) 77 | }) 78 | 79 | t.Run("null", func(t *testing.T) { 80 | t.Parallel() 81 | 82 | t.Run("unknown nil", func(t *testing.T) { 83 | t.Parallel() 84 | 85 | payload := map[string]any{ 86 | "code": nil, 87 | } 88 | 89 | assert.Equal(t, "", GetByJSONPath[string](payload, "{ .code }")) 90 | }) 91 | 92 | t.Run("nil string", func(t *testing.T) { 93 | t.Parallel() 94 | 95 | type payload struct { 96 | Code *string `json:"code"` 97 | } 98 | 99 | p := payload{ 100 | Code: nil, 101 | } 102 | 103 | assert.Equal(t, "", GetByJSONPath[string](p, "{ .code }")) 104 | }) 105 | }) 106 | } 107 | -------------------------------------------------------------------------------- /pkg/utils/lo.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import "github.com/samber/lo" 4 | 5 | func FilterNonNil[T any](item T, _ int) bool { 6 | return !lo.IsNil(item) 7 | } 8 | 9 | func MapTypeAssert[F any, T any](item F, _ int) T { 10 | val, _ := any(item).(T) 11 | return val 12 | } 13 | 14 | func TypeAssertFrom[F any, T any](items []F) []T { 15 | filters := lo.Map(items, MapTypeAssert[F, T]) 16 | return lo.Filter(filters, FilterNonNil) 17 | } 18 | 19 | func Clone[T any, Slice ~[]T](collection Slice) Slice { 20 | return lo.Map(collection, func(item T, _ int) T { 21 | return item 22 | }) 23 | } 24 | -------------------------------------------------------------------------------- /samples/api-key-server/config.yaml: -------------------------------------------------------------------------------- 1 | api_keys: 2 | - api_key: "valid-api-key-1" 3 | is_valid: true 4 | allow_models: 5 | - "kebe/*" 6 | - "llama2-70b" 7 | - "openai/*" 8 | api_key_id: "1" 9 | user_id: "user-1" 10 | - api_key: "valid-api-key-2" 11 | is_valid: true 12 | allow_models: 13 | - "*" 14 | api_key_id: "2" 15 | user_id: "user-2" 16 | - api_key: "invalid-api-key" 17 | api_key_id: "3" 18 | is_valid: false 19 | allow_models: [] 20 | user_id: "xxx" -------------------------------------------------------------------------------- /samples/api-key-server/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "log" 7 | "log/slog" 8 | "net" 9 | "os" 10 | 11 | "google.golang.org/grpc" 12 | "gopkg.in/yaml.v3" 13 | 14 | "knoway.dev/api/service/v1alpha1" 15 | ) 16 | 17 | // APIKeyAuthResponse 结构体定义与之前相同 18 | type APIKeyAuthResponse struct { 19 | IsValid bool `yaml:"is_valid"` 20 | AllowModels []string `yaml:"allow_models"` 21 | APIKeyID string `yaml:"api_key_id"` 22 | UserID string `yaml:"user_id"` 23 | } 24 | 25 | type APIKeyAuthServer struct { 26 | v1alpha1.UnimplementedAuthServiceServer 27 | ValidAPIKeys map[string]*APIKeyAuthResponse // 存储从 YAML 加载的 API Key 信息 28 | } 29 | 30 | // 从 YAML 文件加载 API Keys 31 | func loadAPIKeysFromYAML(filePath string) (map[string]*APIKeyAuthResponse, error) { 32 | var apiKeyConfig struct { 33 | APIKeys []struct { 34 | APIKey string `yaml:"api_key"` 35 | IsValid bool `yaml:"is_valid"` 36 | AllowModels []string `yaml:"allow_models"` 37 | APIKeyID string `yaml:"api_key_id"` 38 | UserID string `yaml:"user_id"` 39 | } `yaml:"api_keys"` 40 | } 41 | 42 | // 读取文件内容 43 | data, err := os.ReadFile(filePath) 44 | if err != nil { 45 | return nil, fmt.Errorf("failed to read YAML file: %w", err) 46 | } 47 | 48 | // 解析 YAML 数据 49 | if err := yaml.Unmarshal(data, &apiKeyConfig); err != nil { 50 | return nil, fmt.Errorf("failed to unmarshal YAML data: %w", err) 51 | } 52 | 53 | // 将解析的数据转换为 map 结构 54 | validAPIKeys := make(map[string]*APIKeyAuthResponse) 55 | for _, apiKey := range apiKeyConfig.APIKeys { 56 | validAPIKeys[apiKey.APIKey] = &APIKeyAuthResponse{ 57 | IsValid: apiKey.IsValid, 58 | AllowModels: apiKey.AllowModels, 59 | APIKeyID: apiKey.APIKeyID, 60 | UserID: apiKey.UserID, 61 | } 62 | } 63 | 64 | return validAPIKeys, nil 65 | } 66 | 67 | // APIKeyAuth 处理 API Key 验证请求 68 | func (s *APIKeyAuthServer) APIKeyAuth(ctx context.Context, req *v1alpha1.APIKeyAuthRequest) (*v1alpha1.APIKeyAuthResponse, error) { 69 | // 从 YAML 加载的 API Keys 70 | if res, exists := s.ValidAPIKeys[req.GetApiKey()]; exists { 71 | // 返回相应的认证结果 72 | return &v1alpha1.APIKeyAuthResponse{ 73 | IsValid: res.IsValid, 74 | AllowModels: res.AllowModels, 75 | ApiKeyId: res.APIKeyID, 76 | UserId: res.UserID, 77 | }, nil 78 | } 79 | 80 | // 如果无效的 API Key 返回错误响应 81 | return &v1alpha1.APIKeyAuthResponse{ 82 | IsValid: false, 83 | AllowModels: []string{}, 84 | }, nil 85 | } 86 | 87 | func main() { 88 | // 从 YAML 文件加载 API Key 配置 89 | validAPIKeys, err := loadAPIKeysFromYAML("samples/api-key-server/config.yaml") 90 | if err != nil { 91 | log.Fatalf("Error loading API keys from YAML: %v", err) 92 | } 93 | 94 | // 创建 gRPC 服务器实例 95 | server := grpc.NewServer() 96 | 97 | // 创建 APIKeyAuthServer 实例并注册 API Keys 98 | authServer := &APIKeyAuthServer{ 99 | ValidAPIKeys: validAPIKeys, 100 | } 101 | 102 | // 注册 AuthService 服务 103 | v1alpha1.RegisterAuthServiceServer(server, authServer) 104 | 105 | // 监听指定端口 106 | listener, err := net.Listen("tcp", ":50051") //nolint:gosec 107 | if err != nil { 108 | log.Fatalf("failed to listen: %v", err) 109 | } 110 | 111 | // 启动 gRPC 服务器 112 | slog.Info("Starting APIKeyAuthServer on port 50051...") 113 | if err := server.Serve(listener); err != nil { 114 | log.Fatalf("failed to serve: %v", err) 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /scripts/build-or-download-binaries.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -ex 4 | 5 | CUR_DIR=$( 6 | cd -- "$(dirname "$0")" >/dev/null 2>&1 7 | pwd -P 8 | ) 9 | 10 | PLATFORMS=${PLATFORMS:-linux/amd64} 11 | APP=${APP:-knoway-gateway} 12 | 13 | for p in $(echo ${PLATFORMS} | tr "," " "); do 14 | GOOS=$(echo ${p} | cut -d "/" -f 1) 15 | GOARCH=$(echo ${p} | cut -d "/" -f 2) 16 | dist=${CUR_DIR}/../out/$p/ 17 | mkdir -p ${dist} 18 | echo "building ${APP} for ${GOOS}/${GOARCH}" 19 | CGO_ENABLED=0 GOOS=${GOOS} GOARCH=${GOARCH} go build -ldflags "-s -w" -o ${CUR_DIR}/../out/$p/${APP} ${CUR_DIR}/../cmd 20 | done 21 | -------------------------------------------------------------------------------- /scripts/code-freeze.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -ex 4 | 5 | CUR_DIR=$( 6 | cd -- "$(dirname "$0")" >/dev/null 2>&1 7 | pwd -P 8 | ) 9 | 10 | export GITLAB_HOST=${GITLAB_HOST:-https://gitlab.daocloud.cn} 11 | 12 | PURE_HOST=${GITLAB_HOST//https:\/\//} 13 | 14 | glab auth login -t ${GITLAB_CI_TOKEN} -h ${PURE_HOST} 15 | glab auth status 16 | 17 | minor_version=$(grep "MINOR_VERSION ?=" ${CUR_DIR}/../Makefile | sed -r 's/MINOR_VERSION \?= (.*)/\1/g' | xargs) 18 | 19 | if [ -n "${NEXT_VERSION}" ] && ! echo ${NEXT_VERSION} | grep -E "^v\d+\.\d+$"; then 20 | echo "Invalid NEXT_VERSION: ${NEXT_VERSION}, require running on v*.* branch" 21 | exit 1 22 | else 23 | major_number=$(echo ${minor_version} | awk -F. '{print $1}') 24 | minor_number=$(echo ${minor_version} | awk -F. '{print $2}') 25 | NEXT_VERSION=${major_number}.$((${minor_number} + 1)) 26 | fi 27 | 28 | if [ -n "${GITLAB_CI_TOKEN}" ]; then 29 | git remote set-url origin https://gitlab-ci-token:${GITLAB_CI_TOKEN}@gitlab.daocloud.cn/ndx/ai/knoway.git 30 | fi 31 | 32 | if git ls-remote --exit-code origin release-${minor_version} &>/dev/null; then 33 | echo "release-${minor_version} branch already exists" 34 | exit 1 35 | fi 36 | 37 | if ! git config user.name; then 38 | git config user.name "Auto Release Bot" 39 | git config user.email "knoway-auto-release@daocloud.io" 40 | fi 41 | 42 | git checkout -b release-${minor_version} 43 | 44 | # change version 45 | if [ "$(uname)" = "Darwin" ]; then 46 | sed -i "" "s/MINOR_VERSION ?=.*/MINOR_VERSION ?= ${NEXT_VERSION}/g" ${CUR_DIR}/../Makefile 47 | else 48 | sed -i "s/MINOR_VERSION ?=.*/MINOR_VERSION ?= ${NEXT_VERSION}/g" ${CUR_DIR}/../Makefile 49 | fi 50 | 51 | # push release branch 52 | git push origin release-${minor_version} 53 | 54 | # create label 55 | glab label create --color="#ed9121" -n cherry-pick-release-${minor_version} 56 | 57 | git checkout ${CI_BUILD_REF_NAME} 58 | 59 | git add ${CUR_DIR}/../Makefile 60 | git commit -m "Code freeze and bump MINOR_VERSION to ${NEXT_VERSION}" 61 | 62 | # push origin branch 63 | git push origin ${CI_BUILD_REF_NAME} 64 | -------------------------------------------------------------------------------- /scripts/copy-crds.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | set -o errexit 5 | set -o nounset 6 | set -o pipefail 7 | 8 | temp=$(mktemp) 9 | new_version() { 10 | echo '{{- if semverCompare ">=1.23.0-0" .Capabilities.KubeVersion.GitVersion }}' >$2 11 | cat $1 >>$2 12 | echo '{{- else }}' >>$2 13 | } 14 | old_version() { 15 | cat $1 >>$2 16 | echo '{{- end }}' >>$2 17 | } 18 | 19 | if [[ "" == $(cat $1 | yq '.. | select(has("x-kubernetes-validations"))') ]]; then 20 | echo "no x-kubernetes-validations found, skip" 21 | cp $1 $2 22 | exit 23 | fi 24 | 25 | f=$(basename $1) 26 | new_version $1 $2/${f} 27 | 28 | cat $1 | yq 'del(.. | select(has("x-kubernetes-validations")).x-kubernetes-validations)' >$temp 29 | 30 | old_version $temp $2/${f} 31 | 32 | rm -f ${temp} 33 | -------------------------------------------------------------------------------- /scripts/gen-change-logs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -ex 4 | 5 | CUR_DIR=$( 6 | cd -- "$(dirname "$0")" >/dev/null 2>&1 7 | pwd -P 8 | ) 9 | 10 | export GITLAB_HOST=${GITLAB_HOST:-https://gitlab.daocloud.cn} 11 | PURE_HOST=${GITLAB_HOST//https:\/\//} 12 | 13 | glab auth login -t ${GITLAB_CI_TOKEN} -h ${PURE_HOST} 14 | glab auth status 15 | 16 | CUR_VERSION=${CUR_VERSION:-v0.0.0} 17 | 18 | OUTFILE=${1:-${CUR_DIR}/../changes/CHANGELOG-${CUR_VERSION}.md} 19 | 20 | mkdir -p $(dirname ${OUTFILE}) 21 | 22 | getallmrs() { 23 | git fetch origin --tags ${PRE_VERSION} &>/dev/null 24 | git log ${CI_BUILD_REF_NAME} ^${PRE_VERSION} | grep -E '\(![^\)]+\)$' | sed -r 's/.*\(\!(.*)\)$/\1/g' | uniq | sort 25 | } 26 | 27 | features="" 28 | bugs="" 29 | 30 | for mr in $(getallmrs); do 31 | cont=$(glab mr view ${mr}) 32 | title=$(echo "${cont}" | grep -E '^title:' | sed 's/title:\t//g') 33 | author=$(echo "${cont}" | grep -E '^author:' | sed 's/author:\t//g') 34 | labels=$(echo "${cont}" | grep -E '^labels:' | sed 's/labels:\t//g') 35 | if echo "${labels}" | grep -E 'kind/feature' &>/dev/null; then 36 | echo "mr ${mr} is a feature" 37 | features+="- ${title}(!${mr}) by @${author} 38 | " 39 | else 40 | echo "mr ${mr} is not a feature" 41 | bugs+="- ${title}(!${mr}) by @${author} 42 | " 43 | fi 44 | done 45 | 46 | echo " 47 | # ${CUR_VERSION} Change logs 48 | 49 | ## Change since ${PRE_VERSION} 50 | 51 | ### Changes by Kind 52 | 53 | #### Bug 54 | 55 | ${bugs} 56 | 57 | #### Feature 58 | 59 | ${features} 60 | 61 | " >${OUTFILE} 62 | -------------------------------------------------------------------------------- /scripts/gen-check.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | SCRIPT_ROOT=$(dirname "${BASH_SOURCE[0]}")/.. 8 | 9 | DIFFPROTO="${SCRIPT_ROOT}" 10 | make gen 11 | if [ "$(git status --porcelain | wc -l)" -eq "0" ]; then 12 | echo "${DIFFPROTO} up to date." 13 | else 14 | echo "${DIFFPROTO} is out of date. Please run make gen to update codes for the proto files." 15 | echo "Diff files:" 16 | git status --porcelain 17 | git diff 18 | exit 1 19 | fi 20 | -------------------------------------------------------------------------------- /scripts/pr-status-manage.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # 这个脚本不止做 Cherry Pick,后期可能还会集成其它功能,比如 Coverage 检测? 4 | # 所以名字叫 PR Status Manage 5 | 6 | set -ex 7 | 8 | export GITLAB_HOST=${GITLAB_HOST:-https://gitlab.daocloud.cn} 9 | 10 | PURE_HOST=${GITLAB_HOST//https:\/\//} 11 | 12 | APIREPO=${CI_PROJECT_PATH//\//%2F} 13 | 14 | glab auth login -t ${GITLAB_CI_TOKEN} -h ${PURE_HOST} 15 | glab auth status 16 | 17 | git fetch 18 | 19 | cherrypickto() { 20 | target_branch=$1 21 | branch=cherrypick-${CI_MERGE_REQUEST_IID}-to-${target_branch} 22 | git branch -D $branch || true # force delete local branch if exists 23 | git checkout ${target_branch} # checkout to target branch 24 | git checkout -b $branch # checkout to a new branch 25 | 26 | # get current pr commits and reserve by date 27 | commits=$(glab api projects/${APIREPO}/merge_requests/${CI_MERGE_REQUEST_IID}/commits | jq '.[].id' -r | sed '1!G;h;$!d') 28 | echo '```' >/tmp/cherry-pick.log 29 | echo "Auto cherry-pick !${CI_MERGE_REQUEST_IID} to ${target_branch} failed!" >>/tmp/cherry-pick.log 30 | git config user.name "Auto Cherry-pick Bot" 31 | git config user.email "cherry-pick-bot@daocloud.io" 32 | for commit in ${commits}; do 33 | if ! git cherry-pick ${commit} --allow-empty &>>/tmp/cherry-pick.log; then 34 | echo "cherry-pick ${commit} failed" 35 | echo '```' >>/tmp/cherry-pick.log 36 | cat /tmp/cherry-pick.log 37 | # check if already exists failed issue 38 | if glab issue list --in title --search "[manually cherry-pick required] !${CI_MERGE_REQUEST_IID}" | grep "!${CI_MERGE_REQUEST_IID}"; then 39 | echo "Issue already exists, skip" 40 | return 41 | fi 42 | author=$(glab mr view ${CI_MERGE_REQUEST_IID} | awk '/^author:/ {print $2}') 43 | # create an issue if cherry-pick failed. 44 | glab issue create \ 45 | --title "[manually cherry-pick required] !${CI_MERGE_REQUEST_IID} Auto cherry-pick to ${target_branch} error" \ 46 | --description "$(cat /tmp/cherry-pick.log)" \ 47 | --assignee ${author} 48 | return 49 | fi 50 | done 51 | title="Auto cherry-pick !${CI_MERGE_REQUEST_IID} to ${target_branch}" 52 | mr_state=$(glab mr view ${CI_MERGE_REQUEST_IID} | awk '/^state:/ {print $2}') 53 | if [[ "${mr_state}" != "merged" ]]; then 54 | # if mr is not merged, mark new mr as draft to avoid merge it by mistake 55 | title="Draft: ${title}" 56 | fi 57 | git remote set-url origin https://gitlab-ci-token:${GITLAB_CI_TOKEN}@${PURE_HOST}/${CI_PROJECT_PATH}.git 58 | git push origin ${branch} -f 59 | if ! glab mr list --source-branch=${branch} --target-branch=${target_branch} | grep "Auto cherry-pick"; then 60 | res=$(glab mr create --no-editor \ 61 | --remove-source-branch \ 62 | --source-branch ${branch} \ 63 | --target-branch ${target_branch} \ 64 | --title "${title}" \ 65 | --label auto-cherry-picked \ 66 | --description "Auto cherry-pick from !${CI_MERGE_REQUEST_IID}" &>/dev/stdout) 67 | glab mr note ${CI_MERGE_REQUEST_IID} -m "### Auto cherry-picked!
${res}" 68 | else 69 | echo "MR already exists, skip" 70 | fi 71 | } 72 | 73 | cherrypick() { 74 | if [ -z "${CI_MERGE_REQUEST_LABELS}" ]; then 75 | echo "No cherry-pick labels found." 76 | return 77 | fi 78 | for label in $(echo "${CI_MERGE_REQUEST_LABELS}" | tr ',' '\n'); do 79 | if [[ ${label} == cherry-pick-* ]]; then 80 | target=${label//cherry-pick-/} 81 | if ! git rev-list origin/${target} >/dev/null; then 82 | echo "target branch ${target} not exists" >/dev/stderr 83 | exit 1 84 | fi 85 | cherrypickto ${target} 86 | fi 87 | done 88 | } 89 | 90 | cherrypick 91 | -------------------------------------------------------------------------------- /scripts/release-version.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -ex 4 | 5 | CUR_DIR=$( 6 | cd -- "$(dirname "$0")" >/dev/null 2>&1 7 | pwd -P 8 | ) 9 | 10 | MINOR_VERSION=$(echo ${CI_BUILD_REF_NAME} | sed -r 's/^release-(.*)$/\1/g') 11 | 12 | if [[ ${CI_BUILD_REF_NAME} == ${MINOR_VERSION} || ${MINOR_VERSION} == "" ]]; then 13 | echo "Invalid branch name: ${CI_BUILD_REF_NAME}, require running on release-.* branch" 14 | exit 1 15 | fi 16 | 17 | if [ -z "${PRE_VERSION}" ]; then 18 | echo you must specify PRE_VERSION var >>/dev/stderr 19 | exit 1 20 | fi 21 | 22 | if [ -z "${PATCH_VERSION}" ]; then 23 | echo you must specify NEXT_VERSION var >>/dev/stderr 24 | exit 1 25 | fi 26 | 27 | CUR_VERSION=v${MINOR_VERSION}.${PATCH_VERSION} 28 | 29 | SHORT_VERSION=v${MINOR_VERSION} 30 | 31 | echo "VERSION is ${CUR_VERSION}" 32 | echo "SHORT_VERSION is ${SHORT_VERSION}" 33 | 34 | if [ "${PRE_VERSION}" = "${CUR_VERSION}" ]; then 35 | echo PRE_VERSION should not be same as knoway in current version >>/dev/stderr 36 | exit 1 37 | fi 38 | 39 | git fetch 40 | 41 | if ! git rev-list ${PRE_VERSION} >/dev/null; then 42 | echo "${PRE_VERSION} tag not exists" >/dev/stderr 43 | exit 1 44 | fi 45 | 46 | if [ -n "${CI_BUILD_REF_NAME}" ]; then 47 | git checkout ${CI_BUILD_REF_NAME} 48 | fi 49 | 50 | # todo release notes 51 | #cd ${CUR_DIR}/../tools/gen-release-notes 52 | #mkdir -p ${CUR_DIR}/../changes/${SHORT_VERSION} 53 | #go run . --oldRelease ${PRE_VERSION} --newRelease ${CUR_VERSION} --notes ${CUR_DIR}/../ --outDir ${CUR_DIR}/../changes/${SHORT_VERSION} 54 | 55 | CUR_VERSION=${CUR_VERSION} bash ${CUR_DIR}/gen-change-logs.sh ${CUR_DIR}/../changes/${SHORT_VERSION}/CHANGELOG-${CUR_VERSION}.md 56 | 57 | cd ${CUR_DIR}/.. 58 | 59 | if ! git config user.name; then 60 | git config user.name "Auto Release Bot" 61 | git config user.email "knoway-auto-release@daocloud.io" 62 | fi 63 | 64 | # we no need to sync api repo any more 65 | # sh ${CUR_DIR}/sync-api-repo.sh ${CUR_VERSION} 66 | 67 | cd ${CUR_DIR}/.. 68 | 69 | git add . 70 | 71 | git commit -m "Release ${CUR_VERSION} and add release notes" 72 | 73 | cat ${CUR_DIR}/../changes/${SHORT_VERSION}/CHANGELOG-${CUR_VERSION}.md | git tag -a ${CUR_VERSION} -F- 74 | 75 | if [ -n "${GITLAB_CI_TOKEN}" ]; then 76 | git remote set-url origin https://gitlab-ci-token:${GITLAB_CI_TOKEN}@gitlab.daocloud.cn/ndx/ai/knoway.git 77 | fi 78 | 79 | # push to release branch 80 | if [ -z "${CI_BUILD_REF_NAME}" ]; then 81 | git push origin $(git rev-parse --abbrev-ref HEAD) 82 | else 83 | git push origin ${CI_BUILD_REF_NAME} 84 | fi 85 | 86 | COMMIT=$(git rev-parse HEAD) 87 | 88 | # Push release notes to main branch also 89 | git checkout main 90 | git cherry-pick ${COMMIT} 91 | git push origin main 92 | 93 | # push tag 94 | git push origin ${CUR_VERSION} 95 | 96 | curl -s -v \ 97 | -H "PRIVATE-TOKEN: ${GITLAB_CI_TOKEN}" \ 98 | -H 'Content-Type: application/json' \ 99 | 'https://gitlab.daocloud.cn/api/v4/projects/ndx%2Fai%2Fknoway/releases' \ 100 | -X POST \ 101 | -d "$(echo '{}' | jq \ 102 | --arg name "Release ${CUR_VERSION}" \ 103 | --arg tag_name "${CUR_VERSION}" \ 104 | --arg description "$(cat ${CUR_DIR}/../changes/${SHORT_VERSION}/CHANGELOG-${CUR_VERSION}.md)" \ 105 | '.name = $name | .tag_name = $tag_name | .description = $description')" 106 | -------------------------------------------------------------------------------- /scripts/run-make-gen.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -ex 4 | 5 | CUR_DIR=$( 6 | cd -- "$(dirname "$0")" >/dev/null 2>&1 7 | pwd -P 8 | ) 9 | 10 | git fetch origin ${CI_BUILD_REF_NAME} 11 | git checkout ${CI_BUILD_REF_NAME} 12 | 13 | cd ${CUR_DIR}/.. 14 | make gen 15 | 16 | git rev-parse --abbrev-ref HEAD 17 | git status 18 | 19 | if [ "$(git status --porcelain | wc -l)" -eq "0" ]; then 20 | echo "${DIFFPROTO} up to date." 21 | exit 0 22 | fi 23 | 24 | if [ -n "${GITLAB_TOKEN}" ]; then 25 | git remote set-url origin https://gitlab-ci-token:${GITLAB_TOKEN}@gitlab.daocloud.cn/${CI_PROJECT_PATH}.git 26 | fi 27 | 28 | if ! git config user.name; then 29 | git config user.name "Auto Gen Bot" 30 | git config user.email "auto-gen-bot@daocloud.io" 31 | fi 32 | 33 | git add . 34 | 35 | git commit -m "Auto run gen code" 36 | 37 | git push origin ${CI_BUILD_REF_NAME} 38 | -------------------------------------------------------------------------------- /scripts/trivy.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | # ignore VULNEEABILITY CVE-2022-1996 it will fix at k8s.io/api next release 8 | # ignore unfixed VULNEEABILITY 9 | 10 | TRIVY_DB_REPOSITORY=${TRIVY_DB_REPOSITORY:-ghcr.io/aquasecurity/trivy-db} 11 | 12 | trivy fs --scanners secret --secret-config ./.trivycert.yaml --exit-code 1 ./ 13 | 14 | # The parameters that this shell receives look like this : 15 | # HIGH,CRITICAL release-ci.daocloud.io/mspider/mspider:v0.8.3-47-gd3ac6536 release-ci.daocloud.io/mspider/mspider-api-server:v0.8.3-47-gd3ac6536 16 | # so need use firtParameter parameter to skip first Parameter HIGH,CRITICAL than trivy images 17 | firtParameter=1 18 | for i in "$@"; do 19 | if (($firtParameter == 1)); then 20 | ((firtParameter = $firtParameter + 1)) 21 | else 22 | trivy image --skip-dirs istio.io/istio --ignore-unfixed --db-repository=${TRIVY_DB_REPOSITORY} --exit-code 1 --severity $1 $i 23 | fi 24 | done 25 | -------------------------------------------------------------------------------- /scripts/unit-test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | set -x 8 | PATH2TEST=(./pkg/... ./internal/...) 9 | tmpDir=$(mktemp -d) 10 | mergeF="${tmpDir}/merge.out" 11 | rm -f ${mergeF} 12 | for ((i = 0; i < ${#PATH2TEST[@]}; i++)); do 13 | ls $tmpDir 14 | cov_file="${tmpDir}/$i.cover" 15 | GOMAXPROCS=8 go test --race --v -covermode=atomic -coverpkg=${PATH2TEST[i]} -coverprofile=${cov_file} ${PATH2TEST[i]} # $(go list ${PATH2TEST[i]}) 16 | cat $cov_file | grep -v mode: >>${mergeF} || echo no coverage found 17 | done 18 | #merge them 19 | header=$(head -n1 "${tmpDir}/0.cover") 20 | echo "${header}" >coverage.out 21 | cat ${mergeF} >>coverage.out 22 | go tool cover -func=coverage.out 23 | rm -rf coverage.out ${tmpDir} ${mergeF} 24 | -------------------------------------------------------------------------------- /scripts/util.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -ex 4 | set -u 5 | set -o pipefail 6 | 7 | # This script holds featuregate bash variables and utility functions. 8 | 9 | # This function installs a Go tools by 'go get' command. 10 | # Parameters: 11 | # - $1: package name, such as "sigs.k8s.io/controller-tools/cmd/controller-gen" 12 | # - $2: package version, such as "v0.4.1" 13 | # Note: 14 | # Since 'go get' command will resolve and add dependencies to current module, that may update 'go.mod' and 'go.sum' file. 15 | # So we use a temporary directory to install the tools. 16 | function util::install_tools() { 17 | local package="$1" 18 | local version="$2" 19 | 20 | temp_path=$(mktemp -d) 21 | pushd "${temp_path}" >/dev/null 22 | GO111MODULE=on go install "${package}"@"${version}" 23 | GOPATH=$(go env GOPATH | awk -F ':' '{print $1}') 24 | export PATH=$PATH:$GOPATH/bin 25 | popd >/dev/null 26 | rm -rf "${temp_path}" 27 | } 28 | -------------------------------------------------------------------------------- /scripts/verify-license.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -ex 4 | 5 | CONFIG_PATH=${CONFIG_PATH:-$(dirname "${BASH_SOURCE[0]}")/..} 6 | 7 | if license-lint -config ${CONFIG_PATH}/license-lint.yml; then 8 | echo "✅ License lint succeeded" 9 | else 10 | echo # print one empty line, separate from warning messages. 11 | echo '❌ Please review the above error messages.' 12 | exit 1 13 | fi 14 | -------------------------------------------------------------------------------- /scripts/verify-staticcheck.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | REPO_ROOT=$(dirname "${BASH_SOURCE[0]}")/.. 8 | GOLANGCI_LINT_PKG="github.com/golangci/golangci-lint/cmd/golangci-lint" 9 | GOLANGCI_LINT_VER="v1.62.2" 10 | 11 | cd "${REPO_ROOT}" 12 | source "scripts/util.sh" 13 | 14 | command golangci-lint &>/dev/null || util::install_tools ${GOLANGCI_LINT_PKG} ${GOLANGCI_LINT_VER} 15 | 16 | golangci-lint --version 17 | 18 | if golangci-lint run -v --timeout=5m; then 19 | echo '✅ Congratulations! All Go source files have passed staticcheck.' 20 | else 21 | echo '❌ Staticcheck failed. Please review the warnings above.' 22 | echo '💡 Tip: If these warnings are unclear, you can file an issue for help.' 23 | exit 1 24 | fi 25 | --------------------------------------------------------------------------------