├── .clang-format
├── .dockerignore
├── .gitignore
├── .gitlab-ci.yml
├── .golangci.yml
├── Dockerfile
├── LICENSE
├── Makefile
├── PROJECT
├── README.md
├── api
    ├── admin
    │   └── v1alpha1
    │   │   ├── config_dump.pb.go
    │   │   └── config_dump.proto
    ├── buf.gen.yaml
    ├── buf.lock
    ├── buf.yaml
    ├── clean.sh
    ├── clusters
    │   └── v1alpha1
    │   │   ├── cluster.pb.go
    │   │   └── cluster.proto
    ├── filters
    │   └── v1alpha1
    │   │   ├── api_key_auth.pb.go
    │   │   ├── api_key_auth.proto
    │   │   ├── rate_limit.pb.go
    │   │   └── rate_limit.proto
    ├── google
    │   └── protobuf
    │   │   ├── any.proto
    │   │   ├── descriptor.proto
    │   │   ├── duration.proto
    │   │   ├── empty.proto
    │   │   ├── struct.proto
    │   │   ├── timestamp.proto
    │   │   └── wrappers.proto
    ├── listeners
    │   └── v1alpha1
    │   │   ├── chat_listener.pb.go
    │   │   ├── chat_listener.proto
    │   │   ├── common.pb.go
    │   │   ├── common.proto
    │   │   ├── image_listener.pb.go
    │   │   └── image_listener.proto
    ├── route
    │   └── v1alpha1
    │   │   ├── route.pb.go
    │   │   └── route.proto
    ├── service
    │   └── v1alpha1
    │   │   ├── apikey_auth.pb.go
    │   │   ├── apikey_auth.proto
    │   │   ├── apikey_auth_grpc.pb.go
    │   │   ├── usage_stats.pb.go
    │   │   ├── usage_stats.proto
    │   │   └── usage_stats_grpc.pb.go
    └── v1alpha1
    │   ├── common_types.go
    │   ├── groupversion_info.go
    │   ├── imagegenerationbackend_types.go
    │   ├── llmbackend_types.go
    │   ├── modelroute_types.go
    │   └── zz_generated.deepcopy.go
├── changes
    ├── v0.1
    │   ├── CHANGELOG-v0.1.0-rc4.md
    │   ├── CHANGELOG-v0.1.0-rc5.md
    │   ├── CHANGELOG-v0.1.0.md
    │   ├── CHANGELOG-v0.1.1.md
    │   └── CHANGELOG-v0.1.2.md
    ├── v0.2
    │   └── CHANGELOG-v0.2.0-rc0.md
    └── v0.3
    │   ├── CHANGELOG-v0.3.0-rc0.md
    │   └── CHANGELOG-v0.3.0-rc1.md
├── cmd
    ├── admin
    │   └── admin.go
    ├── gateway
    │   ├── devClusters.go
    │   └── proxy.go
    ├── main.go
    └── server
    │   └── server.go
├── config
    ├── config.go
    ├── config.yaml
    ├── crd
    │   ├── bases
    │   │   ├── llm.knoway.dev_imagegenerationbackends.yaml
    │   │   ├── llm.knoway.dev_llmbackends.yaml
    │   │   └── llm.knoway.dev_modelroutes.yaml
    │   ├── kustomization.yaml
    │   └── kustomizeconfig.yaml
    ├── rbac
    │   ├── imagegenerationbackend_editor_role.yaml
    │   ├── imagegenerationbackend_viewer_role.yaml
    │   ├── llmbackend_editor_role.yaml
    │   ├── llmbackend_viewer_role.yaml
    │   ├── modelroute_admin_role.yaml
    │   ├── modelroute_editor_role.yaml
    │   ├── modelroute_viewer_role.yaml
    │   └── role.yaml
    └── samples
    │   ├── kustomization.yaml
    │   ├── llm_v1alpha1_imagegenerationbackend.yaml
    │   ├── llm_v1alpha1_llmbackend.yaml
    │   └── llm_v1alpha1_modelroute.yaml
├── cspell.config.yaml
├── go.mod
├── go.sum
├── hack
    └── boilerplate.go.txt
├── internal
    └── controller
    │   ├── backends.go
    │   ├── common.go
    │   ├── common_test.go
    │   ├── controller_test.go
    │   ├── enums.go
    │   ├── imagegenerationbackend_controller.go
    │   ├── imagegenerationbackend_controller_test.go
    │   ├── llmbackend_controller.go
    │   ├── llmbackend_controller_test.go
    │   ├── modelroute_controller.go
    │   ├── route.go
    │   └── status.go
├── license-lint.yml
├── manifests
    └── knoway
    │   ├── .helmignore
    │   ├── Chart.yaml
    │   ├── templates
    │       ├── _commons.tpl
    │       ├── _helpers.tpl
    │       ├── clusterrole.yaml
    │       ├── clusterrolebinding.yaml
    │       ├── configmap.yaml
    │       ├── deployment.yaml
    │       ├── hpa.yaml
    │       ├── llm.knoway.dev_imagegenerationbackends.yaml
    │       ├── llm.knoway.dev_llmbackends.yaml
    │       ├── llm.knoway.dev_modelroutes.yaml
    │       ├── service.yaml
    │       └── serviceaccount.yaml
    │   └── values.yaml
├── pkg
    ├── bootkit
    │   ├── bootkit.go
    │   ├── bootkit_test.go
    │   ├── lifecycle.go
    │   ├── lifecycle_test.go
    │   ├── options.go
    │   └── options_test.go
    ├── clusters
    │   ├── cluster
    │   │   └── cluster.go
    │   ├── filters
    │   │   ├── config.go
    │   │   └── openai
    │   │   │   ├── request.go
    │   │   │   └── response.go
    │   ├── interface.go
    │   └── manager
    │   │   └── cluster.go
    ├── constants
    │   ├── config.go
    │   └── listener.go
    ├── filters
    │   ├── auth
    │   │   ├── auth.go
    │   │   └── auth_test.go
    │   ├── config.go
    │   ├── ratelimit
    │   │   ├── local.go
    │   │   ├── rate_limit.go
    │   │   ├── rate_limit_test.go
    │   │   └── redis.go
    │   └── usage
    │   │   └── usage.go
    ├── listener
    │   ├── common.go
    │   ├── handler.go
    │   ├── listener.go
    │   ├── manager
    │   │   ├── chat
    │   │   │   ├── chat_completions.go
    │   │   │   ├── completions.go
    │   │   │   ├── listener.go
    │   │   │   └── models.go
    │   │   └── image
    │   │   │   ├── image_generations.go
    │   │   │   └── listener.go
    │   └── middlewares.go
    ├── metadata
    │   └── metadata.go
    ├── object
    │   ├── completion.go
    │   ├── error.go
    │   ├── errors.go
    │   └── images.go
    ├── observation
    │   └── attributes.go
    ├── protoutils
    │   └── proto.go
    ├── redis
    │   └── client.go
    ├── registry
    │   └── config
    │   │   ├── registry.go
    │   │   └── registry_test.go
    ├── route
    │   ├── loadbalance
    │   │   ├── load_balance.go
    │   │   └── load_balance_test.go
    │   ├── manager
    │   │   └── manager.go
    │   ├── route.go
    │   └── route
    │   │   └── route.go
    ├── types
    │   ├── openai
    │   │   ├── chat_completions_request.go
    │   │   ├── chat_completions_request_test.go
    │   │   ├── chat_completions_response.go
    │   │   ├── chat_completions_response_test.go
    │   │   ├── chat_completions_stream.go
    │   │   ├── chat_completions_stream_test.go
    │   │   ├── common.go
    │   │   ├── common_test.go
    │   │   ├── error_test.go
    │   │   ├── errors.go
    │   │   ├── event.go
    │   │   ├── http.go
    │   │   ├── image_generations_request.go
    │   │   ├── image_generations_request_test.go
    │   │   ├── image_generations_response.go
    │   │   ├── image_generations_response_test.go
    │   │   ├── jsonpatch.go
    │   │   ├── jsonpatch_test.go
    │   │   ├── testdata
    │   │   │   ├── GoogleSampleWebpImage.webp
    │   │   │   ├── SampleGIFImage_135kbmb.gif
    │   │   │   ├── SampleJPGImage_100kbmb.jpg
    │   │   │   └── SamplePNGImage_100kbmb.png
    │   │   └── usage.go
    │   └── sse
    │   │   └── event.go
    └── utils
    │   ├── crd_common_hash.go
    │   ├── http.go
    │   ├── json.go
    │   ├── json_test.go
    │   ├── lo.go
    │   ├── string.go
    │   └── string_test.go
├── samples
    └── api-key-server
    │   ├── config.yaml
    │   └── main.go
└── scripts
    ├── build-or-download-binaries.sh
    ├── code-freeze.sh
    ├── copy-crds.sh
    ├── gen-change-logs.sh
    ├── gen-check.sh
    ├── pr-status-manage.sh
    ├── release-version.sh
    ├── run-make-gen.sh
    ├── trivy.sh
    ├── unit-test.sh
    ├── util.sh
    ├── verify-license.sh
    └── verify-staticcheck.sh


/.clang-format:
--------------------------------------------------------------------------------
 1 | ---
 2 | Language:     Cpp
 3 | BasedOnStyle: Google
 4 | ---
 5 | Language: Proto
 6 | BasedOnStyle: Google
 7 | IndentWidth:     4
 8 | AlignConsecutiveAssignments: true
 9 | CommentPragmas: XValidation
10 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | # More info: https://docs.docker.com/engine/reference/builder/#dockerignore-file
2 | # Ignore build and test binaries.
3 | bin/
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Binaries for programs and plugins
 2 | *.exe
 3 | *.exe~
 4 | *.dll
 5 | *.so
 6 | *.dylib
 7 | bin/*
 8 | Dockerfile.cross
 9 | dist/*
10 | out/*
11 | 
12 | # Test binary, built with `go test -c`
13 | *.test
14 | 
15 | # Output of the go coverage tool, specifically when used with LiteIDE
16 | *.out
17 | 
18 | # Go workspace file
19 | go.work
20 | 
21 | # Kubernetes Generated files - skip generated files, except for vendored files
22 | !vendor/**/zz_generated.*
23 | 
24 | # editor and IDE paraphernalia
25 | .idea
26 | .vscode
27 | *.swp
28 | *.swo
29 | *~
30 | config/local.yaml
31 | 


--------------------------------------------------------------------------------
/.golangci.yml:
--------------------------------------------------------------------------------
 1 | linters:
 2 |   enable-all: true
 3 |   disable:
 4 |     - depguard
 5 |     - exportloopref
 6 |     - execinquery
 7 |     - gomnd
 8 |     - funlen
 9 |     - containedctx
10 |     - exhaustruct
11 |     - testpackage
12 |     - varnamelen
13 |     - maintidx
14 |     - err113
15 |     - nlreturn
16 |     - wrapcheck
17 |     - tagliatelle
18 |     - paralleltest
19 |     - lll
20 |     - contextcheck
21 |     - gochecknoglobals
22 |     - tagalign
23 |     - nilnil
24 |     - godot
25 |     - godox
26 |     - gci
27 |     - gocognit
28 |     - gocyclo
29 |     - cyclop
30 |     - ireturn
31 |     - gofumpt
32 |     - gochecknoinits
33 | 
34 | linters-settings:
35 |   wsl:
36 |     allow-assign-and-call: false
37 |     strict-append: false
38 |     allow-trailing-comment: true
39 |     allow-cuddle-declarations: true
40 |     allow-separated-leading-comment: true
41 |   revive:
42 |     rules:
43 |       - name: blank-imports
44 |         disabled: true
45 |   nestif:
46 |     # Minimal complexity of if statements to report.
47 |     # Default: 5
48 |     min-complexity: 9
49 |   dupl:
50 |     # Tokens count to trigger issue.
51 |     # Default: 150
52 |     threshold: 600
53 |   mnd:
54 |     ignored-functions:
55 |       - "context.WithTimeout"
56 |       - "strconv.ParseComplex"
57 |     ignored-files:
58 |       - "examples/.*"
59 |   gocritic:
60 |     disabled-checks:
61 |       - ifElseChain
62 |   gosec:
63 |     excludes:
64 |       - G115
65 | 
66 | issues:
67 |   exclude:
68 |     - "if statements should only be cuddled with assignments" # from wsl
69 |     - "if statements should only be cuddled with assignments used in the if statement itself" # from wsl
70 |     - "assignments should only be cuddled with other assignments" # from wsl. false positive case: var a bool\nb := true
71 |   exclude-rules:
72 |     - path: _test\.go
73 |       linters:
74 |         - perfsprint
75 |   exclude-dirs:
76 |     - apis
77 |     - api
78 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM docker.m.daocloud.io/alpine:3.15 AS artifacts
 2 | 
 3 | ARG APP
 4 | ARG TARGETOS
 5 | ARG TARGETARCH
 6 | 
 7 | COPY out/$TARGETOS/$TARGETARCH/$APP /files/app/${APP}
 8 | 
 9 | FROM docker.m.daocloud.io/alpine:3.15
10 | 
11 | WORKDIR /app
12 | 
13 | ARG APP
14 | ARG TARGETOS
15 | ARG TARGETARCH
16 | 
17 | ENV APP ${APP}
18 | 
19 | ARG VERSION
20 | ENV VERSION ${VERSION}
21 | 
22 | COPY --from=artifacts /files /
23 | 
24 | CMD /app/${APP}
25 | 


--------------------------------------------------------------------------------
/PROJECT:
--------------------------------------------------------------------------------
 1 | # Code generated by tool. DO NOT EDIT.
 2 | # This file is used to track the info used to scaffold your project
 3 | # and allow the plugins properly work.
 4 | # More info: https://book.kubebuilder.io/reference/project-config.html
 5 | domain: knoway.dev
 6 | layout:
 7 | - go.kubebuilder.io/v4
 8 | projectName: knoway
 9 | repo: knoway.dev
10 | resources:
11 | - api:
12 |     crdVersion: v1
13 |     namespaced: true
14 |   domain: knoway.dev
15 |   group: llm
16 |   kind: LLMBackend
17 |   path: knoway.dev/api/v1alpha1
18 |   version: v1alpha1
19 | - api:
20 |     crdVersion: v1
21 |     namespaced: true
22 |   controller: true
23 |   domain: knoway.dev
24 |   group: llm
25 |   kind: ImageGenerationBackend
26 |   path: knoway.dev/api/v1alpha1
27 |   version: v1alpha1
28 | - api:
29 |     crdVersion: v1
30 |     namespaced: true
31 |   controller: true
32 |   domain: knoway.dev
33 |   group: llm
34 |   kind: ModelRoute
35 |   path: knoway.dev/api/v1alpha1
36 |   version: v1alpha1
37 | version: "3"
38 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # knoway
 2 | 
 3 | > An Envoy inspired, ultimate LLM-first gateway for LLM serving and downstream application developers and enterprises
 4 | 
 5 | ## Description
 6 | 
 7 | Lite and easy dedicated Gateway with various of LLM specific optimizations and features. You can think of it as Nginx, but for LLMs, and upcoming supported models (such as Stable Diffusion, etc.).
 8 | 
 9 | ## Features
10 | 
11 | - 💬 **LLM-first**: Designed for LLMs, with optimizations and features that are specific to LLMs.
12 | - 🕸️ **Envoy Inspired**: Inspired by Envoy, similar architecture and features, if you are already familiar with Envoy, you will find it easy to use and understand this codebase.
13 | - 💻 **Single command deployment**: Deploy the gateway with a single command, just like `nginx -c nginx.conf`.
14 | - 🚢 **Kubernetes Native**: CRDs, control plane side implementations are batteries included, `helm install knoway` and you are ready to go.
15 | 
16 | Some of the LLMs specific optimizations and features include:
17 | 
18 | - 👷 **Serverless boot loader**: Able to boot up the upstream Pod of serving services on-demand, make LLM serving more cost-effective.
19 | - ✅ **Fault tolerance**: Fault tolerance for LLMs, with the ability to retry, circuit breaking, etc. when dealing with external providers.
20 | - 🚥 **Rate limiting**: Rate limiting based on tokens, prompts, etc., to protect the LLMs serving services from being abused.
21 | - 📚 **Semantic Cache**: Cache based on the semantics of the prompts and tokens, CDN of the LLMs.
22 | - 📖 **Semantic Route**: Route based on the difficulties, semantic meaning of prompts, etc., to make the LLMs serving services more efficient with right models.
23 | - 🔍 **OpenTelemetry**: OpenTelemetry support, with the ability to trace the calls to LLMs, and the gateway itself.
24 | 
25 | ## Getting Started
26 | 
27 | ### Prerequisites
28 | 
29 | - `go` version v1.22.0+
30 | - `docker` version 17.03+.
31 | - `kubectl` version v1.11.3+.
32 | - Access to a Kubernetes v1.11.3+ cluster.
33 | 
34 | ### To Deploy on the cluster
35 | 
36 | ```console
37 | $ export VERSION=v0.0.0-xxxx
38 | $ helm repo add knoway https://release-ci.daocloud.io/chartrepo/knoway
39 | $ helm repo update
40 | $ helm upgrade --install --create-namespace -n knoway-system knoway knoway/knoway --version=${VERSION} \
41 |     --set global.imageRegistry=release-ci.daocloud.io \
42 |     --set config.auth_server_url="<upstream gRPC auth server url>" \
43 |     --set config.stats_server_url="<upstream gRPC stats server url>"
44 | ```
45 | 
46 | ### To Uninstall
47 | 
48 | ```console
49 | helm uninstall knoway -n knoway-system
50 | ```
51 | 
52 | ## Project Distribution
53 | 
54 | Following are the steps to build the installer and distribute this project to users.
55 | 
56 | 1. Build the installer for the image built and published in the registry:
57 | 
58 | ```sh
59 | make build-installer IMG=<some-registry>/knoway:tag
60 | ```
61 | 
62 | NOTE: The makefile target mentioned above generates an 'install.yaml'
63 | file in the dist directory. This file contains all the resources built
64 | with Kustomize, which are necessary to install this project without
65 | its dependencies.
66 | 
67 | 2. Using the installer
68 | 
69 | Users can just run kubectl apply -f <URL for YAML BUNDLE> to install the project, i.e.:
70 | 
71 | ```sh
72 | kubectl apply -f https://raw.githubusercontent.com/<org>/knoway/<tag or branch>/dist/install.yaml
73 | ```
74 | 
75 | ## Contributing
76 | 
77 | > [!NOTE]
78 | > Run `make help` for more information on all potential `make` targets
79 | 
80 | More information can be found via the [Kubebuilder Documentation](https://book.kubebuilder.io/introduction.html)
81 | 
82 | ## License
83 | 
84 | Copyright 2024.
85 | 
86 | Licensed under the Apache License, Version 2.0 (the "License");
87 | you may not use this file except in compliance with the License.
88 | You may obtain a copy of the License at
89 | 
90 |     http://www.apache.org/licenses/LICENSE-2.0
91 | 
92 | Unless required by applicable law or agreed to in writing, software
93 | distributed under the License is distributed on an "AS IS" BASIS,
94 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
95 | See the License for the specific language governing permissions and
96 | limitations under the License.
97 | 
98 | 


--------------------------------------------------------------------------------
/api/admin/v1alpha1/config_dump.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | package knoway.admin.v1alpha1;
 4 | 
 5 | import "google/protobuf/any.proto";
 6 | import "google/protobuf/duration.proto";
 7 | 
 8 | option go_package = "knoway.dev/api/admin/v1alpha1";
 9 | 
10 | message ConfigDump {
11 |     repeated google.protobuf.Any listeners = 1;
12 |     repeated google.protobuf.Any clusters  = 2;
13 |     repeated google.protobuf.Any routes    = 3;
14 | }
15 | 


--------------------------------------------------------------------------------
/api/buf.gen.yaml:
--------------------------------------------------------------------------------
 1 | version: v1
 2 | plugins:
 3 |   - name: go
 4 |     out: .
 5 |     opt: paths=source_relative
 6 | #    opt: use_proto_names=true
 7 |   - name: go-grpc
 8 |     out: .
 9 |     opt: paths=source_relative
10 | #  - name: golang-deepcopy
11 | #    out: .
12 | #    opt: paths=source_relative
13 | 


--------------------------------------------------------------------------------
/api/buf.lock:
--------------------------------------------------------------------------------
1 | # Generated by buf. DO NOT EDIT.
2 | version: v1
3 | 


--------------------------------------------------------------------------------
/api/buf.yaml:
--------------------------------------------------------------------------------
 1 | version: v1
 2 | build:
 3 |   excludes:
 4 |     - common-protos
 5 | lint:
 6 |   allow_comment_ignores: true
 7 |   use:
 8 |     - BASIC
 9 |   except:
10 |     - FIELD_LOWER_SNAKE_CASE
11 |     - PACKAGE_DIRECTORY_MATCH
12 | 


--------------------------------------------------------------------------------
/api/clean.sh:
--------------------------------------------------------------------------------
1 | set -eu
2 | 
3 | PATTERNS=".validate.go _deepcopy.gen.go .gen.json gr.gen.go .pb.go _json.gen.go .pb.gw.go .swagger.json .deepcopy.go"
4 | 
5 | for p in $PATTERNS; do
6 |     rm -f ./**/**/*"${p}"
7 |     rm -f ./**/*"${p}"
8 | done
9 | 


--------------------------------------------------------------------------------
/api/clusters/v1alpha1/cluster.proto:
--------------------------------------------------------------------------------
  1 | syntax = "proto3";
  2 | 
  3 | package knoway.clusters.v1alpha1;
  4 | 
  5 | import "google/protobuf/any.proto";
  6 | import "google/protobuf/struct.proto";
  7 | 
  8 | option go_package = "knoway.dev/api/clusters/v1alpha1";
  9 | 
 10 | message ClusterFilter {
 11 |     string name                = 1;
 12 |     google.protobuf.Any config = 2;
 13 | }
 14 | 
 15 | enum LoadBalancePolicy {
 16 |     LOAD_BALANCE_POLICY_UNSPECIFIED = 0;
 17 |     ROUND_ROBIN                     = 1;
 18 |     LEAST_CONNECTION                = 2;
 19 |     IP_HASH                         = 3;
 20 | 
 21 |     // CUSTOM means the load balance policy is defined by the filters.
 22 |     CUSTOM = 15;
 23 | }
 24 | 
 25 | message TLSConfig {
 26 |     // todo
 27 | }
 28 | 
 29 | message Upstream {
 30 |     string url = 1;
 31 |     message Header {
 32 |         string key   = 1;
 33 |         string value = 2;
 34 |     }
 35 |     repeated Header headers                           = 3;
 36 |     int32 timeout                                     = 4;
 37 |     map<string, google.protobuf.Value> defaultParams  = 5;
 38 |     map<string, google.protobuf.Value> overrideParams = 6;
 39 |     repeated string removeParamKeys                   = 7;
 40 | }
 41 | 
 42 | enum ClusterType {
 43 |     CLUSTER_TYPE_UNSPECIFIED = 0;
 44 |     LLM                      = 1;
 45 |     IMAGE_GENERATION         = 2;
 46 | }
 47 | 
 48 | enum ClusterProvider {
 49 |     CLUSTER_PROVIDER_UNSPECIFIED = 0;
 50 |     OPEN_AI                      = 1;
 51 |     VLLM                         = 2;
 52 |     OLLAMA                       = 3;
 53 | }
 54 | 
 55 | message ClusterMeteringPolicy {
 56 |     enum SizeFrom {
 57 |         SIZE_FROM_UNSPECIFIED = 0;
 58 |         // For image generation, the size of the generated image is determined
 59 |         // by the input parameters.
 60 |         //
 61 |         // For example, even if the output image is 1024x1024, as long as the
 62 |         // input parameter specified 256x256, the size of the generated image
 63 |         // will be account as 256x256.
 64 |         SIZE_FROM_INPUT = 1;
 65 |         // For image generation, the size of the generated image is determined
 66 |         // by the output image. This is done by parsing through the actual
 67 |         // generated image file header by using Golang's std library to
 68 |         // determine the size of the image.
 69 |         //
 70 |         // For example, no matter what the input specified, if the output image
 71 |         // is 1024x1024, the size of the generated image will be account as
 72 |         // 1024x1024.
 73 |         SIZE_FROM_OUTPUT = 2;
 74 |         // For image generation, the size of the generated image is determined
 75 |         // by the greatest size of the input parameters and output image
 76 |         // resolution.
 77 |         //
 78 |         // For example, if the input parameter specified 256x256 and the output
 79 |         // image is 1024x1024, the size of the generated image will be account
 80 |         // as 1024x1024. On the other hand, if the input parameter specified
 81 |         // 1024x1024 and the output image is 256x256, the size of the generated
 82 |         // image will be account as 1024x1024.
 83 |         SIZE_FROM_GREATEST = 3;
 84 |     }
 85 | 
 86 |     optional SizeFrom sizeFrom = 1;
 87 | }
 88 | 
 89 | message Cluster {
 90 |     string name                          = 1;
 91 |     LoadBalancePolicy loadBalancePolicy  = 2;
 92 |     Upstream upstream                    = 3;
 93 |     TLSConfig tlsConfig                  = 4;
 94 |     repeated ClusterFilter filters       = 5;
 95 |     ClusterProvider provider             = 6;
 96 |     int64 created                        = 7;
 97 |     ClusterType type                     = 8;
 98 |     ClusterMeteringPolicy meteringPolicy = 9;
 99 | }
100 | 


--------------------------------------------------------------------------------
/api/filters/v1alpha1/api_key_auth.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | package knoway.filters.v1alpha1;
 4 | 
 5 | import "google/protobuf/duration.proto";
 6 | 
 7 | option go_package = "knoway.dev/api/filters/v1alpha1";
 8 | 
 9 | message APIKeyAuthConfig {
10 |     message AuthServer {
11 |         string url                       = 1;
12 |         google.protobuf.Duration timeout = 2;  // Default is 3s
13 |     }
14 |     AuthServer auth_server = 3;
15 | }
16 | 
17 | message UsageStatsConfig {
18 |     message StatsServer {
19 |         string url                       = 1;
20 |         google.protobuf.Duration timeout = 2;  // Default is 3s
21 |     }
22 |     StatsServer stats_server = 3;
23 | }
24 | 
25 | message OpenAIRequestHandlerConfig {}
26 | message OpenAIResponseHandlerConfig {}
27 | 


--------------------------------------------------------------------------------
/api/filters/v1alpha1/rate_limit.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | package knoway.filters.v1alpha1;
 4 | 
 5 | import "google/protobuf/any.proto";
 6 | import "google/protobuf/duration.proto";
 7 | 
 8 | option go_package = "knoway.dev/api/filters/v1alpha1";
 9 | 
10 | message StringMatch {
11 |     oneof match {
12 |         string exact  = 1;
13 |         string prefix = 2;
14 |     }
15 | }
16 | 
17 | enum RateLimitBaseOn {
18 |     RATE_LIMIT_BASE_ON_UNSPECIFIED = 0;
19 |     USER_ID                        = 1;
20 |     API_KEY                        = 2;
21 | }
22 | 
23 | message RateLimitPolicy {
24 |     StringMatch match                 = 1;
25 |     int32 limit                       = 2;
26 |     RateLimitBaseOn based_on          = 3;
27 |     google.protobuf.Duration duration = 4;
28 | }
29 | 
30 | // RateLimitConfig defines rate limiting configuration
31 | message RateLimitConfig {
32 |     repeated RateLimitPolicy policies = 1;
33 |     RateLimitMode model               = 2;
34 |     string server_prefix              = 3;
35 | 
36 |     RedisServer redis_server = 4;
37 | }
38 | 
39 | enum RateLimitMode {
40 |     RATE_LIMIT_MODEL_UNSPECIFIED = 0;
41 |     LOCAL                        = 1;
42 |     REDIS                        = 2;
43 | }
44 | 
45 | message RedisServer {
46 |     string url = 1;
47 | }
48 | 


--------------------------------------------------------------------------------
/api/google/protobuf/empty.proto:
--------------------------------------------------------------------------------
 1 | // Protocol Buffers - Google's data interchange format
 2 | // Copyright 2008 Google Inc.  All rights reserved.
 3 | // https://developers.google.com/protocol-buffers/
 4 | //
 5 | // Redistribution and use in source and binary forms, with or without
 6 | // modification, are permitted provided that the following conditions are
 7 | // met:
 8 | //
 9 | //     * Redistributions of source code must retain the above copyright
10 | // notice, this list of conditions and the following disclaimer.
11 | //     * Redistributions in binary form must reproduce the above
12 | // copyright notice, this list of conditions and the following disclaimer
13 | // in the documentation and/or other materials provided with the
14 | // distribution.
15 | //     * Neither the name of Google Inc. nor the names of its
16 | // contributors may be used to endorse or promote products derived from
17 | // this software without specific prior written permission.
18 | //
19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 
31 | syntax = "proto3";
32 | 
33 | package google.protobuf;
34 | 
35 | option csharp_namespace     = "Google.Protobuf.WellKnownTypes";
36 | option go_package           = "google.golang.org/protobuf/types/known/emptypb";
37 | option java_package         = "com.google.protobuf";
38 | option java_outer_classname = "EmptyProto";
39 | option java_multiple_files  = true;
40 | option objc_class_prefix    = "GPB";
41 | option cc_enable_arenas     = true;
42 | 
43 | // A generic empty message that you can re-use to avoid defining duplicated
44 | // empty messages in your APIs. A typical example is to use it as the request
45 | // or the response type of an API method. For instance:
46 | //
47 | //     service Foo {
48 | //       rpc Bar(google.protobuf.Empty) returns (google.protobuf.Empty);
49 | //     }
50 | //
51 | // The JSON representation for `Empty` is empty JSON object `{}`.
52 | message Empty {}
53 | 


--------------------------------------------------------------------------------
/api/google/protobuf/struct.proto:
--------------------------------------------------------------------------------
 1 | // Protocol Buffers - Google's data interchange format
 2 | // Copyright 2008 Google Inc.  All rights reserved.
 3 | // https://developers.google.com/protocol-buffers/
 4 | //
 5 | // Redistribution and use in source and binary forms, with or without
 6 | // modification, are permitted provided that the following conditions are
 7 | // met:
 8 | //
 9 | //     * Redistributions of source code must retain the above copyright
10 | // notice, this list of conditions and the following disclaimer.
11 | //     * Redistributions in binary form must reproduce the above
12 | // copyright notice, this list of conditions and the following disclaimer
13 | // in the documentation and/or other materials provided with the
14 | // distribution.
15 | //     * Neither the name of Google Inc. nor the names of its
16 | // contributors may be used to endorse or promote products derived from
17 | // this software without specific prior written permission.
18 | //
19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 
31 | syntax = "proto3";
32 | 
33 | package google.protobuf;
34 | 
35 | option csharp_namespace = "Google.Protobuf.WellKnownTypes";
36 | option cc_enable_arenas = true;
37 | option go_package       = "github.com/golang/protobuf/ptypes/struct;structpb";
38 | option java_package     = "com.google.protobuf";
39 | option java_outer_classname = "StructProto";
40 | option java_multiple_files  = true;
41 | option objc_class_prefix    = "GPB";
42 | 
43 | // `Struct` represents a structured data value, consisting of fields
44 | // which map to dynamically typed values. In some languages, `Struct`
45 | // might be supported by a native representation. For example, in
46 | // scripting languages like JS a struct is represented as an
47 | // object. The details of that representation are described together
48 | // with the proto support for the language.
49 | //
50 | // The JSON representation for `Struct` is JSON object.
51 | message Struct {
52 |     // Unordered map of dynamically typed values.
53 |     map<string, Value> fields = 1;
54 | }
55 | 
56 | // `Value` represents a dynamically typed value which can be either
57 | // null, a number, a string, a boolean, a recursive struct value, or a
58 | // list of values. A producer of value is expected to set one of that
59 | // variants, absence of any variant indicates an error.
60 | //
61 | // The JSON representation for `Value` is JSON value.
62 | message Value {
63 |     // The kind of value.
64 |     oneof kind {
65 |         // Represents a null value.
66 |         NullValue null_value = 1;
67 |         // Represents a double value.
68 |         double number_value = 2;
69 |         // Represents a string value.
70 |         string string_value = 3;
71 |         // Represents a boolean value.
72 |         bool bool_value = 4;
73 |         // Represents a structured value.
74 |         Struct struct_value = 5;
75 |         // Represents a repeated `Value`.
76 |         ListValue list_value = 6;
77 |     }
78 | }
79 | 
80 | // `NullValue` is a singleton enumeration to represent the null value for the
81 | // `Value` type union.
82 | //
83 | //  The JSON representation for `NullValue` is JSON `null`.
84 | enum NullValue {
85 |     // Null value.
86 |     NULL_VALUE = 0;
87 | }
88 | 
89 | // `ListValue` is a wrapper around a repeated field of values.
90 | //
91 | // The JSON representation for `ListValue` is JSON array.
92 | message ListValue {
93 |     // Repeated field of dynamically typed values.
94 |     repeated Value values = 1;
95 | }
96 | 


--------------------------------------------------------------------------------
/api/listeners/v1alpha1/chat_listener.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | package knoway.listeners.v1alpha1;
 4 | 
 5 | import "google/protobuf/any.proto";
 6 | import "listeners/v1alpha1/common.proto";
 7 | 
 8 | option go_package = "knoway.dev/api/listeners/v1alpha1";
 9 | 
10 | message ChatCompletionListener {
11 |     string name                     = 1;
12 |     repeated ListenerFilter filters = 2;
13 |     Log access_log                  = 3;
14 | }
15 | 


--------------------------------------------------------------------------------
/api/listeners/v1alpha1/common.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | package knoway.listeners.v1alpha1;
 4 | 
 5 | import "google/protobuf/any.proto";
 6 | 
 7 | option go_package = "knoway.dev/api/listeners/v1alpha1";
 8 | 
 9 | message ListenerFilter {
10 |     string name                = 1;
11 |     google.protobuf.Any config = 2;
12 | }
13 | 
14 | message Log {
15 |     bool enable = 1;
16 | }
17 | 


--------------------------------------------------------------------------------
/api/listeners/v1alpha1/image_listener.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | package knoway.listeners.v1alpha1;
 4 | 
 5 | import "google/protobuf/any.proto";
 6 | import "listeners/v1alpha1/common.proto";
 7 | 
 8 | option go_package = "knoway.dev/api/listeners/v1alpha1";
 9 | 
10 | message ImageListener {
11 |     string name                     = 1;
12 |     repeated ListenerFilter filters = 2;
13 |     Log access_log                  = 3;
14 | }
15 | 


--------------------------------------------------------------------------------
/api/route/v1alpha1/route.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | package knoway.route.v1alpha1;
 4 | 
 5 | import "google/protobuf/any.proto";
 6 | import "google/protobuf/duration.proto";
 7 | 
 8 | option go_package = "knoway.dev/api/route/v1alpha1";
 9 | 
10 | message RouteFilter {
11 |     string name                = 1;
12 |     google.protobuf.Any config = 2;
13 | }
14 | 
15 | message StringMatch {
16 |     oneof match {
17 |         string exact  = 1;
18 |         string prefix = 2;
19 |     }
20 | }
21 | 
22 | message Match {
23 |     StringMatch model   = 1;
24 |     StringMatch message = 2;
25 | }
26 | 
27 | message RouteDestination {
28 |     string namespace      = 1;
29 |     string backend        = 2;
30 |     optional int32 weight = 3;
31 |     string cluster        = 4;
32 | }
33 | 
34 | message RouteTarget {
35 |     RouteDestination destination = 1;
36 | }
37 | 
38 | // See also:
39 | // Supported load balancers — envoy 1.34.0-dev-e3a97f documentation
40 | // https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/load_balancing/load_balancers#arch-overview-load-balancing-types
41 | enum LoadBalancePolicy {
42 |     LOAD_BALANCE_POLICY_UNSPECIFIED   = 0;
43 |     LOAD_BALANCE_POLICY_ROUND_ROBIN   = 1;
44 |     LOAD_BALANCE_POLICY_LEAST_REQUEST = 2;
45 | }
46 | 
47 | message RouteFallback {
48 |     // Only valid when previous attempt failed occurred, default: 0s
49 |     // (immediately)
50 |     optional google.protobuf.Duration pre_delay = 2;
51 |     // Only valid when the ongoing attempt failed occurred, default: 0s
52 |     // (immediately)
53 |     optional google.protobuf.Duration post_delay = 3;
54 |     // default: 3
55 |     optional uint64 max_retries = 1;
56 | }
57 | 
58 | message Route {
59 |     string name                           = 1;
60 |     repeated Match matches                = 2;
61 |     repeated RouteFilter filters          = 3;
62 |     LoadBalancePolicy load_balance_policy = 4;
63 |     repeated RouteTarget targets          = 5;
64 |     optional RouteFallback fallback       = 6;
65 | }
66 | 


--------------------------------------------------------------------------------
/api/service/v1alpha1/apikey_auth.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | option go_package = "knoway.dev/api/service/v1alpha1";
 4 | 
 5 | package knoway.service.v1alpha1;
 6 | 
 7 | message APIKeyAuthRequest {
 8 |     string api_key = 1;
 9 | }
10 | 
11 | message APIKeyAuthResponse {
12 |     // is_valid required: If it is true, it means that the apikey is valid.
13 |     bool is_valid = 1;
14 |     // allow_models optional: If it is empty, it means that the apikey can
15 |     // access all models. If it is not empty, it means that the apikey can only
16 |     // access the specified models.
17 |     // The matching rules for each value follow the rules of glob.
18 |     repeated string allow_models = 2;
19 |     // api_key_id optional: It will be used in statistics to avoid leakage of
20 |     // sensitive information.
21 |     string api_key_id = 3;
22 |     // user_id optional: the apikey's owner, will be used in route matching.
23 |     string user_id = 4;
24 |     // deny_models optional: similar to allow_models, but the opposite.
25 |     // If it is not empty, it means that the apikey
26 |     // cannot access the specified models.
27 |     // The matching rules for each value follow the rules of glob.
28 |     // it has higher priority than allow_models.
29 |     repeated string deny_models = 5;
30 | }
31 | 
32 | service AuthService {
33 |     rpc APIKeyAuth(APIKeyAuthRequest) returns (APIKeyAuthResponse) {};
34 | }
35 | 


--------------------------------------------------------------------------------
/api/service/v1alpha1/apikey_auth_grpc.pb.go:
--------------------------------------------------------------------------------
  1 | // Code generated by protoc-gen-go-grpc. DO NOT EDIT.
  2 | // versions:
  3 | // - protoc-gen-go-grpc v1.2.0
  4 | // - protoc             (unknown)
  5 | // source: service/v1alpha1/apikey_auth.proto
  6 | 
  7 | package v1alpha1
  8 | 
  9 | import (
 10 | 	context "context"
 11 | 
 12 | 	grpc "google.golang.org/grpc"
 13 | 	codes "google.golang.org/grpc/codes"
 14 | 	status "google.golang.org/grpc/status"
 15 | )
 16 | 
 17 | // This is a compile-time assertion to ensure that this generated file
 18 | // is compatible with the grpc package it is being compiled against.
 19 | // Requires gRPC-Go v1.32.0 or later.
 20 | const _ = grpc.SupportPackageIsVersion7
 21 | 
 22 | // AuthServiceClient is the client API for AuthService service.
 23 | //
 24 | // For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
 25 | type AuthServiceClient interface {
 26 | 	APIKeyAuth(ctx context.Context, in *APIKeyAuthRequest, opts ...grpc.CallOption) (*APIKeyAuthResponse, error)
 27 | }
 28 | 
 29 | type authServiceClient struct {
 30 | 	cc grpc.ClientConnInterface
 31 | }
 32 | 
 33 | func NewAuthServiceClient(cc grpc.ClientConnInterface) AuthServiceClient {
 34 | 	return &authServiceClient{cc}
 35 | }
 36 | 
 37 | func (c *authServiceClient) APIKeyAuth(ctx context.Context, in *APIKeyAuthRequest, opts ...grpc.CallOption) (*APIKeyAuthResponse, error) {
 38 | 	out := new(APIKeyAuthResponse)
 39 | 	err := c.cc.Invoke(ctx, "/knoway.service.v1alpha1.AuthService/APIKeyAuth", in, out, opts...)
 40 | 	if err != nil {
 41 | 		return nil, err
 42 | 	}
 43 | 	return out, nil
 44 | }
 45 | 
 46 | // AuthServiceServer is the server API for AuthService service.
 47 | // All implementations must embed UnimplementedAuthServiceServer
 48 | // for forward compatibility
 49 | type AuthServiceServer interface {
 50 | 	APIKeyAuth(context.Context, *APIKeyAuthRequest) (*APIKeyAuthResponse, error)
 51 | 	mustEmbedUnimplementedAuthServiceServer()
 52 | }
 53 | 
 54 | // UnimplementedAuthServiceServer must be embedded to have forward compatible implementations.
 55 | type UnimplementedAuthServiceServer struct {
 56 | }
 57 | 
 58 | func (UnimplementedAuthServiceServer) APIKeyAuth(context.Context, *APIKeyAuthRequest) (*APIKeyAuthResponse, error) {
 59 | 	return nil, status.Errorf(codes.Unimplemented, "method APIKeyAuth not implemented")
 60 | }
 61 | func (UnimplementedAuthServiceServer) mustEmbedUnimplementedAuthServiceServer() {}
 62 | 
 63 | // UnsafeAuthServiceServer may be embedded to opt out of forward compatibility for this service.
 64 | // Use of this interface is not recommended, as added methods to AuthServiceServer will
 65 | // result in compilation errors.
 66 | type UnsafeAuthServiceServer interface {
 67 | 	mustEmbedUnimplementedAuthServiceServer()
 68 | }
 69 | 
 70 | func RegisterAuthServiceServer(s grpc.ServiceRegistrar, srv AuthServiceServer) {
 71 | 	s.RegisterService(&AuthService_ServiceDesc, srv)
 72 | }
 73 | 
 74 | func _AuthService_APIKeyAuth_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
 75 | 	in := new(APIKeyAuthRequest)
 76 | 	if err := dec(in); err != nil {
 77 | 		return nil, err
 78 | 	}
 79 | 	if interceptor == nil {
 80 | 		return srv.(AuthServiceServer).APIKeyAuth(ctx, in)
 81 | 	}
 82 | 	info := &grpc.UnaryServerInfo{
 83 | 		Server:     srv,
 84 | 		FullMethod: "/knoway.service.v1alpha1.AuthService/APIKeyAuth",
 85 | 	}
 86 | 	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
 87 | 		return srv.(AuthServiceServer).APIKeyAuth(ctx, req.(*APIKeyAuthRequest))
 88 | 	}
 89 | 	return interceptor(ctx, in, info, handler)
 90 | }
 91 | 
 92 | // AuthService_ServiceDesc is the grpc.ServiceDesc for AuthService service.
 93 | // It's only intended for direct use with grpc.RegisterService,
 94 | // and not to be introspected or modified (even as a copy)
 95 | var AuthService_ServiceDesc = grpc.ServiceDesc{
 96 | 	ServiceName: "knoway.service.v1alpha1.AuthService",
 97 | 	HandlerType: (*AuthServiceServer)(nil),
 98 | 	Methods: []grpc.MethodDesc{
 99 | 		{
100 | 			MethodName: "APIKeyAuth",
101 | 			Handler:    _AuthService_APIKeyAuth_Handler,
102 | 		},
103 | 	},
104 | 	Streams:  []grpc.StreamDesc{},
105 | 	Metadata: "service/v1alpha1/apikey_auth.proto",
106 | }
107 | 


--------------------------------------------------------------------------------
/api/service/v1alpha1/usage_stats.proto:
--------------------------------------------------------------------------------
 1 | syntax = "proto3";
 2 | 
 3 | package knoway.service.v1alpha1;
 4 | 
 5 | option go_package = "knoway.dev/api/service/v1alpha1";
 6 | 
 7 | message UsageReportRequest {
 8 |     string api_key_id = 1;
 9 |     // user_model_name The name of the model that the user is using, such as
10 |     // "kebe/mnist".
11 |     string user_model_name = 2;
12 |     // upstream_model_name The name of the model that the gateway send the
13 |     // request to, such as "kebe-mnist".
14 |     string upstream_model_name = 3;
15 | 
16 |     message UsageImage {
17 |         uint64 width   = 1;
18 |         uint64 height  = 2;
19 |         uint64 numbers = 3;
20 |         string quality = 4;
21 |         string style   = 5;
22 |     }
23 | 
24 |     message Usage {
25 |         uint64 input_tokens      = 1;
26 |         uint64 output_tokens     = 2;
27 |         UsageImage input_images  = 3;
28 |         UsageImage output_images = 4;
29 |     }
30 |     Usage usage = 4;
31 | 
32 |     enum Mode {
33 |         MODE_UNSPECIFIED = 0;
34 |         // The MODE_PER_REQUEST mode means that each time a request is received,
35 |         // the usage of the request will be included.
36 |         // If the server fails to process, statistical data may be lost.
37 |         MODE_PER_REQUEST = 1;
38 |     }
39 |     Mode mode = 5;
40 | }
41 | 
42 | message UsageReportResponse {
43 |     // accepted required: If it is true, it means that the report is successful.
44 |     bool accepted = 1;
45 | }
46 | 
47 | service UsageStatsService {
48 |     rpc UsageReport(UsageReportRequest) returns (UsageReportResponse) {}
49 | }
50 | 


--------------------------------------------------------------------------------
/api/v1alpha1/common_types.go:
--------------------------------------------------------------------------------
 1 | package v1alpha1
 2 | 
 3 | type Header struct {
 4 | 	Key   string `json:"key,omitempty"`
 5 | 	Value string `json:"value,omitempty"`
 6 | }
 7 | 
 8 | // HeaderFromSource represents the source of a set of ConfigMaps or Secrets
 9 | type HeaderFromSource struct {
10 | 	// An optional identifier to prepend to each key in the ref.
11 | 	Prefix string `json:"prefix,omitempty"`
12 | 	// Type of the source (ConfigMap or Secret)
13 | 	RefType ValueFromType `json:"refType,omitempty"`
14 | 	// Name of the source
15 | 	RefName string `json:"refName,omitempty"`
16 | }
17 | 
18 | // ValueFromType defines the type of source for headers.
19 | // +kubebuilder:validation:Enum=ConfigMap;Secret
20 | type ValueFromType string
21 | 
22 | const (
23 | 	// ConfigMap indicates that the header source is a ConfigMap.
24 | 	ConfigMap ValueFromType = "ConfigMap"
25 | 	// Secret indicates that the header source is a Secret.
26 | 	Secret ValueFromType = "Secret"
27 | )
28 | 
29 | // StatusEnum defines the possible statuses for the LLMBackend, ImageGenerationBackend, and other types.
30 | type StatusEnum string
31 | 
32 | const (
33 | 	Unknown StatusEnum = "Unknown"
34 | 	Healthy StatusEnum = "Healthy"
35 | 	Failed  StatusEnum = "Failed"
36 | )
37 | 
38 | type Provider string
39 | 
40 | const (
41 | 	ProviderOpenAI Provider = "OpenAI"
42 | 	ProviderVLLM   Provider = "vLLM"
43 | 	ProviderOllama Provider = "Ollama"
44 | )
45 | 
46 | type BackendType string
47 | 
48 | const (
49 | 	BackendTypeLLM             BackendType = "LLM"
50 | 	BackendTypeImageGeneration BackendType = "ImageGeneration"
51 | )
52 | 


--------------------------------------------------------------------------------
/api/v1alpha1/groupversion_info.go:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2024.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */
16 | 
17 | // Package v1alpha1 contains API Schema definitions for the llm v1alpha1 API group
18 | // +kubebuilder:object:generate=true
19 | // +groupName=llm.knoway.dev
20 | package v1alpha1
21 | 
22 | import (
23 | 	"k8s.io/apimachinery/pkg/runtime/schema"
24 | 	"sigs.k8s.io/controller-runtime/pkg/scheme"
25 | )
26 | 
27 | var (
28 | 	// GroupVersion is group version used to register these objects
29 | 	GroupVersion = schema.GroupVersion{Group: "llm.knoway.dev", Version: "v1alpha1"}
30 | 
31 | 	// SchemeBuilder is used to add go types to the GroupVersionKind scheme
32 | 	SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion}
33 | 
34 | 	// AddToScheme adds the types in this group-version to the given scheme.
35 | 	AddToScheme = SchemeBuilder.AddToScheme
36 | )
37 | 


--------------------------------------------------------------------------------
/changes/v0.1/CHANGELOG-v0.1.0-rc4.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # v0.1.0-rc4 Change logs
 3 | 
 4 | ## Change since v0.1.0-rc3
 5 | 
 6 | ### Changes by Kind
 7 | 
 8 | #### Bug
 9 | 
10 | - fix: properly handle status codes for ApiKeyAuth service call(!55) by @nekoayaka.zhang
11 | - Fix codefreeze version(!56) by @nicole.li
12 | 
13 | 
14 | #### Feature
15 | 
16 | 
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/changes/v0.1/CHANGELOG-v0.1.0-rc5.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # v0.1.0-rc5 Change logs
 3 | 
 4 | ## Change since v0.1.0-rc4
 5 | 
 6 | ### Changes by Kind
 7 | 
 8 | #### Bug
 9 | 
10 | - fix: rare to reproduce panic when dealing with type asserting though generic helpers and slice type(!57) by @nekoayaka.zhang
11 | - fix: data race when calling filters(!58) by @nekoayaka.zhang
12 | - release 0.1 merge main(!59) by @nicole.li
13 | 
14 | 
15 | #### Feature
16 | 
17 | 
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/changes/v0.1/CHANGELOG-v0.1.0.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # v0.1.0 Change logs
 3 | 
 4 | ## Change since v0.1.0-rc3
 5 | 
 6 | ### Changes by Kind
 7 | 
 8 | #### Bug
 9 | 
10 | - fix: properly handle status codes for ApiKeyAuth service call(!55) by @nekoayaka.zhang
11 | - Fix codefreeze version(!56) by @nicole.li
12 | - fix: rare to reproduce panic when dealing with type asserting though generic helpers and slice type(!57) by @nekoayaka.zhang
13 | - fix: data race when calling filters(!58) by @nekoayaka.zhang
14 | - release 0.1 merge main(!59) by @nicole.li
15 | - Auto cherry-pick !61 to release-0.1(!63) by @ndx-robot
16 | 
17 | 
18 | #### Feature
19 | 
20 | 
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/changes/v0.1/CHANGELOG-v0.1.1.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # v0.1.1 Change logs
 3 | 
 4 | ## Change since v0.1.0
 5 | 
 6 | ### Changes by Kind
 7 | 
 8 | #### Bug
 9 | 
10 | - Auto cherry-pick !67 to release-0.1(!68) by @ndx-robot
11 | 
12 | 
13 | #### Feature
14 | 
15 | 
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/changes/v0.1/CHANGELOG-v0.1.2.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # v0.1.2 Change logs
 3 | 
 4 | ## Change since v0.1.1
 5 | 
 6 | ### Changes by Kind
 7 | 
 8 | #### Bug
 9 | 
10 | 
11 | 
12 | #### Feature
13 | 
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/changes/v0.2/CHANGELOG-v0.2.0-rc0.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # v0.2.0-rc0 Change logs
 3 | 
 4 | ## Change since v0.1.2
 5 | 
 6 | ### Changes by Kind
 7 | 
 8 | #### Bug
 9 | 
10 | - feat: dedicated denied error for denied(!61) by @nekoayaka.zhang
11 | - fix: vllm error handling(!65) by @nekoayaka.zhang
12 | - > fix stream missing content-type(!67) by @kebe.liu
13 | - Revert "feat: support metering usages for images generations"(!70) by @nekoayaka.zhang
14 | - update ci image(!72) by @nicole.li
15 | - feat: add rate limits(!74) by @nicole.li
16 | - fix status equal(!76) by @nicole.li
17 | - feat: add lb for model route(!77) by @xiaowu.zhu
18 | - >fix base cluster not found & image chat not found(!79) by @nicole.li
19 | - + support config listener via config file(!80) by @kebe.liu
20 | - + add config_dump endpoint for debug(!81) by @kebe.liu
21 | - feat: supported ratelimit redis(!82) by @nicole.li
22 | - fix(fallback): not handling default LB policy & not handling invalid content-type with errored status code(!84) by @nekoayaka.zhang
23 | - > fix duplicated requests send due to fallback(!87) by @kebe.liu
24 | 
25 | 
26 | #### Feature
27 | 
28 | - feat: FromString util support ptr(!62) by @nekoayaka.zhang
29 | - feat: image listener(!69) by @nekoayaka.zhang
30 | - feat: size config(!71) by @nekoayaka.zhang
31 | - feat: added ModelRoute CRD(!73) by @nekoayaka.zhang
32 | - chore(route): align route.proto fields with new LB and retry ModelRoute CRD design(!75) by @nekoayaka.zhang
33 | - feat(controller): model route(!78) by @nekoayaka.zhang
34 | - feat: fallback(!83) by @nekoayaka.zhang
35 | 
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/changes/v0.3/CHANGELOG-v0.3.0-rc0.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # v0.3.0-rc0 Change logs
 3 | 
 4 | ## Change since v0.2.0-rc0
 5 | 
 6 | ### Changes by Kind
 7 | 
 8 | #### Bug
 9 | 
10 | - + remove llm request params(!85) by @nicole.li
11 | - refactor: use manager -> entity pattern(!88) by @nekoayaka.zhang
12 | - > rename ratelimit to filters(!89) by @kebe.liu
13 | - > fix 500 when ratelimited(!90) by @kebe.liu
14 | 
15 | 
16 | #### Feature
17 | 
18 | 
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/changes/v0.3/CHANGELOG-v0.3.0-rc1.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # v0.3.0-rc1 Change logs
 3 | 
 4 | ## Change since v0.3.0-rc0
 5 | 
 6 | ### Changes by Kind
 7 | 
 8 | #### Bug
 9 | 
10 | - Auto cherry-pick !91 to release-0.3(!92) by @ndx-robot
11 | 
12 | 
13 | #### Feature
14 | 
15 | 
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/cmd/admin/admin.go:
--------------------------------------------------------------------------------
  1 | package admin
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"log/slog"
  6 | 	"net"
  7 | 	"net/http"
  8 | 	"time"
  9 | 
 10 | 	"knoway.dev/pkg/bootkit"
 11 | 
 12 | 	"google.golang.org/protobuf/encoding/protojson"
 13 | 
 14 | 	"knoway.dev/api/admin/v1alpha1"
 15 | 
 16 | 	"github.com/gorilla/mux"
 17 | 	"github.com/samber/lo"
 18 | 	"google.golang.org/protobuf/proto"
 19 | 	"google.golang.org/protobuf/types/known/anypb"
 20 | 
 21 | 	clustermanager "knoway.dev/pkg/clusters/manager"
 22 | 	"knoway.dev/pkg/listener"
 23 | 	routemanager "knoway.dev/pkg/route/manager"
 24 | )
 25 | 
 26 | type debugListener struct {
 27 | 	staticListeners []*anypb.Any
 28 | }
 29 | 
 30 | func NewAdminListener(staticListeners []*anypb.Any) (listener.Listener, error) {
 31 | 	return &debugListener{staticListeners: staticListeners}, nil
 32 | }
 33 | 
 34 | func (d *debugListener) Drain(ctx context.Context) error {
 35 | 	return nil
 36 | }
 37 | 
 38 | func (d *debugListener) HasDrained() bool {
 39 | 	return false
 40 | }
 41 | 
 42 | func sliceToAny[T proto.Message](s []T) []*anypb.Any {
 43 | 	anySlice := make([]*anypb.Any, 0, len(s))
 44 | 
 45 | 	for _, v := range s {
 46 | 		a, err := anypb.New(v)
 47 | 		if err != nil {
 48 | 			slog.Error("failed to convert to any", "err", err)
 49 | 			continue
 50 | 		}
 51 | 
 52 | 		anySlice = append(anySlice, a)
 53 | 	}
 54 | 
 55 | 	return anySlice
 56 | }
 57 | 
 58 | func (d *debugListener) configDump(writer http.ResponseWriter, request *http.Request) {
 59 | 	clusters := clustermanager.DebugDumpAllClusters()
 60 | 	routes := routemanager.DebugDumpAllRoutes()
 61 | 	listeners := d.staticListeners
 62 | 	cd := &v1alpha1.ConfigDump{
 63 | 		Clusters:  sliceToAny(clusters),
 64 | 		Routes:    sliceToAny(routes),
 65 | 		Listeners: listeners,
 66 | 	}
 67 | 	bs := lo.Must1(protojson.MarshalOptions{
 68 | 		Multiline:         true,
 69 | 		Indent:            "  ",
 70 | 		AllowPartial:      false,
 71 | 		UseProtoNames:     false,
 72 | 		UseEnumNumbers:    false,
 73 | 		EmitUnpopulated:   false,
 74 | 		EmitDefaultValues: false,
 75 | 		Resolver:          nil,
 76 | 	}.Marshal(cd))
 77 | 	_, _ = writer.Write(bs)
 78 | }
 79 | 
 80 | func (d *debugListener) RegisterRoutes(mux *mux.Router) error {
 81 | 	mux.HandleFunc("/config_dump", d.configDump)
 82 | 	return nil
 83 | }
 84 | 
 85 | func NewAdminServer(_ context.Context, staticListeners []*anypb.Any, addr string, lifecycle bootkit.LifeCycle) error {
 86 | 	m := listener.NewMux()
 87 | 	m.Register(NewAdminListener(staticListeners))
 88 | 
 89 | 	server, err := m.BuildServer(&http.Server{Addr: addr, ReadTimeout: time.Minute})
 90 | 	if err != nil {
 91 | 		return err
 92 | 	}
 93 | 
 94 | 	ln, err := net.Listen("tcp", addr)
 95 | 	if err != nil {
 96 | 		return err
 97 | 	}
 98 | 
 99 | 	lifecycle.Append(bootkit.LifeCycleHook{
100 | 		OnStart: func(ctx context.Context) error {
101 | 			slog.Info("Starting admin server ...", "addr", ln.Addr().String())
102 | 
103 | 			if err := server.Serve(ln); err != nil && err != http.ErrServerClosed {
104 | 				return err
105 | 			}
106 | 			return nil
107 | 		},
108 | 		OnStop: func(ctx context.Context) error {
109 | 			slog.Info("Stopping admin server ...")
110 | 
111 | 			if err := server.Shutdown(ctx); err != nil {
112 | 				return err
113 | 			}
114 | 
115 | 			slog.Info("Admin server stopped gracefully.")
116 | 			return nil
117 | 		},
118 | 	})
119 | 
120 | 	return nil
121 | }
122 | 


--------------------------------------------------------------------------------
/cmd/gateway/devClusters.go:
--------------------------------------------------------------------------------
 1 | package gateway
 2 | 
 3 | import (
 4 | 	"github.com/samber/lo"
 5 | 	"google.golang.org/protobuf/types/known/anypb"
 6 | 
 7 | 	clusters "knoway.dev/api/clusters/v1alpha1"
 8 | 	filters "knoway.dev/api/filters/v1alpha1"
 9 | 	"knoway.dev/pkg/bootkit"
10 | 	clustermanager "knoway.dev/pkg/clusters/manager"
11 | 	routemanager "knoway.dev/pkg/route/manager"
12 | )
13 | 
14 | var StaticClustersConfig = map[string]*clusters.Cluster{
15 | 	"openai/gpt-3.5-turbo": {
16 | 		Type:              clusters.ClusterType_LLM,
17 | 		Name:              "openai/gpt-3.5-turbo",
18 | 		Provider:          clusters.ClusterProvider_OPEN_AI,
19 | 		LoadBalancePolicy: clusters.LoadBalancePolicy_ROUND_ROBIN,
20 | 		Upstream: &clusters.Upstream{
21 | 			Url: "https://openrouter.ai/api/v1/chat/completions",
22 | 			Headers: []*clusters.Upstream_Header{
23 | 				{
24 | 					Key:   "Authorization",
25 | 					Value: "Bearer sk-or-v1-",
26 | 				},
27 | 			},
28 | 		},
29 | 		TlsConfig: nil,
30 | 		Filters: []*clusters.ClusterFilter{
31 | 			{
32 | 				Name: "openai-request-handler",
33 | 				Config: func() *anypb.Any {
34 | 					return lo.Must(anypb.New(&filters.OpenAIRequestHandlerConfig{}))
35 | 				}(),
36 | 			},
37 | 			{
38 | 				Name: "openai-response-handler",
39 | 				Config: func() *anypb.Any {
40 | 					return lo.Must(anypb.New(&filters.OpenAIResponseHandlerConfig{}))
41 | 				}(),
42 | 			},
43 | 		},
44 | 	},
45 | }
46 | 
47 | func StaticRegisterClusters(clusterDetails map[string]*clusters.Cluster, lifecycle bootkit.LifeCycle) error {
48 | 	for _, c := range clusterDetails {
49 | 		if err := clustermanager.UpsertAndRegisterCluster(c, lifecycle); err != nil {
50 | 			return err
51 | 		}
52 | 		if err := routemanager.RegisterBaseRouteWithConfig(routemanager.InitDirectModelRoute(c.GetName()), lifecycle); err != nil {
53 | 			return err
54 | 		}
55 | 	}
56 | 
57 | 	return nil
58 | }
59 | 


--------------------------------------------------------------------------------
/cmd/gateway/proxy.go:
--------------------------------------------------------------------------------
 1 | package gateway
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"errors"
 6 | 	"fmt"
 7 | 	"log/slog"
 8 | 	"net"
 9 | 	"net/http"
10 | 	"time"
11 | 
12 | 	"google.golang.org/protobuf/types/known/anypb"
13 | 
14 | 	"google.golang.org/protobuf/proto"
15 | 
16 | 	"knoway.dev/api/listeners/v1alpha1"
17 | 	"knoway.dev/pkg/bootkit"
18 | 	"knoway.dev/pkg/listener"
19 | 	"knoway.dev/pkg/listener/manager/chat"
20 | 	"knoway.dev/pkg/listener/manager/image"
21 | )
22 | 
23 | func StartGateway(_ context.Context, lifecycle bootkit.LifeCycle, listenerAddr string, cfg []*anypb.Any) error {
24 | 	if listenerAddr == "" {
25 | 		listenerAddr = ":8080"
26 | 	}
27 | 	if len(cfg) == 0 {
28 | 		return errors.New("no listener found")
29 | 	}
30 | 	mux := listener.NewMux()
31 | 
32 | 	for _, c := range cfg {
33 | 		obj, err := anypb.UnmarshalNew(c, proto.UnmarshalOptions{})
34 | 		if err != nil {
35 | 			return err
36 | 		}
37 | 
38 | 		switch obj.(type) {
39 | 		case *v1alpha1.ChatCompletionListener:
40 | 			mux.Register(chat.NewOpenAIChatListenerConfigs(obj, lifecycle))
41 | 		case *v1alpha1.ImageListener:
42 | 			mux.Register(image.NewOpenAIImageListenerConfigs(obj, lifecycle))
43 | 		default:
44 | 			return fmt.Errorf("%s is not a valid listener", c.GetTypeUrl())
45 | 		}
46 | 	}
47 | 
48 | 	server, err := mux.BuildServer(&http.Server{Addr: listenerAddr, ReadTimeout: time.Minute})
49 | 	if err != nil {
50 | 		return err
51 | 	}
52 | 
53 | 	ln, err := net.Listen("tcp", listenerAddr)
54 | 	if err != nil {
55 | 		return err
56 | 	}
57 | 
58 | 	lifecycle.Append(bootkit.LifeCycleHook{
59 | 		OnStart: func(ctx context.Context) error {
60 | 			slog.Info("Starting gateway ...", "addr", ln.Addr().String())
61 | 
62 | 			if err := server.Serve(ln); err != nil && err != http.ErrServerClosed {
63 | 				return err
64 | 			}
65 | 			return nil
66 | 		},
67 | 		OnStop: func(ctx context.Context) error {
68 | 			slog.Info("Stopping gateway ...")
69 | 
70 | 			if err := server.Shutdown(ctx); err != nil {
71 | 				return err
72 | 			}
73 | 
74 | 			slog.Info("Gateway stopped gracefully.")
75 | 			return nil
76 | 		},
77 | 	})
78 | 
79 | 	return nil
80 | }
81 | 


--------------------------------------------------------------------------------
/cmd/main.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2024.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package main
 18 | 
 19 | import (
 20 | 	"context"
 21 | 	"flag"
 22 | 	"log/slog"
 23 | 	"os"
 24 | 	"time"
 25 | 
 26 | 	"buf.build/go/protoyaml"
 27 | 	"github.com/samber/lo"
 28 | 	"google.golang.org/protobuf/types/known/anypb"
 29 | 	"sigs.k8s.io/yaml"
 30 | 
 31 | 	"knoway.dev/cmd/admin"
 32 | 
 33 | 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
 34 | 	clientgoscheme "k8s.io/client-go/kubernetes/scheme"
 35 | 
 36 | 	knowaydevv1alpha1 "knoway.dev/api/v1alpha1"
 37 | 
 38 | 	"knoway.dev/cmd/gateway"
 39 | 	"knoway.dev/cmd/server"
 40 | 	"knoway.dev/config"
 41 | 	"knoway.dev/pkg/bootkit"
 42 | 
 43 | 	// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
 44 | 	// to ensure that exec-entrypoint and run can make use of them.
 45 | 	_ "k8s.io/client-go/plugin/pkg/client/auth"
 46 | 	// +kubebuilder:scaffold:imports
 47 | )
 48 | 
 49 | func init() {
 50 | 	utilruntime.Must(clientgoscheme.AddToScheme(clientgoscheme.Scheme))
 51 | 
 52 | 	utilruntime.Must(knowaydevv1alpha1.AddToScheme(clientgoscheme.Scheme))
 53 | 	// +kubebuilder:scaffold:scheme
 54 | }
 55 | 
 56 | func main() {
 57 | 	var metricsAddr string
 58 | 	var probeAddr string
 59 | 	var listenerAddr string
 60 | 	var adminAddr string
 61 | 	var configPath string
 62 | 
 63 | 	flag.StringVar(&listenerAddr, "gateway-listener-address", ":8080", "The address the gateway listener binds to.")
 64 | 	flag.StringVar(&adminAddr, "admin-listener-address", "127.0.0.1:9080", "The address the admin listener binds to.")
 65 | 	flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
 66 | 	flag.StringVar(&metricsAddr, "metrics-bind-address", "0", "The address the metric endpoint binds to. "+
 67 | 		"Use the port :8080. If not set, it will be 0 in order to disable the metrics server")
 68 | 	flag.StringVar(&configPath, "config", "config/config.yaml", "Path to the configuration file")
 69 | 	flag.Parse()
 70 | 
 71 | 	cfg, err := config.LoadConfig(configPath)
 72 | 	if err != nil {
 73 | 		slog.Error("Failed to load configuration", "error", err)
 74 | 		return
 75 | 	}
 76 | 
 77 | 	app := bootkit.New(bootkit.StartTimeout(time.Second * 10)) //nolint:mnd
 78 | 
 79 | 	logLevel := slog.LevelInfo
 80 | 	if cfg.Debug {
 81 | 		logLevel = slog.LevelDebug
 82 | 	}
 83 | 
 84 | 	slog.SetDefault(slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{
 85 | 		Level: logLevel,
 86 | 	})))
 87 | 
 88 | 	// development static server
 89 | 	devStaticServer := false
 90 | 
 91 | 	if devStaticServer {
 92 | 		app.Add(func(_ context.Context, lifeCycle bootkit.LifeCycle) error {
 93 | 			return gateway.StaticRegisterClusters(gateway.StaticClustersConfig, lifeCycle)
 94 | 		})
 95 | 	} else {
 96 | 		// Start the server and handle errors gracefully
 97 | 		app.Add(func(ctx context.Context, lifeCycle bootkit.LifeCycle) error {
 98 | 			return server.StartController(ctx, lifeCycle,
 99 | 				metricsAddr,
100 | 				probeAddr,
101 | 				cfg.Controller)
102 | 		})
103 | 	}
104 | 
105 | 	staticListeners := toAnySlice(cfg.StaticListeners)
106 | 
107 | 	app.Add(func(ctx context.Context, lifeCycle bootkit.LifeCycle) error {
108 | 		return gateway.StartGateway(ctx, lifeCycle,
109 | 			listenerAddr,
110 | 			staticListeners)
111 | 	})
112 | 	app.Add(func(ctx context.Context, lifeCycle bootkit.LifeCycle) error {
113 | 		return admin.NewAdminServer(ctx, staticListeners, adminAddr, lifeCycle)
114 | 	})
115 | 
116 | 	app.Start()
117 | }
118 | 
119 | func toAnySlice(cfg []map[string]interface{}) []*anypb.Any {
120 | 	anys := make([]*anypb.Any, 0, len(cfg))
121 | 
122 | 	for _, c := range cfg {
123 | 		bs := lo.Must1(yaml.Marshal(c))
124 | 		n := new(anypb.Any)
125 | 		lo.Must0(protoyaml.Unmarshal(bs, n))
126 | 		anys = append(anys, n)
127 | 	}
128 | 
129 | 	return anys
130 | }
131 | 


--------------------------------------------------------------------------------
/config/config.go:
--------------------------------------------------------------------------------
 1 | package config
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | 
 7 | 	"gopkg.in/yaml.v3"
 8 | )
 9 | 
10 | type ControllerConfig struct {
11 | 	EnableLeaderElection bool `yaml:"enable_leader_election" json:"enable_leader_election"`
12 | 	SecureMetrics        bool `yaml:"secure_metrics" json:"secure_metrics"`
13 | 	EnableHTTP2          bool `yaml:"enable_http2" json:"enable_http_2"`
14 | }
15 | 
16 | type Config struct {
17 | 	Debug      bool             `yaml:"debug" json:"debug"`
18 | 	Controller ControllerConfig `yaml:"controller" json:"controller"`
19 | 	// KubeConfig is the path to the kubeconfig file, used for local development, if empty, in-cluster config will be used.
20 | 	KubeConfig string `yaml:"kubeConfig" json:"kubeConfig"`
21 | 
22 | 	StaticListeners []map[string]interface{} `yaml:"staticListeners" json:"staticListeners"`
23 | }
24 | 
25 | // LoadConfig loads the configuration from the specified YAML file
26 | func LoadConfig(path string) (*Config, error) {
27 | 	file, err := os.Open(path)
28 | 	if err != nil {
29 | 		return nil, fmt.Errorf("failed to open config file: %w", err)
30 | 	}
31 | 	defer file.Close()
32 | 
33 | 	var cfg Config
34 | 
35 | 	decoder := yaml.NewDecoder(file)
36 | 	if err := decoder.Decode(&cfg); err != nil {
37 | 		return nil, fmt.Errorf("failed to decode config file: %w", err)
38 | 	}
39 | 
40 | 	return &cfg, nil
41 | }
42 | 


--------------------------------------------------------------------------------
/config/config.yaml:
--------------------------------------------------------------------------------
 1 | debug: true
 2 | controller:
 3 |   secure_metrics: false
 4 |   enable_http2: false
 5 | kubeConfig: ""
 6 | staticListeners:
 7 |   - "@type": type.googleapis.com/knoway.listeners.v1alpha1.ChatCompletionListener
 8 |     name: openai-chat
 9 |     filters:
10 |       - name: api-key-auth
11 |         config:
12 |           "@type": type.googleapis.com/knoway.filters.v1alpha1.APIKeyAuthConfig
13 |           authServer:
14 |             url: localhost:8083
15 |             timeout: 3s
16 |       - config:
17 |           "@type": type.googleapis.com/knoway.filters.v1alpha1.UsageStatsConfig
18 |           statsServer:
19 |             url: localhost:8083
20 |             timeout: 3s
21 |       # - config:
22 |       #     "@type": type.googleapis.com/knoway.filters.v1alpha1.RateLimitConfig
23 |       #     policies:
24 |       #       - basedOn: USER_ID
25 |       #         duration: 30s
26 | 
27 |     accessLog:
28 |       enable: true
29 |   - "@type": type.googleapis.com/knoway.listeners.v1alpha1.ImageListener
30 |     name: openai-image
31 |     filters:
32 |       - name: api-key-auth
33 |         config:
34 |           "@type": type.googleapis.com/knoway.filters.v1alpha1.APIKeyAuthConfig
35 |           authServer:
36 |             url: localhost:8083
37 |             timeout: 3s
38 |       - config:
39 |           "@type": type.googleapis.com/knoway.filters.v1alpha1.UsageStatsConfig
40 |           statsServer:
41 |             url: localhost:8083
42 |             timeout: 3s
43 |     accessLog:
44 |       enable: true
45 | 


--------------------------------------------------------------------------------
/config/crd/kustomization.yaml:
--------------------------------------------------------------------------------
 1 | # This kustomization.yaml is not intended to be run by itself,
 2 | # since it depends on service name and namespace that are out of this kustomize package.
 3 | # It should be run by config/default
 4 | resources:
 5 | - bases/llm.knoway.dev_llmbackends.yaml
 6 | - bases/llm.knoway.dev_imagegenerationbackends.yaml
 7 | - bases/llm.knoway.dev_modelroutes.yaml
 8 | # +kubebuilder:scaffold:crdkustomizeresource
 9 | 
10 | patches:
11 | # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix.
12 | # patches here are for enabling the conversion webhook for each CRD
13 | # +kubebuilder:scaffold:crdkustomizewebhookpatch
14 | 
15 | # [CERTMANAGER] To enable cert-manager, uncomment all the sections with [CERTMANAGER] prefix.
16 | # patches here are for enabling the CA injection for each CRD
17 | #- path: patches/cainjection_in_llmbackends.yaml
18 | # +kubebuilder:scaffold:crdkustomizecainjectionpatch
19 | 
20 | # [WEBHOOK] To enable webhook, uncomment the following section
21 | # the following config is for teaching kustomize how to do kustomization for CRDs.
22 | 
23 | #configurations:
24 | #- kustomizeconfig.yaml
25 | 


--------------------------------------------------------------------------------
/config/crd/kustomizeconfig.yaml:
--------------------------------------------------------------------------------
 1 | # This file is for teaching kustomize how to substitute name and namespace reference in CRD
 2 | nameReference:
 3 | - kind: Service
 4 |   version: v1
 5 |   fieldSpecs:
 6 |   - kind: CustomResourceDefinition
 7 |     version: v1
 8 |     group: apiextensions.k8s.io
 9 |     path: spec/conversion/webhook/clientConfig/service/name
10 | 
11 | namespace:
12 | - kind: CustomResourceDefinition
13 |   version: v1
14 |   group: apiextensions.k8s.io
15 |   path: spec/conversion/webhook/clientConfig/service/namespace
16 |   create: false
17 | 
18 | varReference:
19 | - path: metadata/annotations
20 | 


--------------------------------------------------------------------------------
/config/rbac/imagegenerationbackend_editor_role.yaml:
--------------------------------------------------------------------------------
 1 | # This rule is not used by the project knoway itself.
 2 | # It is provided to allow the cluster admin to help manage permissions for users.
 3 | #
 4 | # Grants permissions to create, update, and delete resources within the llm.knoway.dev.
 5 | # This role is intended for users who need to manage these resources
 6 | # but should not control RBAC or manage permissions for others.
 7 | 
 8 | apiVersion: rbac.authorization.k8s.io/v1
 9 | kind: ClusterRole
10 | metadata:
11 |   labels:
12 |     app.kubernetes.io/name: knoway
13 |     app.kubernetes.io/managed-by: kustomize
14 |   name: imagegenerationbackend-editor-role
15 | rules:
16 | - apiGroups:
17 |   - llm.knoway.dev
18 |   resources:
19 |   - imagegenerationbackends
20 |   verbs:
21 |   - create
22 |   - delete
23 |   - get
24 |   - list
25 |   - patch
26 |   - update
27 |   - watch
28 | - apiGroups:
29 |   - llm.knoway.dev
30 |   resources:
31 |   - imagegenerationbackends/status
32 |   verbs:
33 |   - get
34 | 


--------------------------------------------------------------------------------
/config/rbac/imagegenerationbackend_viewer_role.yaml:
--------------------------------------------------------------------------------
 1 | # This rule is not used by the project knoway itself.
 2 | # It is provided to allow the cluster admin to help manage permissions for users.
 3 | #
 4 | # Grants read-only access to llm.knoway.dev resources.
 5 | # This role is intended for users who need visibility into these resources
 6 | # without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing.
 7 | 
 8 | apiVersion: rbac.authorization.k8s.io/v1
 9 | kind: ClusterRole
10 | metadata:
11 |   labels:
12 |     app.kubernetes.io/name: knoway
13 |     app.kubernetes.io/managed-by: kustomize
14 |   name: imagegenerationbackend-viewer-role
15 | rules:
16 | - apiGroups:
17 |   - llm.knoway.dev
18 |   resources:
19 |   - imagegenerationbackends
20 |   verbs:
21 |   - get
22 |   - list
23 |   - watch
24 | - apiGroups:
25 |   - llm.knoway.dev
26 |   resources:
27 |   - imagegenerationbackends/status
28 |   verbs:
29 |   - get
30 | 


--------------------------------------------------------------------------------
/config/rbac/llmbackend_editor_role.yaml:
--------------------------------------------------------------------------------
 1 | # permissions for end users to edit llmbackends.
 2 | apiVersion: rbac.authorization.k8s.io/v1
 3 | kind: ClusterRole
 4 | metadata:
 5 |   labels:
 6 |     app.kubernetes.io/name: knoway
 7 |     app.kubernetes.io/managed-by: kustomize
 8 |   name: llmbackend-editor-role
 9 | rules:
10 | - apiGroups:
11 |   - llm.knoway.dev
12 |   resources:
13 |   - llmbackends
14 |   verbs:
15 |   - create
16 |   - delete
17 |   - get
18 |   - list
19 |   - patch
20 |   - update
21 |   - watch
22 | - apiGroups:
23 |   - llm.knoway.dev
24 |   resources:
25 |   - llmbackends/status
26 |   verbs:
27 |   - get
28 | 


--------------------------------------------------------------------------------
/config/rbac/llmbackend_viewer_role.yaml:
--------------------------------------------------------------------------------
 1 | # permissions for end users to view llmbackends.
 2 | apiVersion: rbac.authorization.k8s.io/v1
 3 | kind: ClusterRole
 4 | metadata:
 5 |   labels:
 6 |     app.kubernetes.io/name: knoway
 7 |     app.kubernetes.io/managed-by: kustomize
 8 |   name: llmbackend-viewer-role
 9 | rules:
10 | - apiGroups:
11 |   - llm.knoway.dev
12 |   resources:
13 |   - llmbackends
14 |   verbs:
15 |   - get
16 |   - list
17 |   - watch
18 | - apiGroups:
19 |   - llm.knoway.dev
20 |   resources:
21 |   - llmbackends/status
22 |   verbs:
23 |   - get
24 | 


--------------------------------------------------------------------------------
/config/rbac/modelroute_admin_role.yaml:
--------------------------------------------------------------------------------
 1 | # This rule is not used by the project knoway itself.
 2 | # It is provided to allow the cluster admin to help manage permissions for users.
 3 | #
 4 | # Grants full permissions ('*') over llm.knoway.dev.
 5 | # This role is intended for users authorized to modify roles and bindings within the cluster,
 6 | # enabling them to delegate specific permissions to other users or groups as needed.
 7 | 
 8 | apiVersion: rbac.authorization.k8s.io/v1
 9 | kind: ClusterRole
10 | metadata:
11 |   labels:
12 |     app.kubernetes.io/name: knoway
13 |     app.kubernetes.io/managed-by: kustomize
14 |   name: modelroute-admin-role
15 | rules:
16 | - apiGroups:
17 |   - llm.knoway.dev
18 |   resources:
19 |   - modelroutes
20 |   verbs:
21 |   - '*'
22 | - apiGroups:
23 |   - llm.knoway.dev
24 |   resources:
25 |   - modelroutes/status
26 |   verbs:
27 |   - get
28 | 


--------------------------------------------------------------------------------
/config/rbac/modelroute_editor_role.yaml:
--------------------------------------------------------------------------------
 1 | # This rule is not used by the project knoway itself.
 2 | # It is provided to allow the cluster admin to help manage permissions for users.
 3 | #
 4 | # Grants permissions to create, update, and delete resources within the llm.knoway.dev.
 5 | # This role is intended for users who need to manage these resources
 6 | # but should not control RBAC or manage permissions for others.
 7 | 
 8 | apiVersion: rbac.authorization.k8s.io/v1
 9 | kind: ClusterRole
10 | metadata:
11 |   labels:
12 |     app.kubernetes.io/name: knoway
13 |     app.kubernetes.io/managed-by: kustomize
14 |   name: modelroute-editor-role
15 | rules:
16 | - apiGroups:
17 |   - llm.knoway.dev
18 |   resources:
19 |   - modelroutes
20 |   verbs:
21 |   - create
22 |   - delete
23 |   - get
24 |   - list
25 |   - patch
26 |   - update
27 |   - watch
28 | - apiGroups:
29 |   - llm.knoway.dev
30 |   resources:
31 |   - modelroutes/status
32 |   verbs:
33 |   - get
34 | 


--------------------------------------------------------------------------------
/config/rbac/modelroute_viewer_role.yaml:
--------------------------------------------------------------------------------
 1 | # This rule is not used by the project knoway itself.
 2 | # It is provided to allow the cluster admin to help manage permissions for users.
 3 | #
 4 | # Grants read-only access to llm.knoway.dev resources.
 5 | # This role is intended for users who need visibility into these resources
 6 | # without permissions to modify them. It is ideal for monitoring purposes and limited-access viewing.
 7 | 
 8 | apiVersion: rbac.authorization.k8s.io/v1
 9 | kind: ClusterRole
10 | metadata:
11 |   labels:
12 |     app.kubernetes.io/name: knoway
13 |     app.kubernetes.io/managed-by: kustomize
14 |   name: modelroute-viewer-role
15 | rules:
16 | - apiGroups:
17 |   - llm.knoway.dev
18 |   resources:
19 |   - modelroutes
20 |   verbs:
21 |   - get
22 |   - list
23 |   - watch
24 | - apiGroups:
25 |   - llm.knoway.dev
26 |   resources:
27 |   - modelroutes/status
28 |   verbs:
29 |   - get
30 | 


--------------------------------------------------------------------------------
/config/rbac/role.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | apiVersion: rbac.authorization.k8s.io/v1
 3 | kind: ClusterRole
 4 | metadata:
 5 |   name: manager-role
 6 | rules:
 7 | - apiGroups:
 8 |   - llm.knoway.dev
 9 |   resources:
10 |   - imagegenerationbackends
11 |   - llmbackends
12 |   - modelroutes
13 |   verbs:
14 |   - create
15 |   - delete
16 |   - get
17 |   - list
18 |   - patch
19 |   - update
20 |   - watch
21 | - apiGroups:
22 |   - llm.knoway.dev
23 |   resources:
24 |   - imagegenerationbackends/finalizers
25 |   - llmbackends/finalizers
26 |   - modelroutes/finalizers
27 |   verbs:
28 |   - update
29 | - apiGroups:
30 |   - llm.knoway.dev
31 |   resources:
32 |   - imagegenerationbackends/status
33 |   - llmbackends/status
34 |   - modelroutes/status
35 |   verbs:
36 |   - get
37 |   - patch
38 |   - update
39 | 


--------------------------------------------------------------------------------
/config/samples/kustomization.yaml:
--------------------------------------------------------------------------------
1 | ## Append samples of your project ##
2 | resources:
3 | - llm_v1alpha1_llmbackend.yaml
4 | - llm_v1alpha1_imagegenerationbackend.yaml
5 | - llm_v1alpha1_modelroute.yaml
6 | # +kubebuilder:scaffold:manifestskustomizesamples
7 | 


--------------------------------------------------------------------------------
/config/samples/llm_v1alpha1_imagegenerationbackend.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: llm.knoway.dev/v1alpha1
 2 | kind: ImageGenerationBackend
 3 | metadata:
 4 |   labels:
 5 |     app.kubernetes.io/name: knoway
 6 |     app.kubernetes.io/managed-by: kustomize
 7 |   name: imagegenerationbackend-sample
 8 | spec:
 9 |   provider: OpenAI
10 |   name: sd-3
11 |   upstream:
12 |     baseUrl: "https://api.openai.com/v1"
13 |     headers:
14 |       - key: "Authorization"
15 |         value: "Bearer sk-or-v1-xxxxxxxxxx"
16 |     timeout: 300 # ms
17 |     defaultParams:
18 |       openai:
19 |         'n': '1'
20 |         quality: 'standard'
21 |     overrideParams:
22 |       openai:
23 |         # upstream model
24 |         model: "stabilityai/sd-3"
25 |   filters:
26 |     - custom:
27 |         pluginName: "examplePlugin"
28 |         pluginVersion: "1.0.0"
29 | 


--------------------------------------------------------------------------------
/config/samples/llm_v1alpha1_llmbackend.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: llm.knoway.dev/v1alpha1
 2 | kind: LLMBackend
 3 | metadata:
 4 |   labels:
 5 |     app.kubernetes.io/name: knoway
 6 |     app.kubernetes.io/managed-by: kustomize
 7 |   name: llmbackend-sample
 8 | spec:
 9 |   #  type: public # public | private | ...
10 |   provider: OpenAI
11 |   modelName: gpt-3.5-turbo
12 |   upstream:
13 |     baseUrl: "https://openrouter.ai/api/v1"
14 |     headers:
15 |       - key: "Authorization"
16 |         value: "Bearer sk-or-v1-xxxxxxxxxx"
17 |     timeout: 300 # ms
18 |     defaultParams:
19 |       openai:
20 |         temperature: "0.5"
21 |         max_tokens: 100
22 |     overrideParams:
23 |       openai:
24 |         # upstream model
25 |         model: "openai/gpt-3.5-turbo"
26 |     removeParamKeys:
27 |       - negative_prompt
28 |   filters:
29 |     - custom:
30 |         pluginName: "examplePlugin"
31 |         pluginVersion: "1.0.0"
32 | # future:
33 | #  maxToken: 242444
34 | #  capability:
35 | #    steam: true
36 | #    image: true
37 | #    base64: true
38 | #status:
39 | #  conditions:
40 | #    - config-validator
41 | #    - endpoint-check
42 | #    - secret-check
43 | 


--------------------------------------------------------------------------------
/config/samples/llm_v1alpha1_modelroute.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: llm.knoway.dev/v1alpha1
 2 | kind: ModelRoute
 3 | metadata:
 4 |   labels:
 5 |     app.kubernetes.io/name: knoway
 6 |     app.kubernetes.io/managed-by: kustomize
 7 |   name: modelroute-example
 8 | spec:
 9 |   modelName: deepseek-r1
10 |   rateLimit:
11 |     rules:
12 |       - match:
13 |           value:
14 |             prefix: sk-prj-123
15 |         limit: 100
16 |         basedOn: APIKey
17 |         duration: 60
18 |       - match:
19 |           value:
20 |             exact: u-4587
21 |         limit: 100
22 |         basedOn: APIKey
23 |         duration: 60
24 |       - limit: 1
25 |         basedOn: UserID
26 |         duration: 60
27 |   route:
28 |     loadBalancePolicy: "WeightedRoundRobin"
29 |     targets:
30 |       - destination:
31 |           backend: deepseek-r1
32 |           namespace: public
33 |           weight: 1
34 |       - destination:
35 |           backend: deepseek-r1-4090
36 |           namespace: public
37 |           weight: 2
38 |   fallback:
39 |     preDelay: 5s
40 |     postDelay: 5s
41 |     maxRetries: 3
42 | 


--------------------------------------------------------------------------------
/cspell.config.yaml:
--------------------------------------------------------------------------------
  1 | version: "0.2"
  2 | ignorePaths: []
  3 | dictionaryDefinitions: []
  4 | dictionaries: []
  5 | words:
  6 |   - anypb
  7 |   - apierrors
  8 |   - apikey
  9 |   - apimachinery
 10 |   - bmatcuk
 11 |   - bodyclose
 12 |   - bootkit
 13 |   - bufconn
 14 |   - bufnet
 15 |   - Causef
 16 |   - clientcmd
 17 |   - clientgoscheme
 18 |   - clusterfilters
 19 |   - clustermanager
 20 |   - containedctx
 21 |   - contextcheck
 22 |   - copyloopvar
 23 |   - cyclop
 24 |   - dall
 25 |   - daocloud
 26 |   - depguard
 27 |   - doublestar
 28 |   - Drainable
 29 |   - dupl
 30 |   - durationcheck
 31 |   - durationpb
 32 |   - Equalf
 33 |   - err113
 34 |   - errcheck
 35 |   - errchkjson
 36 |   - errname
 37 |   - errorlint
 38 |   - evanphx
 39 |   - execinquery
 40 |   - exhaustive
 41 |   - exhaustruct
 42 |   - exportloopref
 43 |   - fatcontext
 44 |   - filtersv1alpha1
 45 |   - finalizer
 46 |   - finalizers
 47 |   - Finalzer
 48 |   - forcetypeassert
 49 |   - funlen
 50 |   - gci
 51 |   - genclient
 52 |   - ginkgolinter
 53 |   - gochecknoglobals
 54 |   - gochecknoinits
 55 |   - gocognit
 56 |   - goconst
 57 |   - gocritic
 58 |   - gocyclo
 59 |   - godot
 60 |   - godox
 61 |   - gofmt
 62 |   - gofumpt
 63 |   - goimports
 64 |   - gomnd
 65 |   - goopenai
 66 |   - gosec
 67 |   - gosimple
 68 |   - govet
 69 |   - healthz
 70 |   - imagegenerationbackend
 71 |   - imagegenerationbackends
 72 |   - ineffassign
 73 |   - ireturn
 74 |   - jsonpatch
 75 |   - kebe
 76 |   - knoway
 77 |   - knowaydevv1alpha1
 78 |   - kubebuilder
 79 |   - kubeconfig
 80 |   - listenerfilters
 81 |   - lll
 82 |   - llmbackend
 83 |   - llmbackends
 84 |   - llmv1alpha1
 85 |   - Loggable
 86 |   - maintidx
 87 |   - Marshallers
 88 |   - metav1
 89 |   - metricsserver
 90 |   - misspell
 91 |   - modelroute
 92 |   - modelroutes
 93 |   - multierror
 94 |   - nakedret
 95 |   - nekomeowww
 96 |   - nestif
 97 |   - nilerr
 98 |   - nilfunc
 99 |   - nilnil
100 |   - nlreturn
101 |   - nolint
102 |   - nolintlint
103 |   - nosniff
104 |   - nosprintfhostport
105 |   - Ollama
106 |   - openrouter
107 |   - paralleltest
108 |   - perfsprint
109 |   - prealloc
110 |   - predeclared
111 |   - Preflights
112 |   - printcolumn
113 |   - protogetter
114 |   - protojson
115 |   - protoutils
116 |   - ptypes
117 |   - qwen
118 |   - readyz
119 |   - reassign
120 |   - registrycluster
121 |   - registryfilters
122 |   - registryroute
123 |   - revive
124 |   - routemanager
125 |   - routev1alpha1
126 |   - rroute
127 |   - rueidis
128 |   - samber
129 |   - sashabaranov
130 |   - servicev1alpha1
131 |   - stabilityai
132 |   - staticcheck
133 |   - Statusable
134 |   - Statuser
135 |   - stoewer
136 |   - strcase
137 |   - stretchr
138 |   - structpb
139 |   - stylecheck
140 |   - subresource
141 |   - tagalign
142 |   - tagliatelle
143 |   - testpackage
144 |   - tolerations
145 |   - ttlcache
146 |   - typecheck
147 |   - unconvert
148 |   - unmarshalled
149 |   - Unmarshaller
150 |   - Unmarshallers
151 |   - unparam
152 |   - unused
153 |   - usestdlibvars
154 |   - utilruntime
155 |   - varnamelen
156 |   - vllm
157 |   - webp
158 |   - whitespace
159 |   - wrapcheck
160 |   - wsl
161 | ignoreWords: []
162 | import: []
163 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module knoway.dev
 2 | 
 3 | go 1.23.0
 4 | 
 5 | toolchain go1.23.2
 6 | 
 7 | require (
 8 | 	buf.build/go/protoyaml v0.3.1
 9 | 	github.com/bmatcuk/doublestar/v4 v4.7.1
10 | 	github.com/evanphx/json-patch/v5 v5.9.0
11 | 	github.com/golang/protobuf v1.5.4
12 | 	github.com/gorilla/mux v1.8.1
13 | 	github.com/hashicorp/go-multierror v1.1.1
14 | 	github.com/nekomeowww/fo v1.4.0
15 | 	github.com/nekomeowww/xo v1.12.0
16 | 	github.com/redis/rueidis v1.0.55
17 | 	github.com/samber/lo v1.49.1
18 | 	github.com/samber/mo v1.13.0
19 | 	github.com/sashabaranov/go-openai v1.35.7
20 | 	github.com/stoewer/go-strcase v1.3.0
21 | 	github.com/stretchr/testify v1.10.0
22 | 	go.opentelemetry.io/otel v1.29.0
23 | 	golang.org/x/image v0.24.0
24 | 	google.golang.org/grpc v1.65.0
25 | 	google.golang.org/protobuf v1.36.0
26 | 	gopkg.in/yaml.v3 v3.0.1
27 | 	k8s.io/api v0.30.0
28 | 	k8s.io/apimachinery v0.30.0
29 | 	k8s.io/client-go v0.30.0
30 | 	sigs.k8s.io/controller-runtime v0.18.2
31 | 	sigs.k8s.io/yaml v1.3.0
32 | )
33 | 
34 | require (
35 | 	buf.build/gen/go/bufbuild/protovalidate/protocolbuffers/go v1.36.0-20241127180247-a33202765966.1 // indirect
36 | 	cel.dev/expr v0.18.0 // indirect
37 | 	github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
38 | 	github.com/beorn7/perks v1.0.1 // indirect
39 | 	github.com/bufbuild/protovalidate-go v0.8.0 // indirect
40 | 	github.com/cespare/xxhash/v2 v2.3.0 // indirect
41 | 	github.com/davecgh/go-spew v1.1.1 // indirect
42 | 	github.com/emicklei/go-restful/v3 v3.11.0 // indirect
43 | 	github.com/evanphx/json-patch v4.12.0+incompatible // indirect
44 | 	github.com/fsnotify/fsnotify v1.7.0 // indirect
45 | 	github.com/go-logr/logr v1.4.2 // indirect
46 | 	github.com/go-logr/zapr v1.3.0 // indirect
47 | 	github.com/go-openapi/jsonpointer v0.19.6 // indirect
48 | 	github.com/go-openapi/jsonreference v0.20.2 // indirect
49 | 	github.com/go-openapi/swag v0.22.3 // indirect
50 | 	github.com/gogo/protobuf v1.3.2 // indirect
51 | 	github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
52 | 	github.com/google/cel-go v0.22.1 // indirect
53 | 	github.com/google/gnostic-models v0.6.8 // indirect
54 | 	github.com/google/go-cmp v0.7.0 // indirect
55 | 	github.com/google/gofuzz v1.2.0 // indirect
56 | 	github.com/google/uuid v1.6.0 // indirect
57 | 	github.com/hashicorp/errwrap v1.0.0 // indirect
58 | 	github.com/imdario/mergo v0.3.6 // indirect
59 | 	github.com/josharian/intern v1.0.0 // indirect
60 | 	github.com/json-iterator/go v1.1.12 // indirect
61 | 	github.com/mailru/easyjson v0.7.7 // indirect
62 | 	github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
63 | 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
64 | 	github.com/modern-go/reflect2 v1.0.2 // indirect
65 | 	github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
66 | 	github.com/pkg/errors v0.9.1 // indirect
67 | 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
68 | 	github.com/prometheus/client_golang v1.16.0 // indirect
69 | 	github.com/prometheus/client_model v0.4.0 // indirect
70 | 	github.com/prometheus/common v0.44.0 // indirect
71 | 	github.com/prometheus/procfs v0.12.0 // indirect
72 | 	github.com/shopspring/decimal v1.4.0 // indirect
73 | 	github.com/spf13/pflag v1.0.5 // indirect
74 | 	go.uber.org/multierr v1.11.0 // indirect
75 | 	go.uber.org/zap v1.27.0 // indirect
76 | 	golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
77 | 	golang.org/x/net v0.35.0 // indirect
78 | 	golang.org/x/oauth2 v0.20.0 // indirect
79 | 	golang.org/x/sys v0.30.0 // indirect
80 | 	golang.org/x/term v0.29.0 // indirect
81 | 	golang.org/x/text v0.22.0 // indirect
82 | 	golang.org/x/time v0.3.0 // indirect
83 | 	gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
84 | 	google.golang.org/genproto/googleapis/api v0.0.0-20240826202546-f6391c0de4c7 // indirect
85 | 	google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect
86 | 	gopkg.in/inf.v0 v0.9.1 // indirect
87 | 	gopkg.in/yaml.v2 v2.4.0 // indirect
88 | 	k8s.io/apiextensions-apiserver v0.30.0 // indirect
89 | 	k8s.io/klog/v2 v2.120.1 // indirect
90 | 	k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
91 | 	k8s.io/utils v0.0.0-20230726121419-3b25d923346b // indirect
92 | 	sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
93 | 	sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
94 | )
95 | 


--------------------------------------------------------------------------------
/hack/boilerplate.go.txt:
--------------------------------------------------------------------------------
 1 | /*
 2 | Copyright 2024.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | */


--------------------------------------------------------------------------------
/internal/controller/backends.go:
--------------------------------------------------------------------------------
  1 | package controller
  2 | 
  3 | import (
  4 | 	"context"
  5 | 
  6 | 	apierrors "k8s.io/apimachinery/pkg/api/errors"
  7 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
  8 | 	"k8s.io/apimachinery/pkg/types"
  9 | 	"sigs.k8s.io/controller-runtime/pkg/client"
 10 | 
 11 | 	knowaydevv1alpha1 "knoway.dev/api/v1alpha1"
 12 | )
 13 | 
 14 | type Backend interface {
 15 | 	GetType() knowaydevv1alpha1.BackendType
 16 | 	GetObjectObjectMeta() metav1.ObjectMeta
 17 | 	GetStatus() Statusable[knowaydevv1alpha1.StatusEnum]
 18 | 	GetModelName() string
 19 | }
 20 | 
 21 | var _ Backend = (*LLMBackend)(nil)
 22 | 
 23 | type LLMBackend struct {
 24 | 	*knowaydevv1alpha1.LLMBackend
 25 | }
 26 | 
 27 | func (b *LLMBackend) GetType() knowaydevv1alpha1.BackendType {
 28 | 	return knowaydevv1alpha1.BackendTypeLLM
 29 | }
 30 | 
 31 | func (b *LLMBackend) GetObjectObjectMeta() metav1.ObjectMeta {
 32 | 	return b.LLMBackend.ObjectMeta
 33 | }
 34 | 
 35 | func (b *LLMBackend) GetStatus() Statusable[knowaydevv1alpha1.StatusEnum] {
 36 | 	return &LLMBackendStatus{LLMBackendStatus: &b.Status}
 37 | }
 38 | 
 39 | func (b *LLMBackend) GetModelName() string {
 40 | 	return modelNameOrNamespacedName(b.LLMBackend)
 41 | }
 42 | 
 43 | func BackendFromLLMBackend(llmBackend *knowaydevv1alpha1.LLMBackend) Backend {
 44 | 	return &LLMBackend{
 45 | 		LLMBackend: llmBackend,
 46 | 	}
 47 | }
 48 | 
 49 | type LLMBackendStatus struct {
 50 | 	*knowaydevv1alpha1.LLMBackendStatus
 51 | }
 52 | 
 53 | func (s *LLMBackendStatus) GetStatus() knowaydevv1alpha1.StatusEnum {
 54 | 	return s.Status
 55 | }
 56 | 
 57 | func (s *LLMBackendStatus) SetStatus(status knowaydevv1alpha1.StatusEnum) {
 58 | 	s.Status = status
 59 | }
 60 | 
 61 | func (s *LLMBackendStatus) GetConditions() []metav1.Condition {
 62 | 	return s.Conditions
 63 | }
 64 | 
 65 | func (s *LLMBackendStatus) SetConditions(conditions []metav1.Condition) {
 66 | 	s.Conditions = conditions
 67 | }
 68 | 
 69 | var _ Backend = (*ImageGenerationBackend)(nil)
 70 | 
 71 | type ImageGenerationBackend struct {
 72 | 	*knowaydevv1alpha1.ImageGenerationBackend
 73 | }
 74 | 
 75 | func (b *ImageGenerationBackend) GetType() knowaydevv1alpha1.BackendType {
 76 | 	return knowaydevv1alpha1.BackendTypeImageGeneration
 77 | }
 78 | 
 79 | func (b *ImageGenerationBackend) GetObjectObjectMeta() metav1.ObjectMeta {
 80 | 	return b.ImageGenerationBackend.ObjectMeta
 81 | }
 82 | 
 83 | func (b *ImageGenerationBackend) GetStatus() Statusable[knowaydevv1alpha1.StatusEnum] {
 84 | 	return &ImageGenerationBackendStatus{ImageGenerationBackendStatus: &b.Status}
 85 | }
 86 | 
 87 | func (b *ImageGenerationBackend) GetModelName() string {
 88 | 	return modelNameOrNamespacedName(b.ImageGenerationBackend)
 89 | }
 90 | 
 91 | func BackendFromImageGenerationBackend(imageGenerationBackend *knowaydevv1alpha1.ImageGenerationBackend) Backend {
 92 | 	return &ImageGenerationBackend{
 93 | 		ImageGenerationBackend: imageGenerationBackend,
 94 | 	}
 95 | }
 96 | 
 97 | type ImageGenerationBackendStatus struct {
 98 | 	*knowaydevv1alpha1.ImageGenerationBackendStatus
 99 | }
100 | 
101 | func (s *ImageGenerationBackendStatus) GetStatus() knowaydevv1alpha1.StatusEnum {
102 | 	return s.Status
103 | }
104 | 
105 | func (s *ImageGenerationBackendStatus) SetStatus(status knowaydevv1alpha1.StatusEnum) {
106 | 	s.Status = status
107 | }
108 | 
109 | func (s *ImageGenerationBackendStatus) GetConditions() []metav1.Condition {
110 | 	return s.Conditions
111 | }
112 | 
113 | func (s *ImageGenerationBackendStatus) SetConditions(conditions []metav1.Condition) {
114 | 	s.Conditions = conditions
115 | }
116 | 
117 | func getBackendFromNamespacedName(ctx context.Context, kubeClient client.Client, namespacedName types.NamespacedName) (Backend, error) {
118 | 	var llmBackend knowaydevv1alpha1.LLMBackend
119 | 
120 | 	err := kubeClient.Get(ctx, namespacedName, &llmBackend)
121 | 	if err != nil && !apierrors.IsNotFound(err) {
122 | 		return nil, err
123 | 	}
124 | 	if err == nil {
125 | 		return BackendFromLLMBackend(&llmBackend), nil
126 | 	}
127 | 
128 | 	var imageGenerationBackend knowaydevv1alpha1.ImageGenerationBackend
129 | 
130 | 	err = kubeClient.Get(ctx, namespacedName, &imageGenerationBackend)
131 | 	if err != nil && !apierrors.IsNotFound(err) {
132 | 		return nil, err
133 | 	}
134 | 	if err == nil {
135 | 		return BackendFromImageGenerationBackend(&imageGenerationBackend), nil
136 | 	}
137 | 
138 | 	return nil, nil
139 | }
140 | 


--------------------------------------------------------------------------------
/internal/controller/common_test.go:
--------------------------------------------------------------------------------
 1 | package controller
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/samber/lo"
 7 | 	"github.com/stretchr/testify/assert"
 8 | 	"github.com/stretchr/testify/require"
 9 | 	"google.golang.org/protobuf/types/known/structpb"
10 | 
11 | 	"knoway.dev/api/v1alpha1"
12 | )
13 | 
14 | func TestProcessStruct_OpenAIChatParams(t *testing.T) {
15 | 	tests := []struct {
16 | 		name        string
17 | 		input       *v1alpha1.ModelParams
18 | 		expected    map[string]*structpb.Value
19 | 		expectError bool
20 | 	}{
21 | 		{
22 | 			name: "Valid ChatRequest with new params",
23 | 			input: &v1alpha1.ModelParams{
24 | 				OpenAI: &v1alpha1.OpenAIParam{
25 | 					CommonParams: v1alpha1.CommonParams{
26 | 						Model:       "gpt-3.5-turbo",
27 | 						Temperature: lo.ToPtr("0.7"),
28 | 					},
29 | 					MaxTokens:           lo.ToPtr(100),
30 | 					MaxCompletionTokens: lo.ToPtr(200),
31 | 					TopP:                lo.ToPtr("0.3"),
32 | 					Stream:              lo.ToPtr(true),
33 | 					StreamOptions: &v1alpha1.StreamOptions{
34 | 						IncludeUsage: lo.ToPtr(true),
35 | 					},
36 | 				},
37 | 			},
38 | 			expected: map[string]*structpb.Value{
39 | 				"model":                 structpb.NewStringValue("gpt-3.5-turbo"),
40 | 				"temperature":           structpb.NewNumberValue(0.7),
41 | 				"max_tokens":            structpb.NewNumberValue(100),
42 | 				"max_completion_tokens": structpb.NewNumberValue(200),
43 | 				"top_p":                 structpb.NewNumberValue(0.3),
44 | 				"stream":                structpb.NewBoolValue(true),
45 | 				"stream_options": structpb.NewStructValue(&structpb.Struct{
46 | 					Fields: map[string]*structpb.Value{
47 | 						"include_usage": structpb.NewBoolValue(true),
48 | 					},
49 | 				}),
50 | 			},
51 | 			expectError: false,
52 | 		},
53 | 	}
54 | 
55 | 	for _, tt := range tests {
56 | 		t.Run(tt.name, func(t *testing.T) {
57 | 			params := make(map[string]*structpb.Value)
58 | 
59 | 			// Call the function under test
60 | 			err := parseModelParams(tt.input, params)
61 | 
62 | 			if tt.expectError {
63 | 				assert.Error(t, err)
64 | 			} else {
65 | 				require.NoError(t, err)
66 | 				// Validate the result
67 | 				assert.Equal(t, tt.expected, params)
68 | 			}
69 | 		})
70 | 	}
71 | }
72 | 


--------------------------------------------------------------------------------
/internal/controller/controller_test.go:
--------------------------------------------------------------------------------
 1 | package controller
 2 | 
 3 | import (
 4 | 	"flag"
 5 | 	"testing"
 6 | 
 7 | 	ctrl "sigs.k8s.io/controller-runtime"
 8 | 	"sigs.k8s.io/controller-runtime/pkg/log/zap"
 9 | )
10 | 
11 | func TestMain(m *testing.M) {
12 | 	copts := zap.Options{
13 | 		Development: true,
14 | 	}
15 | 
16 | 	copts.BindFlags(flag.CommandLine)
17 | 	ctrl.SetLogger(zap.New(zap.UseFlagOptions(&copts)))
18 | 
19 | 	m.Run()
20 | }
21 | 


--------------------------------------------------------------------------------
/internal/controller/imagegenerationbackend_controller_test.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2024.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package controller
 18 | 
 19 | import (
 20 | 	"context"
 21 | 	"testing"
 22 | 
 23 | 	"github.com/samber/lo"
 24 | 	"github.com/stretchr/testify/require"
 25 | 	"sigs.k8s.io/controller-runtime/pkg/client"
 26 | 	"sigs.k8s.io/controller-runtime/pkg/reconcile"
 27 | 
 28 | 	"knoway.dev/api/v1alpha1"
 29 | 
 30 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 31 | )
 32 | 
 33 | func TestImageGenerationBackendReconciler_Reconcile(t *testing.T) {
 34 | 	tests := []struct {
 35 | 		name        string
 36 | 		setupClient func(client.Client) client.Client
 37 | 		request     reconcile.Request
 38 | 		expectError bool
 39 | 		validate    func(*testing.T, client.Client)
 40 | 	}{
 41 | 		{
 42 | 			name: "Valid resource reconciled",
 43 | 			setupClient: func(cl client.Client) client.Client {
 44 | 				resource := &v1alpha1.ImageGenerationBackend{
 45 | 					ObjectMeta: metav1.ObjectMeta{
 46 | 						Name:      "test-model",
 47 | 						Namespace: "default",
 48 | 					},
 49 | 					Spec: v1alpha1.ImageGenerationBackendSpec{
 50 | 						ModelName: lo.ToPtr("test-model"),
 51 | 						Upstream: v1alpha1.ImageGenerationBackendUpstream{
 52 | 							BaseURL: "xx/v1",
 53 | 						},
 54 | 						Filters: nil,
 55 | 					},
 56 | 					Status: v1alpha1.ImageGenerationBackendStatus{},
 57 | 				}
 58 | 				err := cl.Create(context.Background(), resource)
 59 | 				if err != nil {
 60 | 					t.Fatalf("failed to create resource: %v", err)
 61 | 				}
 62 | 				return cl
 63 | 			},
 64 | 			request: reconcile.Request{
 65 | 				NamespacedName: client.ObjectKey{
 66 | 					Namespace: "default",
 67 | 					Name:      "test-model",
 68 | 				},
 69 | 			},
 70 | 			expectError: false,
 71 | 			validate: func(t *testing.T, cl client.Client) {
 72 | 				t.Helper()
 73 | 				resource := &v1alpha1.ImageGenerationBackend{}
 74 | 				err := cl.Get(context.Background(), client.ObjectKey{
 75 | 					Namespace: "default",
 76 | 					Name:      "test-model",
 77 | 				}, resource)
 78 | 				require.NoError(t, err)
 79 | 			},
 80 | 		},
 81 | 	}
 82 | 
 83 | 	for _, tt := range tests {
 84 | 		t.Run(tt.name, func(t *testing.T) {
 85 | 			fakeClient := tt.setupClient(NewFakeClientWithStatus())
 86 | 			reconciler := &ImageGenerationBackendReconciler{
 87 | 				Client: fakeClient,
 88 | 			}
 89 | 
 90 | 			_, err := reconciler.Reconcile(context.TODO(), tt.request)
 91 | 			if tt.expectError {
 92 | 				require.Error(t, err)
 93 | 			} else {
 94 | 				require.NoError(t, err)
 95 | 			}
 96 | 
 97 | 			if tt.validate != nil {
 98 | 				tt.validate(t, fakeClient)
 99 | 			}
100 | 		})
101 | 	}
102 | }
103 | 


--------------------------------------------------------------------------------
/internal/controller/llmbackend_controller_test.go:
--------------------------------------------------------------------------------
  1 | /*
  2 | Copyright 2024.
  3 | 
  4 | Licensed under the Apache License, Version 2.0 (the "License");
  5 | you may not use this file except in compliance with the License.
  6 | You may obtain a copy of the License at
  7 | 
  8 |     http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 | Unless required by applicable law or agreed to in writing, software
 11 | distributed under the License is distributed on an "AS IS" BASIS,
 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | See the License for the specific language governing permissions and
 14 | limitations under the License.
 15 | */
 16 | 
 17 | package controller
 18 | 
 19 | import (
 20 | 	"context"
 21 | 	"testing"
 22 | 
 23 | 	"github.com/samber/lo"
 24 | 	"github.com/stretchr/testify/require"
 25 | 
 26 | 	"k8s.io/apimachinery/pkg/runtime"
 27 | 	"sigs.k8s.io/controller-runtime/pkg/client/fake"
 28 | 
 29 | 	"knoway.dev/api/v1alpha1"
 30 | 
 31 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 32 | 	"sigs.k8s.io/controller-runtime/pkg/client"
 33 | 	"sigs.k8s.io/controller-runtime/pkg/reconcile"
 34 | )
 35 | 
 36 | func TestLLMBackendReconciler_Reconcile(t *testing.T) {
 37 | 	tests := []struct {
 38 | 		name        string
 39 | 		setupClient func(client.Client) client.Client
 40 | 		request     reconcile.Request
 41 | 		expectError bool
 42 | 		validate    func(*testing.T, client.Client)
 43 | 	}{
 44 | 		{
 45 | 			name: "Valid resource reconciled",
 46 | 			setupClient: func(cl client.Client) client.Client {
 47 | 				resource := &v1alpha1.LLMBackend{
 48 | 					ObjectMeta: metav1.ObjectMeta{
 49 | 						Name:      "test-model",
 50 | 						Namespace: "default",
 51 | 					},
 52 | 					Spec: v1alpha1.LLMBackendSpec{
 53 | 						ModelName: lo.ToPtr("test-model"),
 54 | 						Upstream: v1alpha1.BackendUpstream{
 55 | 							BaseURL: "xx/v1",
 56 | 						},
 57 | 						Filters: nil,
 58 | 					},
 59 | 					Status: v1alpha1.LLMBackendStatus{},
 60 | 				}
 61 | 				err := cl.Create(context.Background(), resource)
 62 | 				if err != nil {
 63 | 					t.Fatalf("failed to create resource: %v", err)
 64 | 				}
 65 | 				return cl
 66 | 			},
 67 | 			request: reconcile.Request{
 68 | 				NamespacedName: client.ObjectKey{
 69 | 					Namespace: "default",
 70 | 					Name:      "test-model",
 71 | 				},
 72 | 			},
 73 | 			expectError: false,
 74 | 			validate: func(t *testing.T, cl client.Client) {
 75 | 				t.Helper()
 76 | 				resource := &v1alpha1.LLMBackend{}
 77 | 				err := cl.Get(context.Background(), client.ObjectKey{
 78 | 					Namespace: "default",
 79 | 					Name:      "test-model",
 80 | 				}, resource)
 81 | 				require.NoError(t, err)
 82 | 			},
 83 | 		},
 84 | 	}
 85 | 
 86 | 	for _, tt := range tests {
 87 | 		t.Run(tt.name, func(t *testing.T) {
 88 | 			fakeClient := tt.setupClient(NewFakeClientWithStatus())
 89 | 			reconciler := &LLMBackendReconciler{
 90 | 				Client: fakeClient,
 91 | 			}
 92 | 
 93 | 			_, err := reconciler.Reconcile(context.TODO(), tt.request)
 94 | 			if tt.expectError {
 95 | 				require.Error(t, err)
 96 | 			} else {
 97 | 				require.NoError(t, err)
 98 | 			}
 99 | 
100 | 			if tt.validate != nil {
101 | 				tt.validate(t, fakeClient)
102 | 			}
103 | 		})
104 | 	}
105 | }
106 | 
107 | func NewFakeClientWithStatus() client.Client {
108 | 	return &FakeClientWithStatus{
109 | 		Client: fake.NewClientBuilder().WithScheme(createTestScheme()).Build(),
110 | 	}
111 | }
112 | 
113 | type FakeClientWithStatus struct {
114 | 	client.Client
115 | }
116 | 
117 | func (f *FakeClientWithStatus) Status() client.StatusWriter {
118 | 	return &FakeStatusWriter{Client: f.Client}
119 | }
120 | 
121 | type FakeStatusWriter struct {
122 | 	client.Client
123 | }
124 | 
125 | func (f *FakeStatusWriter) Create(ctx context.Context, obj client.Object, subResource client.Object, opts ...client.SubResourceCreateOption) error {
126 | 	panic("implement me")
127 | }
128 | 
129 | func (f *FakeStatusWriter) Update(ctx context.Context, obj client.Object, opts ...client.SubResourceUpdateOption) error {
130 | 	return f.Client.Update(ctx, obj)
131 | }
132 | 
133 | func (f *FakeStatusWriter) Patch(ctx context.Context, obj client.Object, patch client.Patch, opts ...client.SubResourcePatchOption) error {
134 | 	panic("implement me")
135 | }
136 | 
137 | func createTestScheme() *runtime.Scheme {
138 | 	scheme := runtime.NewScheme()
139 | 	_ = v1alpha1.AddToScheme(scheme)
140 | 
141 | 	return scheme
142 | }
143 | 


--------------------------------------------------------------------------------
/internal/controller/route.go:
--------------------------------------------------------------------------------
 1 | package controller
 2 | 
 3 | import (
 4 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 5 | 
 6 | 	knowaydevv1alpha1 "knoway.dev/api/v1alpha1"
 7 | )
 8 | 
 9 | type ModelRouteStatus struct {
10 | 	*knowaydevv1alpha1.ModelRouteStatus
11 | }
12 | 
13 | func (s *ModelRouteStatus) GetStatus() knowaydevv1alpha1.StatusEnum {
14 | 	return s.Status
15 | }
16 | 
17 | func (s *ModelRouteStatus) SetStatus(status knowaydevv1alpha1.StatusEnum) {
18 | 	s.Status = status
19 | }
20 | 
21 | func (s *ModelRouteStatus) GetConditions() []metav1.Condition {
22 | 	return s.Conditions
23 | }
24 | 
25 | func (s *ModelRouteStatus) SetConditions(conditions []metav1.Condition) {
26 | 	s.Conditions = conditions
27 | }
28 | 
29 | func (s *ModelRouteStatus) GetTargetsStatus() []knowaydevv1alpha1.ModelRouteStatusTarget {
30 | 	return s.Targets
31 | }
32 | 
33 | func (s *ModelRouteStatus) SetTargetsStatus(targets []knowaydevv1alpha1.ModelRouteStatusTarget) {
34 | 	s.Targets = targets
35 | }
36 | 


--------------------------------------------------------------------------------
/internal/controller/status.go:
--------------------------------------------------------------------------------
 1 | package controller
 2 | 
 3 | import (
 4 | 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 5 | 
 6 | 	llmv1alpha1 "knoway.dev/api/v1alpha1"
 7 | )
 8 | 
 9 | type Statusable[S any] interface {
10 | 	GetStatus() S
11 | 	SetStatus(status S)
12 | 	GetConditions() []metav1.Condition
13 | 	SetConditions(conditions []metav1.Condition)
14 | }
15 | 
16 | type RouteStatusable[S any] interface {
17 | 	Statusable[S]
18 | 
19 | 	GetTargetsStatus() []llmv1alpha1.ModelRouteStatusTarget
20 | 	SetTargetsStatus(targets []llmv1alpha1.ModelRouteStatusTarget)
21 | }
22 | 


--------------------------------------------------------------------------------
/license-lint.yml:
--------------------------------------------------------------------------------
 1 | unrestricted_licenses:
 2 |   - Apache-2.0
 3 |   - CC-BY-3.0
 4 |   - ISC
 5 |   - AFL-2.1
 6 |   - AFL-3.0
 7 |   - Artistic-1.0
 8 |   - Artistic-2.0
 9 |   - Apache-1.1
10 |   - BSD-1-Clause
11 |   - BSD-2-Clause
12 |   - BSD-3-Clause
13 |   - 0BSD
14 |   - FTL
15 |   - LPL-1.02
16 |   - MS-PL
17 |   - MIT
18 |   - NCSA
19 |   - OpenSSL
20 |   - PHP-3.0
21 |   - TCP-wrappers
22 |   - W3C
23 |   - Xnet
24 |   - Zlib
25 | 
26 | reciprocal_licenses:
27 |   - CC0-1.0
28 |   - APSL-2.0
29 |   - CDDL-1.0
30 |   - CDDL-1.1
31 |   - CPL-1.0
32 |   - EPL-1.0
33 |   - IPL-1.0
34 |   - MPL-1.0
35 |   - MPL-1.1
36 |   - MPL-2.0
37 |   - MPL-2.0-no-copyleft-exception
38 |   - Ruby
39 | 
40 | restricted_licenses:
41 |   - GPL-1.0-only
42 |   - GPL-1.0-or-later
43 |   - GPL-2.0-only
44 |   - GPL-2.0-or-later
45 |   - GPL-3.0-only
46 |   - GPL-3.0-or-later
47 |   - LGPL-2.0-only
48 |   - LGPL-2.0-or-later
49 |   - LGPL-2.1-only
50 |   - LGPL-2.1-or-later
51 |   - LGPL-3.0-only
52 |   - LGPL-3.0-or-later
53 |   - NPL-1.0
54 |   - NPL-1.1
55 |   - OSL-1.0
56 |   - OSL-1.1
57 |   - OSL-2.0
58 |   - OSL-2.1
59 |   - OSL-3.0
60 |   - QPL-1.0
61 |   - Sleepycat
62 | 
63 | allowlisted_modules:
64 | # BSD: https://github.com/gogo/protobuf/blob/master/LICENSE
65 | - github.com/gogo/protobuf
66 | 
67 | # MIT: https://github.com/kubernetes-sigs/yaml/blob/master/LICENSE
68 | - sigs.k8s.io/yaml
69 | 


--------------------------------------------------------------------------------
/manifests/knoway/.helmignore:
--------------------------------------------------------------------------------
 1 | # Patterns to ignore when building packages.
 2 | # This supports shell glob matching, relative path matching, and
 3 | # negation (prefixed with !). Only one pattern per line.
 4 | .DS_Store
 5 | # Common VCS dirs
 6 | .git/
 7 | .gitignore
 8 | .bzr/
 9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *.orig
18 | *~
19 | # Various IDEs
20 | .project
21 | .idea/
22 | *.tmproj
23 | .vscode/
24 | 


--------------------------------------------------------------------------------
/manifests/knoway/Chart.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v2
 2 | name: knoway
 3 | description: A Helm chart to deploy Knoway and its related components into Kubernetes
 4 | 
 5 | # A chart can be either an 'application' or a 'library' chart.
 6 | #
 7 | # Application charts are a collection of templates that can be packaged into versioned archives
 8 | # to be deployed.
 9 | #
10 | # Library charts provide useful utilities or functions for the chart developer. They're included as
11 | # a dependency of application charts to inject those utilities and functions into the rendering
12 | # pipeline. Library charts do not define any templates and therefore cannot be deployed.
13 | type: application
14 | 
15 | # This is the chart version. This version number should be incremented each time you make changes
16 | # to the chart and its templates, including the app version.
17 | # Versions are expected to follow Semantic Versioning (https://semver.org/)
18 | version: 0.1.0
19 | 
20 | # This is the version number of the application being deployed. This version number should be
21 | # incremented each time you make changes to the application. Versions are not expected to
22 | # follow Semantic Versioning. They should reflect the version the application is using.
23 | # It is recommended to use it with quotes.
24 | appVersion: "1.16.0"
25 | 


--------------------------------------------------------------------------------
/manifests/knoway/templates/_commons.tpl:
--------------------------------------------------------------------------------
 1 | {{/*
 2 | Common Template
 3 | */}}
 4 | 
 5 | {{/*
 6 | Merge imagePullSecrets: common.images.pullSecrets
 7 | */}}
 8 | {{- define "common.images.pullSecrets" -}}
 9 |     {{- $pullSecrets := list }}
10 | 
11 |     {{- if .Values.global -}}
12 |         {{- range .Values.global.imagePullSecrets -}}
13 |             {{- $pullSecrets = append $pullSecrets . -}}
14 |         {{- end -}}
15 |     {{- end -}}
16 |     {{- range .Values.imagePullSecrets -}}
17 |         {{- $pullSecrets = append $pullSecrets . -}}
18 |     {{- end -}}
19 | 
20 |     {{- if (not (empty $pullSecrets)) }}
21 | imagePullSecrets:
22 |         {{- range $pullSecrets }}
23 |   - name: {{ . }}
24 |         {{- end }}
25 |     {{- end }}
26 | {{- end -}}
27 | 
28 | {{/*
29 | Merge Resource: common.images.resources
30 | */}}
31 | {{- define "common.images.resources" -}}
32 | 
33 |     {{- if .Values.resources }}
34 | {{ toYaml .Values.resources }}
35 |     {{- else if .Values.global }}
36 |             {{- if .Values.global.resources }}
37 | {{ toYaml .Values.global.resources }}
38 |             {{- end }}
39 |     {{- end }}
40 | 
41 | {{- end -}}
42 | 
43 | {{/*
44 | Return the proper image name
45 | Usage:     {{ include "common.images.image" ( dict "imageRoot" .imageRootPath "global" .globalPath "defaultTag" .tagPath) }}
46 | */}}
47 | {{- define "common.images.image" -}}
48 | {{- $registryName := .imageRoot.registry -}}
49 | {{- $repositoryName := .imageRoot.repository -}}
50 | {{- $tag := .defaultTag  -}}
51 | {{- if .global }}
52 |     {{- if .global.imageRegistry }}
53 |      {{- $registryName = .global.imageRegistry -}}
54 |     {{- end -}}
55 | {{- end -}}
56 | {{- if .imageRoot.registry }}
57 |     {{- $registryName = .imageRoot.registry  -}}
58 | {{- end -}}
59 | {{- if .imageRoot.tag }}
60 |     {{- $tag = .imageRoot.tag  -}}
61 | {{- end -}}
62 | {{- if $registryName }}
63 | {{- printf "%s/%s:%s" $registryName $repositoryName $tag -}}
64 | {{- else -}}
65 | {{- printf "%s:%s" $repositoryName $tag -}}
66 | {{- end -}}
67 | {{- end -}}
68 | 
69 | {{- define "replicas" -}}
70 |     {{- if .Values.replicas }}{{.Values.replicas}}{{else}}{{ if .Values.global.high_available }}2{{else}}1{{end}}{{end -}}
71 | {{- end -}}
72 | 
73 | {{- define "hpa.min_replicas" -}}
74 |     {{- if .Values.global.high_available }}2{{else}}1{{end}}
75 | {{- end -}}
76 | 


--------------------------------------------------------------------------------
/manifests/knoway/templates/_helpers.tpl:
--------------------------------------------------------------------------------
1 | {{/*
2 | Return the proper image name
3 | */}}
4 | 
5 | {{- define "knoway.gateway.image" -}}
6 | {{ include "common.images.image" (dict "imageRoot" .Values.gateway.image "global" .Values.global "defaultTag" .Chart.Version) }}
7 | {{- end -}}
8 | 


--------------------------------------------------------------------------------
/manifests/knoway/templates/clusterrole.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: rbac.authorization.k8s.io/v1
 2 | kind: ClusterRole
 3 | metadata:
 4 |   name: {{ .Values.fullNameOverride | default .Release.Name }}
 5 |   namespace: {{ .Release.Namespace }}
 6 |   labels:
 7 |     app: {{ .Values.fullNameOverride | default .Release.Name }}
 8 | rules:
 9 |   - apiGroups:
10 |       - "llm.knoway.dev"
11 |     resources:
12 |       - "*"
13 |     verbs:
14 |       - "*"
15 |   - apiGroups:
16 |       - ""
17 |     resources:
18 |       - secrets
19 |       - configmaps
20 |       - services
21 |       - pods
22 |       - namespaces
23 |     verbs:
24 |       - get
25 |       - list
26 |       - watch
27 |       - create
28 |       - update
29 |       - patch
30 | 


--------------------------------------------------------------------------------
/manifests/knoway/templates/clusterrolebinding.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: rbac.authorization.k8s.io/v1
 2 | kind: ClusterRoleBinding
 3 | metadata:
 4 |   name: {{ .Values.fullNameOverride | default .Release.Name }}
 5 |   namespace: {{ .Release.Namespace }}
 6 |   labels:
 7 |     app: {{ .Values.fullNameOverride | default .Release.Name }}
 8 | roleRef:
 9 |   apiGroup: rbac.authorization.k8s.io
10 |   kind: ClusterRole
11 |   name: {{ .Values.fullNameOverride | default .Release.Name }}
12 | subjects:
13 |   - kind: ServiceAccount
14 |     name: {{ .Values.fullNameOverride | default .Release.Name }}
15 |     namespace: {{ .Release.Namespace }}
16 | 


--------------------------------------------------------------------------------
/manifests/knoway/templates/configmap.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: ConfigMap
 3 | metadata:
 4 |   name: {{ .Values.fullNameOverride | default .Release.Name }}
 5 |   namespace: {{ .Release.Namespace }}
 6 |   labels:
 7 |     app: {{ .Values.fullNameOverride | default .Release.Name }}
 8 | data:
 9 |   config.yaml: |-
10 |     debug: {{.Values.debug }}
11 |     staticListeners:
12 |       - '@type': type.googleapis.com/knoway.listeners.v1alpha1.ChatCompletionListener
13 |         name: openai-chat
14 |         filters:
15 |           - name: api-key-auth
16 |             config:
17 |               '@type': type.googleapis.com/knoway.filters.v1alpha1.APIKeyAuthConfig
18 |               authServer:
19 |                 url: {{ .Values.config.auth_server.url }}
20 |                 timeout: {{ .Values.config.auth_server.timeout }}
21 |           - config:
22 |               '@type': type.googleapis.com/knoway.filters.v1alpha1.UsageStatsConfig
23 |               statsServer:
24 |                 url: {{ .Values.config.stats_server.url }}
25 |                 timeout: {{ .Values.config.stats_server.timeout }}
26 |           {{- if .Values.config.rate_limit.enable }}
27 |           - config:
28 |               '@type': type.googleapis.com/knoway.filters.v1alpha1.RateLimitConfig
29 |               policies: {{- toYaml .Values.config.rate_limit.policies | nindent 16 }}
30 |           {{- end }}
31 |         accessLog: {{- toYaml .Values.config.log.access_log | nindent 10 }}
32 |       - '@type': type.googleapis.com/knoway.listeners.v1alpha1.ImageListener
33 |         name: openai-image
34 |         filters:
35 |           - name: api-key-auth
36 |             config:
37 |               '@type': type.googleapis.com/knoway.filters.v1alpha1.APIKeyAuthConfig
38 |               authServer:
39 |                 url: {{ .Values.config.auth_server.url }}
40 |                 timeout: {{ .Values.config.auth_server.timeout }}
41 |           - config:
42 |               '@type': type.googleapis.com/knoway.filters.v1alpha1.UsageStatsConfig
43 |               statsServer:
44 |                 url: {{ .Values.config.stats_server.url }}
45 |                 timeout: {{ .Values.config.stats_server.timeout }}
46 |           {{- if .Values.config.rate_limit.enable }}
47 |           - config:
48 |               '@type': type.googleapis.com/knoway.filters.v1alpha1.RateLimitConfig
49 |               policies: {{- toYaml .Values.config.rate_limit.policies | nindent 16 }}
50 |           {{- end }}
51 |         accessLog: {{- toYaml .Values.config.log.access_log | nindent 10 }}
52 | 


--------------------------------------------------------------------------------
/manifests/knoway/templates/deployment.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1
 2 | kind: Deployment
 3 | metadata:
 4 |   name: {{ .Values.fullNameOverride | default .Release.Name }}-gateway
 5 |   namespace: {{ .Release.Namespace }}
 6 |   labels:
 7 |     app: {{ .Values.fullNameOverride | default .Release.Name }}-gateway
 8 | spec:
 9 |   replicas: {{ include "replicas" . }}
10 |   selector:
11 |     matchLabels:
12 |       app: {{ .Values.fullNameOverride | default .Release.Name }}-gateway
13 |   strategy:
14 |     rollingUpdate:
15 |       maxSurge: 50%
16 |       maxUnavailable: 50%
17 |     type: RollingUpdate
18 |   template:
19 |     metadata:
20 |       labels:
21 |         app: {{ .Values.fullNameOverride | default .Release.Name }}-gateway
22 |     spec:
23 |       {{- include "common.images.pullSecrets" . | indent 6 }}
24 |       {{- if and (.Values.global.nodeSelector) (not .Values.gateway.nodeSelector) }}
25 |       nodeSelector:
26 | {{ toYaml .Values.global.nodeSelector | indent 8 }}
27 |     {{- end }}
28 |     {{- if .Values.gateway.nodeSelector }}
29 |       nodeSelector:
30 | {{ toYaml .Values.gateway.nodeSelector | indent 8 }}
31 |     {{- end }}
32 |     {{- if and (.Values.global.affinity) (not .Values.gateway.affinity) }}
33 |       affinity:
34 | {{ toYaml .Values.global.affinity | indent 8 }}
35 |     {{- end }}
36 |     {{- if .Values.gateway.affinity }}
37 |       affinity:
38 | {{ toYaml .Values.gateway.affinity | indent 8 }}
39 |     {{- end }}
40 |     {{- if and (.Values.global.tolerations) (not .Values.gateway.tolerations) }}
41 |       tolerations:
42 | {{ toYaml .Values.global.tolerations | indent 8 }}
43 |     {{- end }}
44 |     {{- if .Values.gateway.tolerations }}
45 |       tolerations:
46 | {{ toYaml .Values.gateway.tolerations | indent 8 }}
47 |     {{- end }}
48 |       dnsPolicy: ClusterFirst
49 |       containers:
50 |         - name: proxy
51 |           image: {{ template "knoway.gateway.image" . }}
52 |           imagePullPolicy: {{ .Values.global.imagePullPolicy }}
53 |           {{- if include "common.images.resources" . }}
54 |           resources:
55 |           {{- include "common.images.resources" . | indent 12 }}
56 |           {{- end }}
57 |           ports:
58 |             - containerPort: 8080
59 |               name: http
60 |           volumeMounts:
61 |             - readOnly: true
62 |               mountPath: /app/config
63 |               name: config
64 |           readinessProbe:
65 |             httpGet:
66 |               path: /readyz
67 |               port: 8081
68 |             initialDelaySeconds: 10
69 |             periodSeconds: 15
70 |       serviceAccountName: {{ .Values.fullNameOverride | default .Release.Name }}
71 |       volumes:
72 |         - name: config
73 |           configMap:
74 |             name: {{ .Values.fullNameOverride | default .Release.Name }}
75 | 


--------------------------------------------------------------------------------
/manifests/knoway/templates/hpa.yaml:
--------------------------------------------------------------------------------
 1 | {{ if .Values.global.use_hpa }}
 2 | {{ if .Capabilities.APIVersions.Has "autoscaling/v2/HorizontalPodAutoscaler" -}}
 3 | apiVersion: autoscaling/v2
 4 | {{ else }}
 5 | apiVersion: autoscaling/v1
 6 | {{ end }}
 7 | kind: HorizontalPodAutoscaler
 8 | metadata:
 9 |   labels:
10 |     app: {{ .Values.fullNameOverride | default .Release.Name }}-gateway
11 |   name: {{ .Values.fullNameOverride | default .Release.Name }}-gateway
12 |   namespace: {{ .Release.Namespace }}
13 | spec:
14 |   maxReplicas: 2
15 |   {{ if .Capabilities.APIVersions.Has "autoscaling/v2/HorizontalPodAutoscaler" -}}
16 |   metrics:
17 |     - resource:
18 |         name: cpu
19 |         target:
20 |           averageUtilization: 80
21 |           type: Utilization
22 |       type: Resource
23 |   {{ else }}
24 |   targetCPUUtilizationPercentage: 80
25 |   {{ end }}
26 |   minReplicas: {{ include "hpa.min_replicas" . }}
27 |   scaleTargetRef:
28 |     apiVersion: apps/v1
29 |     kind: Deployment
30 |     name: {{ .Values.fullNameOverride | default .Release.Name }}-gateway
31 | {{ end }}
32 | 


--------------------------------------------------------------------------------
/manifests/knoway/templates/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: {{ .Values.fullNameOverride | default .Release.Name }}-gateway
 5 |   namespace: {{ .Release.Namespace }}
 6 |   labels:
 7 |     app: {{ .Values.fullNameOverride | default .Release.Name }}-gateway
 8 | spec:
 9 |   type: ClusterIP
10 |   ports:
11 |     - port: 8080
12 |       protocol: TCP
13 |       name: http
14 |   selector:
15 |     app: {{ .Values.fullNameOverride | default .Release.Name }}-gateway
16 | 


--------------------------------------------------------------------------------
/manifests/knoway/templates/serviceaccount.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ServiceAccount
3 | metadata:
4 |   name: {{ .Values.fullNameOverride | default .Release.Name }}
5 |   namespace: {{ .Release.Namespace }}
6 |   labels:
7 |     app: {{ .Values.fullNameOverride | default .Release.Name }}
8 | 


--------------------------------------------------------------------------------
/manifests/knoway/values.yaml:
--------------------------------------------------------------------------------
 1 | global:
 2 |   imageRegistry: release.daocloud.io
 3 |   # If you do want to specify resources, uncomment the following
 4 |   # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
 5 |   #  requests:
 6 |   #    cpu: 300m
 7 |   #    memory: 200Mi
 8 |   #  limits:
 9 |   #    cpu: 900m
10 |   #    memory: 200Mi
11 |   resources: {}
12 |   imagePullPolicy: IfNotPresent
13 |   # -- Global Docker registry secret names as an array
14 |   imagePullSecrets: []
15 |   high_available: false
16 |   # Global node selector
17 |   # If set, this will apply to all knoway components
18 |   # Individual components can be set to a different node selector
19 |   nodeSelector: {}
20 |   # Global tolerations
21 |   # If set, this will apply to all knoway components
22 |   # Individual components can be set to a different tolerations
23 |   tolerations: []
24 |   # Global affinity
25 |   # If set, this will apply to all knoway components
26 |   # Individual components can be set to a different affinity
27 |   affinity: {}
28 | 
29 | fullNameOverride: ""
30 | 
31 | debug: false
32 | config:
33 |   auth_server:
34 |     url: ''
35 |     timeout: 3s
36 |   stats_server:
37 |     url: ''
38 |     timeout: 3s
39 |   log:
40 |     access_log:
41 |       enable: true
42 |   rate_limit:
43 |     enable: false
44 |     policies: []
45 | 
46 | gateway:
47 |   image:
48 |     registry: ''
49 |     repository: knoway/knoway-gateway
50 |     tag:
51 |   nodeSelector: {}
52 |   tolerations: []
53 |   affinity: {}
54 | 


--------------------------------------------------------------------------------
/pkg/bootkit/lifecycle.go:
--------------------------------------------------------------------------------
 1 | package bootkit
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"sync"
 6 | )
 7 | 
 8 | var _ lifeCycler = LifeCycleHook{}
 9 | 
10 | type lifeCycler interface {
11 | 	Start(ctx context.Context) error
12 | 	Stop(ctx context.Context) error
13 | }
14 | 
15 | type LifeCycleHook struct {
16 | 	OnStart func(ctx context.Context) error
17 | 	OnStop  func(ctx context.Context) error
18 | }
19 | 
20 | func (l LifeCycleHook) Start(ctx context.Context) error {
21 | 	if l.OnStart == nil {
22 | 		return nil
23 | 	}
24 | 
25 | 	return l.OnStart(ctx)
26 | }
27 | 
28 | func (l LifeCycleHook) Stop(ctx context.Context) error {
29 | 	if l.OnStop == nil {
30 | 		return nil
31 | 	}
32 | 
33 | 	return l.OnStop(ctx)
34 | }
35 | 
36 | type LifeCycle interface {
37 | 	Append(hook LifeCycleHook)
38 | }
39 | 
40 | type lifeCycle struct {
41 | 	hooks []lifeCycler
42 | 
43 | 	mutex sync.Mutex
44 | }
45 | 
46 | func (l *lifeCycle) GetHooks() []lifeCycler {
47 | 	l.mutex.Lock()
48 | 	defer l.mutex.Unlock()
49 | 
50 | 	return append([]lifeCycler{}, l.hooks...) // 返回一个拷贝，避免外部修改
51 | }
52 | 
53 | func (l *lifeCycle) Append(hook LifeCycleHook) {
54 | 	l.mutex.Lock()
55 | 	defer l.mutex.Unlock()
56 | 
57 | 	l.hooks = append(l.hooks, hook)
58 | }
59 | 
60 | type EmptyLifeCycle struct{}
61 | 
62 | func (*EmptyLifeCycle) Append(LifeCycleHook) {}
63 | 
64 | func newLifeCycle() *lifeCycle {
65 | 	return &lifeCycle{
66 | 		hooks: make([]lifeCycler, 0),
67 | 	}
68 | }
69 | 
70 | func NewEmptyLifeCycle() LifeCycle {
71 | 	return &EmptyLifeCycle{}
72 | }
73 | 


--------------------------------------------------------------------------------
/pkg/bootkit/lifecycle_test.go:
--------------------------------------------------------------------------------
 1 | package bootkit
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/stretchr/testify/assert"
 8 | 	"github.com/stretchr/testify/require"
 9 | )
10 | 
11 | func TestLifeCycleHook_Start(t *testing.T) {
12 | 	t.Parallel()
13 | 
14 | 	l := LifeCycleHook{}
15 | 
16 | 	require.NotPanics(t, func() {
17 | 		err := l.Start(context.Background())
18 | 		assert.NoError(t, err)
19 | 	})
20 | 
21 | 	l = LifeCycleHook{
22 | 		OnStart: func(ctx context.Context) error {
23 | 			return nil
24 | 		},
25 | 	}
26 | 
27 | 	require.NotPanics(t, func() {
28 | 		err := l.Start(context.Background())
29 | 		assert.NoError(t, err)
30 | 	})
31 | }
32 | 
33 | func TestLifeCycleHook_Stop(t *testing.T) {
34 | 	t.Parallel()
35 | 
36 | 	l := LifeCycleHook{}
37 | 
38 | 	require.NotPanics(t, func() {
39 | 		err := l.Stop(context.Background())
40 | 		assert.NoError(t, err)
41 | 	})
42 | 
43 | 	l = LifeCycleHook{
44 | 		OnStop: func(ctx context.Context) error {
45 | 			return nil
46 | 		},
47 | 	}
48 | 
49 | 	require.NotPanics(t, func() {
50 | 		err := l.Stop(context.Background())
51 | 		assert.NoError(t, err)
52 | 	})
53 | }
54 | 
55 | func TestLifeCycle_Append(t *testing.T) {
56 | 	t.Parallel()
57 | 
58 | 	l := newLifeCycle()
59 | 
60 | 	l.Append(LifeCycleHook{})
61 | 	l.Append(LifeCycleHook{})
62 | 
63 | 	assert.Len(t, l.hooks, 2)
64 | }
65 | 


--------------------------------------------------------------------------------
/pkg/bootkit/options.go:
--------------------------------------------------------------------------------
 1 | package bootkit
 2 | 
 3 | import "time"
 4 | 
 5 | type bootkitOptions struct {
 6 | 	startTimeout time.Duration
 7 | 	stopTimeout  time.Duration
 8 | }
 9 | 
10 | type bootkitApplyOptions struct {
11 | 	bootkit *bootkitOptions
12 | }
13 | 
14 | type Option interface {
15 | 	apply(options *bootkitApplyOptions)
16 | }
17 | 
18 | type startTimeoutOption time.Duration
19 | 
20 | func (t startTimeoutOption) apply(m *bootkitApplyOptions) {
21 | 	m.bootkit.startTimeout = time.Duration(t)
22 | }
23 | 
24 | func StartTimeout(duration time.Duration) Option {
25 | 	return startTimeoutOption(duration)
26 | }
27 | 
28 | type stopTimeoutOption time.Duration
29 | 
30 | func (t stopTimeoutOption) apply(m *bootkitApplyOptions) {
31 | 	m.bootkit.stopTimeout = time.Duration(t)
32 | }
33 | 
34 | func StopTimeout(duration time.Duration) Option {
35 | 	return stopTimeoutOption(duration)
36 | }
37 | 


--------------------------------------------------------------------------------
/pkg/bootkit/options_test.go:
--------------------------------------------------------------------------------
 1 | package bootkit
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 	"time"
 6 | 
 7 | 	"github.com/stretchr/testify/assert"
 8 | )
 9 | 
10 | func TestStartTimeout(t *testing.T) {
11 | 	t.Parallel()
12 | 
13 | 	option := StartTimeout(time.Second * 100)
14 | 	applyOptions := &bootkitApplyOptions{&bootkitOptions{}}
15 | 	option.apply(applyOptions)
16 | 
17 | 	assert.Equal(t, time.Second*100, applyOptions.bootkit.startTimeout)
18 | }
19 | 
20 | func TestStopTimeout(t *testing.T) {
21 | 	t.Parallel()
22 | 
23 | 	option := StopTimeout(time.Second * 100)
24 | 	applyOptions := &bootkitApplyOptions{&bootkitOptions{}}
25 | 	option.apply(applyOptions)
26 | 
27 | 	assert.Equal(t, time.Second*100, applyOptions.bootkit.stopTimeout)
28 | }
29 | 


--------------------------------------------------------------------------------
/pkg/clusters/filters/openai/request.go:
--------------------------------------------------------------------------------
  1 | package openai
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"context"
  6 | 	"encoding/json"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"net/http"
 10 | 	"net/url"
 11 | 	"strings"
 12 | 
 13 | 	"github.com/samber/lo"
 14 | 	"google.golang.org/protobuf/types/known/anypb"
 15 | 
 16 | 	v1alpha1clusters "knoway.dev/api/clusters/v1alpha1"
 17 | 	"knoway.dev/api/filters/v1alpha1"
 18 | 	"knoway.dev/pkg/bootkit"
 19 | 	clusterfilters "knoway.dev/pkg/clusters/filters"
 20 | 	"knoway.dev/pkg/object"
 21 | 	"knoway.dev/pkg/protoutils"
 22 | )
 23 | 
 24 | func NewRequestHandlerWithConfig(cfg *anypb.Any, _ bootkit.LifeCycle) (clusterfilters.ClusterFilter, error) {
 25 | 	c, err := protoutils.FromAny(cfg, &v1alpha1.OpenAIRequestHandlerConfig{})
 26 | 	if err != nil {
 27 | 		return nil, fmt.Errorf("invalid config type %T", cfg)
 28 | 	}
 29 | 
 30 | 	return &requestHandler{
 31 | 		cfg: c,
 32 | 	}, nil
 33 | }
 34 | 
 35 | var _ clusterfilters.ClusterFilterRequestModifier = (*requestHandler)(nil)
 36 | var _ clusterfilters.ClusterFilterUpstreamRequestMarshaller = (*requestHandler)(nil)
 37 | 
 38 | type requestHandler struct {
 39 | 	clusterfilters.IsClusterFilter
 40 | 
 41 | 	cfg *v1alpha1.OpenAIRequestHandlerConfig
 42 | }
 43 | 
 44 | func (f *requestHandler) RequestModifier(ctx context.Context, cluster *v1alpha1clusters.Cluster, request object.LLMRequest) (object.LLMRequest, error) {
 45 | 	err := request.SetModel(cluster.GetName())
 46 | 	if err != nil {
 47 | 		return request, err
 48 | 	}
 49 | 
 50 | 	err = request.SetDefaultParams(cluster.GetUpstream().GetDefaultParams())
 51 | 	if err != nil {
 52 | 		return request, err
 53 | 	}
 54 | 
 55 | 	err = request.SetOverrideParams(cluster.GetUpstream().GetOverrideParams())
 56 | 	if err != nil {
 57 | 		return request, err
 58 | 	}
 59 | 
 60 | 	err = request.RemoveParamKeys(cluster.GetUpstream().GetRemoveParamKeys())
 61 | 	if err != nil {
 62 | 		return request, err
 63 | 	}
 64 | 
 65 | 	return request, nil
 66 | }
 67 | 
 68 | func (f *requestHandler) MarshalUpstreamRequest(ctx context.Context, cluster *v1alpha1clusters.Cluster, llmRequest object.LLMRequest, request *http.Request) (*http.Request, error) {
 69 | 	upstreamURL := cluster.GetUpstream().GetUrl()
 70 | 	upstreamURL = strings.TrimSuffix(upstreamURL, "/")
 71 | 
 72 | 	switch llmRequest.GetRequestType() {
 73 | 	case object.RequestTypeChatCompletions:
 74 | 		upstreamURL += "/chat/completions"
 75 | 	case object.RequestTypeCompletions:
 76 | 		upstreamURL += "/completions"
 77 | 	case object.RequestTypeImageGenerations:
 78 | 		upstreamURL += "/images/generations"
 79 | 	default:
 80 | 		panic("unknown request type: " + string(llmRequest.GetRequestType()))
 81 | 	}
 82 | 
 83 | 	parsedUpstreamURL, err := url.Parse(upstreamURL)
 84 | 	if err != nil {
 85 | 		return nil, err
 86 | 	}
 87 | 
 88 | 	jsonBody, err := json.Marshal(llmRequest)
 89 | 	if err != nil {
 90 | 		return nil, err
 91 | 	}
 92 | 
 93 | 	if request == nil {
 94 | 		request, err = http.NewRequestWithContext(ctx, http.MethodPost, upstreamURL, bytes.NewReader(jsonBody))
 95 | 		if err != nil {
 96 | 			return nil, err
 97 | 		}
 98 | 	} else {
 99 | 		request.URL = parsedUpstreamURL
100 | 		request.Method = http.MethodPost
101 | 		request.Body = io.NopCloser(bytes.NewReader(jsonBody))
102 | 	}
103 | 
104 | 	request.Header.Set("Content-Type", "application/json")
105 | 	// Apply headers
106 | 	if llmRequest.IsStream() { // non stream
107 | 		request.Header.Set("Accept", "text/event-stream")
108 | 		request.Header.Set("Cache-Control", "no-cache")
109 | 		request.Header.Set("Connection", "keep-alive")
110 | 	}
111 | 
112 | 	// Apply user-defined headers
113 | 	lo.ForEach(cluster.GetUpstream().GetHeaders(), func(h *v1alpha1clusters.Upstream_Header, _ int) {
114 | 		request.Header.Set(h.GetKey(), h.GetValue())
115 | 	})
116 | 
117 | 	return request, nil
118 | }
119 | 


--------------------------------------------------------------------------------
/pkg/clusters/filters/openai/response.go:
--------------------------------------------------------------------------------
 1 | package openai
 2 | 
 3 | import (
 4 | 	"bufio"
 5 | 	"bytes"
 6 | 	"context"
 7 | 	"fmt"
 8 | 	"io"
 9 | 	"net/http"
10 | 	"strings"
11 | 
12 | 	v1alpha12 "knoway.dev/api/clusters/v1alpha1"
13 | 
14 | 	"google.golang.org/protobuf/types/known/anypb"
15 | 
16 | 	"knoway.dev/api/filters/v1alpha1"
17 | 	"knoway.dev/pkg/bootkit"
18 | 	clusterfilters "knoway.dev/pkg/clusters/filters"
19 | 	"knoway.dev/pkg/object"
20 | 	"knoway.dev/pkg/protoutils"
21 | 	"knoway.dev/pkg/types/openai"
22 | )
23 | 
24 | func NewResponseHandlerWithConfig(cfg *anypb.Any, _ bootkit.LifeCycle) (clusterfilters.ClusterFilter, error) {
25 | 	c, err := protoutils.FromAny(cfg, &v1alpha1.OpenAIResponseHandlerConfig{})
26 | 	if err != nil {
27 | 		return nil, fmt.Errorf("invalid config type %T", cfg)
28 | 	}
29 | 
30 | 	return &responseHandler{
31 | 		cfg: c,
32 | 	}, nil
33 | }
34 | 
35 | var _ clusterfilters.ClusterFilterResponseUnmarshaller = (*responseHandler)(nil)
36 | var _ clusterfilters.ClusterFilterResponseModifier = (*responseHandler)(nil)
37 | 
38 | type responseHandler struct {
39 | 	cfg *v1alpha1.OpenAIResponseHandlerConfig
40 | 	clusterfilters.ClusterFilter
41 | }
42 | 
43 | func (f *responseHandler) UnmarshalResponseBody(ctx context.Context, cluster *v1alpha12.Cluster, req object.LLMRequest, rawResponse *http.Response, reader *bufio.Reader, pre object.LLMResponse) (object.LLMResponse, error) {
44 | 	contentType := rawResponse.Header.Get("Content-Type")
45 | 
46 | 	switch req.GetRequestType() {
47 | 	case
48 | 		object.RequestTypeChatCompletions,
49 | 		object.RequestTypeCompletions:
50 | 		switch {
51 | 		case strings.HasPrefix(contentType, "application/json"):
52 | 			return openai.NewChatCompletionResponse(req, rawResponse, reader)
53 | 		case strings.HasPrefix(contentType, "text/event-stream"):
54 | 			return openai.NewChatCompletionStreamResponse(req, rawResponse, reader)
55 | 		default:
56 | 			break
57 | 		}
58 | 	case
59 | 		object.RequestTypeImageGenerations:
60 | 		switch {
61 | 		case strings.HasPrefix(contentType, "application/json"):
62 | 			return openai.NewImageGenerationsResponse(ctx, req, rawResponse, reader,
63 | 				openai.NewImageGenerationsResponseWithUsage(cluster.GetMeteringPolicy()),
64 | 			)
65 | 		default:
66 | 			break
67 | 		}
68 | 	default:
69 | 		return nil, fmt.Errorf("unsupported request type %s", req.GetRequestType())
70 | 	}
71 | 
72 | 	if rawResponse.StatusCode >= http.StatusBadRequest {
73 | 		tryReadBody := new(bytes.Buffer)
74 | 
75 | 		_, err := tryReadBody.ReadFrom(rawResponse.Body)
76 | 		if err != nil {
77 | 			return nil, fmt.Errorf("failed to read body: %w", err)
78 | 		}
79 | 
80 | 		rawResponse.Body.Close()
81 | 		rawResponse.Body = io.NopCloser(bytes.NewBuffer(tryReadBody.Bytes()))
82 | 
83 | 		return nil, fmt.Errorf("upstream returned status code %d with body %s", rawResponse.StatusCode, tryReadBody.String())
84 | 	}
85 | 
86 | 	return nil, fmt.Errorf("unsupported content type %s", contentType)
87 | }
88 | 
89 | func (f *responseHandler) ResponseModifier(ctx context.Context, cluster *v1alpha12.Cluster, request object.LLMRequest, response object.LLMResponse) (object.LLMResponse, error) {
90 | 	err := response.SetModel(cluster.GetName())
91 | 	if err != nil {
92 | 		return response, err
93 | 	}
94 | 
95 | 	return response, nil
96 | }
97 | 


--------------------------------------------------------------------------------
/pkg/clusters/interface.go:
--------------------------------------------------------------------------------
 1 | package clusters
 2 | 
 3 | import (
 4 | 	"context"
 5 | 
 6 | 	"knoway.dev/api/clusters/v1alpha1"
 7 | 	"knoway.dev/pkg/object"
 8 | )
 9 | 
10 | type Cluster interface {
11 | 	GetClusterType() v1alpha1.ClusterType
12 | 	GetClusterConfig() *v1alpha1.Cluster
13 | 	DoUpstreamRequest(ctx context.Context, req object.LLMRequest) (object.LLMResponse, error)
14 | }
15 | 


--------------------------------------------------------------------------------
/pkg/clusters/manager/cluster.go:
--------------------------------------------------------------------------------
  1 | package manager
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"log/slog"
  6 | 	"sync"
  7 | 
  8 | 	"github.com/samber/lo"
  9 | 	"github.com/samber/mo"
 10 | 
 11 | 	"knoway.dev/api/clusters/v1alpha1"
 12 | 	"knoway.dev/pkg/bootkit"
 13 | 	clusters2 "knoway.dev/pkg/clusters"
 14 | 	cluster "knoway.dev/pkg/clusters/cluster"
 15 | 	"knoway.dev/pkg/metadata"
 16 | 	"knoway.dev/pkg/object"
 17 | )
 18 | 
 19 | var clusterRegister *Register
 20 | 
 21 | func HandleRequest(ctx context.Context, clusterName string, request object.LLMRequest) (object.LLMResponse, error) {
 22 | 	foundCluster, ok := clusterRegister.FindClusterByName(clusterName)
 23 | 	if !ok {
 24 | 		return nil, object.NewErrorModelNotFoundOrNotAccessible(request.GetModel())
 25 | 	}
 26 | 
 27 | 	rMeta := metadata.RequestMetadataFromCtx(ctx)
 28 | 	rMeta.SelectedCluster = mo.Some(foundCluster)
 29 | 
 30 | 	resp, err := foundCluster.DoUpstreamRequest(ctx, request)
 31 | 	if err != nil {
 32 | 		// Cluster will ensure that error will always be LLMError
 33 | 		return resp, err
 34 | 	}
 35 | 	if resp.GetError() != nil {
 36 | 		return resp, resp.GetError()
 37 | 	}
 38 | 
 39 | 	return resp, err
 40 | }
 41 | 
 42 | func RemoveCluster(cluster *v1alpha1.Cluster) {
 43 | 	clusterRegister.DeleteCluster(cluster.GetName())
 44 | }
 45 | 
 46 | func UpsertAndRegisterCluster(cluster *v1alpha1.Cluster, lifecycle bootkit.LifeCycle) error {
 47 | 	return clusterRegister.UpsertAndRegisterCluster(cluster, lifecycle)
 48 | }
 49 | 
 50 | func ListModels() []*v1alpha1.Cluster {
 51 | 	if clusterRegister == nil {
 52 | 		return nil
 53 | 	}
 54 | 
 55 | 	return clusterRegister.ListModels()
 56 | }
 57 | 
 58 | func init() { //nolint:gochecknoinits
 59 | 	if clusterRegister == nil {
 60 | 		InitClusterRegister()
 61 | 	}
 62 | }
 63 | 
 64 | type Register struct {
 65 | 	clusters        map[string]clusters2.Cluster
 66 | 	clustersDetails map[string]*v1alpha1.Cluster
 67 | 	clustersLock    sync.RWMutex
 68 | }
 69 | 
 70 | type RegisterOptions struct {
 71 | 	DevConfig bool
 72 | }
 73 | 
 74 | func NewClusterRegister() *Register {
 75 | 	r := &Register{
 76 | 		clusters:        make(map[string]clusters2.Cluster),
 77 | 		clustersDetails: make(map[string]*v1alpha1.Cluster),
 78 | 		clustersLock:    sync.RWMutex{},
 79 | 	}
 80 | 
 81 | 	return r
 82 | }
 83 | 
 84 | func InitClusterRegister() {
 85 | 	c := NewClusterRegister()
 86 | 	clusterRegister = c
 87 | }
 88 | 
 89 | func (cr *Register) DeleteCluster(name string) {
 90 | 	cr.clustersLock.Lock()
 91 | 	defer cr.clustersLock.Unlock()
 92 | 
 93 | 	delete(cr.clusters, name)
 94 | 	delete(cr.clustersDetails, name)
 95 | 	slog.Info("remove cluster", "name", name)
 96 | }
 97 | 
 98 | func (cr *Register) FindClusterByName(name string) (clusters2.Cluster, bool) {
 99 | 	cr.clustersLock.RLock()
100 | 	defer cr.clustersLock.RUnlock()
101 | 
102 | 	c, ok := cr.clusters[name]
103 | 
104 | 	return c, ok
105 | }
106 | 
107 | func (cr *Register) UpsertAndRegisterCluster(c *v1alpha1.Cluster, lifecycle bootkit.LifeCycle) error {
108 | 	cr.clustersLock.Lock()
109 | 	defer cr.clustersLock.Unlock()
110 | 
111 | 	name := c.GetName()
112 | 
113 | 	newCluster, err := cluster.NewWithConfigs(c, lifecycle)
114 | 	if err != nil {
115 | 		return err
116 | 	}
117 | 
118 | 	cr.clustersDetails[c.GetName()] = c
119 | 	cr.clusters[name] = newCluster
120 | 
121 | 	slog.Info("register cluster", "name", name)
122 | 
123 | 	return nil
124 | }
125 | 
126 | func (cr *Register) ListModels() []*v1alpha1.Cluster {
127 | 	cr.clustersLock.RLock()
128 | 	defer cr.clustersLock.RUnlock()
129 | 
130 | 	clusters := make([]*v1alpha1.Cluster, 0, len(cr.clusters))
131 | 	for _, cluster := range cr.clustersDetails {
132 | 		clusters = append(clusters, cluster)
133 | 	}
134 | 
135 | 	return clusters
136 | }
137 | 
138 | func (cr *Register) dumpAllClusters() []*v1alpha1.Cluster {
139 | 	cr.clustersLock.RLock()
140 | 	defer cr.clustersLock.RUnlock()
141 | 
142 | 	return lo.Values(clusterRegister.clustersDetails)
143 | }
144 | 
145 | func DebugDumpAllClusters() []*v1alpha1.Cluster {
146 | 	return clusterRegister.dumpAllClusters()
147 | }
148 | 


--------------------------------------------------------------------------------
/pkg/constants/config.go:
--------------------------------------------------------------------------------
1 | package constants
2 | 
3 | const TestClusterConfigPath = "dist/load_config.json"
4 | const DefaultClusterConfigPath = "config/default_config.json"
5 | 


--------------------------------------------------------------------------------
/pkg/constants/listener.go:
--------------------------------------------------------------------------------
1 | package constants
2 | 
3 | import "time"
4 | 
5 | const (
6 | 	DefaultDrainWaitTime = 30 * time.Second
7 | )
8 | 


--------------------------------------------------------------------------------
/pkg/filters/config.go:
--------------------------------------------------------------------------------
  1 | package filters
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"net/http"
  6 | 
  7 | 	"knoway.dev/pkg/object"
  8 | 	"knoway.dev/pkg/utils"
  9 | )
 10 | 
 11 | const (
 12 | 	ListenerFilterResultTypeSucceeded = iota
 13 | 	ListenerFilterResultTypeFailed
 14 | 	ListenerFilterResultTypeSkipped
 15 | )
 16 | 
 17 | type RequestFilterResult struct {
 18 | 	// Type Succeeded, Failed, or Skipped
 19 | 	Type  int
 20 | 	Error error
 21 | }
 22 | 
 23 | func (r RequestFilterResult) IsFailed() bool {
 24 | 	return r.Type == ListenerFilterResultTypeFailed
 25 | }
 26 | 
 27 | func (r RequestFilterResult) IsSkipped() bool {
 28 | 	return r.Type == ListenerFilterResultTypeSkipped
 29 | }
 30 | 
 31 | func (r RequestFilterResult) IsSSucceeded() bool {
 32 | 	return r.Type == ListenerFilterResultTypeSucceeded
 33 | }
 34 | 
 35 | func NewOK() RequestFilterResult {
 36 | 	return RequestFilterResult{Type: ListenerFilterResultTypeSucceeded}
 37 | }
 38 | 
 39 | func NewFailed(err error) RequestFilterResult {
 40 | 	return RequestFilterResult{Type: ListenerFilterResultTypeFailed, Error: err}
 41 | }
 42 | 
 43 | type RequestFilter interface {
 44 | 	isRequestFilter()
 45 | }
 46 | 
 47 | var _ RequestFilter = IsRequestFilter{}
 48 | 
 49 | type IsRequestFilter struct{}
 50 | 
 51 | func (IsRequestFilter) isRequestFilter() {}
 52 | 
 53 | type OnRequestPreFilter interface {
 54 | 	RequestFilter
 55 | 
 56 | 	OnRequestPre(ctx context.Context, sourceHTTPRequest *http.Request) RequestFilterResult
 57 | }
 58 | 
 59 | type OnCompletionRequestFilter interface {
 60 | 	RequestFilter
 61 | 
 62 | 	OnCompletionRequest(ctx context.Context, request object.LLMRequest, sourceHTTPRequest *http.Request) RequestFilterResult
 63 | }
 64 | 
 65 | type OnImageGenerationsRequestFilter interface {
 66 | 	RequestFilter
 67 | 
 68 | 	OnImageGenerationsRequest(ctx context.Context, request object.LLMRequest, sourceHTTPRequest *http.Request) RequestFilterResult
 69 | }
 70 | 
 71 | type OnCompletionResponseFilter interface {
 72 | 	RequestFilter
 73 | 
 74 | 	OnCompletionResponse(ctx context.Context, request object.LLMRequest, response object.LLMResponse) RequestFilterResult
 75 | }
 76 | 
 77 | type OnCompletionStreamResponseFilter interface {
 78 | 	RequestFilter
 79 | 
 80 | 	OnCompletionStreamResponse(ctx context.Context, request object.LLMRequest, response object.LLMStreamResponse, responseChunk object.LLMChunkResponse) RequestFilterResult
 81 | }
 82 | 
 83 | type OnImageGenerationsResponseFilter interface {
 84 | 	RequestFilter
 85 | 
 86 | 	OnImageGenerationsResponse(ctx context.Context, request object.LLMRequest, response object.LLMResponse) RequestFilterResult
 87 | }
 88 | 
 89 | type OnResponsePostFilter interface {
 90 | 	RequestFilter
 91 | 
 92 | 	OnResponsePost(ctx context.Context, request *http.Request, response any, err error)
 93 | }
 94 | 
 95 | type RequestFilters []RequestFilter
 96 | 
 97 | func (r RequestFilters) OnRequestPreFilters() []OnRequestPreFilter {
 98 | 	return utils.TypeAssertFrom[RequestFilter, OnRequestPreFilter](r)
 99 | }
100 | 
101 | func (r RequestFilters) OnCompletionRequestFilters() []OnCompletionRequestFilter {
102 | 	return utils.TypeAssertFrom[RequestFilter, OnCompletionRequestFilter](r)
103 | }
104 | 
105 | func (r RequestFilters) OnImageGenerationsRequestFilters() []OnImageGenerationsRequestFilter {
106 | 	return utils.TypeAssertFrom[RequestFilter, OnImageGenerationsRequestFilter](r)
107 | }
108 | 
109 | func (r RequestFilters) OnCompletionResponseFilters() []OnCompletionResponseFilter {
110 | 	return utils.TypeAssertFrom[RequestFilter, OnCompletionResponseFilter](r)
111 | }
112 | 
113 | func (r RequestFilters) OnCompletionStreamResponseFilters() []OnCompletionStreamResponseFilter {
114 | 	return utils.TypeAssertFrom[RequestFilter, OnCompletionStreamResponseFilter](r)
115 | }
116 | 
117 | func (r RequestFilters) OnImageGenerationsResponseFilters() []OnImageGenerationsResponseFilter {
118 | 	return utils.TypeAssertFrom[RequestFilter, OnImageGenerationsResponseFilter](r)
119 | }
120 | 
121 | func (r RequestFilters) OnResponsePostFilters() []OnResponsePostFilter {
122 | 	return utils.TypeAssertFrom[RequestFilter, OnResponsePostFilter](r)
123 | }
124 | 


--------------------------------------------------------------------------------
/pkg/filters/ratelimit/redis.go:
--------------------------------------------------------------------------------
  1 | package ratelimit
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"log/slog"
  6 | 	"strconv"
  7 | 	"time"
  8 | )
  9 | 
 10 | //nolint:dupword
 11 | var redisRateLimitScript = `
 12 | -- KEYS[1]: rate limit key
 13 | -- ARGV[1]: limit (max tokens)
 14 | -- ARGV[2]: window in milliseconds
 15 | -- ARGV[3]: current timestamp in milliseconds
 16 | -- ARGV[4]: precision multiplier
 17 | 
 18 | local function init_bucket(limit, now)
 19 |     return {
 20 |         tokens = limit,
 21 |         last_update = now,
 22 |         limit = limit
 23 |     }
 24 | end
 25 | 
 26 | local key = KEYS[1]
 27 | local limit = tonumber(ARGV[1])
 28 | local window_ms = tonumber(ARGV[2])
 29 | local now = tonumber(ARGV[3])
 30 | local precision = tonumber(ARGV[4])
 31 | 
 32 | -- Calculate bucket parameters
 33 | local capacity = limit * precision
 34 | local fill_rate = capacity / window_ms -- tokens per millisecond
 35 | 
 36 | -- Get or create bucket atomically
 37 | local bucket = redis.call('HGETALL', key)
 38 | local state = {}
 39 | if #bucket == 0 then
 40 |     state = init_bucket(capacity, now)
 41 | else
 42 |     -- Convert array to hash
 43 |     for i = 1, #bucket, 2 do
 44 |         state[bucket[i]] = tonumber(bucket[i + 1])
 45 |     end
 46 | 
 47 |     -- Handle rate limit changes
 48 |     if state.limit ~= capacity then
 49 |         state = init_bucket(capacity, now)
 50 |     end
 51 | end
 52 | 
 53 | -- Calculate available tokens
 54 | local elapsed_ms = now - state.last_update
 55 | local new_tokens = math.min(capacity, state.tokens + (elapsed_ms * fill_rate))
 56 | 
 57 | -- Attempt to consume token
 58 | local allowed = 0
 59 | if new_tokens >= precision then
 60 |     new_tokens = new_tokens - precision
 61 |     allowed = 1
 62 | end
 63 | 
 64 | -- Update bucket state
 65 | local ttl = math.max(300000, math.ceil(window_ms * 2)) -- Set TTL to max(5min, 2x window) for safety
 66 | redis.call('HMSET', key,
 67 |     'tokens', new_tokens,
 68 |     'last_update', now,
 69 |     'limit', capacity
 70 | )
 71 | redis.call('PEXPIRE', key, ttl)
 72 | 
 73 | return allowed
 74 | `
 75 | 
 76 | func (rl *RateLimiter) checkBucketRedis(key string, window time.Duration, limit int) (bool, error) {
 77 | 	now := time.Now().UnixMilli() // 使用毫秒精度
 78 | 	windowMs := window.Milliseconds()
 79 | 
 80 | 	cmd := rl.redisClient.B().Eval().Script(redisRateLimitScript).
 81 | 		Numkeys(1).
 82 | 		Key(key).
 83 | 		Arg(
 84 | 			strconv.Itoa(limit),
 85 | 			strconv.FormatInt(windowMs, 10),
 86 | 			strconv.FormatInt(now, 10),
 87 | 			strconv.Itoa(precision),
 88 | 		).
 89 | 		Build()
 90 | 
 91 | 	result := rl.redisClient.Do(context.Background(), cmd)
 92 | 	if err := result.NonRedisError(); err != nil {
 93 | 		slog.ErrorContext(context.Background(), "redis error", append(rl.logCommonAttrs(), slog.Any("error", err))...)
 94 | 		return false, err
 95 | 	}
 96 | 
 97 | 	allowed, err := result.AsInt64()
 98 | 	if err != nil {
 99 | 		slog.ErrorContext(context.Background(), "failed to parse redis result", append(rl.logCommonAttrs(), slog.Any("error", err))...)
100 | 		return false, err
101 | 	}
102 | 
103 | 	return allowed != 0, nil
104 | }
105 | 


--------------------------------------------------------------------------------
/pkg/listener/handler.go:
--------------------------------------------------------------------------------
 1 | package listener
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | )
 6 | 
 7 | type HandlerFunc func(writer http.ResponseWriter, request *http.Request) (any, error)
 8 | 
 9 | type Middleware func(HandlerFunc) HandlerFunc
10 | 
11 | func WithMiddlewares(middlewares ...Middleware) Middleware {
12 | 	return func(next HandlerFunc) HandlerFunc {
13 | 		for i := len(middlewares) - 1; i >= 0; i-- {
14 | 			next = middlewares[i](next)
15 | 		}
16 | 
17 | 		return next
18 | 	}
19 | }
20 | 
21 | func HTTPHandlerFunc(fn HandlerFunc) http.HandlerFunc {
22 | 	return func(writer http.ResponseWriter, request *http.Request) {
23 | 		_, _ = fn(writer, request)
24 | 	}
25 | }
26 | 


--------------------------------------------------------------------------------
/pkg/listener/listener.go:
--------------------------------------------------------------------------------
 1 | package listener
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"errors"
 6 | 	"net/http"
 7 | 	"sync"
 8 | 
 9 | 	"github.com/gorilla/mux"
10 | )
11 | 
12 | type Drainable interface {
13 | 	Drain(ctx context.Context) error
14 | 	HasDrained() bool
15 | }
16 | 
17 | type Listener interface {
18 | 	Drainable
19 | 
20 | 	RegisterRoutes(mux *mux.Router) error
21 | }
22 | 
23 | type Mux struct {
24 | 	*mux.Router
25 | 
26 | 	errors []error
27 | 	mutex  sync.Mutex
28 | }
29 | 
30 | func NewMux() *Mux {
31 | 	return &Mux{
32 | 		Router: mux.NewRouter(),
33 | 		errors: make([]error, 0),
34 | 	}
35 | }
36 | 
37 | func (r *Mux) Error() error {
38 | 	r.mutex.Lock()
39 | 	defer r.mutex.Unlock()
40 | 
41 | 	return errors.Join(r.errors...)
42 | }
43 | 
44 | func (r *Mux) Register(listener Listener, err error) *Mux {
45 | 	if err != nil {
46 | 		r.mutex.Lock()
47 | 		r.errors = append(r.errors, err)
48 | 		r.mutex.Unlock()
49 | 
50 | 		return r
51 | 	}
52 | 
53 | 	err = listener.RegisterRoutes(r.Router)
54 | 	if err != nil {
55 | 		r.mutex.Lock()
56 | 		r.errors = append(r.errors, err)
57 | 		r.mutex.Unlock()
58 | 	}
59 | 
60 | 	return r
61 | }
62 | 
63 | func (r *Mux) BuildServer(server *http.Server) (*http.Server, error) {
64 | 	if len(r.errors) > 0 {
65 | 		return nil, r.Error()
66 | 	}
67 | 
68 | 	server.Handler = r.Router
69 | 
70 | 	return server, nil
71 | }
72 | 


--------------------------------------------------------------------------------
/pkg/listener/manager/chat/chat_completions.go:
--------------------------------------------------------------------------------
 1 | package chat
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | 
 6 | 	"knoway.dev/pkg/metadata"
 7 | 	"knoway.dev/pkg/object"
 8 | 	"knoway.dev/pkg/types/openai"
 9 | )
10 | 
11 | func (l *OpenAIChatListener) unmarshalChatCompletionsRequestToLLMRequest(request *http.Request) (object.LLMRequest, error) {
12 | 	llmRequest, err := openai.NewChatCompletionRequest(request)
13 | 	if err != nil {
14 | 		return nil, err
15 | 	}
16 | 
17 | 	if llmRequest.GetModel() == "" {
18 | 		return nil, openai.NewErrorMissingModel()
19 | 	}
20 | 
21 | 	rMeta := metadata.RequestMetadataFromCtx(request.Context())
22 | 	rMeta.RequestModel = llmRequest.GetModel()
23 | 
24 | 	return llmRequest, nil
25 | }
26 | 


--------------------------------------------------------------------------------
/pkg/listener/manager/chat/completions.go:
--------------------------------------------------------------------------------
 1 | package chat
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | 
 6 | 	"knoway.dev/pkg/metadata"
 7 | 	"knoway.dev/pkg/object"
 8 | 	"knoway.dev/pkg/types/openai"
 9 | )
10 | 
11 | func (l *OpenAIChatListener) unmarshalCompletionsRequestToLLMRequest(request *http.Request) (object.LLMRequest, error) {
12 | 	llmRequest, err := openai.NewCompletionsRequest(request)
13 | 	if err != nil {
14 | 		return nil, err
15 | 	}
16 | 
17 | 	if llmRequest.GetModel() == "" {
18 | 		return nil, openai.NewErrorMissingModel()
19 | 	}
20 | 
21 | 	rMeta := metadata.RequestMetadataFromCtx(request.Context())
22 | 	rMeta.RequestModel = llmRequest.GetModel()
23 | 
24 | 	return llmRequest, nil
25 | }
26 | 


--------------------------------------------------------------------------------
/pkg/listener/manager/chat/listener.go:
--------------------------------------------------------------------------------
  1 | package chat
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"sync"
  7 | 
  8 | 	"github.com/gorilla/mux"
  9 | 	"github.com/samber/lo/mutable"
 10 | 	"google.golang.org/protobuf/proto"
 11 | 
 12 | 	"knoway.dev/api/listeners/v1alpha1"
 13 | 	"knoway.dev/pkg/bootkit"
 14 | 	"knoway.dev/pkg/constants"
 15 | 	"knoway.dev/pkg/filters"
 16 | 	"knoway.dev/pkg/listener"
 17 | 	"knoway.dev/pkg/registry/config"
 18 | 	"knoway.dev/pkg/types/openai"
 19 | 	"knoway.dev/pkg/utils"
 20 | )
 21 | 
 22 | var _ listener.Listener = (*OpenAIChatListener)(nil)
 23 | var _ listener.Drainable = (*OpenAIChatListener)(nil)
 24 | 
 25 | type OpenAIChatListener struct {
 26 | 	cfg             *v1alpha1.ChatCompletionListener
 27 | 	filters         filters.RequestFilters
 28 | 	reversedFilters filters.RequestFilters
 29 | 	cancellable     *listener.CancellableRequestMap
 30 | 
 31 | 	mutex   sync.RWMutex
 32 | 	drained bool
 33 | }
 34 | 
 35 | func NewOpenAIChatListenerConfigs(cfg proto.Message, lifecycle bootkit.LifeCycle) (listener.Listener, error) {
 36 | 	c, ok := cfg.(*v1alpha1.ChatCompletionListener)
 37 | 	if !ok {
 38 | 		return nil, fmt.Errorf("invalid config type %T", cfg)
 39 | 	}
 40 | 
 41 | 	l := &OpenAIChatListener{
 42 | 		cfg:         c,
 43 | 		cancellable: listener.NewCancellableRequestMap(),
 44 | 	}
 45 | 
 46 | 	lifecycle.Append(bootkit.LifeCycleHook{
 47 | 		OnStop: l.Drain,
 48 | 	})
 49 | 
 50 | 	for _, fc := range c.GetFilters() {
 51 | 		f, err := config.NewRequestFilterWithConfig(fc.GetName(), fc.GetConfig(), lifecycle)
 52 | 		if err != nil {
 53 | 			return nil, err
 54 | 		}
 55 | 
 56 | 		l.filters = append(l.filters, f)
 57 | 	}
 58 | 
 59 | 	l.reversedFilters = utils.Clone(l.filters)
 60 | 	mutable.Reverse(l.reversedFilters)
 61 | 
 62 | 	return l, nil
 63 | }
 64 | 
 65 | func (l *OpenAIChatListener) RegisterRoutes(mux *mux.Router) error {
 66 | 	middlewares := listener.WithMiddlewares(
 67 | 		listener.WithCancellable(l.cancellable),
 68 | 		listener.WithInitMetadata(),
 69 | 		listener.WithAccessLog(l.cfg.GetAccessLog().GetEnable()),
 70 | 		listener.WithRequestTimer(),
 71 | 		listener.WithOptions(),
 72 | 		listener.WithResponseHandler(openai.ResponseHandler()),
 73 | 		listener.WithRecoverWithError(),
 74 | 		listener.WithRejectAfterDrainedWithError(l),
 75 | 	)
 76 | 
 77 | 	mux.HandleFunc("/v1/chat/completions", listener.HTTPHandlerFunc(middlewares(listener.CommonListenerHandler(l.filters, l.reversedFilters, l.unmarshalChatCompletionsRequestToLLMRequest))))
 78 | 	mux.HandleFunc("/v1/completions", listener.HTTPHandlerFunc(middlewares(listener.CommonListenerHandler(l.filters, l.reversedFilters, l.unmarshalCompletionsRequestToLLMRequest))))
 79 | 	mux.HandleFunc("/v1/models", listener.HTTPHandlerFunc(middlewares(l.listModels)))
 80 | 
 81 | 	return nil
 82 | }
 83 | 
 84 | func (l *OpenAIChatListener) HasDrained() bool {
 85 | 	l.mutex.RLock()
 86 | 	defer l.mutex.RUnlock()
 87 | 
 88 | 	return l.drained
 89 | }
 90 | 
 91 | func (l *OpenAIChatListener) Drain(ctx context.Context) error {
 92 | 	l.mutex.Lock()
 93 | 	l.drained = true
 94 | 	l.mutex.Unlock()
 95 | 
 96 | 	l.cancellable.CancelAllAfterWithContext(ctx, constants.DefaultDrainWaitTime)
 97 | 
 98 | 	return nil
 99 | }
100 | 


--------------------------------------------------------------------------------
/pkg/listener/manager/chat/models.go:
--------------------------------------------------------------------------------
 1 | package chat
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | 	"sort"
 6 | 	"strings"
 7 | 
 8 | 	"github.com/samber/lo"
 9 | 	goopenai "github.com/sashabaranov/go-openai"
10 | 
11 | 	v1alpha4 "knoway.dev/api/clusters/v1alpha1"
12 | 	clustermanager "knoway.dev/pkg/clusters/manager"
13 | 	"knoway.dev/pkg/filters/auth"
14 | 	"knoway.dev/pkg/metadata"
15 | )
16 | 
17 | func ClustersToOpenAIModels(clusters []*v1alpha4.Cluster) []goopenai.Model {
18 | 	res := make([]goopenai.Model, 0)
19 | 	for _, c := range clusters {
20 | 		res = append(res, ClusterToOpenAIModel(c))
21 | 	}
22 | 
23 | 	return res
24 | }
25 | 
26 | func ClusterToOpenAIModel(c *v1alpha4.Cluster) goopenai.Model {
27 | 	// from https://platform.openai.com/docs/api-reference/models/object
28 | 	return goopenai.Model{
29 | 		CreatedAt: c.GetCreated(),
30 | 		ID:        c.GetName(),
31 | 		// The object type, which is always "model".
32 | 		Object:  "model",
33 | 		OwnedBy: c.GetProvider().String(),
34 | 		// todo
35 | 		Permission: nil,
36 | 		Root:       "",
37 | 		Parent:     "",
38 | 	}
39 | }
40 | 
41 | func (l *OpenAIChatListener) listModels(writer http.ResponseWriter, request *http.Request) (any, error) {
42 | 	for _, f := range l.filters.OnRequestPreFilters() {
43 | 		fResult := f.OnRequestPre(request.Context(), request)
44 | 		if fResult.IsFailed() {
45 | 			return nil, fResult.Error
46 | 		}
47 | 	}
48 | 
49 | 	clusters := clustermanager.ListModels()
50 | 
51 | 	// auth filters
52 | 	rMeta := metadata.RequestMetadataFromCtx(request.Context())
53 | 
54 | 	if rMeta.EnabledAuthFilter {
55 | 		if rMeta.AuthInfo != nil {
56 | 			clusters = lo.Filter(clusters, func(item *v1alpha4.Cluster, index int) bool {
57 | 				return auth.CanAccessModel(item.GetName(), rMeta.AuthInfo.GetAllowModels(), rMeta.AuthInfo.GetDenyModels())
58 | 			})
59 | 		}
60 | 	}
61 | 
62 | 	sort.Slice(clusters, func(i, j int) bool {
63 | 		return strings.Compare(clusters[i].GetName(), clusters[j].GetName()) < 0
64 | 	})
65 | 
66 | 	ms := ClustersToOpenAIModels(clusters)
67 | 	body := goopenai.ModelsList{
68 | 		Models: ms,
69 | 	}
70 | 
71 | 	return body, nil
72 | }
73 | 


--------------------------------------------------------------------------------
/pkg/listener/manager/image/image_generations.go:
--------------------------------------------------------------------------------
 1 | package image
 2 | 
 3 | import (
 4 | 	"net/http"
 5 | 
 6 | 	"knoway.dev/pkg/metadata"
 7 | 	"knoway.dev/pkg/object"
 8 | 	"knoway.dev/pkg/types/openai"
 9 | )
10 | 
11 | func (l *OpenAIImageListener) unmarshalImageGenerationsRequestToImageGenerationRequest(request *http.Request) (object.LLMRequest, error) {
12 | 	llmRequest, err := openai.NewImageGenerationsRequest(request)
13 | 	if err != nil {
14 | 		return nil, err
15 | 	}
16 | 
17 | 	if llmRequest.GetModel() == "" {
18 | 		return nil, openai.NewErrorMissingModel()
19 | 	}
20 | 
21 | 	rMeta := metadata.RequestMetadataFromCtx(request.Context())
22 | 	rMeta.RequestModel = llmRequest.GetModel()
23 | 
24 | 	return llmRequest, nil
25 | }
26 | 


--------------------------------------------------------------------------------
/pkg/listener/manager/image/listener.go:
--------------------------------------------------------------------------------
 1 | package image
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"fmt"
 6 | 	"sync"
 7 | 
 8 | 	"github.com/gorilla/mux"
 9 | 	"github.com/samber/lo/mutable"
10 | 	"google.golang.org/protobuf/proto"
11 | 
12 | 	"knoway.dev/api/listeners/v1alpha1"
13 | 	"knoway.dev/pkg/bootkit"
14 | 	"knoway.dev/pkg/constants"
15 | 	"knoway.dev/pkg/filters"
16 | 	"knoway.dev/pkg/listener"
17 | 	"knoway.dev/pkg/registry/config"
18 | 	"knoway.dev/pkg/types/openai"
19 | 	"knoway.dev/pkg/utils"
20 | )
21 | 
22 | var _ listener.Listener = (*OpenAIImageListener)(nil)
23 | var _ listener.Drainable = (*OpenAIImageListener)(nil)
24 | 
25 | type OpenAIImageListener struct {
26 | 	cfg             *v1alpha1.ImageListener
27 | 	filters         filters.RequestFilters
28 | 	reversedFilters filters.RequestFilters
29 | 	cancellable     *listener.CancellableRequestMap
30 | 
31 | 	mutex   sync.RWMutex
32 | 	drained bool
33 | }
34 | 
35 | func NewOpenAIImageListenerConfigs(cfg proto.Message, lifecycle bootkit.LifeCycle) (listener.Listener, error) {
36 | 	c, ok := cfg.(*v1alpha1.ImageListener)
37 | 	if !ok {
38 | 		return nil, fmt.Errorf("invalid config type %T", cfg)
39 | 	}
40 | 
41 | 	l := &OpenAIImageListener{
42 | 		cfg:         c,
43 | 		cancellable: listener.NewCancellableRequestMap(),
44 | 	}
45 | 
46 | 	lifecycle.Append(bootkit.LifeCycleHook{
47 | 		OnStop: l.Drain,
48 | 	})
49 | 
50 | 	for _, fc := range c.GetFilters() {
51 | 		f, err := config.NewRequestFilterWithConfig(fc.GetName(), fc.GetConfig(), lifecycle)
52 | 		if err != nil {
53 | 			return nil, err
54 | 		}
55 | 
56 | 		l.filters = append(l.filters, f)
57 | 	}
58 | 
59 | 	l.reversedFilters = utils.Clone(l.filters)
60 | 	mutable.Reverse(l.reversedFilters)
61 | 
62 | 	return l, nil
63 | }
64 | 
65 | func (l *OpenAIImageListener) RegisterRoutes(mux *mux.Router) error {
66 | 	middlewares := listener.WithMiddlewares(
67 | 		listener.WithCancellable(l.cancellable),
68 | 		listener.WithInitMetadata(),
69 | 		listener.WithAccessLog(l.cfg.GetAccessLog().GetEnable()),
70 | 		listener.WithRequestTimer(),
71 | 		listener.WithOptions(),
72 | 		listener.WithResponseHandler(openai.ResponseHandler()),
73 | 		listener.WithRecoverWithError(),
74 | 		listener.WithRejectAfterDrainedWithError(l),
75 | 	)
76 | 
77 | 	mux.HandleFunc("/v1/images/generations", listener.HTTPHandlerFunc(middlewares(listener.CommonListenerHandler(l.filters, l.reversedFilters, l.unmarshalImageGenerationsRequestToImageGenerationRequest))))
78 | 
79 | 	return nil
80 | }
81 | 
82 | func (l *OpenAIImageListener) HasDrained() bool {
83 | 	l.mutex.RLock()
84 | 	defer l.mutex.RUnlock()
85 | 
86 | 	return l.drained
87 | }
88 | 
89 | func (l *OpenAIImageListener) Drain(ctx context.Context) error {
90 | 	l.mutex.Lock()
91 | 	l.drained = true
92 | 	l.mutex.Unlock()
93 | 
94 | 	l.cancellable.CancelAllAfterWithContext(ctx, constants.DefaultDrainWaitTime)
95 | 
96 | 	return nil
97 | }
98 | 


--------------------------------------------------------------------------------
/pkg/metadata/metadata.go:
--------------------------------------------------------------------------------
 1 | package metadata
 2 | 
 3 | import (
 4 | 	"context"
 5 | 	"net/http"
 6 | 	"time"
 7 | 
 8 | 	"github.com/samber/mo"
 9 | 
10 | 	"knoway.dev/api/clusters/v1alpha1"
11 | 	servicev1alpha1 "knoway.dev/api/service/v1alpha1"
12 | 	"knoway.dev/pkg/clusters"
13 | 	"knoway.dev/pkg/object"
14 | 	"knoway.dev/pkg/route"
15 | )
16 | 
17 | type RequestMetadata struct {
18 | 	// RequestModel is the requested model name from user side,
19 | 	// used to route to the correct cluster and corresponding model.
20 | 	// Much similar to server_name in nginx or vHost in Apache.
21 | 	RequestModel string
22 | 	RequestAt    time.Time
23 | 	// ResponseModel is the model name that the user expects to receive.
24 | 	// In many scenarios, this is the same as RequestModel, except for
25 | 	// auto-routed models, where RequestModel could be `auto`, and
26 | 	// the actual selected Cluster of model name will be selected based on
27 | 	// the request payload / inference difficulty.
28 | 	ResponseModel string
29 | 	RespondAt     time.Time
30 | 
31 | 	// Egress related metadata
32 | 	StatusCode   int
33 | 	ErrorMessage string
34 | 
35 | 	// Auth related metadata
36 | 	EnabledAuthFilter bool                                // Set in AuthFilter
37 | 	AuthInfo          *servicev1alpha1.APIKeyAuthResponse // Set in AuthFilter
38 | 
39 | 	// SelectedCluster is the cluster that the request is routed to
40 | 	SelectedCluster mo.Option[clusters.Cluster]
41 | 
42 | 	// Upstream related metadata
43 | 	UpstreamProvider             v1alpha1.ClusterProvider // Set in Cluster Manager
44 | 	UpstreamResponseStatusCode   int                      // Set in Cluster Manager
45 | 	UpstreamResponseHeader       mo.Option[http.Header]   // Set in Cluster Manager
46 | 	UpstreamResponseErrorMessage string                   // Set in Cluster Manager
47 | 	// UpstreamRequestModel is the model name that the gateway will send to
48 | 	// upstream provider, generally the same as how Cluster overrides `model`
49 | 	// parameter in the request payload.
50 | 	UpstreamRequestModel string    // Set in Cluster Manager
51 | 	UpstreamRequestAt    time.Time // Set in Cluster Manager
52 | 	// UpstreamResponseModel is the model name that the upstream provider
53 | 	// will respond with. Same as explained in ResponseModel, when
54 | 	// UpstreamRequestModel set to `auto`, the actual model name will be
55 | 	// different from the UpstreamRequestModel since the load-balancing or
56 | 	// generic model routing will be done by the upstream provider.
57 | 	UpstreamResponseModel string    // Set in Cluster Manager
58 | 	UpstreamRespondAt     time.Time // Set in Cluster Manager
59 | 	// Setting in Listener is because when reading and handling the stream
60 | 	// of data, the response has been made and processed by Cluster, which
61 | 	// leaves the scope of Cluster Manager, and marshalling and writing to
62 | 	// Connection IO writer is done by Listener, thus the only actor that
63 | 	// knows when the first valid chunk of data is received.
64 | 	UpstreamFirstValidChunkAt time.Time // Set in Listener
65 | 
66 | 	// Overall usage consumption
67 | 	LLMUpstreamTokensUsage mo.Option[object.LLMTokensUsage]
68 | 	LLMUpstreamImagesUsage mo.Option[object.LLMImagesUsage]
69 | 
70 | 	MatchRoute route.Route
71 | }
72 | 
73 | // RequestMetadataFromCtx retrieves RequestMetadata from context
74 | // Note: The returned pointer allows direct access and modification of the underlying RequestMetadata
75 | // Be careful when modifying the properties as they are shared across the request context
76 | func RequestMetadataFromCtx(ctx context.Context) *RequestMetadata {
77 | 	props, pok := ctx.Value(metadataKey{}).(*metadata)
78 | 	if !pok {
79 | 		return nil
80 | 	}
81 | 
82 | 	return props.request
83 | }
84 | 
85 | func InitMetadataContext(request *http.Request) context.Context {
86 | 	return context.WithValue(request.Context(), metadataKey{}, &metadata{
87 | 		request: &RequestMetadata{},
88 | 	})
89 | }
90 | 
91 | type metadataKey struct{}
92 | 
93 | type metadata struct {
94 | 	request *RequestMetadata
95 | }
96 | 


--------------------------------------------------------------------------------
/pkg/object/completion.go:
--------------------------------------------------------------------------------
  1 | package object
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"encoding/json"
  6 | 	"net/http"
  7 | 
  8 | 	structpb "github.com/golang/protobuf/ptypes/struct"
  9 | 
 10 | 	"knoway.dev/pkg/types/sse"
 11 | )
 12 | 
 13 | type RequestType string
 14 | 
 15 | const (
 16 | 	RequestTypeChatCompletions  RequestType = "chat_completions"
 17 | 	RequestTypeCompletions      RequestType = "completions"
 18 | 	RequestTypeImageGenerations RequestType = "image_generations"
 19 | )
 20 | 
 21 | type LLMRequest interface {
 22 | 	IsStream() bool
 23 | 	GetModel() string
 24 | 	SetModel(modelName string) error
 25 | 
 26 | 	SetOverrideParams(params map[string]*structpb.Value) error
 27 | 	SetDefaultParams(params map[string]*structpb.Value) error
 28 | 	RemoveParamKeys(keys []string) error
 29 | 
 30 | 	GetRequestType() RequestType
 31 | 	GetRawRequest() *http.Request
 32 | }
 33 | 
 34 | type LLMResponse interface {
 35 | 	json.Marshaler
 36 | 
 37 | 	IsStream() bool
 38 | 	GetRequestID() string
 39 | 	GetUsage() LLMUsage
 40 | 	GetError() LLMError
 41 | 
 42 | 	GetModel() string
 43 | 	SetModel(modelName string) error
 44 | }
 45 | 
 46 | func IsLLMResponse(r any) bool {
 47 | 	_, ok := r.(LLMResponse)
 48 | 	return ok
 49 | }
 50 | 
 51 | type LLMStreamResponse interface {
 52 | 	LLMResponse
 53 | 
 54 | 	IsEOF() bool
 55 | 	NextChunk() (LLMChunkResponse, error)
 56 | 	WaitUntilEOF() <-chan LLMStreamResponse
 57 | 	OnChunk(cb func(ctx context.Context, stream LLMStreamResponse, chunk LLMChunkResponse))
 58 | }
 59 | 
 60 | func IsLLMStreamResponse(r any) bool {
 61 | 	_, ok := r.(LLMStreamResponse)
 62 | 	if ok {
 63 | 		return true
 64 | 	}
 65 | 
 66 | 	llmResp, ok := r.(LLMStreamResponse)
 67 | 
 68 | 	return ok && llmResp.IsStream()
 69 | }
 70 | 
 71 | type LLMChunkResponse interface {
 72 | 	json.Marshaler
 73 | 
 74 | 	IsFirst() bool
 75 | 	IsEmpty() bool
 76 | 	IsDone() bool
 77 | 	IsUsage() bool
 78 | 	GetResponse() LLMStreamResponse
 79 | 
 80 | 	GetModel() string
 81 | 	SetModel(modelName string) error
 82 | 	GetUsage() LLMUsage
 83 | 
 84 | 	ToServerSentEvent() (*sse.Event, error)
 85 | }
 86 | 
 87 | type LLMUsage interface {
 88 | 	isLLMUsage()
 89 | }
 90 | 
 91 | type LLMTokensUsage interface {
 92 | 	LLMUsage
 93 | 
 94 | 	GetTotalTokens() uint64
 95 | 	GetCompletionTokens() uint64
 96 | 	GetPromptTokens() uint64
 97 | }
 98 | 
 99 | func AsLLMTokensUsage(u LLMUsage) (LLMTokensUsage, bool) {
100 | 	t, ok := u.(LLMTokensUsage)
101 | 	return t, ok
102 | }
103 | 
104 | var _ LLMUsage = (*IsLLMUsage)(nil)
105 | 
106 | type IsLLMUsage struct{}
107 | 
108 | func (IsLLMUsage) isLLMUsage() {}
109 | 


--------------------------------------------------------------------------------
/pkg/object/error.go:
--------------------------------------------------------------------------------
 1 | package object
 2 | 
 3 | import "encoding/json"
 4 | 
 5 | type LLMError interface {
 6 | 	json.Marshaler
 7 | 	json.Unmarshaler
 8 | 	error
 9 | 
10 | 	GetCode() string
11 | 	GetMessage() string
12 | 	GetStatus() int
13 | }
14 | 
15 | func IsLLMError(err error) bool {
16 | 	// Assert with interface, cannot use errors.As
17 | 	_, ok := err.(LLMError) //nolint:errorlint
18 | 	return ok
19 | }
20 | 
21 | func AsLLMError(err error) LLMError {
22 | 	if IsLLMError(err) {
23 | 		llmError, _ := err.(LLMError) //nolint:errorlint
24 | 		return llmError
25 | 	}
26 | 
27 | 	return nil
28 | }
29 | 


--------------------------------------------------------------------------------
/pkg/object/images.go:
--------------------------------------------------------------------------------
 1 | package object
 2 | 
 3 | type ImageGenerationsUsageImage interface {
 4 | 	GetWidth() uint64
 5 | 	GetHeight() uint64
 6 | 	GetStyle() string
 7 | 	GetQuality() string
 8 | }
 9 | 
10 | type LLMImagesUsage interface {
11 | 	LLMUsage
12 | 
13 | 	GetOutputImages() []ImageGenerationsUsageImage
14 | }
15 | 
16 | func AsLLMImagesUsage(u LLMUsage) (LLMImagesUsage, bool) {
17 | 	t, ok := u.(LLMImagesUsage)
18 | 	return t, ok
19 | }
20 | 


--------------------------------------------------------------------------------
/pkg/observation/attributes.go:
--------------------------------------------------------------------------------
 1 | package observation
 2 | 
 3 | import (
 4 | 	"strings"
 5 | 
 6 | 	"go.opentelemetry.io/otel/attribute"
 7 | )
 8 | 
 9 | // AttributeKey is a generic key that can be used for both metrics labels and tracing attributes.
10 | type AttributeKey string
11 | 
12 | // AsAttribute converts the AttributeKey to an OpenTelemetry attribute.Key.
13 | func (a AttributeKey) AsAttribute() attribute.Key {
14 | 	return attribute.Key(a)
15 | }
16 | 
17 | // AsLabelKey converts the AttributeKey to an OpenTelemetry label key.
18 | func (a AttributeKey) AsLabelKey() string {
19 | 	return formatLabel(string(a))
20 | }
21 | 
22 | func formatLabel(label string) string {
23 | 	return strings.ReplaceAll(label, ".", "_")
24 | }
25 | 
26 | var (
27 | 	LLMRequestType    = AttributeKey("llm.request.type")
28 | 	LLMRequestStream  = AttributeKey("llm.request.stream")
29 | 	LLMRequestModel   = AttributeKey("llm.request.model")
30 | 	LLMRequestHeaders = AttributeKey("llm.request.headers")
31 | 
32 | 	LLMResponseModel        = AttributeKey("llm.response.model")
33 | 	LLMResponseCode         = AttributeKey("llm.response.code")
34 | 	LLMResponseErrorMessage = AttributeKey("llm.response.error_message")
35 | 	LLMResponseHeaders      = AttributeKey("llm.response.headers")
36 | 	LLMResponseDuration     = AttributeKey("llm.response.duration")
37 | 
38 | 	LLMTokenType             = AttributeKey("llm.usage.token_type")
39 | 	LLMUsageTotalTokens      = AttributeKey("llm.usage.total_tokens")
40 | 	LLMUsageCompletionTokens = AttributeKey("llm.usage.completion_tokens")
41 | 	LLMUsagePromptTokens     = AttributeKey("llm.usage.prompt_tokens")
42 | 
43 | 	KnowayAuthInfoAPIKey = AttributeKey("knoway.auth.apikey")
44 | 	KnowayAuthInfoUser   = AttributeKey("knoway.auth.user")
45 | )
46 | 
47 | type LLMTokenTypeEnum string
48 | 
49 | const (
50 | 	PromptTokenType     LLMTokenTypeEnum = "prompt"
51 | 	CompletionTokenType LLMTokenTypeEnum = "completion"
52 | )
53 | 


--------------------------------------------------------------------------------
/pkg/protoutils/proto.go:
--------------------------------------------------------------------------------
 1 | package protoutils
 2 | 
 3 | import (
 4 | 	"reflect"
 5 | 
 6 | 	"google.golang.org/protobuf/proto"
 7 | 	"google.golang.org/protobuf/types/known/anypb"
 8 | )
 9 | 
10 | func TypeURLOrDie(obj proto.Message) string {
11 | 	a, err := anypb.New(obj)
12 | 	if err != nil {
13 | 		panic(err)
14 | 	}
15 | 
16 | 	return a.GetTypeUrl()
17 | }
18 | 
19 | func FromAny[T proto.Message](a *anypb.Any, prototype T) (T, error) {
20 | 	newObj, _ := reflect.New(reflect.TypeOf(prototype).Elem()).Interface().(T)
21 | 	if err := a.UnmarshalTo(newObj); err != nil {
22 | 		return newObj, err
23 | 	}
24 | 
25 | 	return newObj, nil
26 | }
27 | 


--------------------------------------------------------------------------------
/pkg/redis/client.go:
--------------------------------------------------------------------------------
 1 | package redis
 2 | 
 3 | import (
 4 | 	"github.com/redis/rueidis"
 5 | )
 6 | 
 7 | func NewRedisClient(url string) (rueidis.Client, error) {
 8 | 	redisClient, err := rueidis.NewClient(rueidis.MustParseURL(url))
 9 | 	if err != nil {
10 | 		return nil, err
11 | 	}
12 | 
13 | 	return redisClient, nil
14 | }
15 | 


--------------------------------------------------------------------------------
/pkg/registry/config/registry.go:
--------------------------------------------------------------------------------
 1 | package config
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 
 6 | 	"google.golang.org/protobuf/types/known/anypb"
 7 | 
 8 | 	filtersv1alpha1 "knoway.dev/api/filters/v1alpha1"
 9 | 	"knoway.dev/pkg/bootkit"
10 | 	clusterfilters "knoway.dev/pkg/clusters/filters"
11 | 	"knoway.dev/pkg/clusters/filters/openai"
12 | 	"knoway.dev/pkg/filters"
13 | 	"knoway.dev/pkg/filters/auth"
14 | 	"knoway.dev/pkg/filters/ratelimit"
15 | 	"knoway.dev/pkg/filters/usage"
16 | 	"knoway.dev/pkg/protoutils"
17 | )
18 | 
19 | var (
20 | 	requestFilters = map[string]func(cfg *anypb.Any, lifecycle bootkit.LifeCycle) (filters.RequestFilter, error){}
21 | 
22 | 	clustersFilters = map[string]func(cfg *anypb.Any, lifecycle bootkit.LifeCycle) (clusterfilters.ClusterFilter, error){}
23 | )
24 | 
25 | func ClusterDefaultFilters(lifecycle bootkit.LifeCycle) []clusterfilters.ClusterFilter {
26 | 	res := make([]clusterfilters.ClusterFilter, 0)
27 | 
28 | 	pb, _ := anypb.New(&filtersv1alpha1.OpenAIRequestHandlerConfig{})
29 | 	reqMar, _ := NewClusterFilterWithConfig("global", pb, lifecycle)
30 | 	res = append(res, reqMar)
31 | 
32 | 	responsePb, _ := anypb.New(&filtersv1alpha1.OpenAIResponseHandlerConfig{})
33 | 	respMar, _ := NewClusterFilterWithConfig("global", responsePb, lifecycle)
34 | 	res = append(res, respMar)
35 | 
36 | 	return res
37 | }
38 | 
39 | func init() {
40 | 	requestFilters[protoutils.TypeURLOrDie(&filtersv1alpha1.APIKeyAuthConfig{})] = auth.NewWithConfig
41 | 	requestFilters[protoutils.TypeURLOrDie(&filtersv1alpha1.RateLimitConfig{})] = ratelimit.NewWithConfig
42 | 	requestFilters[protoutils.TypeURLOrDie(&filtersv1alpha1.UsageStatsConfig{})] = usage.NewWithConfig
43 | 
44 | 	// internal base Filters
45 | 	clustersFilters[protoutils.TypeURLOrDie(&filtersv1alpha1.OpenAIRequestHandlerConfig{})] = openai.NewRequestHandlerWithConfig
46 | 	clustersFilters[protoutils.TypeURLOrDie(&filtersv1alpha1.OpenAIResponseHandlerConfig{})] = openai.NewResponseHandlerWithConfig
47 | }
48 | 
49 | func NewRequestFilterWithConfig(name string, cfg *anypb.Any, lifecycle bootkit.LifeCycle) (filters.RequestFilter, error) {
50 | 	if f, ok := requestFilters[cfg.GetTypeUrl()]; ok {
51 | 		return f(cfg, lifecycle)
52 | 	}
53 | 
54 | 	return nil, fmt.Errorf("unknown listener filter %q, %s", name, cfg.GetTypeUrl())
55 | }
56 | 
57 | func NewClusterFilterWithConfig(name string, cfg *anypb.Any, lifecycle bootkit.LifeCycle) (clusterfilters.ClusterFilter, error) {
58 | 	if f, ok := clustersFilters[cfg.GetTypeUrl()]; ok {
59 | 		return f(cfg, lifecycle)
60 | 	}
61 | 
62 | 	return nil, fmt.Errorf("unknown cluster filter %q, %s", name, cfg.GetTypeUrl())
63 | }
64 | 
65 | // NewRequestFiltersKeys returns the keys of the requestFilters map
66 | func NewRequestFiltersKeys() []string {
67 | 	keys := make([]string, 0, len(requestFilters))
68 | 	for k := range requestFilters {
69 | 		keys = append(keys, k)
70 | 	}
71 | 
72 | 	return keys
73 | }
74 | 
75 | // NewClustersFiltersKeys returns the keys of the clustersFilters map
76 | func NewClustersFiltersKeys() []string {
77 | 	keys := make([]string, 0, len(clustersFilters))
78 | 	for k := range clustersFilters {
79 | 		keys = append(keys, k)
80 | 	}
81 | 
82 | 	return keys
83 | }
84 | 


--------------------------------------------------------------------------------
/pkg/registry/config/registry_test.go:
--------------------------------------------------------------------------------
 1 | package config
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/stretchr/testify/assert"
 7 | 	"github.com/stretchr/testify/require"
 8 | )
 9 | 
10 | func TestNewRequestFiltersKeys(t *testing.T) {
11 | 	checkKeys := func(expectedKeys []string, actualKeys []string) {
12 | 		require.NotEmpty(t, actualKeys)
13 | 
14 | 		for _, expectedKey := range expectedKeys {
15 | 			found := false
16 | 
17 | 			for _, key := range actualKeys {
18 | 				if key == expectedKey {
19 | 					found = true
20 | 					break
21 | 				}
22 | 			}
23 | 
24 | 			assert.True(t, found)
25 | 		}
26 | 	}
27 | 
28 | 	expectedRequestFiltersKeys := []string{
29 | 		"type.googleapis.com/knoway.filters.v1alpha1.APIKeyAuthConfig",
30 | 		"type.googleapis.com/knoway.filters.v1alpha1.UsageStatsConfig",
31 | 	}
32 | 	keys := NewRequestFiltersKeys()
33 | 	checkKeys(expectedRequestFiltersKeys, keys)
34 | 
35 | 	expectedClustersFiltersKeys := []string{
36 | 		"type.googleapis.com/knoway.filters.v1alpha1.OpenAIRequestHandlerConfig",
37 | 		"type.googleapis.com/knoway.filters.v1alpha1.OpenAIResponseHandlerConfig",
38 | 	}
39 | 	cKeys := NewClustersFiltersKeys()
40 | 	checkKeys(expectedClustersFiltersKeys, cKeys)
41 | }
42 | 


--------------------------------------------------------------------------------
/pkg/route/manager/manager.go:
--------------------------------------------------------------------------------
  1 | package manager
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"log/slog"
  6 | 	"sync"
  7 | 
  8 | 	"knoway.dev/pkg/bootkit"
  9 | 	"knoway.dev/pkg/metadata"
 10 | 	"knoway.dev/pkg/object"
 11 | 
 12 | 	"knoway.dev/api/route/v1alpha1"
 13 | 	"knoway.dev/pkg/route"
 14 | 	rroute "knoway.dev/pkg/route/route"
 15 | 
 16 | 	"github.com/samber/lo"
 17 | )
 18 | 
 19 | var (
 20 | 	matchRouteRegistry = make(map[string]route.Route)
 21 | 	routeRegistry      = make(map[string]route.Route)
 22 | 
 23 | 	routes    = make([]route.Route, 0)
 24 | 	routeLock sync.RWMutex
 25 | )
 26 | 
 27 | func InitDirectModelRoute(modelName string) *v1alpha1.Route {
 28 | 	return &v1alpha1.Route{
 29 | 		Name: modelName,
 30 | 		Matches: []*v1alpha1.Match{
 31 | 			{
 32 | 				Model: &v1alpha1.StringMatch{
 33 | 					Match: &v1alpha1.StringMatch_Exact{
 34 | 						Exact: modelName,
 35 | 					},
 36 | 				},
 37 | 			},
 38 | 		},
 39 | 		Targets: []*v1alpha1.RouteTarget{
 40 | 			{
 41 | 				Destination: &v1alpha1.RouteDestination{
 42 | 					Cluster: modelName,
 43 | 				},
 44 | 			},
 45 | 		},
 46 | 		Filters: nil, // todo future
 47 | 	}
 48 | }
 49 | 
 50 | func RegisterMatchRouteWithConfig(cfg *v1alpha1.Route, lifecycle bootkit.LifeCycle) error {
 51 | 	routeLock.Lock()
 52 | 	defer routeLock.Unlock()
 53 | 
 54 | 	r, err := rroute.NewWithConfig(cfg, lifecycle)
 55 | 	if err != nil {
 56 | 		return err
 57 | 	}
 58 | 
 59 | 	matchRouteRegistry[cfg.GetName()] = r
 60 | 	routes = mergeRoutes()
 61 | 
 62 | 	slog.Info("register match route", "name", cfg.GetName())
 63 | 
 64 | 	return nil
 65 | }
 66 | 
 67 | func RemoveMatchRoute(rName string) {
 68 | 	routeLock.Lock()
 69 | 	defer routeLock.Unlock()
 70 | 
 71 | 	delete(matchRouteRegistry, rName)
 72 | 	routes = mergeRoutes()
 73 | 
 74 | 	slog.Info("remove match route", "name", rName)
 75 | }
 76 | 
 77 | func RegisterBaseRouteWithConfig(cfg *v1alpha1.Route, lifecycle bootkit.LifeCycle) error {
 78 | 	routeLock.Lock()
 79 | 	defer routeLock.Unlock()
 80 | 
 81 | 	r, err := rroute.NewWithConfig(cfg, lifecycle)
 82 | 	if err != nil {
 83 | 		return err
 84 | 	}
 85 | 
 86 | 	routeRegistry[cfg.GetName()] = r
 87 | 
 88 | 	if _, exists := matchRouteRegistry[cfg.GetName()]; exists {
 89 | 		slog.Info("route exists in matchRouteRegistry, skipping base route registration", "name", cfg.GetName())
 90 | 		return nil
 91 | 	}
 92 | 
 93 | 	routes = mergeRoutes()
 94 | 
 95 | 	slog.Info("register base route", "name", cfg.GetName())
 96 | 
 97 | 	return nil
 98 | }
 99 | 
100 | func RemoveBaseRoute(rName string) {
101 | 	routeLock.Lock()
102 | 	defer routeLock.Unlock()
103 | 
104 | 	delete(routeRegistry, rName)
105 | 	routes = mergeRoutes()
106 | 
107 | 	slog.Info("remove base route", "name", rName)
108 | }
109 | 
110 | func mergeRoutes() []route.Route {
111 | 	uniqueRoutes := make(map[string]route.Route)
112 | 
113 | 	for k, v := range matchRouteRegistry {
114 | 		uniqueRoutes[k] = v
115 | 	}
116 | 
117 | 	for k, v := range routeRegistry {
118 | 		if _, exists := uniqueRoutes[k]; !exists {
119 | 			uniqueRoutes[k] = v
120 | 		}
121 | 	}
122 | 
123 | 	return lo.Values(uniqueRoutes)
124 | }
125 | 
126 | func MatchRoute(ctx context.Context, request object.LLMRequest) route.Route {
127 | 	routeLock.RLock()
128 | 	defer routeLock.RUnlock()
129 | 
130 | 	for _, r := range routes {
131 | 		if r.Match(ctx, request) {
132 | 			return r
133 | 		}
134 | 	}
135 | 
136 | 	return nil
137 | }
138 | 
139 | func HandleRequest(ctx context.Context, llmRequest object.LLMRequest) (object.LLMResponse, error) {
140 | 	route := MatchRoute(ctx, llmRequest)
141 | 	if route == nil {
142 | 		return nil, object.NewErrorModelNotFoundOrNotAccessible(llmRequest.GetModel())
143 | 	}
144 | 
145 | 	rMeta := metadata.RequestMetadataFromCtx(ctx)
146 | 	rMeta.MatchRoute = route
147 | 
148 | 	return route.HandleRequest(ctx, llmRequest)
149 | }
150 | 
151 | func DebugDumpAllRoutes() []*v1alpha1.Route {
152 | 	routeLock.Lock()
153 | 	defer routeLock.Unlock()
154 | 
155 | 	return lo.Map(routes, func(r route.Route, _ int) *v1alpha1.Route {
156 | 		return r.GetRouteConfig()
157 | 	})
158 | }
159 | 


--------------------------------------------------------------------------------
/pkg/route/route.go:
--------------------------------------------------------------------------------
 1 | package route
 2 | 
 3 | import (
 4 | 	"context"
 5 | 
 6 | 	routev1alpha1 "knoway.dev/api/route/v1alpha1"
 7 | 	"knoway.dev/pkg/object"
 8 | )
 9 | 
10 | type Route interface {
11 | 	// Match returns true if the route matches the request
12 | 	Match(ctx context.Context, request object.LLMRequest) bool
13 | 	// HandleRequest handles the request
14 | 	HandleRequest(ctx context.Context, request object.LLMRequest) (object.LLMResponse, error)
15 | 
16 | 	// GetRouteConfig returns the route config
17 | 	GetRouteConfig() *routev1alpha1.Route
18 | }
19 | 


--------------------------------------------------------------------------------
/pkg/types/openai/chat_completions_request_test.go:
--------------------------------------------------------------------------------
  1 | package openai
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"context"
  6 | 	"encoding/json"
  7 | 	"net/http"
  8 | 	"testing"
  9 | 
 10 | 	"github.com/samber/lo"
 11 | 	"github.com/stretchr/testify/assert"
 12 | 	"github.com/stretchr/testify/require"
 13 | 	"google.golang.org/protobuf/types/known/structpb"
 14 | )
 15 | 
 16 | func TestSetModel(t *testing.T) {
 17 | 	httpRequest, err := http.NewRequestWithContext(context.Background(), http.MethodPost, "http://example.com", bytes.NewBufferString(`
 18 | {
 19 |     "model": "some",
 20 |     "messages": [
 21 |         {
 22 |             "role": "user",
 23 |             "content": "hi"
 24 |         }
 25 |     ]
 26 | }
 27 | `))
 28 | 	require.NoError(t, err)
 29 | 
 30 | 	request, err := NewChatCompletionRequest(httpRequest)
 31 | 	require.NoError(t, err)
 32 | 
 33 | 	newModel := lo.RandomString(10, lo.LettersCharset)
 34 | 
 35 | 	err = request.SetModel(newModel)
 36 | 	require.NoError(t, err)
 37 | 	assert.Equal(t, newModel, request.GetModel())
 38 | 
 39 | 	// Verify the body buffer has been updated
 40 | 	var body map[string]any
 41 | 
 42 | 	err = json.Unmarshal(lo.Must(json.Marshal(request)), &body)
 43 | 	require.NoError(t, err)
 44 | 	assert.Equal(t, newModel, body["model"])
 45 | 
 46 | 	messages := []map[string]any{
 47 | 		{
 48 | 			"role":    "user",
 49 | 			"content": "hi",
 50 | 		},
 51 | 	}
 52 | 
 53 | 	newMessages, ok := body["messages"].([]interface{})
 54 | 	require.True(t, ok)
 55 | 	assert.Equal(t, len(messages), len(newMessages))
 56 | 
 57 | 	for i, msg := range messages {
 58 | 		newMessageMap, ok := newMessages[i].(map[string]interface{})
 59 | 		require.True(t, ok)
 60 | 
 61 | 		assert.Equal(t, msg["role"], newMessageMap["role"])
 62 | 		assert.Equal(t, msg["content"], newMessageMap["content"])
 63 | 	}
 64 | }
 65 | 
 66 | func TestSetDefaultParams(t *testing.T) {
 67 | 	body := []byte(`{
 68 | 		"model": "gpt-4",
 69 | 		"stream": false
 70 | 	}`)
 71 | 
 72 | 	req, err := http.NewRequestWithContext(context.TODO(), http.MethodPost, "/api/v1", bytes.NewReader(body))
 73 | 	require.NoError(t, err)
 74 | 
 75 | 	chatReq, err := NewChatCompletionRequest(req)
 76 | 	require.NoError(t, err)
 77 | 
 78 | 	params := map[string]*structpb.Value{
 79 | 		"model":       structpb.NewStringValue("openai/gpt-4"),
 80 | 		"stream":      structpb.NewBoolValue(true),
 81 | 		"temperature": structpb.NewNumberValue(0.7),
 82 | 		"max_tokens":  structpb.NewNumberValue(100),
 83 | 	}
 84 | 
 85 | 	err = chatReq.SetDefaultParams(params)
 86 | 	require.NoError(t, err)
 87 | 
 88 | 	assert.Equal(t, false, chatReq.bodyParsed["stream"])
 89 | 	assert.Equal(t, "gpt-4", chatReq.bodyParsed["model"])
 90 | 	assert.InDelta(t, 0.7, chatReq.bodyParsed["temperature"], 0.0001)
 91 | 	assert.InDelta(t, 100.0, chatReq.bodyParsed["max_tokens"], 0.0001)
 92 | }
 93 | 
 94 | func TestSetOverrideParams(t *testing.T) {
 95 | 	body := []byte(`{
 96 | 		"model": "gpt-4",
 97 | 		"stream": false,
 98 | 		"temperature": 0.5,
 99 | 		"max_tokens": 200
100 | 	}`)
101 | 
102 | 	req, err := http.NewRequestWithContext(context.TODO(), http.MethodPost, "/api/v1", bytes.NewReader(body))
103 | 	require.NoError(t, err)
104 | 
105 | 	chatReq, err := NewChatCompletionRequest(req)
106 | 	require.NoError(t, err)
107 | 
108 | 	params := map[string]*structpb.Value{
109 | 		"model":       structpb.NewStringValue("openai/gpt-4"),
110 | 		"stream":      structpb.NewBoolValue(true),
111 | 		"temperature": structpb.NewNumberValue(0.7),
112 | 		"max_tokens":  structpb.NewNumberValue(100),
113 | 		"stream_options": structpb.NewStructValue(&structpb.Struct{
114 | 			Fields: map[string]*structpb.Value{
115 | 				"include_usage": structpb.NewBoolValue(true),
116 | 			},
117 | 		}),
118 | 	}
119 | 
120 | 	err = chatReq.SetOverrideParams(params)
121 | 	require.NoError(t, err)
122 | 
123 | 	assert.Equal(t, "openai/gpt-4", chatReq.bodyParsed["model"])
124 | 	assert.InDelta(t, 0.7, chatReq.bodyParsed["temperature"], 0.0001)
125 | 	assert.InDelta(t, 100.0, chatReq.bodyParsed["max_tokens"], 0.0001)
126 | 
127 | 	assert.Equal(t, true, chatReq.bodyParsed["stream"])
128 | 	assert.Equal(t, map[string]any{
129 | 		"include_usage": true,
130 | 	}, chatReq.bodyParsed["stream_options"])
131 | }
132 | 


--------------------------------------------------------------------------------
/pkg/types/openai/chat_completions_response.go:
--------------------------------------------------------------------------------
  1 | package openai
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"bytes"
  6 | 	"encoding/json"
  7 | 	"fmt"
  8 | 	"net/http"
  9 | 
 10 | 	"knoway.dev/pkg/object"
 11 | 	"knoway.dev/pkg/utils"
 12 | )
 13 | 
 14 | var _ object.LLMResponse = (*ChatCompletionsResponse)(nil)
 15 | 
 16 | type ChatCompletionsResponse struct {
 17 | 	Status int                   `json:"status"`
 18 | 	Model  string                `json:"model"`
 19 | 	Usage  *ChatCompletionsUsage `json:"usage,omitempty"`
 20 | 	Error  *ErrorResponse        `json:"error,omitempty"`
 21 | 	Stream bool                  `json:"stream"`
 22 | 
 23 | 	request          object.LLMRequest
 24 | 	responseBody     json.RawMessage
 25 | 	bodyParsed       map[string]any
 26 | 	outgoingResponse *http.Response
 27 | }
 28 | 
 29 | func NewChatCompletionResponse(request object.LLMRequest, response *http.Response, reader *bufio.Reader) (*ChatCompletionsResponse, error) {
 30 | 	resp := new(ChatCompletionsResponse)
 31 | 
 32 | 	buffer := new(bytes.Buffer)
 33 | 
 34 | 	_, err := buffer.ReadFrom(reader)
 35 | 	if err != nil {
 36 | 		return nil, err
 37 | 	}
 38 | 
 39 | 	err = resp.processBytes(buffer.Bytes(), response)
 40 | 	if err != nil {
 41 | 		return nil, fmt.Errorf("failed to unmarshal response: %w", err)
 42 | 	}
 43 | 
 44 | 	resp.request = request
 45 | 	resp.outgoingResponse = response
 46 | 
 47 | 	return resp, nil
 48 | }
 49 | 
 50 | func (r *ChatCompletionsResponse) processBytes(bs []byte, response *http.Response) error {
 51 | 	if r == nil {
 52 | 		return nil
 53 | 	}
 54 | 
 55 | 	r.responseBody = bs
 56 | 	r.Status = response.StatusCode
 57 | 
 58 | 	var body map[string]any
 59 | 
 60 | 	err := json.Unmarshal(bs, &body)
 61 | 	if err != nil {
 62 | 		return fmt.Errorf("failed to unmarshal response body: %w", err)
 63 | 	}
 64 | 
 65 | 	r.bodyParsed = body
 66 | 
 67 | 	r.Model = utils.GetByJSONPath[string](body, "{ .model }")
 68 | 	usageMap := utils.GetByJSONPath[map[string]any](body, "{ .usage }")
 69 | 
 70 | 	r.Usage, err = utils.FromMap[ChatCompletionsUsage](usageMap)
 71 | 	if err != nil {
 72 | 		return fmt.Errorf("failed to unmarshal usage: %w", err)
 73 | 	}
 74 | 
 75 | 	errorResponse, err := unmarshalErrorResponseFromParsedBody(body, response, bs)
 76 | 	if err != nil {
 77 | 		return err
 78 | 	}
 79 | 	if errorResponse != nil {
 80 | 		r.Error = errorResponse
 81 | 	}
 82 | 
 83 | 	return nil
 84 | }
 85 | 
 86 | func (r *ChatCompletionsResponse) MarshalJSON() ([]byte, error) {
 87 | 	return r.responseBody, nil
 88 | }
 89 | 
 90 | func (r *ChatCompletionsResponse) IsStream() bool {
 91 | 	return false
 92 | }
 93 | 
 94 | func (r *ChatCompletionsResponse) GetRequestID() string {
 95 | 	// TODO: implement
 96 | 	return ""
 97 | }
 98 | 
 99 | func (r *ChatCompletionsResponse) GetModel() string {
100 | 	return r.Model
101 | }
102 | 
103 | func (r *ChatCompletionsResponse) SetModel(model string) error {
104 | 	if r.Error == nil {
105 | 		var err error
106 | 
107 | 		r.responseBody, r.bodyParsed, err = modifyBytesBodyAndParsed(r.responseBody, NewReplace("/model", model))
108 | 		if err != nil {
109 | 			return err
110 | 		}
111 | 	}
112 | 
113 | 	r.Model = model
114 | 
115 | 	return nil
116 | }
117 | 
118 | func (r *ChatCompletionsResponse) GetUsage() object.LLMUsage {
119 | 	return r.Usage
120 | }
121 | 
122 | func (r *ChatCompletionsResponse) GetError() object.LLMError {
123 | 	if r.Error != nil {
124 | 		return r.Error
125 | 	}
126 | 
127 | 	return nil
128 | }
129 | 


--------------------------------------------------------------------------------
/pkg/types/openai/common_test.go:
--------------------------------------------------------------------------------
 1 | package openai
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/samber/lo"
 7 | 	"github.com/stretchr/testify/assert"
 8 | 	"github.com/stretchr/testify/require"
 9 | )
10 | 
11 | func TestParseImageGenerationsSizeString(t *testing.T) {
12 | 	size, err := parseImageGenerationsSizeString(nil)
13 | 	require.NoError(t, err)
14 | 	require.Nil(t, size)
15 | 
16 | 	size, err = parseImageGenerationsSizeString(lo.ToPtr(""))
17 | 	require.Error(t, err)
18 | 	require.Nil(t, size)
19 | 	require.EqualError(t, err, "empty size string")
20 | 
21 | 	size, err = parseImageGenerationsSizeString(lo.ToPtr("1024x1024"))
22 | 	require.NoError(t, err)
23 | 	require.NotNil(t, size)
24 | 	assert.Equal(t, uint64(1024), size.Width)
25 | 	assert.Equal(t, uint64(1024), size.Height)
26 | 
27 | 	size, err = parseImageGenerationsSizeString(lo.ToPtr("1024x"))
28 | 	require.Error(t, err)
29 | 	require.Nil(t, size)
30 | 	require.EqualError(t, err, "invalid height `` in \"size\" value `1024x`")
31 | 
32 | 	size, err = parseImageGenerationsSizeString(lo.ToPtr("x1024"))
33 | 	require.Error(t, err)
34 | 	require.Nil(t, size)
35 | 	require.EqualError(t, err, "invalid width `` in \"size\" value `x1024`")
36 | 
37 | 	size, err = parseImageGenerationsSizeString(lo.ToPtr("1024x1024x1024"))
38 | 	require.Error(t, err)
39 | 	require.Nil(t, size)
40 | 	require.EqualError(t, err, "invalid `1024x1024x1024` in \"size\" value: too many parts")
41 | 
42 | 	size, err = parseImageGenerationsSizeString(lo.ToPtr("1024"))
43 | 	require.Error(t, err)
44 | 	require.Nil(t, size)
45 | 	require.EqualError(t, err, "invalid `1024` in \"size\" value")
46 | 
47 | 	size, err = parseImageGenerationsSizeString(lo.ToPtr("1024x1024x"))
48 | 	require.Error(t, err)
49 | 	require.Nil(t, size)
50 | 	require.EqualError(t, err, "invalid `1024x1024x` in \"size\" value: too many parts")
51 | 
52 | 	size, err = parseImageGenerationsSizeString(lo.ToPtr("testx1024"))
53 | 	require.Error(t, err)
54 | 	require.Nil(t, size)
55 | 	require.EqualError(t, err, "invalid width `test` in \"size\" value `testx1024`")
56 | }
57 | 


--------------------------------------------------------------------------------
/pkg/types/openai/event.go:
--------------------------------------------------------------------------------
 1 | package openai
 2 | 
 3 | type Event string
 4 | 
 5 | const (
 6 | 	EventError Event = "error"
 7 | )
 8 | 
 9 | type ErrorEvent struct {
10 | 	Event Event `json:"event"`
11 | 	Error Error `json:"error"`
12 | }
13 | 


--------------------------------------------------------------------------------
/pkg/types/openai/http.go:
--------------------------------------------------------------------------------
 1 | package openai
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"log/slog"
 6 | 	"net/http"
 7 | 
 8 | 	"knoway.dev/pkg/metadata"
 9 | 	"knoway.dev/pkg/utils"
10 | )
11 | 
12 | var (
13 | 	SkipStreamResponse = errors.New("skip writing stream response") //nolint:errname,stylecheck
14 | )
15 | 
16 | func ResponseHandler() func(resp any, err error, writer http.ResponseWriter, request *http.Request) {
17 | 	return func(resp any, err error, writer http.ResponseWriter, request *http.Request) {
18 | 		rMeta := metadata.RequestMetadataFromCtx(request.Context())
19 | 
20 | 		if err == nil {
21 | 			if resp != nil {
22 | 				rMeta.StatusCode = http.StatusOK
23 | 
24 | 				utils.WriteJSONForHTTP(http.StatusOK, resp, writer)
25 | 			}
26 | 
27 | 			return
28 | 		}
29 | 
30 | 		if errors.Is(err, SkipStreamResponse) {
31 | 			// NOTICE: special case where the response is already handled by the stream
32 | 			// handler as we assume the stream handler will handle the response as
33 | 			// status code 200 OK.
34 | 			rMeta.StatusCode = http.StatusOK
35 | 
36 | 			return
37 | 		}
38 | 
39 | 		openAIError := NewErrorFromLLMError(err)
40 | 		if openAIError.FromUpstream {
41 | 			slog.Error("upstream returned an error",
42 | 				"status", openAIError.Status,
43 | 				"code", openAIError.ErrorBody.Code,
44 | 				"message", openAIError.ErrorBody.Message,
45 | 				"type", openAIError.ErrorBody.Type,
46 | 			)
47 | 		} else if openAIError.Status >= http.StatusInternalServerError {
48 | 			slog.Error("failed to handle request", "error", openAIError, "cause", openAIError.Cause, "source_error", err.Error())
49 | 		}
50 | 
51 | 		rMeta.StatusCode = openAIError.Status
52 | 		rMeta.ErrorMessage = openAIError.Error()
53 | 
54 | 		utils.WriteJSONForHTTP(openAIError.Status, openAIError, writer)
55 | 	}
56 | }
57 | 


--------------------------------------------------------------------------------
/pkg/types/openai/image_generations_request_test.go:
--------------------------------------------------------------------------------
 1 | package openai
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"context"
 6 | 	"net/http"
 7 | 	"testing"
 8 | 
 9 | 	"github.com/stretchr/testify/assert"
10 | 	"github.com/stretchr/testify/require"
11 | 	"google.golang.org/protobuf/types/known/structpb"
12 | )
13 | 
14 | func TestImageGenerationsSetDefaultParams(t *testing.T) {
15 | 	body := []byte(`{
16 | 		"model": "public/sd-3",
17 | 		"n": 3,
18 | 		"size": "1024x1792"
19 | 	}`)
20 | 
21 | 	req, err := http.NewRequestWithContext(context.TODO(), http.MethodPost, "/api/v1", bytes.NewReader(body))
22 | 	require.NoError(t, err)
23 | 
24 | 	chatReq, err := NewImageGenerationsRequest(req)
25 | 	require.NoError(t, err)
26 | 
27 | 	params := map[string]*structpb.Value{
28 | 		"model":   structpb.NewStringValue("openai/dall-e-3"),
29 | 		"n":       structpb.NewNumberValue(1),
30 | 		"style":   structpb.NewStringValue("natural"),
31 | 		"quality": structpb.NewStringValue("hd"),
32 | 		"size":    structpb.NewStringValue("1792x1024"),
33 | 	}
34 | 
35 | 	err = chatReq.SetDefaultParams(params)
36 | 	require.NoError(t, err)
37 | 
38 | 	assert.Equal(t, "public/sd-3", chatReq.bodyParsed["model"])
39 | 	assert.InDelta(t, 3.0, chatReq.bodyParsed["n"], 0.0001)
40 | 	assert.Equal(t, "natural", chatReq.bodyParsed["style"])
41 | 	assert.Equal(t, "hd", chatReq.bodyParsed["quality"])
42 | 	assert.Equal(t, "1024x1792", chatReq.bodyParsed["size"])
43 | }
44 | 
45 | func TestImageGenerationsSetOverrideParams(t *testing.T) {
46 | 	body := []byte(`{
47 | 		"model": "public/sd-3",
48 | 		"n": 3,
49 | 		"style": "natural"
50 | 	}`)
51 | 
52 | 	req, err := http.NewRequestWithContext(context.TODO(), http.MethodPost, "/api/v1", bytes.NewReader(body))
53 | 	require.NoError(t, err)
54 | 
55 | 	chatReq, err := NewImageGenerationsRequest(req)
56 | 	require.NoError(t, err)
57 | 
58 | 	params := map[string]*structpb.Value{
59 | 		"model": structpb.NewStringValue("openai/dall-e-3"),
60 | 		"n":     structpb.NewNumberValue(1),
61 | 		"size":  structpb.NewStringValue("1792x1024"),
62 | 	}
63 | 
64 | 	err = chatReq.SetOverrideParams(params)
65 | 	require.NoError(t, err)
66 | 
67 | 	assert.Equal(t, "openai/dall-e-3", chatReq.bodyParsed["model"])
68 | 	assert.InDelta(t, 1.0, chatReq.bodyParsed["n"], 0.0001)
69 | 	assert.Equal(t, "natural", chatReq.bodyParsed["style"])
70 | }
71 | 


--------------------------------------------------------------------------------
/pkg/types/openai/jsonpatch.go:
--------------------------------------------------------------------------------
 1 | package openai
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 
 6 | 	"github.com/samber/lo"
 7 | )
 8 | 
 9 | type JSONPatchOperation string
10 | 
11 | const (
12 | 	JSONPatchOperationAdd     JSONPatchOperation = "add"
13 | 	JSONPatchOperationRemove  JSONPatchOperation = "remove"
14 | 	JSONPatchOperationReplace JSONPatchOperation = "replace"
15 | )
16 | 
17 | type JSONPatchOperationObject struct {
18 | 	Operation JSONPatchOperation `json:"op"`
19 | 	Path      string             `json:"path"`
20 | 	Value     any                `json:"value,omitempty"`
21 | }
22 | 
23 | func NewPatches(operations ...*JSONPatchOperationObject) []byte {
24 | 	return lo.Must(json.Marshal(operations))
25 | }
26 | 
27 | func NewReplace(path string, to any) *JSONPatchOperationObject {
28 | 	return &JSONPatchOperationObject{
29 | 		Operation: JSONPatchOperationReplace,
30 | 		Path:      path,
31 | 		Value:     to,
32 | 	}
33 | }
34 | 
35 | func NewAdd(path string, value any) *JSONPatchOperationObject {
36 | 	return &JSONPatchOperationObject{
37 | 		Operation: JSONPatchOperationAdd,
38 | 		Path:      path,
39 | 		Value:     value,
40 | 	}
41 | }
42 | 
43 | func NewRemove(path string) *JSONPatchOperationObject {
44 | 	return &JSONPatchOperationObject{
45 | 		Operation: JSONPatchOperationRemove,
46 | 		Path:      path,
47 | 	}
48 | }
49 | 


--------------------------------------------------------------------------------
/pkg/types/openai/jsonpatch_test.go:
--------------------------------------------------------------------------------
 1 | package openai
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"testing"
 6 | 
 7 | 	jsonpatch "github.com/evanphx/json-patch/v5"
 8 | 	"github.com/samber/lo"
 9 | 	"github.com/stretchr/testify/require"
10 | )
11 | 
12 | func TestJSONPatchReplace(t *testing.T) {
13 | 	patch, err := jsonpatch.DecodePatch(NewPatches(
14 | 		NewReplace("/model", "gpt-3.5-turbo"),
15 | 	))
16 | 	require.NoError(t, err)
17 | 
18 | 	patched, err := patch.Apply(lo.Must(json.Marshal(map[string]interface{}{
19 | 		"model": "gpt-3.5",
20 | 	})))
21 | 	require.NoError(t, err)
22 | 
23 | 	require.JSONEq(t, `{"model":"gpt-3.5-turbo"}`, string(patched))
24 | }
25 | 
26 | func TestJSONPatchAdd(t *testing.T) {
27 | 	patch, err := jsonpatch.DecodePatch(NewPatches(
28 | 		NewAdd("/stream_options", map[string]any{
29 | 			"include_usage": true,
30 | 		}),
31 | 	))
32 | 	require.NoError(t, err)
33 | 
34 | 	patched, err := patch.Apply(lo.Must(json.Marshal(map[string]interface{}{
35 | 		"model": "gpt-3.5",
36 | 	})))
37 | 	require.NoError(t, err)
38 | 
39 | 	require.JSONEq(t, `{"model":"gpt-3.5","stream_options":{"include_usage":true}}`, string(patched))
40 | }
41 | 
42 | func TestJSONPatchRemove(t *testing.T) {
43 | 	patch, err := jsonpatch.DecodePatch(NewPatches(
44 | 		NewRemove("/model"),
45 | 	))
46 | 	require.NoError(t, err)
47 | 
48 | 	patched, err := patch.Apply(lo.Must(json.Marshal(map[string]interface{}{
49 | 		"model": "gpt-3.5",
50 | 	})))
51 | 	require.NoError(t, err)
52 | 
53 | 	require.JSONEq(t, `{}`, string(patched))
54 | }
55 | 


--------------------------------------------------------------------------------
/pkg/types/openai/testdata/GoogleSampleWebpImage.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/knoway-dev/knoway/76080a36d3e84786d710f9952657d71df4fce873/pkg/types/openai/testdata/GoogleSampleWebpImage.webp


--------------------------------------------------------------------------------
/pkg/types/openai/testdata/SampleGIFImage_135kbmb.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/knoway-dev/knoway/76080a36d3e84786d710f9952657d71df4fce873/pkg/types/openai/testdata/SampleGIFImage_135kbmb.gif


--------------------------------------------------------------------------------
/pkg/types/openai/testdata/SampleJPGImage_100kbmb.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/knoway-dev/knoway/76080a36d3e84786d710f9952657d71df4fce873/pkg/types/openai/testdata/SampleJPGImage_100kbmb.jpg


--------------------------------------------------------------------------------
/pkg/types/openai/testdata/SamplePNGImage_100kbmb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/knoway-dev/knoway/76080a36d3e84786d710f9952657d71df4fce873/pkg/types/openai/testdata/SamplePNGImage_100kbmb.png


--------------------------------------------------------------------------------
/pkg/types/openai/usage.go:
--------------------------------------------------------------------------------
 1 | package openai
 2 | 
 3 | import (
 4 | 	"knoway.dev/pkg/object"
 5 | 	"knoway.dev/pkg/utils"
 6 | )
 7 | 
 8 | type CompletionTokensDetails struct {
 9 | 	AcceptedPredictionTokens uint64 `json:"accepted_prediction_tokens"` // When using Predicted Outputs, the number of tokens in the prediction that appeared in the completion.
10 | 	AudioTokens              uint64 `json:"audio_tokens"`               // Audio input tokens generated by the model.
11 | 	ReasoningTokens          uint64 `json:"reasoning_tokens"`           // Tokens generated by the model for reasoning.
12 | 	RejectedPredictionTokens uint64 `json:"rejected_prediction_tokens"` // When using Predicted Outputs, the number of tokens in the prediction that did not appear in the completion. However, like reasoning tokens, these tokens are still counted in the total completion tokens for purposes of billing, output, and context window limits.
13 | }
14 | 
15 | type PromptTokensDetails struct {
16 | 	AudioTokens  uint64 `json:"audio_tokens"`  // Audio input tokens generated by the model.
17 | 	CachedTokens uint64 `json:"cached_tokens"` // Tokens generated by the model that were cached from previous completions.
18 | }
19 | 
20 | var _ object.LLMUsage = (*ChatCompletionsUsage)(nil)
21 | var _ object.LLMTokensUsage = (*ChatCompletionsUsage)(nil)
22 | 
23 | type ChatCompletionsUsage struct {
24 | 	object.IsLLMUsage
25 | 
26 | 	TotalTokens             uint64                   `json:"total_tokens,omitempty"`              // Total number of tokens used in the request (prompt + completion).
27 | 	CompletionTokens        uint64                   `json:"completion_tokens,omitempty"`         // Number of tokens in the generated completion.
28 | 	PromptTokens            uint64                   `json:"prompt_tokens,omitempty"`             // Number of tokens in the prompt.
29 | 	CompletionTokensDetails *CompletionTokensDetails `json:"completion_tokens_details,omitempty"` // Breakdown of tokens used in a completion.
30 | 	PromptTokensDetails     *PromptTokensDetails     `json:"prompt_tokens_details,omitempty"`     // Breakdown of tokens used in the prompt.
31 | }
32 | 
33 | func (u *ChatCompletionsUsage) GetTotalTokens() uint64 {
34 | 	return u.TotalTokens
35 | }
36 | 
37 | func (u *ChatCompletionsUsage) GetCompletionTokens() uint64 {
38 | 	return u.CompletionTokens
39 | }
40 | 
41 | func (u *ChatCompletionsUsage) GetPromptTokens() uint64 {
42 | 	return u.PromptTokens
43 | }
44 | 
45 | func (u *ChatCompletionsUsage) GetOutputImages() []object.ImageGenerationsUsageImage {
46 | 	return make([]object.ImageGenerationsUsageImage, 0)
47 | }
48 | 
49 | var _ object.ImageGenerationsUsageImage = (*ImageGenerationsUsageImage)(nil)
50 | 
51 | type ImageGenerationsUsageImage struct {
52 | 	Width   uint64 `json:"width,omitempty"`   // Width of the generated image.
53 | 	Height  uint64 `json:"height,omitempty"`  // Height of the generated image.
54 | 	Style   string `json:"style,omitempty"`   // Style of the generated image.
55 | 	Quality string `json:"quality,omitempty"` // Quality of the generated image.
56 | }
57 | 
58 | func (i *ImageGenerationsUsageImage) GetWidth() uint64 {
59 | 	return i.Width
60 | }
61 | 
62 | func (i *ImageGenerationsUsageImage) GetHeight() uint64 {
63 | 	return i.Height
64 | }
65 | 
66 | func (i *ImageGenerationsUsageImage) GetStyle() string {
67 | 	return i.Style
68 | }
69 | 
70 | func (i *ImageGenerationsUsageImage) GetQuality() string {
71 | 	return i.Quality
72 | }
73 | 
74 | var _ object.LLMUsage = (*ImageGenerationsUsage)(nil)
75 | var _ object.LLMImagesUsage = (*ImageGenerationsUsage)(nil)
76 | 
77 | type ImageGenerationsUsage struct {
78 | 	object.IsLLMUsage
79 | 
80 | 	Images []*ImageGenerationsUsageImage `json:"images,omitempty"` // Usage information for each generated image.
81 | }
82 | 
83 | func (u *ImageGenerationsUsage) GetTotalTokens() uint64 {
84 | 	return 0
85 | }
86 | 
87 | func (u *ImageGenerationsUsage) GetCompletionTokens() uint64 {
88 | 	return 0
89 | }
90 | 
91 | func (u *ImageGenerationsUsage) GetPromptTokens() uint64 {
92 | 	return 0
93 | }
94 | 
95 | func (u *ImageGenerationsUsage) GetOutputImages() []object.ImageGenerationsUsageImage {
96 | 	return utils.TypeAssertFrom[*ImageGenerationsUsageImage, object.ImageGenerationsUsageImage](u.Images)
97 | }
98 | 


--------------------------------------------------------------------------------
/pkg/types/sse/event.go:
--------------------------------------------------------------------------------
 1 | package sse
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"fmt"
 6 | 	"io"
 7 | 
 8 | 	"knoway.dev/pkg/utils"
 9 | )
10 | 
11 | // Event represents Server-Sent Event.
12 | // SSE explanation: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#event_stream_format
13 | type Event struct {
14 | 	// ID is used to set the EventSource object's last event ID value.
15 | 	ID []byte
16 | 	// Data field is for the message. When the EventSource receives multiple consecutive lines
17 | 	// that begin with data:, it concatenates them, inserting a newline character between each one.
18 | 	// Trailing newlines are removed.
19 | 	Data []byte
20 | 	// Event is a string identifying the type of event described. If this is specified, an event
21 | 	// will be dispatched on the browser to the listener for the specified event name; the website
22 | 	// source code should use addEventListener() to listen for named events. The onmessage handler
23 | 	// is called if no event name is specified for a message.
24 | 	Event []byte
25 | 	// Retry is the reconnection time. If the connection to the server is lost, the browser will
26 | 	// wait for the specified time before attempting to reconnect. This must be an integer, specifying
27 | 	// the reconnection time in milliseconds. If a non-integer value is specified, the field is ignored.
28 | 	Retry []byte
29 | 	// Comment line can be used to prevent connections from timing out; a server can send a comment
30 | 	// periodically to keep the connection alive.
31 | 	Comment []byte
32 | }
33 | 
34 | // MarshalTo marshals Event to given Writer.
35 | func (ev *Event) MarshalTo(w io.Writer) error {
36 | 	// Marshalling part is taken from: https://github.com/r3labs/sse/blob/c6d5381ee3ca63828b321c16baa008fd6c0b4564/http.go#L16
37 | 	if len(ev.Data) == 0 && len(ev.Comment) == 0 {
38 | 		return nil
39 | 	}
40 | 
41 | 	defer utils.SafeFlush(w)
42 | 
43 | 	if len(ev.Data) > 0 { //nolint:nestif
44 | 		if len(ev.ID) > 0 {
45 | 			if _, err := fmt.Fprintf(w, "id: %s\n", ev.ID); err != nil {
46 | 				return err
47 | 			}
48 | 		}
49 | 
50 | 		sd := bytes.Split(ev.Data, []byte("\n"))
51 | 		for i := range sd {
52 | 			if _, err := fmt.Fprintf(w, "data: %s\n", sd[i]); err != nil {
53 | 				return err
54 | 			}
55 | 		}
56 | 
57 | 		if len(ev.Event) > 0 {
58 | 			if _, err := fmt.Fprintf(w, "event: %s\n", ev.Event); err != nil {
59 | 				return err
60 | 			}
61 | 		}
62 | 
63 | 		if len(ev.Retry) > 0 {
64 | 			if _, err := fmt.Fprintf(w, "retry: %s\n", ev.Retry); err != nil {
65 | 				return err
66 | 			}
67 | 		}
68 | 	}
69 | 
70 | 	if len(ev.Comment) > 0 {
71 | 		if _, err := fmt.Fprintf(w, ": %s\n", ev.Comment); err != nil {
72 | 			return err
73 | 		}
74 | 	}
75 | 
76 | 	if _, err := fmt.Fprint(w, "\n"); err != nil {
77 | 		return err
78 | 	}
79 | 
80 | 	return nil
81 | }
82 | 


--------------------------------------------------------------------------------
/pkg/utils/crd_common_hash.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"crypto/sha256"
 5 | 	"encoding/hex"
 6 | 	"strings"
 7 | )
 8 | 
 9 | func CalcKeysHash(keys []string) string {
10 | 	if len(keys) == 0 {
11 | 		return ""
12 | 	}
13 | 
14 | 	h := sha256.New()
15 | 	h.Write([]byte(strings.Join(keys, "/")))
16 | 	bs := h.Sum(nil)
17 | 
18 | 	return hex.EncodeToString(bs)[:8]
19 | }
20 | 


--------------------------------------------------------------------------------
/pkg/utils/http.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"net/http"
 6 | )
 7 | 
 8 | func SafeFlush(writer any) {
 9 | 	f, ok := writer.(http.Flusher)
10 | 	if ok && f != nil {
11 | 		f.Flush()
12 | 	}
13 | }
14 | 
15 | func WriteJSONForHTTP(status int, resp any, writer http.ResponseWriter) {
16 | 	bs, _ := json.Marshal(resp) //nolint:errchkjson
17 | 
18 | 	writer.Header().Set("Content-Type", "application/json; charset=utf-8")
19 | 	writer.WriteHeader(status)
20 | 
21 | 	SafeFlush(writer)
22 | 
23 | 	_, _ = writer.Write(bs)
24 | }
25 | 
26 | func WriteEventStreamHeadersForHTTP(writer http.ResponseWriter) {
27 | 	writer.Header().Set("Content-Type", "text/event-stream; charset=utf-8")
28 | 	writer.Header().Set("Cache-Control", "no-cache")
29 | 	writer.Header().Set("Connection", "keep-alive")
30 | 	writer.Header().Set("Transfer-Encoding", "chunked")
31 | 	writer.WriteHeader(http.StatusOK)
32 | 
33 | 	SafeFlush(writer)
34 | }
35 | 


--------------------------------------------------------------------------------
/pkg/utils/json.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"encoding/json"
 6 | 	"io"
 7 | 
 8 | 	"k8s.io/client-go/util/jsonpath"
 9 | )
10 | 
11 | func GetByJSONPathWithoutConvert(input any, template string) (string, error) {
12 | 	j := jsonpath.New("document")
13 | 	j.AllowMissingKeys(true)
14 | 
15 | 	err := j.Parse(template)
16 | 	if err != nil {
17 | 		return "", err
18 | 	}
19 | 
20 | 	buffer := new(bytes.Buffer)
21 | 
22 | 	err = j.Execute(buffer, input)
23 | 	if err != nil {
24 | 		return "", err
25 | 	}
26 | 
27 | 	return buffer.String(), nil
28 | }
29 | 
30 | func GetByJSONPath[T any](input any, template string) T {
31 | 	var empty T
32 | 
33 | 	result, err := GetByJSONPathWithoutConvert(input, template)
34 | 	if err != nil {
35 | 		return empty
36 | 	}
37 | 
38 | 	return FromStringOrEmpty[T](result)
39 | }
40 | 
41 | func ReadAsJSONWithClose(readCloser io.ReadCloser) (*bytes.Buffer, map[string]any, error) {
42 | 	defer func() {
43 | 		_ = readCloser.Close()
44 | 	}()
45 | 
46 | 	buffer, jsonMap, err := ReadAsJSON(readCloser)
47 | 	if err != nil {
48 | 		return buffer, jsonMap, err
49 | 	}
50 | 
51 | 	return buffer, jsonMap, nil
52 | }
53 | 
54 | func ReadAsJSON(reader io.Reader) (*bytes.Buffer, map[string]any, error) {
55 | 	buffer := new(bytes.Buffer)
56 | 	jsonMap := make(map[string]any)
57 | 
58 | 	_, err := io.Copy(buffer, reader)
59 | 	if err != nil {
60 | 		return buffer, jsonMap, err
61 | 	}
62 | 
63 | 	err = json.Unmarshal(buffer.Bytes(), &jsonMap)
64 | 	if err != nil {
65 | 		return buffer, jsonMap, err
66 | 	}
67 | 
68 | 	return buffer, jsonMap, nil
69 | }
70 | 
71 | func FromMap[T any, MK comparable, MV any](m map[MK]MV) (*T, error) {
72 | 	if m == nil {
73 | 		return nil, nil
74 | 	}
75 | 
76 | 	if len(m) == 0 {
77 | 		return nil, nil
78 | 	}
79 | 
80 | 	var initial T
81 | 
82 | 	bs, err := json.Marshal(m)
83 | 	if err != nil {
84 | 		return nil, err
85 | 	}
86 | 
87 | 	err = json.Unmarshal(bs, &initial)
88 | 	if err != nil {
89 | 		return nil, err
90 | 	}
91 | 
92 | 	return &initial, nil
93 | }
94 | 


--------------------------------------------------------------------------------
/pkg/utils/json_test.go:
--------------------------------------------------------------------------------
  1 | package utils
  2 | 
  3 | import (
  4 | 	"testing"
  5 | 
  6 | 	"github.com/stretchr/testify/assert"
  7 | )
  8 | 
  9 | func TestJSONPathExecute(t *testing.T) {
 10 | 	t.Parallel()
 11 | 
 12 | 	t.Run("string", func(t *testing.T) {
 13 | 		t.Parallel()
 14 | 
 15 | 		type testCase struct {
 16 | 			name     string
 17 | 			payload  map[string]any
 18 | 			template string
 19 | 			expected any
 20 | 		}
 21 | 
 22 | 		testCases := []testCase{
 23 | 			{
 24 | 				name: "model",
 25 | 				payload: map[string]any{
 26 | 					"model": "gpt-4o",
 27 | 				},
 28 | 				template: "{ .model }",
 29 | 				expected: "gpt-4o",
 30 | 			},
 31 | 			{
 32 | 				name: "message role",
 33 | 				payload: map[string]any{
 34 | 					"model": "gpt-4o",
 35 | 					"messages": []any{
 36 | 						map[string]any{
 37 | 							"role":    "user",
 38 | 							"content": "Hello",
 39 | 						},
 40 | 					},
 41 | 				},
 42 | 				template: "{ .messages[0].role }",
 43 | 				expected: "user",
 44 | 			},
 45 | 			{
 46 | 				name: "message content",
 47 | 				payload: map[string]any{
 48 | 					"model": "gpt-4o",
 49 | 					"messages": []any{
 50 | 						map[string]any{
 51 | 							"role":    "user",
 52 | 							"content": "Hello",
 53 | 						},
 54 | 					},
 55 | 				},
 56 | 				template: "{ .messages[0].content }",
 57 | 				expected: "Hello",
 58 | 			},
 59 | 		}
 60 | 
 61 | 		for _, tc := range testCases {
 62 | 			t.Run(tc.name, func(t *testing.T) {
 63 | 				t.Parallel()
 64 | 				assert.Equal(t, tc.expected, GetByJSONPath[string](tc.payload, tc.template))
 65 | 			})
 66 | 		}
 67 | 	})
 68 | 
 69 | 	t.Run("number", func(t *testing.T) {
 70 | 		t.Parallel()
 71 | 
 72 | 		payload := map[string]any{
 73 | 			"code": 401,
 74 | 		}
 75 | 
 76 | 		assert.Equal(t, 401, GetByJSONPath[int](payload, "{ .code }"))
 77 | 	})
 78 | 
 79 | 	t.Run("null", func(t *testing.T) {
 80 | 		t.Parallel()
 81 | 
 82 | 		t.Run("unknown nil", func(t *testing.T) {
 83 | 			t.Parallel()
 84 | 
 85 | 			payload := map[string]any{
 86 | 				"code": nil,
 87 | 			}
 88 | 
 89 | 			assert.Equal(t, "", GetByJSONPath[string](payload, "{ .code }"))
 90 | 		})
 91 | 
 92 | 		t.Run("nil string", func(t *testing.T) {
 93 | 			t.Parallel()
 94 | 
 95 | 			type payload struct {
 96 | 				Code *string `json:"code"`
 97 | 			}
 98 | 
 99 | 			p := payload{
100 | 				Code: nil,
101 | 			}
102 | 
103 | 			assert.Equal(t, "", GetByJSONPath[string](p, "{ .code }"))
104 | 		})
105 | 	})
106 | }
107 | 


--------------------------------------------------------------------------------
/pkg/utils/lo.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import "github.com/samber/lo"
 4 | 
 5 | func FilterNonNil[T any](item T, _ int) bool {
 6 | 	return !lo.IsNil(item)
 7 | }
 8 | 
 9 | func MapTypeAssert[F any, T any](item F, _ int) T {
10 | 	val, _ := any(item).(T)
11 | 	return val
12 | }
13 | 
14 | func TypeAssertFrom[F any, T any](items []F) []T {
15 | 	filters := lo.Map(items, MapTypeAssert[F, T])
16 | 	return lo.Filter(filters, FilterNonNil)
17 | }
18 | 
19 | func Clone[T any, Slice ~[]T](collection Slice) Slice {
20 | 	return lo.Map(collection, func(item T, _ int) T {
21 | 		return item
22 | 	})
23 | }
24 | 


--------------------------------------------------------------------------------
/samples/api-key-server/config.yaml:
--------------------------------------------------------------------------------
 1 | api_keys:
 2 |   - api_key: "valid-api-key-1"
 3 |     is_valid: true
 4 |     allow_models:
 5 |       - "kebe/*"
 6 |       - "llama2-70b"
 7 |       - "openai/*"
 8 |     api_key_id: "1"
 9 |     user_id: "user-1"
10 |   - api_key: "valid-api-key-2"
11 |     is_valid: true
12 |     allow_models:
13 |       - "*"
14 |     api_key_id: "2"
15 |     user_id: "user-2"
16 |   - api_key: "invalid-api-key"
17 |     api_key_id: "3"
18 |     is_valid: false
19 |     allow_models: []
20 |     user_id: "xxx"


--------------------------------------------------------------------------------
/samples/api-key-server/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"context"
  5 | 	"fmt"
  6 | 	"log"
  7 | 	"log/slog"
  8 | 	"net"
  9 | 	"os"
 10 | 
 11 | 	"google.golang.org/grpc"
 12 | 	"gopkg.in/yaml.v3"
 13 | 
 14 | 	"knoway.dev/api/service/v1alpha1"
 15 | )
 16 | 
 17 | // APIKeyAuthResponse 结构体定义与之前相同
 18 | type APIKeyAuthResponse struct {
 19 | 	IsValid     bool     `yaml:"is_valid"`
 20 | 	AllowModels []string `yaml:"allow_models"`
 21 | 	APIKeyID    string   `yaml:"api_key_id"`
 22 | 	UserID      string   `yaml:"user_id"`
 23 | }
 24 | 
 25 | type APIKeyAuthServer struct {
 26 | 	v1alpha1.UnimplementedAuthServiceServer
 27 | 	ValidAPIKeys map[string]*APIKeyAuthResponse // 存储从 YAML 加载的 API Key 信息
 28 | }
 29 | 
 30 | // 从 YAML 文件加载 API Keys
 31 | func loadAPIKeysFromYAML(filePath string) (map[string]*APIKeyAuthResponse, error) {
 32 | 	var apiKeyConfig struct {
 33 | 		APIKeys []struct {
 34 | 			APIKey      string   `yaml:"api_key"`
 35 | 			IsValid     bool     `yaml:"is_valid"`
 36 | 			AllowModels []string `yaml:"allow_models"`
 37 | 			APIKeyID    string   `yaml:"api_key_id"`
 38 | 			UserID      string   `yaml:"user_id"`
 39 | 		} `yaml:"api_keys"`
 40 | 	}
 41 | 
 42 | 	// 读取文件内容
 43 | 	data, err := os.ReadFile(filePath)
 44 | 	if err != nil {
 45 | 		return nil, fmt.Errorf("failed to read YAML file: %w", err)
 46 | 	}
 47 | 
 48 | 	// 解析 YAML 数据
 49 | 	if err := yaml.Unmarshal(data, &apiKeyConfig); err != nil {
 50 | 		return nil, fmt.Errorf("failed to unmarshal YAML data: %w", err)
 51 | 	}
 52 | 
 53 | 	// 将解析的数据转换为 map 结构
 54 | 	validAPIKeys := make(map[string]*APIKeyAuthResponse)
 55 | 	for _, apiKey := range apiKeyConfig.APIKeys {
 56 | 		validAPIKeys[apiKey.APIKey] = &APIKeyAuthResponse{
 57 | 			IsValid:     apiKey.IsValid,
 58 | 			AllowModels: apiKey.AllowModels,
 59 | 			APIKeyID:    apiKey.APIKeyID,
 60 | 			UserID:      apiKey.UserID,
 61 | 		}
 62 | 	}
 63 | 
 64 | 	return validAPIKeys, nil
 65 | }
 66 | 
 67 | // APIKeyAuth 处理 API Key 验证请求
 68 | func (s *APIKeyAuthServer) APIKeyAuth(ctx context.Context, req *v1alpha1.APIKeyAuthRequest) (*v1alpha1.APIKeyAuthResponse, error) {
 69 | 	// 从 YAML 加载的 API Keys
 70 | 	if res, exists := s.ValidAPIKeys[req.GetApiKey()]; exists {
 71 | 		// 返回相应的认证结果
 72 | 		return &v1alpha1.APIKeyAuthResponse{
 73 | 			IsValid:     res.IsValid,
 74 | 			AllowModels: res.AllowModels,
 75 | 			ApiKeyId:    res.APIKeyID,
 76 | 			UserId:      res.UserID,
 77 | 		}, nil
 78 | 	}
 79 | 
 80 | 	// 如果无效的 API Key 返回错误响应
 81 | 	return &v1alpha1.APIKeyAuthResponse{
 82 | 		IsValid:     false,
 83 | 		AllowModels: []string{},
 84 | 	}, nil
 85 | }
 86 | 
 87 | func main() {
 88 | 	// 从 YAML 文件加载 API Key 配置
 89 | 	validAPIKeys, err := loadAPIKeysFromYAML("samples/api-key-server/config.yaml")
 90 | 	if err != nil {
 91 | 		log.Fatalf("Error loading API keys from YAML: %v", err)
 92 | 	}
 93 | 
 94 | 	// 创建 gRPC 服务器实例
 95 | 	server := grpc.NewServer()
 96 | 
 97 | 	// 创建 APIKeyAuthServer 实例并注册 API Keys
 98 | 	authServer := &APIKeyAuthServer{
 99 | 		ValidAPIKeys: validAPIKeys,
100 | 	}
101 | 
102 | 	// 注册 AuthService 服务
103 | 	v1alpha1.RegisterAuthServiceServer(server, authServer)
104 | 
105 | 	// 监听指定端口
106 | 	listener, err := net.Listen("tcp", ":50051") //nolint:gosec
107 | 	if err != nil {
108 | 		log.Fatalf("failed to listen: %v", err)
109 | 	}
110 | 
111 | 	// 启动 gRPC 服务器
112 | 	slog.Info("Starting APIKeyAuthServer on port 50051...")
113 | 	if err := server.Serve(listener); err != nil {
114 | 		log.Fatalf("failed to serve: %v", err)
115 | 	}
116 | }
117 | 


--------------------------------------------------------------------------------
/scripts/build-or-download-binaries.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -ex
 4 | 
 5 | CUR_DIR=$(
 6 |     cd -- "$(dirname "$0")" >/dev/null 2>&1
 7 |     pwd -P
 8 | )
 9 | 
10 | PLATFORMS=${PLATFORMS:-linux/amd64}
11 | APP=${APP:-knoway-gateway}
12 | 
13 | for p in $(echo ${PLATFORMS} | tr "," " "); do
14 |     GOOS=$(echo ${p} | cut -d "/" -f 1)
15 |     GOARCH=$(echo ${p} | cut -d "/" -f 2)
16 |     dist=${CUR_DIR}/../out/$p/
17 |     mkdir -p ${dist}
18 |     echo "building ${APP} for ${GOOS}/${GOARCH}"
19 |     CGO_ENABLED=0 GOOS=${GOOS} GOARCH=${GOARCH} go build -ldflags "-s -w" -o ${CUR_DIR}/../out/$p/${APP} ${CUR_DIR}/../cmd
20 | done
21 | 


--------------------------------------------------------------------------------
/scripts/code-freeze.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -ex
 4 | 
 5 | CUR_DIR=$(
 6 |     cd -- "$(dirname "$0")" >/dev/null 2>&1
 7 |     pwd -P
 8 | )
 9 | 
10 | export GITLAB_HOST=${GITLAB_HOST:-https://gitlab.daocloud.cn}
11 | 
12 | PURE_HOST=${GITLAB_HOST//https:\/\//}
13 | 
14 | glab auth login -t ${GITLAB_CI_TOKEN} -h ${PURE_HOST}
15 | glab auth status
16 | 
17 | minor_version=$(grep "MINOR_VERSION ?=" ${CUR_DIR}/../Makefile | sed -r 's/MINOR_VERSION \?= (.*)/\1/g' | xargs)
18 | 
19 | if [ -n "${NEXT_VERSION}" ] && ! echo ${NEXT_VERSION} | grep -E "^v\d+\.\d+$"; then
20 |     echo "Invalid NEXT_VERSION: ${NEXT_VERSION}, require running on v*.* branch"
21 |     exit 1
22 | else
23 |     major_number=$(echo ${minor_version} | awk -F. '{print $1}')
24 |     minor_number=$(echo ${minor_version} | awk -F. '{print $2}')
25 |     NEXT_VERSION=${major_number}.$((${minor_number} + 1))
26 | fi
27 | 
28 | if [ -n "${GITLAB_CI_TOKEN}" ]; then
29 |     git remote set-url origin https://gitlab-ci-token:${GITLAB_CI_TOKEN}@gitlab.daocloud.cn/ndx/ai/knoway.git
30 | fi
31 | 
32 | if git ls-remote --exit-code origin release-${minor_version} &>/dev/null; then
33 |     echo "release-${minor_version} branch already exists"
34 |     exit 1
35 | fi
36 | 
37 | if ! git config user.name; then
38 |     git config user.name "Auto Release Bot"
39 |     git config user.email "knoway-auto-release@daocloud.io"
40 | fi
41 | 
42 | git checkout -b release-${minor_version}
43 | 
44 | # change version
45 | if [ "$(uname)" = "Darwin" ]; then
46 |     sed -i "" "s/MINOR_VERSION ?=.*/MINOR_VERSION ?= ${NEXT_VERSION}/g" ${CUR_DIR}/../Makefile
47 | else
48 |     sed -i "s/MINOR_VERSION ?=.*/MINOR_VERSION ?= ${NEXT_VERSION}/g" ${CUR_DIR}/../Makefile
49 | fi
50 | 
51 | # push release branch
52 | git push origin release-${minor_version}
53 | 
54 | # create label
55 | glab label create --color="#ed9121" -n cherry-pick-release-${minor_version}
56 | 
57 | git checkout ${CI_BUILD_REF_NAME}
58 | 
59 | git add ${CUR_DIR}/../Makefile
60 | git commit -m "Code freeze and bump MINOR_VERSION to ${NEXT_VERSION}"
61 | 
62 | # push origin branch
63 | git push origin ${CI_BUILD_REF_NAME}
64 | 


--------------------------------------------------------------------------------
/scripts/copy-crds.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | set -o errexit
 5 | set -o nounset
 6 | set -o pipefail
 7 | 
 8 | temp=$(mktemp)
 9 | new_version() {
10 |     echo '{{- if semverCompare ">=1.23.0-0" .Capabilities.KubeVersion.GitVersion }}' >$2
11 |     cat $1 >>$2
12 |     echo '{{- else }}' >>$2
13 | }
14 | old_version() {
15 |     cat $1 >>$2
16 |     echo '{{- end }}' >>$2
17 | }
18 | 
19 | if [[ "" == $(cat $1 | yq '.. | select(has("x-kubernetes-validations"))') ]]; then
20 |     echo "no x-kubernetes-validations found, skip"
21 |     cp $1 $2
22 |     exit
23 | fi
24 | 
25 | f=$(basename $1)
26 | new_version $1 $2/${f}
27 | 
28 | cat $1 | yq 'del(.. | select(has("x-kubernetes-validations")).x-kubernetes-validations)' >$temp
29 | 
30 | old_version $temp $2/${f}
31 | 
32 | rm -f ${temp}
33 | 


--------------------------------------------------------------------------------
/scripts/gen-change-logs.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -ex
 4 | 
 5 | CUR_DIR=$(
 6 |     cd -- "$(dirname "$0")" >/dev/null 2>&1
 7 |     pwd -P
 8 | )
 9 | 
10 | export GITLAB_HOST=${GITLAB_HOST:-https://gitlab.daocloud.cn}
11 | PURE_HOST=${GITLAB_HOST//https:\/\//}
12 | 
13 | glab auth login -t ${GITLAB_CI_TOKEN} -h ${PURE_HOST}
14 | glab auth status
15 | 
16 | CUR_VERSION=${CUR_VERSION:-v0.0.0}
17 | 
18 | OUTFILE=${1:-${CUR_DIR}/../changes/CHANGELOG-${CUR_VERSION}.md}
19 | 
20 | mkdir -p $(dirname ${OUTFILE})
21 | 
22 | getallmrs() {
23 |     git fetch origin --tags ${PRE_VERSION} &>/dev/null
24 |     git log ${CI_BUILD_REF_NAME} ^${PRE_VERSION} | grep -E '\(![^\)]+\)$' | sed -r 's/.*\(\!(.*)\)$/\1/g' | uniq | sort
25 | }
26 | 
27 | features=""
28 | bugs=""
29 | 
30 | for mr in $(getallmrs); do
31 |     cont=$(glab mr view ${mr})
32 |     title=$(echo "${cont}" | grep -E '^title:' | sed 's/title:\t//g')
33 |     author=$(echo "${cont}" | grep -E '^author:' | sed 's/author:\t//g')
34 |     labels=$(echo "${cont}" | grep -E '^labels:' | sed 's/labels:\t//g')
35 |     if echo "${labels}" | grep -E 'kind/feature' &>/dev/null; then
36 |         echo "mr ${mr} is a feature"
37 |         features+="- ${title}(!${mr}) by @${author}
38 | "
39 |     else
40 |         echo "mr ${mr} is not a feature"
41 |         bugs+="- ${title}(!${mr}) by @${author}
42 | "
43 |     fi
44 | done
45 | 
46 | echo "
47 | # ${CUR_VERSION} Change logs
48 | 
49 | ## Change since ${PRE_VERSION}
50 | 
51 | ### Changes by Kind
52 | 
53 | #### Bug
54 | 
55 | ${bugs}
56 | 
57 | #### Feature
58 | 
59 | ${features}
60 | 
61 | " >${OUTFILE}
62 | 


--------------------------------------------------------------------------------
/scripts/gen-check.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -o errexit
 4 | set -o nounset
 5 | set -o pipefail
 6 | 
 7 | SCRIPT_ROOT=$(dirname "${BASH_SOURCE[0]}")/..
 8 | 
 9 | DIFFPROTO="${SCRIPT_ROOT}"
10 | make gen
11 | if [ "$(git status --porcelain | wc -l)" -eq "0" ]; then
12 |     echo "${DIFFPROTO} up to date."
13 | else
14 |     echo "${DIFFPROTO} is out of date. Please run make gen to update codes for the proto files."
15 |     echo "Diff files:"
16 |     git status --porcelain
17 |     git diff
18 |     exit 1
19 | fi
20 | 


--------------------------------------------------------------------------------
/scripts/pr-status-manage.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # 这个脚本不止做 Cherry Pick，后期可能还会集成其它功能，比如 Coverage 检测？
 4 | # 所以名字叫 PR Status Manage
 5 | 
 6 | set -ex
 7 | 
 8 | export GITLAB_HOST=${GITLAB_HOST:-https://gitlab.daocloud.cn}
 9 | 
10 | PURE_HOST=${GITLAB_HOST//https:\/\//}
11 | 
12 | APIREPO=${CI_PROJECT_PATH//\//%2F}
13 | 
14 | glab auth login -t ${GITLAB_CI_TOKEN} -h ${PURE_HOST}
15 | glab auth status
16 | 
17 | git fetch
18 | 
19 | cherrypickto() {
20 |     target_branch=$1
21 |     branch=cherrypick-${CI_MERGE_REQUEST_IID}-to-${target_branch}
22 |     git branch -D $branch || true # force delete local branch if exists
23 |     git checkout ${target_branch} # checkout to target branch
24 |     git checkout -b $branch       # checkout to a new branch
25 | 
26 |     # get current pr commits and reserve by date
27 |     commits=$(glab api projects/${APIREPO}/merge_requests/${CI_MERGE_REQUEST_IID}/commits | jq '.[].id' -r | sed '1!G;h;$!d')
28 |     echo '```' >/tmp/cherry-pick.log
29 |     echo "Auto cherry-pick !${CI_MERGE_REQUEST_IID} to ${target_branch} failed!" >>/tmp/cherry-pick.log
30 |     git config user.name "Auto Cherry-pick Bot"
31 |     git config user.email "cherry-pick-bot@daocloud.io"
32 |     for commit in ${commits}; do
33 |         if ! git cherry-pick ${commit} --allow-empty &>>/tmp/cherry-pick.log; then
34 |             echo "cherry-pick ${commit} failed"
35 |             echo '```' >>/tmp/cherry-pick.log
36 |             cat /tmp/cherry-pick.log
37 |             # check if already exists failed issue
38 |             if glab issue list --in title --search "[manually cherry-pick required] !${CI_MERGE_REQUEST_IID}" | grep "!${CI_MERGE_REQUEST_IID}"; then
39 |                 echo "Issue already exists, skip"
40 |                 return
41 |             fi
42 |             author=$(glab mr view ${CI_MERGE_REQUEST_IID} | awk '/^author:/ {print $2}')
43 |             # create an issue if cherry-pick failed.
44 |             glab issue create \
45 |                 --title "[manually cherry-pick required] !${CI_MERGE_REQUEST_IID} Auto cherry-pick to ${target_branch} error" \
46 |                 --description "$(cat /tmp/cherry-pick.log)" \
47 |                 --assignee ${author}
48 |             return
49 |         fi
50 |     done
51 |     title="Auto cherry-pick !${CI_MERGE_REQUEST_IID} to ${target_branch}"
52 |     mr_state=$(glab mr view ${CI_MERGE_REQUEST_IID} | awk '/^state:/ {print $2}')
53 |     if [[ "${mr_state}" != "merged" ]]; then
54 |         # if mr is not merged, mark new mr as draft to avoid merge it by mistake
55 |         title="Draft: ${title}"
56 |     fi
57 |     git remote set-url origin https://gitlab-ci-token:${GITLAB_CI_TOKEN}@${PURE_HOST}/${CI_PROJECT_PATH}.git
58 |     git push origin ${branch} -f
59 |     if ! glab mr list --source-branch=${branch} --target-branch=${target_branch} | grep "Auto cherry-pick"; then
60 |         res=$(glab mr create --no-editor \
61 |             --remove-source-branch \
62 |             --source-branch ${branch} \
63 |             --target-branch ${target_branch} \
64 |             --title "${title}" \
65 |             --label auto-cherry-picked \
66 |             --description "Auto cherry-pick from !${CI_MERGE_REQUEST_IID}" &>/dev/stdout)
67 |         glab mr note ${CI_MERGE_REQUEST_IID} -m "### Auto cherry-picked!<br>${res}"
68 |     else
69 |         echo "MR already exists, skip"
70 |     fi
71 | }
72 | 
73 | cherrypick() {
74 |     if [ -z "${CI_MERGE_REQUEST_LABELS}" ]; then
75 |         echo "No cherry-pick labels found."
76 |         return
77 |     fi
78 |     for label in $(echo "${CI_MERGE_REQUEST_LABELS}" | tr ',' '\n'); do
79 |         if [[ ${label} == cherry-pick-* ]]; then
80 |             target=${label//cherry-pick-/}
81 |             if ! git rev-list origin/${target} >/dev/null; then
82 |                 echo "target branch ${target} not exists" >/dev/stderr
83 |                 exit 1
84 |             fi
85 |             cherrypickto ${target}
86 |         fi
87 |     done
88 | }
89 | 
90 | cherrypick
91 | 


--------------------------------------------------------------------------------
/scripts/release-version.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | set -ex
  4 | 
  5 | CUR_DIR=$(
  6 |     cd -- "$(dirname "$0")" >/dev/null 2>&1
  7 |     pwd -P
  8 | )
  9 | 
 10 | MINOR_VERSION=$(echo ${CI_BUILD_REF_NAME} | sed -r 's/^release-(.*)$/\1/g')
 11 | 
 12 | if [[ ${CI_BUILD_REF_NAME} == ${MINOR_VERSION} || ${MINOR_VERSION} == "" ]]; then
 13 |     echo "Invalid branch name: ${CI_BUILD_REF_NAME}, require running on release-.* branch"
 14 |     exit 1
 15 | fi
 16 | 
 17 | if [ -z "${PRE_VERSION}" ]; then
 18 |     echo you must specify PRE_VERSION var >>/dev/stderr
 19 |     exit 1
 20 | fi
 21 | 
 22 | if [ -z "${PATCH_VERSION}" ]; then
 23 |     echo you must specify NEXT_VERSION var >>/dev/stderr
 24 |     exit 1
 25 | fi
 26 | 
 27 | CUR_VERSION=v${MINOR_VERSION}.${PATCH_VERSION}
 28 | 
 29 | SHORT_VERSION=v${MINOR_VERSION}
 30 | 
 31 | echo "VERSION is ${CUR_VERSION}"
 32 | echo "SHORT_VERSION is ${SHORT_VERSION}"
 33 | 
 34 | if [ "${PRE_VERSION}" = "${CUR_VERSION}" ]; then
 35 |     echo PRE_VERSION should not be same as knoway in current version >>/dev/stderr
 36 |     exit 1
 37 | fi
 38 | 
 39 | git fetch
 40 | 
 41 | if ! git rev-list ${PRE_VERSION} >/dev/null; then
 42 |     echo "${PRE_VERSION} tag not exists" >/dev/stderr
 43 |     exit 1
 44 | fi
 45 | 
 46 | if [ -n "${CI_BUILD_REF_NAME}" ]; then
 47 |     git checkout ${CI_BUILD_REF_NAME}
 48 | fi
 49 | 
 50 | # todo release notes
 51 | #cd ${CUR_DIR}/../tools/gen-release-notes
 52 | #mkdir -p ${CUR_DIR}/../changes/${SHORT_VERSION}
 53 | #go run . --oldRelease ${PRE_VERSION} --newRelease ${CUR_VERSION} --notes ${CUR_DIR}/../ --outDir ${CUR_DIR}/../changes/${SHORT_VERSION}
 54 | 
 55 | CUR_VERSION=${CUR_VERSION} bash ${CUR_DIR}/gen-change-logs.sh ${CUR_DIR}/../changes/${SHORT_VERSION}/CHANGELOG-${CUR_VERSION}.md
 56 | 
 57 | cd ${CUR_DIR}/..
 58 | 
 59 | if ! git config user.name; then
 60 |     git config user.name "Auto Release Bot"
 61 |     git config user.email "knoway-auto-release@daocloud.io"
 62 | fi
 63 | 
 64 | # we no need to sync api repo any more
 65 | # sh ${CUR_DIR}/sync-api-repo.sh ${CUR_VERSION}
 66 | 
 67 | cd ${CUR_DIR}/..
 68 | 
 69 | git add .
 70 | 
 71 | git commit -m "Release ${CUR_VERSION} and add release notes"
 72 | 
 73 | cat ${CUR_DIR}/../changes/${SHORT_VERSION}/CHANGELOG-${CUR_VERSION}.md | git tag -a ${CUR_VERSION} -F-
 74 | 
 75 | if [ -n "${GITLAB_CI_TOKEN}" ]; then
 76 |     git remote set-url origin https://gitlab-ci-token:${GITLAB_CI_TOKEN}@gitlab.daocloud.cn/ndx/ai/knoway.git
 77 | fi
 78 | 
 79 | # push to release branch
 80 | if [ -z "${CI_BUILD_REF_NAME}" ]; then
 81 |     git push origin $(git rev-parse --abbrev-ref HEAD)
 82 | else
 83 |     git push origin ${CI_BUILD_REF_NAME}
 84 | fi
 85 | 
 86 | COMMIT=$(git rev-parse HEAD)
 87 | 
 88 | # Push release notes to main branch also
 89 | git checkout main
 90 | git cherry-pick ${COMMIT}
 91 | git push origin main
 92 | 
 93 | # push tag
 94 | git push origin ${CUR_VERSION}
 95 | 
 96 | curl -s -v \
 97 |     -H "PRIVATE-TOKEN: ${GITLAB_CI_TOKEN}" \
 98 |     -H 'Content-Type: application/json' \
 99 |     'https://gitlab.daocloud.cn/api/v4/projects/ndx%2Fai%2Fknoway/releases' \
100 |     -X POST \
101 |     -d "$(echo '{}' | jq \
102 |         --arg name "Release ${CUR_VERSION}" \
103 |         --arg tag_name "${CUR_VERSION}" \
104 |         --arg description "$(cat ${CUR_DIR}/../changes/${SHORT_VERSION}/CHANGELOG-${CUR_VERSION}.md)" \
105 |         '.name = $name | .tag_name = $tag_name | .description = $description')"
106 | 


--------------------------------------------------------------------------------
/scripts/run-make-gen.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -ex
 4 | 
 5 | CUR_DIR=$(
 6 |     cd -- "$(dirname "$0")" >/dev/null 2>&1
 7 |     pwd -P
 8 | )
 9 | 
10 | git fetch origin ${CI_BUILD_REF_NAME}
11 | git checkout ${CI_BUILD_REF_NAME}
12 | 
13 | cd ${CUR_DIR}/..
14 | make gen
15 | 
16 | git rev-parse --abbrev-ref HEAD
17 | git status
18 | 
19 | if [ "$(git status --porcelain | wc -l)" -eq "0" ]; then
20 |     echo "${DIFFPROTO} up to date."
21 |     exit 0
22 | fi
23 | 
24 | if [ -n "${GITLAB_TOKEN}" ]; then
25 |     git remote set-url origin https://gitlab-ci-token:${GITLAB_TOKEN}@gitlab.daocloud.cn/${CI_PROJECT_PATH}.git
26 | fi
27 | 
28 | if ! git config user.name; then
29 |     git config user.name "Auto Gen Bot"
30 |     git config user.email "auto-gen-bot@daocloud.io"
31 | fi
32 | 
33 | git add .
34 | 
35 | git commit -m "Auto run gen code"
36 | 
37 | git push origin ${CI_BUILD_REF_NAME}
38 | 


--------------------------------------------------------------------------------
/scripts/trivy.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -o errexit
 4 | set -o nounset
 5 | set -o pipefail
 6 | 
 7 | # ignore VULNEEABILITY CVE-2022-1996 it will fix at k8s.io/api next release
 8 | # ignore unfixed  VULNEEABILITY
 9 | 
10 | TRIVY_DB_REPOSITORY=${TRIVY_DB_REPOSITORY:-ghcr.io/aquasecurity/trivy-db}
11 | 
12 | trivy fs --scanners secret --secret-config ./.trivycert.yaml --exit-code 1 ./
13 | 
14 | # The parameters that this shell receives look like this ：
15 | # HIGH,CRITICAL release-ci.daocloud.io/mspider/mspider:v0.8.3-47-gd3ac6536  release-ci.daocloud.io/mspider/mspider-api-server:v0.8.3-47-gd3ac6536
16 | # so need use firtParameter parameter to skip first Parameter HIGH,CRITICAL than trivy images
17 | firtParameter=1
18 | for i in "$@"; do
19 |     if (($firtParameter == 1)); then
20 |         ((firtParameter = $firtParameter + 1))
21 |     else
22 |         trivy image --skip-dirs istio.io/istio --ignore-unfixed --db-repository=${TRIVY_DB_REPOSITORY} --exit-code 1 --severity $1 $i
23 |     fi
24 | done
25 | 


--------------------------------------------------------------------------------
/scripts/unit-test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -o errexit
 4 | set -o nounset
 5 | set -o pipefail
 6 | 
 7 | set -x
 8 | PATH2TEST=(./pkg/... ./internal/...)
 9 | tmpDir=$(mktemp -d)
10 | mergeF="${tmpDir}/merge.out"
11 | rm -f ${mergeF}
12 | for ((i = 0; i < ${#PATH2TEST[@]}; i++)); do
13 |     ls $tmpDir
14 |     cov_file="${tmpDir}/$i.cover"
15 |     GOMAXPROCS=8 go test --race --v -covermode=atomic -coverpkg=${PATH2TEST[i]} -coverprofile=${cov_file} ${PATH2TEST[i]} # $(go list ${PATH2TEST[i]})
16 |     cat $cov_file | grep -v mode: >>${mergeF} || echo no coverage found
17 | done
18 | #merge them
19 | header=$(head -n1 "${tmpDir}/0.cover")
20 | echo "${header}" >coverage.out
21 | cat ${mergeF} >>coverage.out
22 | go tool cover -func=coverage.out
23 | rm -rf coverage.out ${tmpDir} ${mergeF}
24 | 


--------------------------------------------------------------------------------
/scripts/util.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -ex
 4 | set -u
 5 | set -o pipefail
 6 | 
 7 | # This script holds featuregate bash variables and utility functions.
 8 | 
 9 | # This function installs a Go tools by 'go get' command.
10 | # Parameters:
11 | #  - $1: package name, such as "sigs.k8s.io/controller-tools/cmd/controller-gen"
12 | #  - $2: package version, such as "v0.4.1"
13 | # Note:
14 | #   Since 'go get' command will resolve and add dependencies to current module, that may update 'go.mod' and 'go.sum' file.
15 | #   So we use a temporary directory to install the tools.
16 | function util::install_tools() {
17 |     local package="$1"
18 |     local version="$2"
19 | 
20 |     temp_path=$(mktemp -d)
21 |     pushd "${temp_path}" >/dev/null
22 |     GO111MODULE=on go install "${package}"@"${version}"
23 |     GOPATH=$(go env GOPATH | awk -F ':' '{print $1}')
24 |     export PATH=$PATH:$GOPATH/bin
25 |     popd >/dev/null
26 |     rm -rf "${temp_path}"
27 | }
28 | 


--------------------------------------------------------------------------------
/scripts/verify-license.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -ex
 4 | 
 5 | CONFIG_PATH=${CONFIG_PATH:-$(dirname "${BASH_SOURCE[0]}")/..}
 6 | 
 7 | if license-lint -config ${CONFIG_PATH}/license-lint.yml; then
 8 |     echo "✅ License lint succeeded"
 9 | else
10 |     echo # print one empty line, separate from warning messages.
11 |     echo '❌ Please review the above error messages.'
12 |     exit 1
13 | fi
14 | 


--------------------------------------------------------------------------------
/scripts/verify-staticcheck.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -o errexit
 4 | set -o nounset
 5 | set -o pipefail
 6 | 
 7 | REPO_ROOT=$(dirname "${BASH_SOURCE[0]}")/..
 8 | GOLANGCI_LINT_PKG="github.com/golangci/golangci-lint/cmd/golangci-lint"
 9 | GOLANGCI_LINT_VER="v1.62.2"
10 | 
11 | cd "${REPO_ROOT}"
12 | source "scripts/util.sh"
13 | 
14 | command golangci-lint &>/dev/null || util::install_tools ${GOLANGCI_LINT_PKG} ${GOLANGCI_LINT_VER}
15 | 
16 | golangci-lint --version
17 | 
18 | if golangci-lint run -v --timeout=5m; then
19 |     echo '✅ Congratulations! All Go source files have passed staticcheck.'
20 | else
21 |     echo '❌ Staticcheck failed. Please review the warnings above.'
22 |     echo '💡 Tip: If these warnings are unclear, you can file an issue for help.'
23 |     exit 1
24 | fi
25 | 


--------------------------------------------------------------------------------