├── .github ├── .testcoverage.yml ├── ISSUE_TEMPLATE │ ├── 100_feature_request.yml │ ├── 200_bug_report.yml │ └── 300_RFC.yml ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── chart-ci.yml │ ├── docker-build-images.yml │ ├── docker-push-images.yml │ ├── installation-tests.yml │ ├── lint-and-tests.yml │ ├── python-aibrix-kvcache-tests.yml │ ├── python-aibrix-tests.yml │ └── release-build.yaml ├── .gitignore ├── .golangci.yml ├── .readthedocs.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── Makefile ├── PROJECT ├── README.md ├── SECURITY.md ├── api ├── autoscaling │ └── v1alpha1 │ │ ├── groupversion_info.go │ │ ├── podautoscaler_types.go │ │ ├── podautoscaler_types_test.go │ │ └── zz_generated.deepcopy.go ├── model │ └── v1alpha1 │ │ ├── groupversion_info.go │ │ ├── modeladapter_types.go │ │ └── zz_generated.deepcopy.go └── orchestration │ └── v1alpha1 │ ├── condition.go │ ├── groupversion_info.go │ ├── kvcache_types.go │ ├── podset_types.go │ ├── raycluster_type.go │ ├── rayclusterfleet_types.go │ ├── rayclusterreplicaset_types.go │ ├── roleset_types.go │ ├── stormservice_types.go │ └── zz_generated.deepcopy.go ├── benchmarks ├── README.md ├── benchmark.py ├── client │ ├── README.md │ ├── __init__.py │ ├── analyze.py │ ├── client.py │ └── utils.py ├── config.yaml ├── generator │ ├── .gitignore │ ├── __init__.py │ ├── dataset_generator │ │ ├── README.md │ │ ├── __init__.py │ │ ├── converter.py │ │ ├── dataset-examples.png │ │ ├── multiturn_prefix_sharing_dataset.py │ │ ├── synthetic_prefix_sharing_dataset.py │ │ ├── synthetic_prompt.py │ │ ├── util.py │ │ └── utility.py │ └── workload_generator │ │ ├── README.md │ │ ├── __init__.py │ │ ├── config │ │ └── examples │ │ │ ├── completion-len-config.json │ │ │ ├── prompt-len-config.json │ │ │ └── traffic-config.json │ │ ├── distribution.py │ │ ├── sample_request.py │ │ ├── utility.py │ │ ├── utils.py │ │ ├── workload-plot-example.png │ │ └── workload_generator.py ├── image │ ├── aibrix-benchmark-client.png │ ├── aibrix-benchmark-component.png │ ├── aibrix-benchmark-dataset.png │ └── aibrix-benchmark-workload.png ├── plot │ ├── aibrix0.1-downloader.ipynb │ ├── aibrix0.1-lora.ipynb │ ├── aibrix0.1-routing.ipynb │ └── aibrix0.3-routing_vtc-basic-vs-random.ipynb ├── requirements.txt └── scenarios │ ├── autoscaling │ ├── README.md │ ├── deepseek-llm-7b-chat │ │ ├── apa.yaml │ │ ├── deploy.yaml │ │ ├── hpa.yaml │ │ ├── kpa.yaml │ │ ├── optimizer-kpa.yaml │ │ └── svc.yaml │ ├── overnight_run.sh │ ├── plot-everything.py │ ├── plot_per_pod.py │ ├── requirements_bench_pa.txt │ ├── run-test.sh │ ├── workload-configs │ │ └── predefined │ │ │ ├── completion-len-configs │ │ │ ├── HighFast.json │ │ │ ├── HighSlow.json │ │ │ ├── LowFast.json │ │ │ └── LowSlow.json │ │ │ ├── prompt-len-configs │ │ │ ├── HighFast.json │ │ │ ├── HighSlow.json │ │ │ ├── LowFast.json │ │ │ └── LowSlow.json │ │ │ └── traffic-configs │ │ │ ├── HighFast.json │ │ │ ├── HighSlow.json │ │ │ ├── LowFast.json │ │ │ └── LowSlow.json │ └── workload │ │ ├── 25min_up_and_down │ │ ├── 25min_up_and_down.jsonl │ │ └── plot-synthetic-from-csv-file-synthetic2.pdf │ │ ├── 5s.jsonl │ │ ├── 8min_up_and_down.jsonl │ │ ├── one_request.jsonl │ │ └── ten_requests.jsonl │ ├── gateway │ ├── 7b.yaml │ ├── README.md │ ├── benchmark.py │ └── client.py │ ├── kvcache │ └── README.md │ ├── lora │ ├── README.md │ └── benchmark.py │ └── utils │ ├── check_k8s_is_ready.py │ ├── count_num_pods.py │ ├── set_num_replicas.py │ └── streaming_pod_log_to_file.py ├── build └── container │ ├── .dockerignore │ ├── Dockerfile │ ├── Dockerfile.gateway │ ├── Dockerfile.kvcache │ ├── Dockerfile.metadata │ ├── Dockerfile.runtime │ └── Dockerfile.runtime.dockerignore ├── cmd ├── controllers │ └── main.go ├── kvcache-watcher │ └── main.go └── plugins │ └── main.go ├── config ├── .gitignore ├── crd │ ├── autoscaling │ │ ├── autoscaling.aibrix.ai_podautoscalers.yaml │ │ └── kustomization.yaml │ ├── kustomization.yaml │ ├── kustomizeconfig.yaml │ ├── model │ │ ├── kustomization.yaml │ │ └── model.aibrix.ai_modeladapters.yaml │ └── orchestration │ │ ├── kustomization.yaml │ │ ├── orchestration.aibrix.ai_kvcaches.yaml │ │ ├── orchestration.aibrix.ai_podsets.yaml │ │ ├── orchestration.aibrix.ai_rayclusterfleets.yaml │ │ ├── orchestration.aibrix.ai_rayclusterreplicasets.yaml │ │ ├── orchestration.aibrix.ai_rolesets.yaml │ │ └── orchestration.aibrix.ai_stormservices.yaml ├── default │ ├── kustomization.yaml │ ├── manager_webhook_patch.yaml │ └── webhookcainjection_patch.yaml ├── dependency │ ├── envoy-gateway │ │ ├── envoy_gateway_patch.yaml │ │ └── kustomization.yaml │ ├── kuberay-operator │ │ ├── README.md │ │ ├── crds │ │ │ ├── ray.io_rayclusters.yaml │ │ │ ├── ray.io_rayjobs.yaml │ │ │ └── ray.io_rayservices.yaml │ │ ├── kustomization.yaml │ │ └── templates │ │ │ ├── deployment.yaml │ │ │ ├── leader_election_role.yaml │ │ │ ├── leader_election_role_binding.yaml │ │ │ ├── multiple_namespaces_role.yaml │ │ │ ├── multiple_namespaces_rolebinding.yaml │ │ │ ├── ray_rayjob_editor_role.yaml │ │ │ ├── ray_rayjob_viewer_role.yaml │ │ │ ├── ray_rayservice_editor_role.yaml │ │ │ ├── ray_rayservice_viewer_role.yaml │ │ │ ├── role.yaml │ │ │ ├── rolebinding.yaml │ │ │ ├── service.yaml │ │ │ └── serviceaccount.yaml │ └── kustomization.yaml ├── experimentals │ └── gpu-optimizer │ │ ├── gateway-plugin │ │ └── kustomization.yaml │ │ └── kustomization.yaml ├── gateway │ ├── gateway-plugin │ │ ├── gateway-plugin.yaml │ │ └── kustomization.yaml │ ├── gateway.yaml │ └── kustomization.yaml ├── gpu-optimizer │ ├── deployment.yaml │ ├── kustomization.yaml │ ├── rbac.yaml │ └── service.yaml ├── internalcert │ ├── kustomization.yaml │ └── secret.yaml ├── job │ ├── k8s_job_rbac.yaml │ └── kustomization.yaml ├── manager │ ├── kustomization.yaml │ └── manager.yaml ├── metadata │ ├── job_template_patch.yaml │ ├── kustomization.yaml │ ├── metadata.yaml │ ├── redis.yaml │ ├── s3-env-patch.yaml │ └── tos-env-patch.yaml ├── namespace │ ├── kustomization.yaml │ └── namespace.yaml ├── overlays │ ├── dev │ │ ├── gateway-plugin │ │ │ └── kustomization.yaml │ │ ├── gpu-optimizer │ │ │ └── kustomization.yaml │ │ └── manager │ │ │ └── kustomization.yaml │ ├── release │ │ ├── envoy_proxy_patch.yaml │ │ ├── gateway_plugins_patch.yaml │ │ ├── kustomization.yaml │ │ └── pdb.yaml │ ├── vke-dev │ │ ├── gateway-plugin │ │ │ ├── gateway_plugins_patch.yaml │ │ │ └── kustomization.yaml │ │ ├── gpu-optimizer │ │ │ └── kustomization.yaml │ │ └── manager │ │ │ └── kustomization.yaml │ ├── vke-ipv6 │ │ ├── envoy_proxy_patch.yaml │ │ └── kustomization.yaml │ └── vke │ │ ├── default │ │ ├── envoy_proxy_patch.yaml │ │ ├── gateway_plugins_patch.yaml │ │ └── kustomization.yaml │ │ └── dependency │ │ ├── envoy_gateway_patch.yaml │ │ └── kustomization.yaml ├── prometheus │ ├── kustomization.yaml │ └── monitor.yaml ├── rbac │ ├── autoscaling │ │ ├── autoscaling_podautoscaler_editor_role.yaml │ │ ├── autoscaling_podautoscaler_viewer_role.yaml │ │ └── kustomization.yaml │ ├── controller-manager │ │ ├── kustomization.yaml │ │ ├── leader_election_role.yaml │ │ ├── leader_election_role_binding.yaml │ │ ├── role.yaml │ │ ├── role_binding.yaml │ │ └── service_account_controller_manager.yaml │ ├── gateway │ │ ├── kustomization.yaml │ │ ├── role_binding_gateway.yaml │ │ ├── role_gateway.yaml │ │ └── service_account_gateway.yaml │ ├── kustomization.yaml │ ├── model │ │ ├── kustomization.yaml │ │ ├── model_modeladapter_editor_role.yaml │ │ └── model_modeladapter_viewer_role.yaml │ └── orchestration │ │ ├── kustomization.yaml │ │ ├── orchestration_kvcache_editor_role.yaml │ │ ├── orchestration_kvcache_viewer_role.yaml │ │ ├── orchestration_rayclusterfleet_editor_role.yaml │ │ ├── orchestration_rayclusterfleet_viewer_role.yaml │ │ ├── orchestration_rayclusterreplicaset_editor_role.yaml │ │ ├── orchestration_rayclusterreplicaset_viewer_role.yaml │ │ ├── orchestration_roleset_editor_role.yaml │ │ ├── orchestration_roleset_viewer_role.yaml │ │ ├── orchestration_stormservice_editor_role.yaml │ │ └── orchestration_stormservice_viewer_role.yaml ├── samples │ ├── autoscaling_v1alpha1_demo_nginx.yaml │ ├── autoscaling_v1alpha1_kpa.yaml │ ├── autoscaling_v1alpha1_mock_llama.yaml │ ├── autoscaling_v1alpha1_mock_llama_apa.yaml │ ├── autoscaling_v1alpha1_podautoscaler.yaml │ ├── kustomization.yaml │ ├── model_v1alpha1_modeladapter.yaml │ ├── orchestration_v1alpha1_kvcache.yaml │ ├── orchestration_v1alpha1_rayclusterfleet.yaml │ ├── orchestration_v1alpha1_rayclusterreplicaset.yaml │ ├── orchestration_v1alpha1_roleset.yaml │ └── orchestration_v1alpha1_stormservice.yaml ├── standalone │ ├── autoscaler-controller │ │ ├── kustomization.yaml │ │ └── patch.yaml │ ├── distributed-inference-controller │ │ ├── kustomization.yaml │ │ └── patch.yaml │ ├── kv-cache-controller │ │ ├── kustomization.yaml │ │ └── patch.yaml │ ├── model-adapter-controller │ │ ├── kustomization.yaml │ │ └── patch.yaml │ └── stormservice-controller │ │ ├── kustomization.yaml │ │ └── patch.yaml ├── test │ ├── README.md │ ├── gateway │ │ ├── kustomization.yaml │ │ └── vtc-test-env-patch.yaml │ └── kustomization.yaml └── webhook │ ├── kustomization.yaml │ ├── kustomizeconfig.yaml │ ├── manifests.yaml │ └── service.yaml ├── deployment └── terraform │ ├── .gitignore │ ├── gcp │ ├── .terraform-docs.yml │ ├── .terraform.lock.hcl │ ├── README.md │ ├── cluster │ │ ├── data.tf │ │ ├── main.tf │ │ ├── outputs.tf │ │ ├── terraform.tf │ │ └── variables.tf │ ├── data.tf │ ├── docs.md │ ├── locals.tf │ ├── main.tf │ ├── outputs.tf │ ├── providers.tf │ ├── terraform.tf │ ├── terraform.tfvars.example │ └── variables.tf │ ├── go.mod │ ├── go.sum │ ├── kubernetes │ ├── .terraform.lock.hcl │ ├── README.md │ ├── data.tf │ ├── locals.tf │ ├── main.tf │ ├── outputs.tf │ ├── terraform.tf │ └── variables.tf │ └── tests │ └── gcp_test.go ├── development ├── README.md ├── app │ ├── Dockerfile │ ├── Makefile │ ├── README.md │ ├── app.py │ ├── config.json │ ├── config │ │ ├── heterogeneous │ │ │ ├── kustomization.yaml │ │ │ └── simulator_a40 │ │ │ │ ├── kustomization.yaml │ │ │ │ ├── patch_deployment_a40.yaml │ │ │ │ ├── patch_podautoscaler_a40.yaml │ │ │ │ ├── rename_deployment_a40.json │ │ │ │ └── rename_podautoscaler_a40.json │ │ ├── mock │ │ │ ├── api-key-patch.yaml │ │ │ ├── components.yaml │ │ │ └── kustomization.yaml │ │ ├── simulator │ │ │ ├── kustomization.yaml │ │ │ ├── patch_deployment_a100.yaml │ │ │ ├── patch_podautoscaler_a100.yaml │ │ │ ├── rename_deployment_a100.json │ │ │ └── rename_podautoscaler_a100.json │ │ ├── templates │ │ │ ├── deployment │ │ │ │ ├── deployment.yaml │ │ │ │ └── kustomization.yaml │ │ │ └── podautoscaler │ │ │ │ ├── kustomization.yaml │ │ │ │ ├── podautoscaler_apa.yaml │ │ │ │ ├── podautoscaler_hpa.yaml │ │ │ │ └── podautoscaler_kpa.yaml │ │ └── vke │ │ │ └── kustomization.yaml │ ├── entrypoint.sh │ ├── requirements.txt │ ├── simulator.py │ ├── test_app.py │ └── zscaler_root_ca.crt ├── tutorials │ ├── batch │ │ └── README.md │ ├── distributed │ │ ├── README.md │ │ ├── fleet-autoscaling.yaml │ │ ├── fleet-mock.yaml │ │ ├── fleet.yaml │ │ ├── nvkind-single-node.yaml │ │ ├── nvkind-two-nodes.yaml │ │ ├── raycluster-head-no-gpu.yaml │ │ ├── raycluster.yaml │ │ └── rayjob.yaml │ ├── kvcache │ │ └── kvcache.yaml │ ├── lora │ │ ├── README.md │ │ ├── deployment.yaml │ │ ├── model_adapter.yaml │ │ └── model_adapter_api_key.yaml │ ├── metrics │ │ └── service-monitor.yaml │ ├── podautoscaler │ │ ├── README.md │ │ ├── hpa.yaml │ │ └── pa.yaml │ └── runtime │ │ ├── README.md │ │ ├── runtime-hf-download.yaml │ │ ├── runtime-s3-download.yaml │ │ └── runtime-tos-download.yaml └── vllm │ ├── README.md │ ├── kind-config.yaml │ ├── linux │ └── kustomization.yaml │ └── macos │ ├── components.yaml │ ├── deployment.yaml │ └── kustomization.yaml ├── dist └── chart │ ├── .helmignore │ ├── Chart.yaml │ ├── README.md │ ├── crds │ ├── autoscaling.aibrix.ai_podautoscalers.yaml │ ├── model.aibrix.ai_modeladapters.yaml │ ├── orchestration.aibrix.ai_kvcaches.yaml │ ├── orchestration.aibrix.ai_podsets.yaml │ ├── orchestration.aibrix.ai_rayclusterfleets.yaml │ ├── orchestration.aibrix.ai_rayclusterreplicasets.yaml │ ├── orchestration.aibrix.ai_rolesets.yaml │ └── orchestration.aibrix.ai_stormservices.yaml │ ├── stable.yaml │ ├── templates │ ├── _helpers.tpl │ ├── controller-manager │ │ ├── deployment.yaml │ │ ├── rbac.yaml │ │ └── service.yaml │ ├── gateway-instance │ │ └── gateway.yaml │ ├── gateway-plugin │ │ ├── deployment.yaml │ │ ├── envoy_extension_policy.yaml │ │ ├── httproute.yaml │ │ ├── rbac.yaml │ │ └── service.yaml │ ├── gpu-optimizer │ │ ├── deployment.yaml │ │ ├── rbac.yaml │ │ └── service.yaml │ ├── metadata-service │ │ ├── deployment.yaml │ │ ├── rbac.yaml │ │ ├── redis.yaml │ │ └── service.yaml │ ├── prometheus │ │ └── monitor.yaml │ └── webhook │ │ ├── secret.yaml │ │ ├── service.yaml │ │ └── webhooks.yaml │ ├── values.schema.json │ ├── values.yaml │ └── vke.yaml ├── docs ├── .gitattributes ├── Makefile ├── README.md ├── kv-event-sync-readme.md ├── make.bat ├── paper │ └── AIBrix_White_Paper_0219_2025.pdf ├── requirements-docs.txt └── source │ ├── assets │ ├── images │ │ ├── ai-engine-runtime-overview.png │ │ ├── aibrix-architecture-v1.jpeg │ │ ├── aibrix-dist-kv-cache-arch-overview.png │ │ ├── aibrix-dist-kv-cache-dashboard.png │ │ ├── aibrix-infinistore-arch-overview.png │ │ ├── aibrix-kvcache-offloading-arch-overview.png │ │ ├── aibrix-kvcache-profiling.png │ │ ├── autoscaler │ │ │ ├── aibrix-controller-manager-output.png │ │ │ ├── autoscaling_result.png │ │ │ ├── optimizer-based-autoscaling-70-results.png │ │ │ ├── optimizer-based-podautoscaler.png │ │ │ └── podautoscaler-describe.png │ │ ├── benchmark │ │ │ └── aibrix-benchmark-component-doc.png │ │ ├── cloud │ │ │ ├── lambda-cloud-installation.png │ │ │ ├── lambda-cloud-instance.png │ │ │ ├── lambda-cloud-ssh.png │ │ │ └── lambda-cloud-verify-installation.png │ │ ├── delete-namespace-stuck-1.png │ │ ├── delete-namespace-stuck-2.png │ │ ├── draft-release.png │ │ ├── gateway-design.png │ │ ├── heterogeneous-gpu-diagram.png │ │ ├── lora-controller-workflow.png │ │ ├── lora-sequence-diagram.png │ │ ├── lora-service-discovery-resources.png │ │ ├── mix-grain-orchestration.png │ │ ├── model-error.png │ │ ├── release-pipeline-manifests.png │ │ ├── release-pipeline-python-package.png │ │ ├── slo_routing │ │ │ ├── evaluation.png │ │ │ ├── motivation.png │ │ │ └── variation_comparison.png │ │ └── stormservice │ │ │ └── aibrix-stormservice-illustration.png │ └── logos │ │ └── aibrix-logo.jpeg │ ├── community │ ├── community.rst │ ├── contribution.rst │ └── research.rst │ ├── conf.py │ ├── designs │ ├── aibrix-autoscaler.rst │ ├── aibrix-engine-runtime.rst │ ├── aibrix-kvcache-offloading-framework.rst │ ├── aibrix-router.rst │ ├── aibrix-stormservice.rst │ └── architecture.rst │ ├── development │ ├── development.rst │ └── release.rst │ ├── features │ ├── autoscaling │ │ ├── autoscaling.rst │ │ ├── metric-based-autoscaling.rst │ │ └── optimizer-based-autoscaling.rst │ ├── batch-api.rst │ ├── benchmark-and-generator.rst │ ├── gateway-plugins.rst │ ├── heterogeneous-gpu.rst │ ├── kv-event-sync.rst │ ├── kvcache-offloading.rst │ ├── lora-dynamic-loading.rst │ ├── multi-engine.rst │ ├── multi-node-inference.rst │ └── runtime.rst │ ├── getting_started │ ├── advanced-k8s-examples.rst │ ├── faq.rst │ ├── installation │ │ ├── aws.rst │ │ ├── gcp.rst │ │ ├── installation.rst │ │ ├── lambda.rst │ │ └── mac-for-desktop.rst │ └── quickstart.rst │ ├── index.rst │ └── production │ └── observability.rst ├── go.mod ├── go.sum ├── hack ├── boilerplate.go.txt ├── ci │ └── kind-config.yaml ├── enable-kv-events.sh ├── lambda-cloud │ ├── README.md │ ├── install.sh │ ├── nvkind-cluster.yaml │ ├── setup.sh │ └── verify.sh ├── rdma │ ├── detect-gid-in-container.sh │ └── search-gid.sh ├── release │ ├── sync-dependency-images.sh │ └── sync-images.sh ├── tools.go ├── update-codegen.sh ├── verify-codegen.sh └── verify-crd-sync.sh ├── observability ├── grafana │ ├── AIBrix_Control_Plane_Runtime_Dashboard.json │ ├── AIBrix_Envoy_Gateway_Dashboard.json │ ├── AIBrix_Envoy_Gateway_Plugins_Dashboard.json │ └── AIBrix_vLLM_Engine_Dashboard.json └── monitor │ ├── envoy_metrics_service.yaml │ ├── service_monitor_controller_manager.yaml │ ├── service_monitor_gateway.yaml │ ├── service_monitor_gateway_plugin.yaml │ └── service_monitor_vllm.yaml ├── pkg ├── cache │ ├── README.md │ ├── build_verification_default_test.go │ ├── build_verification_zmq_test.go │ ├── cache_api.go │ ├── cache_impl.go │ ├── cache_init.go │ ├── cache_init_test.go │ ├── cache_log.go │ ├── cache_metrics.go │ ├── cache_profile.go │ ├── cache_test.go │ ├── cache_trace.go │ ├── errors.go │ ├── informers.go │ ├── kv_event_manager.go │ ├── kv_event_manager_validation_test.go │ ├── kv_event_manager_zmq.go │ ├── kvcache │ │ ├── README.md │ │ ├── endpoint.go │ │ ├── endpoint_test.go │ │ ├── event_types.go │ │ ├── metrics.go │ │ ├── metrics_test.go │ │ ├── msgpack_decoder.go │ │ ├── msgpack_decoder_test.go │ │ ├── msgpack_encoder.go │ │ ├── types.go │ │ ├── zmq_client.go │ │ ├── zmq_client_stub.go │ │ └── zmq_client_test.go │ ├── load_provider.go │ ├── model.go │ ├── model_gpu_profile.go │ ├── model_gpu_profile_test.go │ ├── output_predictor.go │ ├── output_predictor_test.go │ ├── pending_load_provider.go │ ├── pod.go │ ├── store_providers.go │ ├── store_providers_test.go │ ├── test_helpers.go │ ├── trace.go │ ├── trace_test.go │ └── utils.go ├── cert │ └── cert.go ├── client │ ├── applyconfiguration │ │ ├── autoscaling │ │ │ └── v1alpha1 │ │ │ │ ├── metricsource.go │ │ │ │ ├── podautoscaler.go │ │ │ │ ├── podautoscalerspec.go │ │ │ │ ├── podautoscalerstatus.go │ │ │ │ ├── scalingdecision.go │ │ │ │ └── subtargetselector.go │ │ ├── internal │ │ │ └── internal.go │ │ ├── model │ │ │ └── v1alpha1 │ │ │ │ ├── modeladapter.go │ │ │ │ ├── modeladapterspec.go │ │ │ │ └── modeladapterstatus.go │ │ ├── orchestration │ │ │ └── v1alpha1 │ │ │ │ ├── condition.go │ │ │ │ ├── disruptiontolerance.go │ │ │ │ ├── rayclusterfleet.go │ │ │ │ ├── rayclusterfleetcondition.go │ │ │ │ ├── rayclusterfleetspec.go │ │ │ │ ├── rayclusterfleetstatus.go │ │ │ │ ├── rayclusterreplicaset.go │ │ │ │ ├── rayclusterreplicasetspec.go │ │ │ │ ├── rayclusterreplicasetstatus.go │ │ │ │ ├── rayclustertemplatespec.go │ │ │ │ ├── rolesetspec.go │ │ │ │ ├── rolesettemplatespec.go │ │ │ │ ├── rolespec.go │ │ │ │ ├── rolestatus.go │ │ │ │ ├── roleupdatestrategy.go │ │ │ │ ├── schedulingstrategy.go │ │ │ │ ├── stormservice.go │ │ │ │ ├── stormservicespec.go │ │ │ │ ├── stormservicestatus.go │ │ │ │ └── stormserviceupdatestrategy.go │ │ └── utils.go │ ├── clientset │ │ └── versioned │ │ │ ├── clientset.go │ │ │ ├── fake │ │ │ ├── clientset_generated.go │ │ │ ├── doc.go │ │ │ └── register.go │ │ │ ├── scheme │ │ │ ├── doc.go │ │ │ └── register.go │ │ │ └── typed │ │ │ ├── autoscaling │ │ │ └── v1alpha1 │ │ │ │ ├── autoscaling_client.go │ │ │ │ ├── doc.go │ │ │ │ ├── fake │ │ │ │ ├── doc.go │ │ │ │ ├── fake_autoscaling_client.go │ │ │ │ └── fake_podautoscaler.go │ │ │ │ ├── generated_expansion.go │ │ │ │ └── podautoscaler.go │ │ │ ├── model │ │ │ └── v1alpha1 │ │ │ │ ├── doc.go │ │ │ │ ├── fake │ │ │ │ ├── doc.go │ │ │ │ ├── fake_model_client.go │ │ │ │ └── fake_modeladapter.go │ │ │ │ ├── generated_expansion.go │ │ │ │ ├── model_client.go │ │ │ │ └── modeladapter.go │ │ │ └── orchestration │ │ │ └── v1alpha1 │ │ │ ├── doc.go │ │ │ ├── fake │ │ │ ├── doc.go │ │ │ ├── fake_orchestration_client.go │ │ │ ├── fake_rayclusterfleet.go │ │ │ ├── fake_rayclusterreplicaset.go │ │ │ └── fake_stormservice.go │ │ │ ├── generated_expansion.go │ │ │ ├── orchestration_client.go │ │ │ ├── rayclusterfleet.go │ │ │ ├── rayclusterreplicaset.go │ │ │ └── stormservice.go │ ├── informers │ │ └── externalversions │ │ │ ├── autoscaling │ │ │ ├── interface.go │ │ │ └── v1alpha1 │ │ │ │ ├── interface.go │ │ │ │ └── podautoscaler.go │ │ │ ├── factory.go │ │ │ ├── generic.go │ │ │ ├── internalinterfaces │ │ │ └── factory_interfaces.go │ │ │ ├── model │ │ │ ├── interface.go │ │ │ └── v1alpha1 │ │ │ │ ├── interface.go │ │ │ │ └── modeladapter.go │ │ │ └── orchestration │ │ │ ├── interface.go │ │ │ └── v1alpha1 │ │ │ ├── interface.go │ │ │ ├── rayclusterfleet.go │ │ │ ├── rayclusterreplicaset.go │ │ │ └── stormservice.go │ └── listers │ │ ├── autoscaling │ │ └── v1alpha1 │ │ │ ├── expansion_generated.go │ │ │ └── podautoscaler.go │ │ ├── model │ │ └── v1alpha1 │ │ │ ├── expansion_generated.go │ │ │ └── modeladapter.go │ │ └── orchestration │ │ └── v1alpha1 │ │ ├── expansion_generated.go │ │ ├── rayclusterfleet.go │ │ ├── rayclusterreplicaset.go │ │ └── stormservice.go ├── config │ └── config.go ├── constants │ ├── kv_event_sync.go │ ├── kvcache.go │ └── model.go ├── controller │ ├── constants │ │ └── stormservice.go │ ├── controller.go │ ├── kvcache │ │ ├── backends │ │ │ ├── common.go │ │ │ ├── common_test.go │ │ │ ├── distributed.go │ │ │ ├── distributed_test.go │ │ │ ├── hpkv.go │ │ │ ├── hpkv_test.go │ │ │ ├── infinistore.go │ │ │ ├── infinistore_test.go │ │ │ ├── reconciler.go │ │ │ └── vineyard.go │ │ ├── kvcache_controller.go │ │ ├── kvcache_controller_ginkgo_test.go │ │ ├── kvcache_controller_test.go │ │ └── suite_test.go │ ├── modeladapter │ │ ├── README.md │ │ ├── modeladapter_controller.go │ │ ├── modeladapter_controller_test.go │ │ ├── modeladapter_controller_unit_tests.go │ │ ├── resources.go │ │ ├── resources_test.go │ │ ├── scheduling │ │ │ ├── bin_pack.go │ │ │ ├── least_adapters.go │ │ │ ├── least_latency.go │ │ │ ├── least_throughput.go │ │ │ ├── random.go │ │ │ └── scheduler.go │ │ ├── suite_test.go │ │ ├── utils.go │ │ └── utils_test.go │ ├── modelrouter │ │ └── modelrouter_controller.go │ ├── podautoscaler │ │ ├── aggregation │ │ │ └── aggregator.go │ │ ├── algorithm │ │ │ ├── algorithm.go │ │ │ ├── apa.go │ │ │ ├── apa_test.go │ │ │ ├── hpa.go │ │ │ ├── kpa.go │ │ │ ├── kpa_test.go │ │ │ └── mock_context_test.go │ │ ├── autoscaler.go │ │ ├── context │ │ │ └── context.go │ │ ├── hpa_resources.go │ │ ├── metrics │ │ │ ├── client.go │ │ │ ├── collector.go │ │ │ ├── fetcher.go │ │ │ └── utils.go │ │ ├── monitor │ │ │ ├── metrics.go │ │ │ ├── monitor.go │ │ │ └── monitor_test.go │ │ ├── podautoscaler_controller.go │ │ ├── types │ │ │ ├── annotations.go │ │ │ ├── core.go │ │ │ └── metrics.go │ │ ├── utils.go │ │ ├── workload_scale.go │ │ └── workload_scale_test.go │ ├── podset │ │ └── podset_controller.go │ ├── rayclusterfleet │ │ ├── progress.go │ │ ├── rayclusterfleet_controller.go │ │ ├── rayclusterfleet_controller_test.go │ │ ├── recreate.go │ │ ├── rollback.go │ │ ├── rolling.go │ │ ├── suite_test.go │ │ ├── sync.go │ │ └── util │ │ │ └── fleet.go │ ├── rayclusterreplicaset │ │ ├── rayclusterreplicaset_controller.go │ │ ├── rayclusterreplicaset_controller_test.go │ │ ├── rayclusterreplicaset_utils.go │ │ └── suite_test.go │ ├── roleset │ │ ├── podset_rollsyncer.go │ │ ├── roleset_controller.go │ │ ├── roleset_controller_test.go │ │ ├── rolesyncer.go │ │ ├── rolesyncer_test.go │ │ ├── rolling.go │ │ ├── suite_test.go │ │ ├── sync.go │ │ ├── utils.go │ │ └── utils_test.go │ ├── stormservice │ │ ├── revision.go │ │ ├── revision_test.go │ │ ├── rolesetoperations.go │ │ ├── rolesetoperations_test.go │ │ ├── stormservice_controller.go │ │ ├── stormservice_controller_test.go │ │ ├── suite_test.go │ │ ├── sync.go │ │ ├── sync_test.go │ │ ├── utils.go │ │ └── utils_test.go │ └── util │ │ ├── controller_utils.go │ │ ├── controller_utils_test.go │ │ ├── expectation │ │ ├── expectation.go │ │ └── expectation_test.go │ │ ├── history │ │ └── controller_history.go │ │ ├── orchestration │ │ ├── util.go │ │ └── util_test.go │ │ └── patch │ │ ├── json_patch.go │ │ ├── json_patch_test.go │ │ ├── patch_method.go │ │ └── patch_method_test.go ├── features │ └── features.go ├── kvevent │ ├── doc.go │ ├── errors.go │ ├── handler.go │ ├── handler_test.go │ ├── integration_test.go │ ├── interfaces.go │ ├── manager.go │ ├── manager_comprehensive_test.go │ ├── manager_test.go │ └── test_helpers.go ├── metrics │ ├── common.go │ ├── custom_metrics.go │ ├── custom_metrics_test.go │ ├── engine_fetcher.go │ ├── engine_fetcher_test.go │ ├── metrics.go │ ├── server.go │ ├── types.go │ ├── types_test.go │ ├── utils.go │ └── utils_test.go ├── plugins │ └── gateway │ │ ├── algorithms │ │ ├── README.md │ │ ├── algorithms_test.go │ │ ├── fallback.go │ │ ├── fallback_test.go │ │ ├── least_busy_time.go │ │ ├── least_busy_time_test.go │ │ ├── least_gpu_cache.go │ │ ├── least_gpu_cache_test.go │ │ ├── least_kv_cache.go │ │ ├── least_kv_cache_test.go │ │ ├── least_latency.go │ │ ├── least_latency_test.go │ │ ├── least_load.go │ │ ├── least_load_test.go │ │ ├── least_request.go │ │ ├── least_request_test.go │ │ ├── least_util.go │ │ ├── least_util_test.go │ │ ├── model_router_factory.go │ │ ├── pack_load.go │ │ ├── pd_disaggregation.go │ │ ├── pd_disaggregation_test.go │ │ ├── prefix_cache.go │ │ ├── prefix_cache_metrics_test.go │ │ ├── prefix_cache_new_test.go │ │ ├── prefix_cache_preble.go │ │ ├── prefix_cache_preble_test.go │ │ ├── prefix_cache_routing_test.go │ │ ├── prefix_cache_test.go │ │ ├── queue_router.go │ │ ├── random.go │ │ ├── router.go │ │ ├── router_test.go │ │ ├── slo.go │ │ ├── slo_test.go │ │ ├── throughput.go │ │ ├── throughput_test.go │ │ ├── tokenizer_pool.go │ │ ├── tokenizer_pool_metrics_test.go │ │ ├── tokenizer_pool_test.go │ │ ├── util.go │ │ ├── vtc.go │ │ └── vtc │ │ │ ├── token_estimator.go │ │ │ ├── token_estimator_test.go │ │ │ ├── token_tracker.go │ │ │ ├── token_tracker_test.go │ │ │ ├── vtc_basic.go │ │ │ ├── vtc_basic_test.go │ │ │ └── vtc_router.go │ │ ├── gateway.go │ │ ├── gateway_ratelimit.go │ │ ├── gateway_req_body.go │ │ ├── gateway_req_body_test.go │ │ ├── gateway_req_headers.go │ │ ├── gateway_rsp_body.go │ │ ├── gateway_rsp_headers.go │ │ ├── gateway_test.go │ │ ├── gateway_test_helpers.go │ │ ├── queue │ │ ├── queue_test.go │ │ ├── simple_queue.go │ │ ├── simple_queue_test.go │ │ └── slo_queue.go │ │ ├── ratelimiter │ │ ├── rate_limiter.go │ │ └── redis.go │ │ ├── types.go │ │ ├── util.go │ │ └── util_test.go ├── types │ ├── output_predictor.go │ ├── pod_list.go │ ├── router.go │ ├── router_context.go │ ├── router_context_test.go │ ├── router_queue.go │ └── types_test.go ├── utils │ ├── annotations.go │ ├── annotations_test.go │ ├── hash │ │ ├── hash.go │ │ └── hash_test.go │ ├── kvcache.go │ ├── kvcache_test.go │ ├── labels.go │ ├── labels_test.go │ ├── lrustore │ │ ├── lru_store.go │ │ ├── lru_store_test.go │ │ └── store.go │ ├── modeladapter.go │ ├── modeladapter_test.go │ ├── pod.go │ ├── pod_array.go │ ├── pod_array_test.go │ ├── pod_test.go │ ├── prefixcacheindexer │ │ ├── hash.go │ │ ├── hash_test.go │ │ ├── tree.go │ │ └── tree_test.go │ ├── raycluster.go │ ├── redis.go │ ├── registry.go │ ├── registry_test.go │ ├── sync_map.go │ ├── sync_map_test.go │ ├── syncprefixcacheindexer │ │ ├── README.md │ │ ├── events.go │ │ ├── sync_hash.go │ │ ├── sync_hash_bench_test.go │ │ ├── sync_hash_stress_test.go │ │ └── sync_hash_test.go │ ├── tokenizer │ │ ├── README.md │ │ ├── adapter_sglang.go │ │ ├── adapter_vllm.go │ │ ├── errors.go │ │ ├── interfaces.go │ │ ├── local_characters.go │ │ ├── local_tiktoken.go │ │ ├── remote_client.go │ │ ├── remote_client_test.go │ │ ├── remote_tokenizer.go │ │ ├── tokenizer.go │ │ ├── types.go │ │ └── utils.go │ ├── users.go │ ├── util.go │ ├── util_test.go │ └── utils_test.go └── webhook │ ├── deployment_webhook.go │ ├── kvcache_webhook.go │ ├── modeladapter_webhook.go │ ├── sidecar_injection.go │ └── stormservice_webhook.go ├── python ├── aibrix │ ├── README.md │ ├── aibrix │ │ ├── __init__.py │ │ ├── __version__.py │ │ ├── app.py │ │ ├── batch │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── constant.py │ │ │ ├── driver.py │ │ │ ├── job_driver.py │ │ │ ├── job_entity │ │ │ │ ├── __init__.py │ │ │ │ ├── batch_job.py │ │ │ │ ├── job_entity_manager.py │ │ │ │ └── k8s_transformer.py │ │ │ ├── job_manager.py │ │ │ ├── job_progress_manager.py │ │ │ ├── scheduler.py │ │ │ ├── storage │ │ │ │ ├── __init__.py │ │ │ │ ├── adapter.py │ │ │ │ ├── batch_metastore.py │ │ │ │ └── batch_storage.py │ │ │ └── worker.py │ │ ├── common │ │ │ ├── __init__.py │ │ │ └── errors.py │ │ ├── config.py │ │ ├── downloader │ │ │ ├── __init__.py │ │ │ ├── __main__.py │ │ │ ├── base.py │ │ │ ├── entity.py │ │ │ ├── huggingface.py │ │ │ ├── s3.py │ │ │ ├── tos.py │ │ │ └── utils.py │ │ ├── envs.py │ │ ├── gpu_optimizer │ │ │ ├── Makefile │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── app.py │ │ │ ├── load_monitor │ │ │ │ ├── __init__.py │ │ │ │ ├── clusterer.py │ │ │ │ ├── helpers.py │ │ │ │ ├── load_reader.py │ │ │ │ ├── monitor.py │ │ │ │ ├── profile_reader.py │ │ │ │ └── visualizer.py │ │ │ ├── optimizer │ │ │ │ ├── __init__.py │ │ │ │ ├── optimizer.py │ │ │ │ ├── profiling │ │ │ │ │ ├── README.md │ │ │ │ │ ├── benchmark.py │ │ │ │ │ ├── benchmark.sh │ │ │ │ │ ├── gen_benchmark_prompt.py │ │ │ │ │ ├── gen_profile.py │ │ │ │ │ ├── gpu_benchmark.py │ │ │ │ │ └── result │ │ │ │ │ │ ├── simulator-llama2-7b-a100.json │ │ │ │ │ │ ├── simulator-llama2-7b-a100.jsonl │ │ │ │ │ │ ├── simulator-llama2-7b-a100_obsoleted_v1.json │ │ │ │ │ │ ├── simulator-llama2-7b-a40.json │ │ │ │ │ │ └── simulator-llama2-7b-a40.jsonl │ │ │ │ ├── solver │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── melange │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── config_example.json │ │ │ │ │ │ ├── example.py │ │ │ │ │ │ ├── requirements.txt │ │ │ │ │ │ ├── runner.py │ │ │ │ │ │ ├── solver.py │ │ │ │ │ │ └── util.py │ │ │ │ └── types.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ └── logging.py │ │ ├── logger.py │ │ ├── metadata │ │ │ ├── __init__.py │ │ │ ├── api │ │ │ │ ├── __init__.py │ │ │ │ └── v1 │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── batch.py │ │ │ │ │ ├── files.py │ │ │ │ │ ├── models.py │ │ │ │ │ └── users.py │ │ │ ├── app.py │ │ │ ├── cache │ │ │ │ ├── __init__.py │ │ │ │ ├── job.py │ │ │ │ └── utils.py │ │ │ ├── core │ │ │ │ ├── __init__.py │ │ │ │ ├── asyncio_thread.py │ │ │ │ ├── httpx_client.py │ │ │ │ └── kopf_operator.py │ │ │ ├── secret_gen.py │ │ │ └── setting │ │ │ │ ├── __init__.py │ │ │ │ ├── config.py │ │ │ │ ├── k8s_job_redis_patch.yaml │ │ │ │ ├── k8s_job_s3_patch.yaml │ │ │ │ ├── k8s_job_template.yaml │ │ │ │ ├── k8s_job_tos_patch.yaml │ │ │ │ ├── s3_secret_template.yaml │ │ │ │ └── tos_secret_template.yaml │ │ ├── metrics │ │ │ ├── __init__.py │ │ │ ├── engine_rules.py │ │ │ ├── http_collector.py │ │ │ ├── metrics.py │ │ │ └── standard_rules.py │ │ ├── openapi │ │ │ ├── __init__.py │ │ │ ├── engine │ │ │ │ ├── __init__.py │ │ │ │ ├── base.py │ │ │ │ └── vllm.py │ │ │ ├── model.py │ │ │ └── protocol.py │ │ ├── protos │ │ │ └── __init__.py │ │ ├── runtime │ │ │ ├── __init__.py │ │ │ ├── artifact_service.py │ │ │ └── downloaders.py │ │ └── storage │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── factory.py │ │ │ ├── local.py │ │ │ ├── reader.py │ │ │ ├── redis.py │ │ │ ├── s3.py │ │ │ ├── tos.py │ │ │ ├── types.py │ │ │ └── utils.py │ ├── poetry.lock │ ├── pyproject.toml │ ├── scripts │ │ ├── __init__.py │ │ ├── format.sh │ │ └── generate_secrets.py │ └── tests │ │ ├── __init__.py │ │ ├── batch │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_batch_storage_adapter.py │ │ ├── test_driver.py │ │ ├── test_e2e_abnormal_job_behavior.py │ │ ├── test_e2e_openai_batch_api.py │ │ ├── test_inference_client_integration.py │ │ ├── test_job_cache.py │ │ ├── test_job_entity.py │ │ ├── test_job_manager.py │ │ ├── test_k8s_job_persistence.py │ │ ├── test_k8s_job_transformer.py │ │ ├── test_rbac_setup.py │ │ ├── test_worker_s3_integration.py │ │ └── testdata │ │ │ ├── job_rbac.yaml │ │ │ ├── k8s_job_patch_unittest.yaml │ │ │ ├── s3_secret.yaml │ │ │ └── sample_job_input.jsonl │ │ ├── downloader │ │ ├── __init__.py │ │ ├── test_downloader_hf.py │ │ ├── test_downloader_s3.py │ │ ├── test_downloader_tos.py │ │ ├── test_downloader_tos_v1.py │ │ ├── test_entity.py │ │ └── test_utils.py │ │ ├── e2e │ │ ├── README.md │ │ ├── __init__.py │ │ └── test_batch_api.py │ │ ├── gpu_optimizer │ │ ├── __init__.py │ │ ├── test_datasetloadreader.py │ │ ├── test_gatewayloadreader.py │ │ └── test_gen_benchmark.py │ │ ├── metadata │ │ ├── test_app_integration.py │ │ ├── test_kopf_integration.py │ │ ├── test_models_api.py │ │ ├── test_secret_gen.py │ │ └── test_users_api.py │ │ ├── metrics │ │ ├── __init__.py │ │ ├── test_metrics.py │ │ ├── test_metrics_multi_engine.py │ │ └── test_metrics_passthrough_mode.py │ │ ├── openapi │ │ ├── __init__.py │ │ └── engine │ │ │ ├── __init__.py │ │ │ └── test_vllm_engine_lora_loading.py │ │ ├── storage │ │ ├── README.md │ │ ├── __init__.py │ │ ├── conftest.py │ │ ├── test_factory.py │ │ ├── test_local_storage.py │ │ ├── test_reader.py │ │ ├── test_redis_storage.py │ │ ├── test_s3_demo.py │ │ ├── test_storage.py │ │ └── test_utils.py │ │ ├── test_files_api.py │ │ └── test_logger.py └── aibrix_kvcache │ ├── .gitignore │ ├── .pre-commit-config.yaml │ ├── CMakeLists.txt │ ├── LICENSE │ ├── README.md │ ├── aibrix_kvcache │ ├── __init__.py │ ├── _custom_ops.py │ ├── cache_args.py │ ├── cache_handle.py │ ├── cache_hashable.py │ ├── cache_manager.py │ ├── common │ │ ├── __init__.py │ │ ├── absl_logging.py │ │ ├── async_base.py │ │ ├── cached_pyobject.py │ │ ├── object_pool.py │ │ └── threading.py │ ├── config.py │ ├── envs.py │ ├── l1 │ │ ├── __init__.py │ │ ├── eviction_policy │ │ │ ├── __init__.py │ │ │ ├── base_eviction_policy.py │ │ │ ├── fifo.py │ │ │ ├── lru.py │ │ │ └── s3fifo.py │ │ └── l1_cache.py │ ├── l2 │ │ ├── __init__.py │ │ ├── connectors │ │ │ ├── __init__.py │ │ │ ├── connector.py │ │ │ ├── hpkv.py │ │ │ ├── infinistore.py │ │ │ ├── mock.py │ │ │ ├── pris.py │ │ │ └── rocksdb.py │ │ ├── key_builders │ │ │ ├── __init__.py │ │ │ ├── hasher.py │ │ │ ├── hex_key_builder.py │ │ │ ├── key_builder.py │ │ │ ├── raw_key_builder.py │ │ │ ├── rolling_hash_key_builder.py │ │ │ └── simple_hash_key_builder.py │ │ ├── l2_cache.py │ │ ├── marshallers │ │ │ ├── __init__.py │ │ │ ├── marshaller.py │ │ │ ├── string_serializer.py │ │ │ ├── tensor_serializer.py │ │ │ └── zstd_compressor.py │ │ └── placement │ │ │ ├── __init__.py │ │ │ ├── placement.py │ │ │ └── simple_placement.py │ ├── memory │ │ ├── __init__.py │ │ ├── allocator.py │ │ ├── external_memory_region.py │ │ ├── memory_region.py │ │ └── ref_counted_obj.py │ ├── meta_service │ │ ├── __init__.py │ │ ├── meta_service.py │ │ └── redis_meta_service.py │ ├── metrics.py │ ├── profiling.py │ ├── spec.py │ ├── status.py │ ├── transport │ │ ├── __init__.py │ │ └── rdma.py │ ├── utils.py │ └── version.py │ ├── cmake │ └── utils.cmake │ ├── csrc │ ├── attention │ │ ├── attention_dtypes.h │ │ ├── attention_generic.cuh │ │ ├── dtype_bfloat16.cuh │ │ ├── dtype_float16.cuh │ │ ├── dtype_float32.cuh │ │ └── dtype_fp8.cuh │ ├── cache.h │ ├── cache_kernels.cu │ ├── core │ │ └── registration.h │ ├── quantization │ │ └── fp8 │ │ │ ├── amd │ │ │ └── quant_utils.cuh │ │ │ └── nvidia │ │ │ └── quant_utils.cuh │ └── torch_bindings.cpp │ ├── integration │ └── vllm │ │ ├── vllm_v0.10.2-aibrix-kvcache.patch │ │ ├── vllm_v0.8.5-aibrix-kvcache.patch │ │ └── vllm_v0.9.1-aibrix-kvcache.patch │ ├── pyproject.toml │ ├── requirements │ ├── build.txt │ ├── core.txt │ ├── dev.txt │ ├── lint.txt │ └── test.txt │ ├── scripts │ ├── check_aibrix_header.py │ └── format.sh │ ├── setup.py │ └── tests │ ├── __init__.py │ ├── conftest.py │ ├── pytest.ini │ ├── test_cache_handle.py │ ├── test_cache_manager.py │ ├── test_cache_ops.py │ ├── test_common.py │ ├── test_eviction_policies.py │ ├── test_group_aware_cache_manager.py │ ├── test_key_builder.py │ ├── test_l1cache.py │ ├── test_l2cache.py │ ├── test_memory_region.py │ ├── test_placement.py │ └── test_tensor_pool_allocator.py ├── samples ├── adapter │ ├── adapter-api-key.yaml │ ├── adapter-multi-replica.yaml │ ├── adapter-reliability-demo.yaml │ ├── adapter.yaml │ ├── base-api-key.yaml │ ├── base-without-runtime.yaml │ └── base.yaml ├── autoscaling │ ├── apa-resource.yaml │ ├── apa.yaml │ ├── deploy.yaml │ ├── hpa.yaml │ ├── kpa.yaml │ ├── optimizer-kpa.yaml │ ├── stormservice-pool.yaml │ └── stormservice-replica.yaml ├── deepseek-r1 │ ├── README.md │ ├── deepseek-r1-ai-runtime.yaml │ ├── deepseek-r1-autoscaling.yaml │ ├── deepseek-r1-huggingface.yaml │ ├── deepseek-r1-local-nvme.yaml │ ├── deepseek-r1-pvc.yaml │ ├── deepseek-r1-service.yaml │ ├── deepseek-r1-tcp.yaml │ └── static │ │ ├── AIBrix Engine Dashboard (vLLM)-1741078999667.json │ │ ├── deepseek-dashboard.png │ │ └── deepseek-deployment.png ├── disaggregation │ ├── sglang │ │ ├── README.md │ │ ├── pool.yaml │ │ ├── replica.yaml │ │ └── tp-1p1d.yaml │ └── vllm │ │ ├── 1p1d.yaml │ │ ├── README.md │ │ ├── disagg_proxy_server.py │ │ ├── pool.yaml │ │ ├── replica.yaml │ │ └── router.yaml ├── distributed │ ├── fleet-two-node.yaml │ └── fleet.yaml ├── heterogeneous │ ├── deepseek-coder-7b-l20-deployment.yaml │ ├── deepseek-coder-7b-l20-podautoscaler.yaml │ ├── deepseek-coder-7b-service.yaml │ ├── deepseek-coder-7b-v100-deployment.yaml │ ├── deepseek-coder-7b-v100-podautoscaler.yaml │ └── kustomization.yaml ├── kvcache │ ├── infinistore │ │ ├── kvcache.yaml │ │ ├── kvcache_customized.yaml │ │ └── vllm.yaml │ ├── l1cache │ │ └── vllm.yaml │ ├── profiling │ │ └── profiling_svc.yaml │ └── vineyard │ │ ├── deployment-tp.yaml │ │ ├── deployment.yaml │ │ ├── kvcache-tp.yaml │ │ └── kvcache.yaml ├── multimodality │ ├── vllm │ │ ├── README.md │ │ ├── dse-qwen2-2b.yaml │ │ ├── llava-7b.yaml │ │ ├── qwen-audio.yaml │ │ ├── qwen-vl.yaml │ │ └── send_file_base64.py │ └── xDiT │ │ ├── README.md │ │ ├── image-generation │ │ ├── aibrix_vke_kv_image_hunyuanDiT.yaml │ │ ├── aibrix_vke_kv_image_hunyuanDiT_parallel.yaml │ │ ├── aibrix_vke_kv_image_sd.yaml │ │ └── aibrix_vke_kv_image_sd_parallel.yaml │ │ ├── video-generation │ │ ├── aibrix_vke_staging_video_cogvideo_parallel.yaml │ │ └── aibrix_vke_staging_video_hunyuanvideo.yaml │ │ └── xDiT-integration │ │ └── xdit-52e74e88d2332281eefe68894af02f805a1d2b4f.patch ├── quickstart │ ├── model.yaml │ └── pd-model.yaml └── volcano-engine │ ├── README.md │ ├── autoscaler.yaml │ ├── deepseek-8b-kv-cluster.yaml │ ├── deepseek-8b-kv-direct.yaml │ ├── deepseek-8b-kv-dram.yaml │ ├── deepseek-8b-naive.yaml │ ├── deepseek-r1.yaml │ ├── grafana.yaml │ ├── hpa-r1.yaml │ ├── kvcache.yaml │ └── prefix-cache-routing.ipynb ├── scripts └── port-forward.sh └── test ├── README.md ├── e2e ├── e2e_test.go ├── model_adapter_test.go ├── openai_api_compatibility_test.go ├── routing_strategy_test.go ├── util.go └── vtc_routing_test.go ├── integration ├── controller │ ├── podautoscaler_test.go │ ├── podset_test.go │ ├── roleset_test.go │ ├── stormservice_test.go │ └── suit_test.go └── webhook │ ├── deployment_webhook_test.go │ ├── kvcache_webhook_test.go │ ├── modeladapter_test.go │ ├── stormservice_webhook_test.go │ └── suit_test.go ├── kv-event-sync-e2e.rst ├── regression ├── v0.2.1 │ ├── README.md │ ├── aibrix_kvcache_0.6.1.yaml │ ├── aibrix_naive.yaml │ ├── benchmark_output_20250323.zip │ ├── client.yaml │ ├── k8s_stack.yaml │ ├── plot.py │ ├── ps_k8s_stack.yaml │ └── ps_stack.yaml ├── v0.3.0 │ ├── README.md │ ├── aibrix_kvcache_dram.yaml │ ├── aibrix_kvcache_external.yaml │ ├── aibrix_naive.yaml │ ├── aibrix_naive_prefix_cache.yaml │ ├── benchmark_output_20250519.zip │ ├── client.yaml │ ├── figure_ttft_generation_time.png │ ├── infinistore-hostnetwork.yaml │ ├── infinistore.yaml │ ├── k8s_stack.yaml │ ├── kvcache.yaml │ ├── lmcache_helm_naive.yaml │ ├── lmcache_helm_stack.yaml │ ├── plot.py │ └── ps_stack.yaml └── v0.4.0 │ ├── Chart.yaml │ ├── README.md │ ├── benchmark-client.yaml │ ├── configs │ ├── sglang-disagg-base.yaml │ ├── sglang-non-disagg-base.yaml │ ├── vllm-disagg-base.yaml │ └── vllm-non-disagg-base.yaml │ ├── dynamo │ ├── disagg.yaml │ └── disagg_router.yaml │ ├── multi-engine │ ├── sglang-llama-8b.yaml │ └── vllm-llama-8b.yaml │ ├── templates │ ├── _helpers.tpl │ ├── sglang-disaggregated.yaml │ ├── sglang-non-disaggregated.yaml │ ├── vllm-disaggregated.yaml │ └── vllm-non-disaggregated.yaml │ └── values.yaml ├── run-e2e-tests.sh └── utils ├── utils.go ├── validation ├── hpa.go ├── pod.go ├── podautoscaler.go ├── podset.go ├── roleset.go └── stormservice.go └── wrapper ├── deployment.go ├── kvcache.go ├── modeladapter.go ├── podautoscaler.go ├── podset.go ├── roleset.go └── stormservice.go /.github/workflows/docker-build-images.yml: -------------------------------------------------------------------------------- 1 | name: Docker Build Images 2 | 3 | on: 4 | workflow_dispatch: # Allows manual trigger 5 | pull_request: 6 | branches: [ "main", "release-*" ] 7 | 8 | jobs: 9 | build: 10 | # This prevents the job from running as other steps cover its functionality. 11 | # We use 'if: false' to keep the file for future reference without deleting it. 12 | if: false 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Check out code 16 | uses: actions/checkout@v4 17 | - name: Set up Docker Buildx 18 | uses: docker/setup-buildx-action@v3 19 | - name: Docker Build Container Images 20 | run: make docker-build-all 21 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | run: 2 | timeout: 5m 3 | allow-parallel-runners: true 4 | 5 | issues: 6 | # don't skip warning about doc comments 7 | # don't exclude the default set of lint 8 | exclude-use-default: false 9 | # restore some of the defaults 10 | # (fill in the rest as needed) 11 | exclude-rules: 12 | - path: "api/*" 13 | linters: 14 | - lll 15 | - path: "pkg/*" 16 | linters: 17 | - dupl 18 | - lll 19 | # disable lll check for flag descriptions 20 | - path: "cmd/controllers/main.go" 21 | linters: 22 | - lll 23 | linters: 24 | disable-all: true 25 | enable: 26 | - dupl 27 | - errcheck 28 | - exportloopref 29 | - goconst 30 | - gocyclo 31 | - gofmt 32 | - goimports 33 | - gosimple 34 | - govet 35 | - ineffassign 36 | - lll 37 | - misspell 38 | - nakedret 39 | # - prealloc 40 | - staticcheck 41 | - typecheck 42 | - unconvert 43 | # - unparam 44 | # - unused 45 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # AIBrix Community Code of Conduct 2 | 3 | AIBrix follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/master/code-of-conduct.md). 4 | 5 | ## Enforcement 6 | 7 | Violations of the Code of Conduct should be reported to steering@aibrix.ai. 8 | 9 | This email address is monitored by the AIBrix Steering Committee. This committee will handle the code of conduct issues and will coordinate with the CNCF. 10 | 11 | The identity of individuals who report violations of the Code of Conduct will remain confidential. 12 | 13 | Steering Committee and Technical Oversight Committee members who do not follow or enforce the Code of Conduct in good faith may be reported to the CNCF staff via conduct@cncf.io. 14 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | This section shows that which versions of AIBrix are currently being supported with security updates. 6 | 7 | | Version | Supported | 8 | | ------- | ------------------ | 9 | | 0.2.x | :white_check_mark: | 10 | | 0.1.x | :x: | 11 | 12 | ## Reporting a Vulnerability 13 | 14 | Please contact maintainers@aibrix.ai to report a vulnerability. 15 | -------------------------------------------------------------------------------- /benchmarks/client/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/benchmarks/client/__init__.py -------------------------------------------------------------------------------- /benchmarks/generator/.gitignore: -------------------------------------------------------------------------------- 1 | output 2 | plot 3 | -------------------------------------------------------------------------------- /benchmarks/generator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/benchmarks/generator/__init__.py -------------------------------------------------------------------------------- /benchmarks/generator/dataset_generator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/benchmarks/generator/dataset_generator/__init__.py -------------------------------------------------------------------------------- /benchmarks/generator/dataset_generator/dataset-examples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/benchmarks/generator/dataset_generator/dataset-examples.png -------------------------------------------------------------------------------- /benchmarks/generator/workload_generator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/benchmarks/generator/workload_generator/__init__.py -------------------------------------------------------------------------------- /benchmarks/generator/workload_generator/config/examples/completion-len-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "fluctuate": 8, 3 | "mean": 169, 4 | "noise": 0.1, 5 | "period_len_ms": 300000, 6 | "only_rise": false 7 | } -------------------------------------------------------------------------------- /benchmarks/generator/workload_generator/config/examples/prompt-len-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "fluctuate": 15, 3 | "mean": 309, 4 | "noise": 0.1, 5 | "period_len_ms": 300000, 6 | "only_rise": false 7 | } -------------------------------------------------------------------------------- /benchmarks/generator/workload_generator/config/examples/traffic-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "fluctuate": 2, 3 | "mean": 6, 4 | "noise": 0.1, 5 | "period_len_ms": 300000, 6 | "only_rise": false 7 | } -------------------------------------------------------------------------------- /benchmarks/generator/workload_generator/workload-plot-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/benchmarks/generator/workload_generator/workload-plot-example.png -------------------------------------------------------------------------------- /benchmarks/image/aibrix-benchmark-client.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/benchmarks/image/aibrix-benchmark-client.png -------------------------------------------------------------------------------- /benchmarks/image/aibrix-benchmark-component.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/benchmarks/image/aibrix-benchmark-component.png -------------------------------------------------------------------------------- /benchmarks/image/aibrix-benchmark-dataset.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/benchmarks/image/aibrix-benchmark-dataset.png -------------------------------------------------------------------------------- /benchmarks/image/aibrix-benchmark-workload.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/benchmarks/image/aibrix-benchmark-workload.png -------------------------------------------------------------------------------- /benchmarks/requirements.txt: -------------------------------------------------------------------------------- 1 | PyYAML==6.0.3 2 | openai==2.3.0 3 | matplotlib==3.10.7 4 | transformers==4.57.0 5 | scipy==1.16.2 -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/deepseek-llm-7b-chat/apa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | name: deepseek-llm-7b-chat-v100-apa 5 | namespace: default 6 | labels: 7 | app.kubernetes.io/name: aibrix 8 | app.kubernetes.io/managed-by: kustomize 9 | annotations: 10 | autoscaling.aibrix.ai/up-fluctuation-tolerance: '0.1' 11 | autoscaling.aibrix.ai/down-fluctuation-tolerance: '0.2' 12 | apa.autoscaling.aibrix.ai/window: 30s 13 | spec: 14 | scalingStrategy: APA 15 | minReplicas: 1 16 | maxReplicas: 8 17 | metricsSources: 18 | - metricSourceType: pod 19 | protocolType: http 20 | port: '8000' 21 | path: metrics 22 | targetMetric: gpu_cache_usage_perc 23 | targetValue: '0.5' 24 | scaleTargetRef: 25 | apiVersion: apps/v1 26 | kind: Deployment 27 | name: deepseek-llm-7b-chat -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/deepseek-llm-7b-chat/hpa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | name: deepseek-llm-7b-chat-hpa 5 | namespace: default 6 | labels: 7 | app.kubernetes.io/name: aibrix 8 | app.kubernetes.io/managed-by: kustomize 9 | spec: 10 | scalingStrategy: HPA 11 | minReplicas: 1 12 | maxReplicas: 8 13 | metricsSources: 14 | - metricSourceType: pod 15 | protocolType: http 16 | port: '8000' 17 | path: /metrics 18 | targetMetric: gpu_cache_usage_perc 19 | targetValue: '50' 20 | scaleTargetRef: 21 | apiVersion: apps/v1 22 | kind: Deployment 23 | name: deepseek-llm-7b-chat -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/deepseek-llm-7b-chat/kpa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | name: deepseek-llm-7b-chat-kpa 5 | namespace: default 6 | labels: 7 | app.kubernetes.io/name: aibrix 8 | app.kubernetes.io/managed-by: kustomize 9 | annotations: 10 | kpa.autoscaling.aibrix.ai/scale-down-delay: 3m 11 | spec: 12 | scalingStrategy: KPA 13 | minReplicas: 1 14 | maxReplicas: 8 15 | metricsSources: 16 | - metricSourceType: pod 17 | protocolType: http 18 | port: '8000' 19 | path: metrics 20 | targetMetric: gpu_cache_usage_perc 21 | targetValue: '0.5' 22 | scaleTargetRef: 23 | apiVersion: apps/v1 24 | kind: Deployment 25 | name: deepseek-llm-7b-chat -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/deepseek-llm-7b-chat/optimizer-kpa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | name: deepseek-llm-7b-chat-gpu-optimizer 5 | namespace: default 6 | labels: 7 | app.kubernetes.io/name: aibrix 8 | app.kubernetes.io/managed-by: kustomize 9 | annotations: 10 | kpa.autoscaling.aibrix.ai/scale-down-delay: 0s 11 | spec: 12 | scalingStrategy: KPA 13 | minReplicas: 1 14 | maxReplicas: 8 15 | metricsSources: 16 | - endpoint: aibrix-gpu-optimizer.aibrix-system.svc.cluster.local:8080 17 | metricSourceType: domain 18 | path: /metrics/default/deepseek-llm-7b-chat 19 | protocolType: http 20 | targetMetric: vllm:deployment_replicas 21 | targetValue: "100" 22 | scaleTargetRef: 23 | apiVersion: apps/v1 24 | kind: Deployment 25 | name: deepseek-llm-7b-chat 26 | -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/deepseek-llm-7b-chat/svc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | model.aibrix.ai/name: deepseek-llm-7b-chat 6 | prometheus-discovery: "true" 7 | annotations: 8 | prometheus.io/scrape: "true" 9 | prometheus.io/port: "8000" 10 | name: deepseek-llm-7b-chat 11 | namespace: default 12 | spec: 13 | ports: 14 | - name: serve 15 | port: 8000 16 | protocol: TCP 17 | targetPort: 8000 18 | selector: 19 | model.aibrix.ai/name: deepseek-llm-7b-chat 20 | type: LoadBalancer 21 | -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/overnight_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | workload_path=$1 4 | if [ -z "${workload_path}" ]; then 5 | echo "workload path is not given" 6 | echo "Usage: $0 " 7 | exit 1 8 | fi 9 | 10 | autoscalers="hpa kpa apa optimizer-kpa" 11 | for autoscaler in ${autoscalers}; do 12 | start_time=$(date +%s) 13 | echo "--------------------------------" 14 | echo "started experiment at $(date)" 15 | echo autoscaler: ${autoscaler} 16 | echo workload: ${workload_path} 17 | echo "The stdout/stderr is being logged in ./output.txt" 18 | ./run-test.sh ${workload_path} ${autoscaler} &> output.txt 19 | end_time=$(date +%s) 20 | echo "Done: Time taken: $((end_time-start_time)) seconds" 21 | echo "--------------------------------" 22 | sleep 10 23 | done -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/requirements_bench_pa.txt: -------------------------------------------------------------------------------- 1 | openai==1.51.2 2 | transformers==4.45.2 3 | vllm==0.6.3 4 | matplotlib==3.9.2 5 | pandas==2.2.3 6 | numpy==1.26.4 -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/workload-configs/predefined/completion-len-configs/HighFast.json: -------------------------------------------------------------------------------- 1 | { 2 | "fluctuate": 34, 3 | "mean": 43, 4 | "noise": 0.1, 5 | "period_len_ms": 120000, 6 | "only_rise": false 7 | } -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/workload-configs/predefined/completion-len-configs/HighSlow.json: -------------------------------------------------------------------------------- 1 | { 2 | "fluctuate": 34, 3 | "mean": 43, 4 | "noise": 0.1, 5 | "period_len_ms": 300000, 6 | "only_rise": false 7 | } -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/workload-configs/predefined/completion-len-configs/LowFast.json: -------------------------------------------------------------------------------- 1 | { 2 | "fluctuate": 7.1, 3 | "mean": 43, 4 | "noise": 0.1, 5 | "period_len_ms": 120000, 6 | "only_rise": false 7 | } -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/workload-configs/predefined/completion-len-configs/LowSlow.json: -------------------------------------------------------------------------------- 1 | { 2 | "fluctuate": 7.1, 3 | "mean": 43, 4 | "noise": 0.1, 5 | "period_len_ms": 300000, 6 | "only_rise": false 7 | } -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/workload-configs/predefined/prompt-len-configs/HighFast.json: -------------------------------------------------------------------------------- 1 | { 2 | "fluctuate": 696, 3 | "mean": 1024, 4 | "noise": 0.1, 5 | "period_len_ms": 90000, 6 | "only_rise": false 7 | } -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/workload-configs/predefined/prompt-len-configs/HighSlow.json: -------------------------------------------------------------------------------- 1 | { 2 | "fluctuate": 696, 3 | "mean": 1024, 4 | "noise": 0.1, 5 | "period_len_ms": 120000, 6 | "only_rise": false 7 | } -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/workload-configs/predefined/prompt-len-configs/LowFast.json: -------------------------------------------------------------------------------- 1 | { 2 | "fluctuate": 174, 3 | "mean": 1024, 4 | "noise": 0.1, 5 | "period_len_ms": 90000, 6 | "only_rise": false 7 | } -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/workload-configs/predefined/prompt-len-configs/LowSlow.json: -------------------------------------------------------------------------------- 1 | { 2 | "fluctuate": 174, 3 | "mean": 1024, 4 | "noise": 0.1, 5 | "period_len_ms": 120000, 6 | "only_rise": false 7 | } -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/workload-configs/predefined/traffic-configs/HighFast.json: -------------------------------------------------------------------------------- 1 | { 2 | "fluctuate": 3.6, 3 | "mean": 12, 4 | "noise": 0.1, 5 | "period_len_ms": 120000, 6 | "only_rise": false 7 | } -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/workload-configs/predefined/traffic-configs/HighSlow.json: -------------------------------------------------------------------------------- 1 | { 2 | "fluctuate": 3.6, 3 | "mean": 12, 4 | "noise": 0.1, 5 | "period_len_ms": 840000, 6 | "only_rise": false 7 | } -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/workload-configs/predefined/traffic-configs/LowFast.json: -------------------------------------------------------------------------------- 1 | { 2 | "fluctuate": 1.08, 3 | "mean": 12, 4 | "noise": 0.1, 5 | "period_len_ms": 120000, 6 | "only_rise": false 7 | } -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/workload-configs/predefined/traffic-configs/LowSlow.json: -------------------------------------------------------------------------------- 1 | { 2 | "fluctuate": 1.08, 3 | "mean": 12, 4 | "noise": 0.1, 5 | "period_len_ms": 840000, 6 | "only_rise": false 7 | } -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/workload/25min_up_and_down/plot-synthetic-from-csv-file-synthetic2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/benchmarks/scenarios/autoscaling/workload/25min_up_and_down/plot-synthetic-from-csv-file-synthetic2.pdf -------------------------------------------------------------------------------- /benchmarks/scenarios/autoscaling/workload/one_request.jsonl: -------------------------------------------------------------------------------- 1 | {"timestamp": 0, "requests": [{"Prompt Length": 3, "Output Length": 22, "prompt": "Where is Beijing"}]} -------------------------------------------------------------------------------- /benchmarks/scenarios/kvcache/README.md: -------------------------------------------------------------------------------- 1 | ## KVCache Benchmark 2 | 3 | **Coming Soon!** 4 | 5 | Stay tuned, benchmark configurations and benchmarking steps will be released shortly. 6 | -------------------------------------------------------------------------------- /benchmarks/scenarios/utils/set_num_replicas.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from kubernetes import client, config 3 | import sys 4 | 5 | def scale_deployment(deployment_name, replicas): 6 | try: 7 | config.load_kube_config(context="ccr3aths9g2gqedu8asdg@41073177-kcu0mslcp5mhjsva38rpg") 8 | apps_v1 = client.AppsV1Api() 9 | apps_v1.patch_namespaced_deployment( 10 | name=deployment_name, 11 | namespace="default", 12 | body={"spec": {"replicas": replicas}} 13 | ) 14 | except Exception as e: 15 | print(f"Error: {e}") 16 | sys.exit(1) 17 | 18 | if __name__ == "__main__": 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument("--deployment", required=True) 21 | parser.add_argument("--replicas", type=int, required=True) 22 | args = parser.parse_args() 23 | 24 | scale_deployment(args.deployment, args.replicas) 25 | print(f"Deployment {args.deployment} scaled to {args.replicas} replica(s)") -------------------------------------------------------------------------------- /build/container/.dockerignore: -------------------------------------------------------------------------------- 1 | # More info: https://docs.docker.com/engine/reference/builder/#dockerignore-file 2 | # Ignore build and test binaries. 3 | bin/ 4 | -------------------------------------------------------------------------------- /build/container/Dockerfile.runtime.dockerignore: -------------------------------------------------------------------------------- 1 | # More info: https://docs.docker.com/engine/reference/builder/#dockerignore-file 2 | # Ignore test 3 | python/aibrix/tests/ 4 | -------------------------------------------------------------------------------- /config/.gitignore: -------------------------------------------------------------------------------- 1 | # these files are auto-generated by kubebuilder 2 | # however, we reorganize files for easy sharing but break kubebuilder way managing files. 3 | # create a .gitignore to untrack this file 4 | 5 | crd/bases/** 6 | rbac/role.yaml 7 | -------------------------------------------------------------------------------- /config/crd/autoscaling/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - autoscaling.aibrix.ai_podautoscalers.yaml 3 | -------------------------------------------------------------------------------- /config/crd/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # This file is for teaching kustomize how to substitute name and namespace reference in CRD 2 | nameReference: 3 | - kind: Service 4 | version: v1 5 | fieldSpecs: 6 | - kind: CustomResourceDefinition 7 | version: v1 8 | group: apiextensions.k8s.io 9 | path: spec/conversion/webhook/clientConfig/service/name 10 | 11 | namespace: 12 | - kind: CustomResourceDefinition 13 | version: v1 14 | group: apiextensions.k8s.io 15 | path: spec/conversion/webhook/clientConfig/service/namespace 16 | create: false 17 | 18 | varReference: 19 | - path: metadata/annotations 20 | -------------------------------------------------------------------------------- /config/crd/model/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - model.aibrix.ai_modeladapters.yaml 3 | -------------------------------------------------------------------------------- /config/crd/orchestration/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - orchestration.aibrix.ai_rayclusterreplicasets.yaml 3 | - orchestration.aibrix.ai_rayclusterfleets.yaml 4 | - orchestration.aibrix.ai_kvcaches.yaml 5 | - orchestration.aibrix.ai_stormservices.yaml 6 | - orchestration.aibrix.ai_rolesets.yaml 7 | - orchestration.aibrix.ai_podsets.yaml -------------------------------------------------------------------------------- /config/default/manager_webhook_patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: manager 11 | ports: 12 | - containerPort: 9443 13 | name: webhook-server 14 | protocol: TCP 15 | volumeMounts: 16 | - mountPath: /tmp/k8s-webhook-server/serving-certs 17 | name: cert 18 | readOnly: true 19 | volumes: 20 | - name: cert 21 | secret: 22 | defaultMode: 420 23 | secretName: webhook-server-cert 24 | -------------------------------------------------------------------------------- /config/dependency/envoy-gateway/envoy_gateway_patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: envoy-gateway-config 5 | namespace: envoy-gateway-system 6 | data: 7 | envoy-gateway.yaml: | 8 | apiVersion: gateway.envoyproxy.io/v1alpha1 9 | kind: EnvoyGateway 10 | provider: 11 | type: Kubernetes 12 | gateway: 13 | controllerName: gateway.envoyproxy.io/gatewayclass-controller 14 | extensionApis: 15 | enableEnvoyPatchPolicy: true -------------------------------------------------------------------------------- /config/dependency/envoy-gateway/kustomization.yaml: -------------------------------------------------------------------------------- 1 | kind: Kustomization 2 | 3 | resources: 4 | - https://github.com/envoyproxy/gateway/releases/download/v1.2.8/install.yaml 5 | 6 | patches: 7 | - path: envoy_gateway_patch.yaml 8 | 9 | images: 10 | - name: envoyproxy/gateway 11 | newName: envoyproxy/gateway 12 | newTag: v1.2.8 -------------------------------------------------------------------------------- /config/dependency/kuberay-operator/README.md: -------------------------------------------------------------------------------- 1 | ## KubeRay upstream manifests 2 | 3 | Commands to export manifest from helm package. After you got manifest, copy to this folder. 4 | 5 | ```shell 6 | helm template kuberay-operator kuberay/kuberay-operator --namespace aibrix-system --version 1.2.1 --include-crds --set env[0].name=ENABLE_PROBES_INJECTION --set-string env[0].value=false --set fullnameOverride=kuberay-operator --set featureGates[0].name=RayClusterStatusConditions --set featureGates[0].enabled=true --output-dir ./config/dependency 7 | ``` 8 | 9 | If you use zsh, please use `noglob helm ...` to skip the brace check. 10 | -------------------------------------------------------------------------------- /config/dependency/kuberay-operator/kustomization.yaml: -------------------------------------------------------------------------------- 1 | kind: Kustomization 2 | 3 | # KubeRay helm package doesn't allow namespace setting at this moment, we can not export resources with namespace specified 4 | # The workaround is to use kustomize namespace override to create under aibrix-system namespace. 5 | 6 | resources: 7 | - templates/deployment.yaml 8 | - templates/leader_election_role.yaml 9 | - templates/leader_election_role_binding.yaml 10 | - templates/ray_rayjob_editor_role.yaml 11 | - templates/ray_rayjob_viewer_role.yaml 12 | - templates/ray_rayservice_editor_role.yaml 13 | - templates/ray_rayservice_viewer_role.yaml 14 | - templates/role.yaml 15 | - templates/rolebinding.yaml 16 | - templates/service.yaml 17 | - templates/serviceaccount.yaml 18 | 19 | images: 20 | - name: quay.io/kuberay/operator 21 | newName: aibrix/kuberay-operator 22 | newTag: v1.2.1-patch-20250726 -------------------------------------------------------------------------------- /config/dependency/kuberay-operator/templates/leader_election_role.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Source: kuberay-operator/templates/leader_election_role.yaml 3 | kind: Role 4 | apiVersion: rbac.authorization.k8s.io/v1 5 | metadata: 6 | labels: 7 | app.kubernetes.io/name: kuberay-operator 8 | helm.sh/chart: kuberay-operator-1.2.1 9 | app.kubernetes.io/instance: kuberay-operator 10 | app.kubernetes.io/managed-by: Helm 11 | name: kuberay-operator-leader-election 12 | rules: 13 | - apiGroups: 14 | - "" 15 | resources: 16 | - configmaps 17 | verbs: 18 | - get 19 | - list 20 | - watch 21 | - create 22 | - update 23 | - patch 24 | - delete 25 | - apiGroups: 26 | - "" 27 | resources: 28 | - configmaps/status 29 | verbs: 30 | - get 31 | - update 32 | - patch 33 | - apiGroups: 34 | - "" 35 | resources: 36 | - events 37 | verbs: 38 | - create 39 | - apiGroups: 40 | - coordination.k8s.io 41 | resources: 42 | - leases 43 | verbs: 44 | - create 45 | - get 46 | - list 47 | - update 48 | -------------------------------------------------------------------------------- /config/dependency/kuberay-operator/templates/leader_election_role_binding.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Source: kuberay-operator/templates/leader_election_role_binding.yaml 3 | kind: RoleBinding 4 | apiVersion: rbac.authorization.k8s.io/v1 5 | metadata: 6 | labels: 7 | app.kubernetes.io/name: kuberay-operator 8 | helm.sh/chart: kuberay-operator-1.2.1 9 | app.kubernetes.io/instance: kuberay-operator 10 | app.kubernetes.io/managed-by: Helm 11 | name: kuberay-operator-leader-election 12 | subjects: 13 | - kind: ServiceAccount 14 | name: kuberay-operator 15 | namespace: aibrix-system 16 | roleRef: 17 | kind: Role 18 | name: kuberay-operator-leader-election 19 | apiGroup: rbac.authorization.k8s.io 20 | -------------------------------------------------------------------------------- /config/dependency/kuberay-operator/templates/multiple_namespaces_role.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Source: kuberay-operator/templates/multiple_namespaces_role.yaml 3 | # Install Role for namespaces listed in watchNamespace. 4 | # This should be consistent with `role.yaml`, except for the `kind` field. 5 | -------------------------------------------------------------------------------- /config/dependency/kuberay-operator/templates/multiple_namespaces_rolebinding.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Source: kuberay-operator/templates/multiple_namespaces_rolebinding.yaml 3 | # Install RoleBinding for namespaces listed in watchNamespace. 4 | # This should be consistent with `rolebinding.yaml`, except for the `kind` field. 5 | -------------------------------------------------------------------------------- /config/dependency/kuberay-operator/templates/ray_rayjob_editor_role.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Source: kuberay-operator/templates/ray_rayjob_editor_role.yaml 3 | # permissions for end users to edit rayjobs. 4 | 5 | kind: ClusterRole 6 | apiVersion: rbac.authorization.k8s.io/v1 7 | metadata: 8 | labels: 9 | app.kubernetes.io/name: kuberay-operator 10 | helm.sh/chart: kuberay-operator-1.2.1 11 | app.kubernetes.io/instance: kuberay-operator 12 | app.kubernetes.io/managed-by: Helm 13 | name: rayjob-editor-role 14 | rules: 15 | - apiGroups: 16 | - ray.io 17 | resources: 18 | - rayjobs 19 | verbs: 20 | - create 21 | - delete 22 | - get 23 | - list 24 | - patch 25 | - update 26 | - watch 27 | - apiGroups: 28 | - ray.io 29 | resources: 30 | - rayjobs/status 31 | verbs: 32 | - get 33 | -------------------------------------------------------------------------------- /config/dependency/kuberay-operator/templates/ray_rayjob_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Source: kuberay-operator/templates/ray_rayjob_viewer_role.yaml 3 | # permissions for end users to view rayjobs. 4 | 5 | kind: ClusterRole 6 | apiVersion: rbac.authorization.k8s.io/v1 7 | metadata: 8 | labels: 9 | app.kubernetes.io/name: kuberay-operator 10 | helm.sh/chart: kuberay-operator-1.2.1 11 | app.kubernetes.io/instance: kuberay-operator 12 | app.kubernetes.io/managed-by: Helm 13 | name: rayjob-viewer-role 14 | rules: 15 | - apiGroups: 16 | - ray.io 17 | resources: 18 | - rayjobs 19 | verbs: 20 | - get 21 | - list 22 | - watch 23 | - apiGroups: 24 | - ray.io 25 | resources: 26 | - rayjobs/status 27 | verbs: 28 | - get 29 | -------------------------------------------------------------------------------- /config/dependency/kuberay-operator/templates/ray_rayservice_editor_role.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Source: kuberay-operator/templates/ray_rayservice_editor_role.yaml 3 | # permissions for end users to edit rayservices. 4 | apiVersion: rbac.authorization.k8s.io/v1 5 | kind: ClusterRole 6 | metadata: 7 | name: rayservice-editor-role 8 | rules: 9 | - apiGroups: 10 | - ray.io 11 | resources: 12 | - rayservices 13 | verbs: 14 | - create 15 | - delete 16 | - get 17 | - list 18 | - patch 19 | - update 20 | - watch 21 | - apiGroups: 22 | - ray.io 23 | resources: 24 | - rayservices/status 25 | verbs: 26 | - get 27 | -------------------------------------------------------------------------------- /config/dependency/kuberay-operator/templates/ray_rayservice_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Source: kuberay-operator/templates/ray_rayservice_viewer_role.yaml 3 | # permissions for end users to view rayservices. 4 | apiVersion: rbac.authorization.k8s.io/v1 5 | kind: ClusterRole 6 | metadata: 7 | name: rayservice-viewer-role 8 | rules: 9 | - apiGroups: 10 | - ray.io 11 | resources: 12 | - rayservices 13 | verbs: 14 | - get 15 | - list 16 | - watch 17 | - apiGroups: 18 | - ray.io 19 | resources: 20 | - rayservices/status 21 | verbs: 22 | - get 23 | -------------------------------------------------------------------------------- /config/dependency/kuberay-operator/templates/rolebinding.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Source: kuberay-operator/templates/rolebinding.yaml 3 | kind: ClusterRoleBinding 4 | apiVersion: rbac.authorization.k8s.io/v1 5 | metadata: 6 | labels: 7 | app.kubernetes.io/name: kuberay-operator 8 | helm.sh/chart: kuberay-operator-1.2.1 9 | app.kubernetes.io/instance: kuberay-operator 10 | app.kubernetes.io/managed-by: Helm 11 | name: kuberay-operator 12 | subjects: 13 | - kind: ServiceAccount 14 | name: kuberay-operator 15 | namespace: aibrix-system 16 | roleRef: 17 | kind: ClusterRole 18 | name: kuberay-operator 19 | apiGroup: rbac.authorization.k8s.io 20 | -------------------------------------------------------------------------------- /config/dependency/kuberay-operator/templates/service.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Source: kuberay-operator/templates/service.yaml 3 | apiVersion: v1 4 | kind: Service 5 | metadata: 6 | name: kuberay-operator 7 | labels: 8 | app.kubernetes.io/name: kuberay-operator 9 | helm.sh/chart: kuberay-operator-1.2.1 10 | app.kubernetes.io/instance: kuberay-operator 11 | app.kubernetes.io/managed-by: Helm 12 | spec: 13 | type: ClusterIP 14 | ports: 15 | - port: 8080 16 | targetPort: http 17 | protocol: TCP 18 | name: http 19 | selector: 20 | app.kubernetes.io/name: kuberay-operator 21 | app.kubernetes.io/instance: kuberay-operator 22 | -------------------------------------------------------------------------------- /config/dependency/kuberay-operator/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | # Source: kuberay-operator/templates/serviceaccount.yaml 3 | apiVersion: v1 4 | kind: ServiceAccount 5 | metadata: 6 | name: kuberay-operator 7 | labels: 8 | app.kubernetes.io/name: kuberay-operator 9 | helm.sh/chart: kuberay-operator-1.2.1 10 | app.kubernetes.io/instance: kuberay-operator 11 | app.kubernetes.io/managed-by: Helm 12 | -------------------------------------------------------------------------------- /config/dependency/kustomization.yaml: -------------------------------------------------------------------------------- 1 | kind: Kustomization 2 | 3 | resources: 4 | - envoy-gateway 5 | - kuberay-operator/crds/ray.io_rayclusters.yaml 6 | - kuberay-operator/crds/ray.io_rayjobs.yaml 7 | - kuberay-operator/crds/ray.io_rayservices.yaml 8 | -------------------------------------------------------------------------------- /config/experimentals/gpu-optimizer/gateway-plugin/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | namespace: aibrix-system 5 | 6 | namePrefix: aibrix- 7 | 8 | resources: 9 | - ../../../gateway/gateway-plugin 10 | 11 | patches: 12 | - patch: |- # Use the '|' and '-' for inline patching 13 | apiVersion: apps/v1 14 | kind: Deployment 15 | metadata: 16 | name: gateway-plugins 17 | spec: 18 | template: 19 | spec: 20 | containers: 21 | - name: gateway-plugin 22 | env: 23 | - name: AIBRIX_GPU_OPTIMIZER_TRACING_FLAG 24 | value: "true" 25 | target: 26 | kind: Deployment 27 | name: gateway-plugins 28 | namespace: aibrix-system 29 | version: v1 30 | 31 | images: 32 | - name: gateway-plugins 33 | newName: aibrix/gateway-plugins 34 | newTag: nightly 35 | 36 | apiVersion: kustomize.config.k8s.io/v1beta1 -------------------------------------------------------------------------------- /config/experimentals/gpu-optimizer/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | resources: 5 | - gateway-plugin 6 | 7 | -------------------------------------------------------------------------------- /config/gateway/gateway-plugin/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - gateway-plugin.yaml 3 | 4 | apiVersion: kustomize.config.k8s.io/v1beta1 5 | kind: Kustomization -------------------------------------------------------------------------------- /config/gateway/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - gateway.yaml 3 | - gateway-plugin 4 | 5 | apiVersion: kustomize.config.k8s.io/v1beta1 6 | kind: Kustomization 7 | 8 | labels: 9 | - pairs: 10 | app.kubernetes.io/component: aibrix-gateway-plugin -------------------------------------------------------------------------------- /config/gpu-optimizer/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: gpu-optimizer 5 | namespace: system 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: gpu-optimizer 11 | template: 12 | metadata: 13 | labels: 14 | app: gpu-optimizer 15 | spec: 16 | serviceAccountName: gpu-optimizer-sa 17 | automountServiceAccountToken: true 18 | containers: 19 | - name: gpu-optimizer 20 | image: aibrix/runtime:nightly 21 | command: ["python", "-m", "aibrix.gpu_optimizer.app"] 22 | ports: 23 | - containerPort: 8080 24 | resources: 25 | limits: 26 | cpu: 500m 27 | memory: 256Mi 28 | requests: 29 | cpu: 10m 30 | memory: 64Mi 31 | env: 32 | - name: REDIS_HOST 33 | value: aibrix-redis-master.aibrix-system.svc.cluster.local -------------------------------------------------------------------------------- /config/gpu-optimizer/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - deployment.yaml 3 | - service.yaml 4 | - rbac.yaml 5 | 6 | labels: 7 | - pairs: 8 | app.kubernetes.io/component: aibrix-gpu-optimizer -------------------------------------------------------------------------------- /config/gpu-optimizer/rbac.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: gpu-optimizer-sa 5 | namespace: system 6 | --- 7 | apiVersion: rbac.authorization.k8s.io/v1 8 | kind: ClusterRole 9 | metadata: 10 | name: gpu-optimizer-clusterrole 11 | rules: 12 | - apiGroups: ["apps"] 13 | resources: ["deployments"] 14 | verbs: ["get", "list", "watch"] 15 | --- 16 | apiVersion: rbac.authorization.k8s.io/v1 17 | kind: ClusterRoleBinding 18 | metadata: 19 | name: gpu-optimizer-clusterrole-binding 20 | subjects: 21 | - kind: ServiceAccount 22 | name: gpu-optimizer-sa 23 | namespace: system 24 | roleRef: 25 | kind: ClusterRole 26 | name: gpu-optimizer-clusterrole 27 | apiGroup: rbac.authorization.k8s.io -------------------------------------------------------------------------------- /config/gpu-optimizer/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: gpu-optimizer 5 | namespace: system 6 | spec: 7 | selector: 8 | app: gpu-optimizer 9 | ports: 10 | - protocol: TCP 11 | port: 8080 12 | targetPort: 8080 13 | type: ClusterIP -------------------------------------------------------------------------------- /config/internalcert/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - secret.yaml 3 | -------------------------------------------------------------------------------- /config/internalcert/secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: webhook-server-cert 5 | namespace: system 6 | -------------------------------------------------------------------------------- /config/job/k8s_job_rbac.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: job-reader-sa 5 | namespace: default 6 | --- 7 | # Service Account for job pod 8 | apiVersion: rbac.authorization.k8s.io/v1 9 | kind: Role 10 | metadata: 11 | name: job-reader-role 12 | namespace: default 13 | rules: 14 | - apiGroups: ["batch"] 15 | resources: ["jobs"] 16 | verbs: ["get"] # Get permissions only 17 | --- 18 | apiVersion: rbac.authorization.k8s.io/v1 19 | kind: RoleBinding 20 | metadata: 21 | name: job-reader-binding 22 | namespace: default 23 | subjects: 24 | - kind: ServiceAccount 25 | name: job-reader-sa 26 | namespace: default 27 | roleRef: 28 | kind: Role 29 | name: job-reader-role 30 | apiGroup: rbac.authorization.k8s.io -------------------------------------------------------------------------------- /config/job/kustomization.yaml: -------------------------------------------------------------------------------- 1 | kind: Kustomization 2 | 3 | resources: 4 | - k8s_job_rbac.yaml -------------------------------------------------------------------------------- /config/manager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - manager.yaml 3 | apiVersion: kustomize.config.k8s.io/v1beta1 4 | kind: Kustomization 5 | 6 | labels: 7 | - pairs: 8 | app.kubernetes.io/component: aibrix-controller-manager -------------------------------------------------------------------------------- /config/metadata/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - metadata.yaml 3 | - redis.yaml 4 | 5 | configMapGenerator: 6 | - name: metadata-config 7 | namespace: aibrix-system 8 | files: 9 | - job_template_patch.yaml 10 | 11 | patches: 12 | # Uncomment the following lines to enable S3 as the object store 13 | # - path: s3-env-patch.yaml 14 | # Uncomment the following lines to enable TOS as the object store 15 | # - path: tos-env-patch.yaml 16 | 17 | labels: 18 | - pairs: 19 | app.kubernetes.io/component: aibrix-metadata-service -------------------------------------------------------------------------------- /config/namespace/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | resources: 5 | - namespace.yaml 6 | -------------------------------------------------------------------------------- /config/namespace/namespace.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Namespace 3 | metadata: 4 | labels: 5 | control-plane: controller-manager 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | name: system -------------------------------------------------------------------------------- /config/overlays/dev/gateway-plugin/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | namespace: aibrix-system 5 | 6 | namePrefix: aibrix- 7 | 8 | resources: 9 | - ../../../gateway/gateway-plugin 10 | 11 | images: 12 | - name: gateway-plugins 13 | newName: aibrix/gateway-plugins 14 | newTag: nightly 15 | 16 | patches: 17 | - patch: |- # Use the '|' and '-' for inline patching 18 | apiVersion: apps/v1 19 | kind: Deployment 20 | metadata: 21 | name: gateway-plugins 22 | spec: 23 | template: 24 | spec: 25 | containers: 26 | - name: gateway-plugin 27 | args: 28 | - -v=5 29 | env: 30 | - name: AIBRIX_POD_METRIC_REFRESH_INTERVAL_MS 31 | value: "60000" 32 | - name: AIBRIX_GPU_OPTIMIZER_TRACING_FLAG 33 | value: "true" 34 | target: 35 | kind: Deployment 36 | name: gateway-plugins 37 | namespace: system 38 | version: v1 -------------------------------------------------------------------------------- /config/overlays/dev/gpu-optimizer/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | namespace: aibrix-system 5 | 6 | namePrefix: aibrix- 7 | 8 | resources: 9 | - ../../../gpu-optimizer 10 | 11 | patches: 12 | - patch: |- # Use the '|' and '-' for inline patching 13 | apiVersion: apps/v1 14 | kind: Deployment 15 | metadata: 16 | name: gpu-optimizer 17 | spec: 18 | template: 19 | spec: 20 | containers: 21 | - name: gpu-optimizer 22 | command: ["python", "-m", "aibrix.gpu_optimizer.app", "--debug"] 23 | target: 24 | kind: Deployment 25 | name: gpu-optimizer 26 | namespace: system 27 | version: v1 28 | 29 | apiVersion: kustomize.config.k8s.io/v1beta1 -------------------------------------------------------------------------------- /config/overlays/dev/manager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | namespace: aibrix-system 5 | 6 | namePrefix: aibrix- 7 | 8 | resources: 9 | - ../../../manager 10 | 11 | patches: 12 | - patch: |- # Use the '|' and '-' for inline patching 13 | apiVersion: apps/v1 14 | kind: Deployment 15 | metadata: 16 | name: controller-manager 17 | spec: 18 | template: 19 | spec: 20 | containers: 21 | - name: manager 22 | args: 23 | - --leader-elect 24 | - --health-probe-bind-address=:8081 25 | - --metrics-bind-address=0 26 | - -v=4 27 | target: 28 | kind: Deployment 29 | name: controller-manager 30 | namespace: system 31 | version: v1 -------------------------------------------------------------------------------- /config/overlays/release/kustomization.yaml: -------------------------------------------------------------------------------- 1 | # this manifest is for public stable release. 2 | # Please override the tag with latest stable tags. 3 | apiVersion: kustomize.config.k8s.io/v1beta1 4 | kind: Kustomization 5 | 6 | resources: 7 | - ../../default 8 | - pdb.yaml 9 | 10 | patches: 11 | - path: envoy_proxy_patch.yaml 12 | - path: gateway_plugins_patch.yaml 13 | 14 | images: 15 | - name: quay.io/kuberay/operator 16 | newName: aibrix/kuberay-operator 17 | newTag: v1.2.1-patch-20250726 18 | - name: busybox 19 | newTag: stable 20 | - name: redis 21 | newTag: latest 22 | - name: aibrix/gateway-plugins 23 | newTag: v0.4.1 24 | - name: aibrix/metadata-service 25 | newTag: v0.4.1 26 | - name: aibrix/controller-manager 27 | newTag: v0.4.1 28 | - name: aibrix/runtime 29 | newTag: v0.4.1 30 | -------------------------------------------------------------------------------- /config/overlays/release/pdb.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: policy/v1 2 | kind: PodDisruptionBudget 3 | metadata: 4 | name: gateway-plugins-pdb 5 | namespace: aibrix-system 6 | spec: 7 | minAvailable: 1 8 | selector: 9 | matchLabels: 10 | app: gateway-plugins 11 | -------------------------------------------------------------------------------- /config/overlays/vke-dev/gateway-plugin/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | resources: 5 | - ../../dev/gateway-plugin 6 | 7 | patches: 8 | - path: gateway_plugins_patch.yaml 9 | 10 | images: 11 | - name: busybox 12 | newName: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/busybox 13 | newTag: stable 14 | - name: aibrix/gateway-plugins 15 | newName: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/gateway-plugins 16 | newTag: nightly -------------------------------------------------------------------------------- /config/overlays/vke-dev/gpu-optimizer/kustomization.yaml: -------------------------------------------------------------------------------- 1 | kind: Kustomization 2 | 3 | resources: 4 | - ../../dev/gpu-optimizer 5 | 6 | images: 7 | - name: aibrix/runtime 8 | newName: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/runtime 9 | newTag: nightly -------------------------------------------------------------------------------- /config/overlays/vke-dev/manager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | kind: Kustomization 2 | 3 | resources: 4 | - ../../dev/manager 5 | 6 | images: 7 | - name: aibrix/controller-manager 8 | newName: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/controller-manager 9 | newTag: nightly -------------------------------------------------------------------------------- /config/overlays/vke-ipv6/envoy_proxy_patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: gateway.envoyproxy.io/v1alpha1 2 | kind: EnvoyProxy 3 | metadata: 4 | name: aibrix-custom-proxy-config 5 | namespace: aibrix-system 6 | spec: 7 | provider: 8 | kubernetes: 9 | envoyService: 10 | patch: 11 | type: StrategicMerge 12 | value: 13 | metadata: 14 | annotations: 15 | service.beta.kubernetes.io/volcengine-loadbalancer-address-type: "PRIVATE" 16 | spec: 17 | ipFamilies: 18 | - IPv4 19 | - IPv6 20 | ipFamilyPolicy: PreferDualStack # Changed from SingleStack to PreferDualStack 21 | envoyDeployment: 22 | pod: 23 | annotations: 24 | vci.vke.volcengine.com/pod-ip-family: dualstack -------------------------------------------------------------------------------- /config/overlays/vke-ipv6/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | resources: 5 | - ../vke/default 6 | 7 | patches: 8 | - path: envoy_proxy_patch.yaml 9 | -------------------------------------------------------------------------------- /config/overlays/vke/dependency/envoy_gateway_patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: envoy-gateway-config 5 | namespace: envoy-gateway-system 6 | data: 7 | envoy-gateway.yaml: | 8 | apiVersion: gateway.envoyproxy.io/v1alpha1 9 | kind: EnvoyGateway 10 | provider: 11 | kubernetes: 12 | shutdownManager: 13 | image: aibrix-cn-beijing.cr.volces.com/aibrix/gateway:v1.2.8 14 | type: Kubernetes 15 | gateway: 16 | controllerName: gateway.envoyproxy.io/gatewayclass-controller 17 | extensionApis: 18 | enableEnvoyPatchPolicy: true 19 | -------------------------------------------------------------------------------- /config/overlays/vke/dependency/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | resources: 5 | - ../../../dependency 6 | 7 | patches: 8 | - path: envoy_gateway_patch.yaml 9 | 10 | images: 11 | - name: envoyproxy/gateway 12 | newName: aibrix-cn-beijing.cr.volces.com/aibrix/gateway 13 | newTag: v1.2.8 14 | -------------------------------------------------------------------------------- /config/prometheus/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - monitor.yaml 3 | -------------------------------------------------------------------------------- /config/prometheus/monitor.yaml: -------------------------------------------------------------------------------- 1 | # Prometheus Monitor Service (Metrics) 2 | apiVersion: monitoring.coreos.com/v1 3 | kind: ServiceMonitor 4 | metadata: 5 | labels: 6 | control-plane: controller-manager 7 | app.kubernetes.io/name: aibrix 8 | app.kubernetes.io/managed-by: kustomize 9 | name: controller-manager-metrics-monitor 10 | namespace: aibrix-system 11 | spec: 12 | endpoints: 13 | - path: /metrics 14 | port: http # Ensure this is the name of the port that exposes HTTP metrics 15 | scheme: http 16 | selector: 17 | matchLabels: 18 | control-plane: controller-manager 19 | -------------------------------------------------------------------------------- /config/rbac/autoscaling/autoscaling_podautoscaler_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit podautoscalers. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | name: autoscaling-podautoscaler-editor-role 9 | rules: 10 | - apiGroups: 11 | - autoscaling.aibrix.ai 12 | resources: 13 | - podautoscalers 14 | verbs: 15 | - create 16 | - delete 17 | - get 18 | - list 19 | - patch 20 | - update 21 | - watch 22 | - apiGroups: 23 | - autoscaling.aibrix.ai 24 | resources: 25 | - podautoscalers/status 26 | verbs: 27 | - get 28 | -------------------------------------------------------------------------------- /config/rbac/autoscaling/autoscaling_podautoscaler_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view podautoscalers. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | name: autoscaling-podautoscaler-viewer-role 9 | rules: 10 | - apiGroups: 11 | - autoscaling.aibrix.ai 12 | resources: 13 | - podautoscalers 14 | verbs: 15 | - get 16 | - list 17 | - watch 18 | - apiGroups: 19 | - autoscaling.aibrix.ai 20 | resources: 21 | - podautoscalers/status 22 | verbs: 23 | - get 24 | -------------------------------------------------------------------------------- /config/rbac/autoscaling/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - autoscaling_podautoscaler_editor_role.yaml 3 | - autoscaling_podautoscaler_viewer_role.yaml 4 | 5 | labels: 6 | - pairs: 7 | app.kubernetes.io/component: aibrix-controller-manager -------------------------------------------------------------------------------- /config/rbac/controller-manager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | # controller manager are common sa, role, rolebindings that could be reused 2 | 3 | resources: 4 | - leader_election_role.yaml 5 | - leader_election_role_binding.yaml 6 | - role.yaml 7 | - role_binding.yaml 8 | - service_account_controller_manager.yaml 9 | 10 | labels: 11 | - pairs: 12 | app.kubernetes.io/component: aibrix-controller-manager -------------------------------------------------------------------------------- /config/rbac/controller-manager/leader_election_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions to do leader election. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: Role 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | name: controller-manager-leader-election-role 9 | rules: 10 | - apiGroups: 11 | - "" 12 | resources: 13 | - configmaps 14 | verbs: 15 | - get 16 | - list 17 | - watch 18 | - create 19 | - update 20 | - patch 21 | - delete 22 | - apiGroups: 23 | - coordination.k8s.io 24 | resources: 25 | - leases 26 | verbs: 27 | - get 28 | - list 29 | - watch 30 | - create 31 | - update 32 | - patch 33 | - delete 34 | - apiGroups: 35 | - "" 36 | resources: 37 | - events 38 | verbs: 39 | - create 40 | - patch 41 | -------------------------------------------------------------------------------- /config/rbac/controller-manager/leader_election_role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: aibrix 6 | app.kubernetes.io/managed-by: kustomize 7 | name: controller-manager-leader-election-rolebinding 8 | roleRef: 9 | apiGroup: rbac.authorization.k8s.io 10 | kind: Role 11 | name: controller-manager-leader-election-role 12 | subjects: 13 | - kind: ServiceAccount 14 | name: controller-manager 15 | namespace: system 16 | -------------------------------------------------------------------------------- /config/rbac/controller-manager/role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: aibrix 6 | app.kubernetes.io/managed-by: kustomize 7 | name: controller-manager-rolebinding 8 | roleRef: 9 | apiGroup: rbac.authorization.k8s.io 10 | kind: ClusterRole 11 | name: controller-manager-role 12 | subjects: 13 | - kind: ServiceAccount 14 | name: controller-manager 15 | namespace: system 16 | -------------------------------------------------------------------------------- /config/rbac/controller-manager/service_account_controller_manager.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: aibrix 6 | app.kubernetes.io/managed-by: kustomize 7 | name: controller-manager 8 | namespace: system 9 | -------------------------------------------------------------------------------- /config/rbac/gateway/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | # Unlike others files are generated. Gateway plugin role, role binding and serving account files are maintained by our own. 3 | - service_account_gateway.yaml 4 | - role_gateway.yaml 5 | - role_binding_gateway.yaml 6 | 7 | labels: 8 | - pairs: 9 | app.kubernetes.io/component: aibrix-gateway-plugins -------------------------------------------------------------------------------- /config/rbac/gateway/role_binding_gateway.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: aibrix 6 | app.kubernetes.io/managed-by: kustomize 7 | name: gateway-plugins-rolebinding 8 | roleRef: 9 | apiGroup: rbac.authorization.k8s.io 10 | kind: ClusterRole 11 | name: gateway-plugins-role 12 | subjects: 13 | - kind: ServiceAccount 14 | name: gateway-plugins 15 | namespace: system 16 | -------------------------------------------------------------------------------- /config/rbac/gateway/role_gateway.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: gateway-plugins-role 6 | rules: 7 | - apiGroups: 8 | - "" 9 | resources: 10 | - pods 11 | verbs: 12 | - create 13 | - delete 14 | - get 15 | - list 16 | - patch 17 | - update 18 | - watch 19 | - apiGroups: 20 | - model.aibrix.ai 21 | resources: 22 | - modeladapters 23 | verbs: 24 | - create 25 | - delete 26 | - get 27 | - list 28 | - patch 29 | - update 30 | - watch 31 | - apiGroups: 32 | - gateway.networking.k8s.io 33 | resources: 34 | - httproutes 35 | verbs: 36 | - create 37 | - delete 38 | - get 39 | - list 40 | - patch 41 | - update 42 | - watch -------------------------------------------------------------------------------- /config/rbac/gateway/service_account_gateway.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: aibrix 6 | app.kubernetes.io/managed-by: kustomize 7 | name: gateway-plugins 8 | namespace: system -------------------------------------------------------------------------------- /config/rbac/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | # All RBAC will be applied under this service account in 3 | # the deployment namespace. You may comment out this resource 4 | # if your manager will use a service account that exists at 5 | # runtime. Be sure to update RoleBinding and ClusterRoleBinding 6 | # subjects if changing service account names. 7 | - controller-manager 8 | # For each CRD, "Editor" and "Viewer" roles are scaffolded by 9 | # default, aiding admins in cluster management. Those roles are 10 | # not used by the Project itself. You can comment the following lines 11 | # if you do not want those helpers be installed with your Project. 12 | - orchestration 13 | - model 14 | - autoscaling 15 | # other components 16 | - gateway 17 | 18 | # TODO: technically, we should split above rbac yamls to separate components and then 19 | # attach component labels. We 20 | #labels: 21 | # - pairs: 22 | # app.kubernetes.io/component: aibrix-controller-manager 23 | -------------------------------------------------------------------------------- /config/rbac/model/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - model_modeladapter_editor_role.yaml 3 | - model_modeladapter_viewer_role.yaml 4 | 5 | labels: 6 | - pairs: 7 | app.kubernetes.io/component: aibrix-controller-manager -------------------------------------------------------------------------------- /config/rbac/model/model_modeladapter_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit modeladapters. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | name: model-modeladapter-editor-role 9 | rules: 10 | - apiGroups: 11 | - model.aibrix.ai 12 | resources: 13 | - modeladapters 14 | verbs: 15 | - create 16 | - delete 17 | - get 18 | - list 19 | - patch 20 | - update 21 | - watch 22 | - apiGroups: 23 | - model.aibrix.ai 24 | resources: 25 | - modeladapters/status 26 | verbs: 27 | - get 28 | -------------------------------------------------------------------------------- /config/rbac/model/model_modeladapter_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view modeladapters. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | name: model-modeladapter-viewer-role 9 | rules: 10 | - apiGroups: 11 | - model.aibrix.ai 12 | resources: 13 | - modeladapters 14 | verbs: 15 | - get 16 | - list 17 | - watch 18 | - apiGroups: 19 | - model.aibrix.ai 20 | resources: 21 | - modeladapters/status 22 | verbs: 23 | - get 24 | -------------------------------------------------------------------------------- /config/rbac/orchestration/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - orchestration_rayclusterfleet_editor_role.yaml 3 | - orchestration_rayclusterfleet_viewer_role.yaml 4 | - orchestration_rayclusterreplicaset_editor_role.yaml 5 | - orchestration_rayclusterreplicaset_viewer_role.yaml 6 | - orchestration_stormservice_editor_role.yaml 7 | - orchestration_stormservice_viewer_role.yaml 8 | - orchestration_roleset_editor_role.yaml 9 | - orchestration_roleset_viewer_role.yaml 10 | 11 | labels: 12 | - pairs: 13 | app.kubernetes.io/component: aibrix-controller-manager -------------------------------------------------------------------------------- /config/rbac/orchestration/orchestration_kvcache_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit kvcaches. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | name: orchestration-kvcache-editor-role 9 | rules: 10 | - apiGroups: 11 | - orchestration.aibrix.ai 12 | resources: 13 | - kvcaches 14 | verbs: 15 | - create 16 | - delete 17 | - get 18 | - list 19 | - patch 20 | - update 21 | - watch 22 | - apiGroups: 23 | - orchestration.aibrix.ai 24 | resources: 25 | - kvcaches/status 26 | verbs: 27 | - get 28 | -------------------------------------------------------------------------------- /config/rbac/orchestration/orchestration_kvcache_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view kvcaches. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | name: orchestration-kvcache-viewer-role 9 | rules: 10 | - apiGroups: 11 | - orchestration.aibrix.ai 12 | resources: 13 | - kvcaches 14 | verbs: 15 | - get 16 | - list 17 | - watch 18 | - apiGroups: 19 | - orchestration.aibrix.ai 20 | resources: 21 | - kvcaches/status 22 | verbs: 23 | - get 24 | -------------------------------------------------------------------------------- /config/rbac/orchestration/orchestration_rayclusterfleet_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit rayclusterfleets. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | name: orchestration-rayclusterfleet-editor-role 9 | rules: 10 | - apiGroups: 11 | - orchestration.aibrix.ai 12 | resources: 13 | - rayclusterfleets 14 | verbs: 15 | - create 16 | - delete 17 | - get 18 | - list 19 | - patch 20 | - update 21 | - watch 22 | - apiGroups: 23 | - orchestration.aibrix.ai 24 | resources: 25 | - rayclusterfleets/status 26 | verbs: 27 | - get 28 | -------------------------------------------------------------------------------- /config/rbac/orchestration/orchestration_rayclusterfleet_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view rayclusterfleets. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | name: orchestration-rayclusterfleet-viewer-role 9 | rules: 10 | - apiGroups: 11 | - orchestration.aibrix.ai 12 | resources: 13 | - rayclusterfleets 14 | verbs: 15 | - get 16 | - list 17 | - watch 18 | - apiGroups: 19 | - orchestration.aibrix.ai 20 | resources: 21 | - rayclusterfleets/status 22 | verbs: 23 | - get 24 | -------------------------------------------------------------------------------- /config/rbac/orchestration/orchestration_rayclusterreplicaset_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit rayclusterreplicasets. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | name: orchestration-rayclusterreplicaset-editor-role 9 | rules: 10 | - apiGroups: 11 | - orchestration.aibrix.ai 12 | resources: 13 | - rayclusterreplicasets 14 | verbs: 15 | - create 16 | - delete 17 | - get 18 | - list 19 | - patch 20 | - update 21 | - watch 22 | - apiGroups: 23 | - orchestration.aibrix.ai 24 | resources: 25 | - rayclusterreplicasets/status 26 | verbs: 27 | - get 28 | -------------------------------------------------------------------------------- /config/rbac/orchestration/orchestration_rayclusterreplicaset_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view rayclusterreplicasets. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | name: orchestration-rayclusterreplicaset-viewer-role 9 | rules: 10 | - apiGroups: 11 | - orchestration.aibrix.ai 12 | resources: 13 | - rayclusterreplicasets 14 | verbs: 15 | - get 16 | - list 17 | - watch 18 | - apiGroups: 19 | - orchestration.aibrix.ai 20 | resources: 21 | - rayclusterreplicasets/status 22 | verbs: 23 | - get 24 | -------------------------------------------------------------------------------- /config/rbac/orchestration/orchestration_roleset_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit rolesets. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | name: orchestration-roleset-editor-role 9 | rules: 10 | - apiGroups: 11 | - orchestration.aibrix.ai 12 | resources: 13 | - rolesets 14 | verbs: 15 | - create 16 | - delete 17 | - get 18 | - list 19 | - patch 20 | - update 21 | - watch 22 | - apiGroups: 23 | - orchestration.aibrix.ai 24 | resources: 25 | - rolesets/status 26 | verbs: 27 | - get 28 | -------------------------------------------------------------------------------- /config/rbac/orchestration/orchestration_roleset_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view rolesets. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | name: orchestration-roleset-viewer-role 9 | rules: 10 | - apiGroups: 11 | - orchestration.aibrix.ai 12 | resources: 13 | - rolesets 14 | verbs: 15 | - get 16 | - list 17 | - watch 18 | - apiGroups: 19 | - orchestration.aibrix.ai 20 | resources: 21 | - rolesets/status 22 | verbs: 23 | - get 24 | -------------------------------------------------------------------------------- /config/rbac/orchestration/orchestration_stormservice_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit stormservices. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | name: orchestration-stormservice-editor-role 9 | rules: 10 | - apiGroups: 11 | - orchestration.aibrix.ai 12 | resources: 13 | - stormservices 14 | verbs: 15 | - create 16 | - delete 17 | - get 18 | - list 19 | - patch 20 | - update 21 | - watch 22 | - apiGroups: 23 | - orchestration.aibrix.ai 24 | resources: 25 | - stormservices/status 26 | verbs: 27 | - get 28 | -------------------------------------------------------------------------------- /config/rbac/orchestration/orchestration_stormservice_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view stormservices. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | name: orchestration-stormservice-viewer-role 9 | rules: 10 | - apiGroups: 11 | - orchestration.aibrix.ai 12 | resources: 13 | - stormservices 14 | verbs: 15 | - get 16 | - list 17 | - watch 18 | - apiGroups: 19 | - orchestration.aibrix.ai 20 | resources: 21 | - stormservices/status 22 | verbs: 23 | - get 24 | -------------------------------------------------------------------------------- /config/samples/autoscaling_v1alpha1_demo_nginx.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: nginx-deployment 5 | namespace: default 6 | spec: 7 | replicas: 1 8 | selector: 9 | matchLabels: 10 | app: nginx 11 | template: 12 | metadata: 13 | labels: 14 | app: nginx 15 | spec: 16 | containers: 17 | - name: nginx 18 | image: nginx:latest 19 | ports: 20 | - containerPort: 80 21 | resources: 22 | limits: 23 | cpu: 500m 24 | requests: 25 | cpu: 200m 26 | --- 27 | apiVersion: v1 28 | kind: Service 29 | metadata: 30 | name: nginx-service 31 | spec: 32 | type: NodePort 33 | ports: 34 | - port: 80 35 | targetPort: 80 36 | nodePort: 30001 37 | selector: 38 | app: nginx -------------------------------------------------------------------------------- /config/samples/autoscaling_v1alpha1_kpa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | name: podautoscaler-example-kpa 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | namespace: default 9 | spec: 10 | scaleTargetRef: 11 | apiVersion: apps/v1 12 | kind: Deployment 13 | name: nginx-deployment 14 | minReplicas: 1 15 | maxReplicas: 10 16 | targetMetric: "CPU" 17 | targetValue: "10" 18 | scalingStrategy: "KPA" -------------------------------------------------------------------------------- /config/samples/autoscaling_v1alpha1_mock_llama.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | name: podautoscaler-example-mock-llama 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | annotations: 9 | autoscaling.aibrix.ai/max-scale-up-rate: "2" 10 | autoscaling.aibrix.ai/max-scale-down-rate: "2" 11 | kpa.autoscaling.aibrix.ai/stable-window: "60s" 12 | kpa.autoscaling.aibrix.ai/scale-down-delay: "60s" 13 | namespace: aibrix-system 14 | spec: 15 | scaleTargetRef: 16 | apiVersion: apps/v1 17 | kind: Deployment 18 | name: llama2-70b 19 | minReplicas: 1 20 | maxReplicas: 10 21 | targetMetric: "avg_prompt_throughput_toks_per_s" 22 | targetValue: "20" 23 | scalingStrategy: "KPA" -------------------------------------------------------------------------------- /config/samples/autoscaling_v1alpha1_mock_llama_apa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | name: podautoscaler-example-mock-llama-apa 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | namespace: aibrix-system 9 | spec: 10 | scaleTargetRef: 11 | apiVersion: apps/v1 12 | kind: Deployment 13 | name: llama2-70b 14 | minReplicas: 1 15 | maxReplicas: 10 16 | targetMetric: "avg_prompt_throughput_toks_per_s" 17 | targetValue: "20" 18 | scalingStrategy: "APA" -------------------------------------------------------------------------------- /config/samples/autoscaling_v1alpha1_podautoscaler.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | name: podautoscaler-example 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | namespace: default 9 | spec: 10 | scaleTargetRef: 11 | apiVersion: apps/v1 12 | kind: Deployment 13 | name: nginx-deployment 14 | minReplicas: 1 15 | maxReplicas: 10 16 | targetMetric: "CPU" 17 | targetValue: "10" 18 | scalingStrategy: "HPA" -------------------------------------------------------------------------------- /config/samples/kustomization.yaml: -------------------------------------------------------------------------------- 1 | ## Append samples of your project ## 2 | resources: 3 | - autoscaling_v1alpha1_podautoscaler.yaml 4 | - model_v1alpha1_modeladapter.yaml 5 | - orchestration_v1alpha1_rayclusterreplicaset.yaml 6 | - orchestration_v1alpha1_rayclusterfleet.yaml 7 | - orchestration_v1alpha1_kvcache.yaml 8 | - orchestration_v1alpha1_stormservice.yaml 9 | - orchestration_v1alpha1_roleset.yaml 10 | #+kubebuilder:scaffold:manifestskustomizesamples 11 | -------------------------------------------------------------------------------- /config/samples/model_v1alpha1_modeladapter.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: model.aibrix.ai/v1alpha1 2 | kind: ModelAdapter 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: aibrix 6 | app.kubernetes.io/managed-by: kustomize 7 | name: modeladapter-sample 8 | spec: 9 | # TODO(user): Add fields here 10 | -------------------------------------------------------------------------------- /config/samples/orchestration_v1alpha1_kvcache.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: orchestration.aibrix.ai/v1alpha1 2 | kind: KVCache 3 | metadata: 4 | name: aibrix-deepseek-coder-33b-kvcache 5 | namespace: aibrix-system 6 | spec: 7 | mode: centralized 8 | service: 9 | type: ClusterIP 10 | ports: 11 | - name: service 12 | port: 9600 13 | targetPort: 9600 14 | protocol: TCP 15 | cache: 16 | image: aibrix/vineyardd:20241120 17 | imagePullPolicy: IfNotPresent 18 | resources: 19 | requests: 20 | cpu: "2000m" 21 | memory: "4Gi" 22 | limits: 23 | cpu: "2000m" 24 | memory: "4Gi" 25 | -------------------------------------------------------------------------------- /config/samples/orchestration_v1alpha1_roleset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: orchestration.aibrix.ai/v1alpha1 2 | kind: RoleSet 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: aibrix 6 | app.kubernetes.io/managed-by: kustomize 7 | name: roleset-sample 8 | spec: 9 | # TODO(user): Add fields here 10 | -------------------------------------------------------------------------------- /config/samples/orchestration_v1alpha1_stormservice.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: orchestration.aibrix.ai/v1alpha1 2 | kind: StormService 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: aibrix 6 | app.kubernetes.io/managed-by: kustomize 7 | name: stormservice-sample 8 | spec: 9 | # TODO(user): Add fields here 10 | -------------------------------------------------------------------------------- /config/standalone/autoscaler-controller/patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: manager 11 | args: 12 | - --leader-elect 13 | - --leader-election-id=aibrix-pod-autoscaler-controller 14 | - --health-probe-bind-address=:8081 15 | - --metrics-bind-address=0 16 | - --controllers=pod-autoscaler-controller 17 | - --disable-webhook 18 | -------------------------------------------------------------------------------- /config/standalone/distributed-inference-controller/patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: manager 11 | args: 12 | - --leader-elect 13 | - --leader-election-id=aibrix-distributed-inference-controller 14 | - --health-probe-bind-address=:8081 15 | - --metrics-bind-address=0 16 | - --controllers=distributed-inference-controller 17 | - --disable-webhook 18 | -------------------------------------------------------------------------------- /config/standalone/kv-cache-controller/patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: manager 11 | args: 12 | - --leader-elect 13 | - --leader-election-id=aibrix-kv-cache-controller 14 | - --health-probe-bind-address=:8081 15 | - --metrics-bind-address=:8080 16 | - --controllers=kv-cache-controller 17 | - --disable-webhook 18 | -------------------------------------------------------------------------------- /config/standalone/model-adapter-controller/patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: manager 11 | args: 12 | - --leader-elect 13 | - --leader-election-id=aibrix-model-adapter-controller 14 | - --health-probe-bind-address=:8081 15 | - --metrics-bind-address=0 16 | - --controllers=model-adapter-controller 17 | # following patch is from config/default/manager_webhook_patch.yaml 18 | ports: 19 | - containerPort: 9443 20 | name: webhook-server 21 | protocol: TCP 22 | volumeMounts: 23 | - mountPath: /tmp/k8s-webhook-server/serving-certs 24 | name: cert 25 | readOnly: true 26 | volumes: 27 | - name: cert 28 | secret: 29 | defaultMode: 420 30 | secretName: webhook-server-cert 31 | -------------------------------------------------------------------------------- /config/standalone/stormservice-controller/patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: manager 11 | args: 12 | - --leader-elect 13 | - --leader-election-id=aibrix-controller-manager 14 | - --health-probe-bind-address=:8081 15 | - --metrics-bind-address=0 16 | - --controllers=stormservice-controller 17 | - --disable-webhook 18 | -------------------------------------------------------------------------------- /config/test/README.md: -------------------------------------------------------------------------------- 1 | ### E2E Test Configurations 2 | 3 | To extend and override the default configurations for e2e tests, add the necessary configurations here. -------------------------------------------------------------------------------- /config/test/gateway/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1alpha1 2 | kind: Component 3 | 4 | patches: 5 | - path: vtc-test-env-patch.yaml 6 | target: 7 | kind: Deployment 8 | name: aibrix-gateway-plugins 9 | -------------------------------------------------------------------------------- /config/test/gateway/vtc-test-env-patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: aibrix-gateway-plugins 5 | namespace: aibrix-system 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: gateway-plugin 11 | env: 12 | - name: AIBRIX_ROUTER_VTC_TOKEN_TRACKER_WINDOW_SIZE 13 | value: "2" 14 | - name: AIBRIX_ROUTER_VTC_TOKEN_TRACKER_TIME_UNIT 15 | value: "seconds" 16 | - name: AIBRIX_ROUTER_VTC_TOKEN_TRACKER_MIN_TOKENS 17 | value: "100" 18 | - name: AIBRIX_ROUTER_VTC_TOKEN_TRACKER_MAX_TOKENS 19 | value: "800" 20 | -------------------------------------------------------------------------------- /config/test/kustomization.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: kustomize.config.k8s.io/v1beta1 2 | kind: Kustomization 3 | 4 | resources: 5 | - ../default 6 | 7 | patches: 8 | - path: gateway/vtc-test-env-patch.yaml 9 | target: 10 | kind: Deployment 11 | name: aibrix-gateway-plugins 12 | namespace: aibrix-system 13 | -------------------------------------------------------------------------------- /config/webhook/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - manifests.yaml 3 | - service.yaml 4 | 5 | configurations: 6 | - kustomizeconfig.yaml 7 | -------------------------------------------------------------------------------- /config/webhook/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # the following config is for teaching kustomize where to look at when substituting nameReference. 2 | # It requires kustomize v2.1.0 or newer to work properly. 3 | nameReference: 4 | - kind: Service 5 | version: v1 6 | fieldSpecs: 7 | - kind: MutatingWebhookConfiguration 8 | group: admissionregistration.k8s.io 9 | path: webhooks/clientConfig/service/name 10 | - kind: ValidatingWebhookConfiguration 11 | group: admissionregistration.k8s.io 12 | path: webhooks/clientConfig/service/name 13 | 14 | namespace: 15 | - kind: MutatingWebhookConfiguration 16 | group: admissionregistration.k8s.io 17 | path: webhooks/clientConfig/service/namespace 18 | create: true 19 | - kind: ValidatingWebhookConfiguration 20 | group: admissionregistration.k8s.io 21 | path: webhooks/clientConfig/service/namespace 22 | create: true 23 | -------------------------------------------------------------------------------- /config/webhook/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | app.kubernetes.io/name: service 6 | app.kubernetes.io/instance: webhook-service 7 | app.kubernetes.io/component: webhook 8 | app.kubernetes.io/created-by: aibrix 9 | app.kubernetes.io/part-of: aibrix 10 | app.kubernetes.io/managed-by: kustomize 11 | name: webhook-service 12 | namespace: system 13 | spec: 14 | ports: 15 | - port: 443 16 | protocol: TCP 17 | targetPort: 9443 18 | selector: 19 | control-plane: controller-manager 20 | -------------------------------------------------------------------------------- /deployment/terraform/gcp/cluster/data.tf: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2025 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | data "google_service_account" "node_pool" { 18 | account_id = var.node_pool_service_account_id 19 | } 20 | -------------------------------------------------------------------------------- /deployment/terraform/gcp/cluster/outputs.tf: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2025 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | output "name" { 18 | value = google_container_cluster.main.name 19 | description = "The name of the created cluster" 20 | } 21 | -------------------------------------------------------------------------------- /deployment/terraform/gcp/cluster/terraform.tf: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2025 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | terraform { 18 | required_providers { 19 | google = { 20 | source = "hashicorp/google" 21 | version = "6.22.0" 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /deployment/terraform/gcp/terraform.tfvars.example: -------------------------------------------------------------------------------- 1 | project_id = "" 2 | default_region = "" -------------------------------------------------------------------------------- /deployment/terraform/kubernetes/README.md: -------------------------------------------------------------------------------- 1 | ## Overview 2 | Creates an AIBrix stack using the Terraform Kubernetes provider 3 | -------------------------------------------------------------------------------- /deployment/terraform/kubernetes/outputs.tf: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2025 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | output "aibrix_service" { 18 | description = "The service definition for the model" 19 | value = data.kubernetes_service.aibrix_service 20 | } 21 | -------------------------------------------------------------------------------- /deployment/terraform/kubernetes/terraform.tf: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2025 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | terraform { 18 | required_providers { 19 | kubernetes = { 20 | source = "hashicorp/kubernetes" 21 | version = "2.36.0" 22 | } 23 | kubectl = { 24 | source = "gavinbunney/kubectl" 25 | version = "1.19.0" 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /deployment/terraform/kubernetes/variables.tf: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2025 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | variable "aibrix_release_version" { 18 | description = "Release version of AIBrix to deploy." 19 | type = string 20 | default = "v0.2.0" 21 | } 22 | 23 | variable "deploy_example_model" { 24 | description = "Whether to deploy example model." 25 | type = bool 26 | default = true 27 | } 28 | -------------------------------------------------------------------------------- /development/app/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "huggingface_token": "your huggingface token" 3 | } -------------------------------------------------------------------------------- /development/app/config/heterogeneous/kustomization.yaml: -------------------------------------------------------------------------------- 1 | kind: Kustomization 2 | 3 | resources: 4 | - ../simulator 5 | - simulator_a40 6 | 7 | apiVersion: kustomize.config.k8s.io/v1beta1 8 | -------------------------------------------------------------------------------- /development/app/config/heterogeneous/simulator_a40/kustomization.yaml: -------------------------------------------------------------------------------- 1 | kind: Kustomization 2 | 3 | resources: 4 | - ../../templates/deployment 5 | - ../../templates/podautoscaler 6 | 7 | patches: 8 | - path: rename_deployment_a40.json 9 | target: 10 | kind: Deployment 11 | name: mock-llama2-7b 12 | - path: rename_podautoscaler_a40.json 13 | target: 14 | kind: PodAutoscaler 15 | name: podautoscaler-mock-llama2-7b 16 | - path: patch_deployment_a40.yaml 17 | target: 18 | kind: Deployment 19 | name: mock-llama2-7b 20 | - path: patch_podautoscaler_a40.yaml 21 | target: 22 | kind: PodAutoscaler 23 | name: podautoscaler-simulator-llama2-7b-a40 24 | 25 | apiVersion: kustomize.config.k8s.io/v1beta1 26 | -------------------------------------------------------------------------------- /development/app/config/heterogeneous/simulator_a40/patch_deployment_a40.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: simulator-llama2-7b-a40 5 | labels: 6 | model.aibrix.ai/name: "llama2-7b" 7 | spec: 8 | replicas: 0 9 | selector: 10 | matchLabels: 11 | model.aibrix.ai/name: "llama2-7b" 12 | app: "simulator-llama2-7b-a40" 13 | template: 14 | metadata: 15 | labels: 16 | model.aibrix.ai/name: "llama2-7b" 17 | app: "simulator-llama2-7b-a40" 18 | spec: 19 | containers: 20 | - name: llm-engine 21 | image: aibrix/vllm-simulator-a40:nightly 22 | env: 23 | - name: MODEL_NAME 24 | valueFrom: 25 | fieldRef: 26 | fieldPath: metadata.labels['model.aibrix.ai/name'] -------------------------------------------------------------------------------- /development/app/config/heterogeneous/simulator_a40/patch_podautoscaler_a40.yaml: -------------------------------------------------------------------------------- 1 | # Pod autoscaler works with gpu-optimizer 2 | apiVersion: autoscaling.aibrix.ai/v1alpha1 3 | kind: PodAutoscaler 4 | metadata: 5 | name: podautoscaler-simulator-llama2-7b-a40 6 | annotations: 7 | kpa.autoscaling.aibrix.ai/scale-down-delay: 0s 8 | spec: 9 | scaleTargetRef: 10 | apiVersion: apps/v1 11 | kind: Deployment 12 | name: simulator-llama2-7b-a40 13 | metricsSources: 14 | - metricSourceType: domain 15 | protocolType: http 16 | endpoint: aibrix-gpu-optimizer.aibrix-system.svc.cluster.local:8080 17 | path: /metrics/default/simulator-llama2-7b-a40 18 | targetMetric: "vllm:deployment_replicas" 19 | targetValue: "100" # For stable workloads. Set to a fraction to tolerate bursts. -------------------------------------------------------------------------------- /development/app/config/heterogeneous/simulator_a40/rename_deployment_a40.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "op": "replace", 4 | "path": "/metadata/name", 5 | "value": "simulator-llama2-7b-a40" 6 | } 7 | ] -------------------------------------------------------------------------------- /development/app/config/heterogeneous/simulator_a40/rename_podautoscaler_a40.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "op": "replace", 4 | "path": "/metadata/name", 5 | "value": "podautoscaler-simulator-llama2-7b-a40" 6 | } 7 | ] -------------------------------------------------------------------------------- /development/app/config/mock/api-key-patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: mock-llama2-7b 5 | labels: 6 | model.aibrix.ai/name: "llama2-7b" 7 | model.aibrix.ai/port: "8000" 8 | adapter.model.aibrix.ai/enabled: "true" 9 | spec: 10 | replicas: 3 11 | selector: 12 | matchLabels: 13 | adapter.model.aibrix.ai/enabled: "true" 14 | model.aibrix.ai/name: "llama2-7b" 15 | app: "mock-llama2-7b" 16 | template: 17 | metadata: 18 | labels: 19 | adapter.model.aibrix.ai/enabled: "true" 20 | model.aibrix.ai/name: "llama2-7b" 21 | app: "mock-llama2-7b" 22 | spec: 23 | serviceAccountName: mocked-app-sa 24 | containers: 25 | - name: llm-engine 26 | image: aibrix/vllm-mock:nightly 27 | command: 28 | - python3 29 | - app.py 30 | - --api_key 31 | - test-key-1234567890 -------------------------------------------------------------------------------- /development/app/config/mock/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - ../templates/deployment 3 | - components.yaml 4 | 5 | # enable following patch when we test lora + api-key 6 | patches: 7 | - path: api-key-patch.yaml -------------------------------------------------------------------------------- /development/app/config/simulator/kustomization.yaml: -------------------------------------------------------------------------------- 1 | kind: Kustomization 2 | 3 | resources: 4 | - ../mock 5 | - ../templates/podautoscaler 6 | 7 | patches: 8 | - path: rename_deployment_a100.json 9 | target: 10 | kind: Deployment 11 | name: mock-llama2-7b 12 | - path: rename_podautoscaler_a100.json 13 | target: 14 | kind: PodAutoscaler 15 | name: podautoscaler-mock-llama2-7b 16 | - path: patch_deployment_a100.yaml 17 | target: 18 | kind: Deployment 19 | name: simulator-llama2-7b-a100 20 | - path: patch_podautoscaler_a100.yaml 21 | target: 22 | kind: PodAutoscaler 23 | name: podautoscaler-simulator-llama2-7b-a100 24 | 25 | apiVersion: kustomize.config.k8s.io/v1beta1 26 | -------------------------------------------------------------------------------- /development/app/config/simulator/patch_deployment_a100.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: simulator-llama2-7b-a100 5 | labels: 6 | model.aibrix.ai/name: "llama2-7b" 7 | model.aibrix.ai/min_replicas: "1" # min replica for gpu optimizer when no workloads. 8 | spec: 9 | replicas: 1 10 | selector: 11 | matchLabels: 12 | model.aibrix.ai/name: "llama2-7b" 13 | app: "simulator-llama2-7b-a100" 14 | template: 15 | metadata: 16 | labels: 17 | model.aibrix.ai/name: "llama2-7b" 18 | app: "simulator-llama2-7b-a100" 19 | spec: 20 | containers: 21 | - name: llm-engine 22 | image: aibrix/vllm-simulator:nightly 23 | env: 24 | - name: MODEL_NAME 25 | valueFrom: 26 | fieldRef: 27 | fieldPath: metadata.labels['model.aibrix.ai/name'] -------------------------------------------------------------------------------- /development/app/config/simulator/patch_podautoscaler_a100.yaml: -------------------------------------------------------------------------------- 1 | # Pod autoscaler works with gpu-optimizer 2 | apiVersion: autoscaling.aibrix.ai/v1alpha1 3 | kind: PodAutoscaler 4 | metadata: 5 | name: podautoscaler-simulator-llama2-7b-a100 6 | annotations: 7 | kpa.autoscaling.aibrix.ai/scale-down-delay: 0s 8 | spec: 9 | scaleTargetRef: 10 | apiVersion: apps/v1 11 | kind: Deployment 12 | name: simulator-llama2-7b-a100 13 | metricsSources: 14 | - metricSourceType: domain 15 | protocolType: http 16 | endpoint: aibrix-gpu-optimizer.aibrix-system.svc.cluster.local:8080 17 | path: /metrics/default/simulator-llama2-7b-a100 18 | targetMetric: "vllm:deployment_replicas" 19 | targetValue: "100" # For stable workloads. Set to a fraction to tolerate bursts. -------------------------------------------------------------------------------- /development/app/config/simulator/rename_deployment_a100.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "op": "replace", 4 | "path": "/metadata/name", 5 | "value": "simulator-llama2-7b-a100" 6 | } 7 | ] -------------------------------------------------------------------------------- /development/app/config/simulator/rename_podautoscaler_a100.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "op": "replace", 4 | "path": "/metadata/name", 5 | "value": "podautoscaler-simulator-llama2-7b-a100" 6 | } 7 | ] -------------------------------------------------------------------------------- /development/app/config/templates/deployment/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - deployment.yaml 3 | -------------------------------------------------------------------------------- /development/app/config/templates/podautoscaler/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - podautoscaler_kpa.yaml 3 | -------------------------------------------------------------------------------- /development/app/config/templates/podautoscaler/podautoscaler_apa.yaml: -------------------------------------------------------------------------------- 1 | # Pod autoscaler works with gpu-optimizer 2 | apiVersion: autoscaling.aibrix.ai/v1alpha1 3 | kind: PodAutoscaler 4 | metadata: 5 | name: podautoscaler-mock-llama2-7b 6 | labels: 7 | app.kubernetes.io/name: aibrix 8 | app.kubernetes.io/managed-by: kustomize 9 | annotations: 10 | autoscaling.aibrix.ai/up-fluctuation-tolerance: "0.1" 11 | autoscaling.aibrix.ai/down-fluctuation-tolerance: "0.2" 12 | apa.autoscaling.aibrix.ai/window: "30s" 13 | namespace: default 14 | spec: 15 | scaleTargetRef: 16 | apiVersion: apps/v1 17 | kind: Deployment 18 | name: mock-llama2-7b 19 | minReplicas: 0 20 | maxReplicas: 10 21 | metricsSources: 22 | - metricSourceType: pod 23 | protocolType: http 24 | port: "8000" 25 | path: metrics 26 | targetMetric: "avg_prompt_throughput_toks_per_s" 27 | targetValue: "60" 28 | scalingStrategy: "APA" -------------------------------------------------------------------------------- /development/app/config/templates/podautoscaler/podautoscaler_hpa.yaml: -------------------------------------------------------------------------------- 1 | # Pod autoscaler works with gpu-optimizer 2 | apiVersion: autoscaling.aibrix.ai/v1alpha1 3 | kind: PodAutoscaler 4 | metadata: 5 | name: metric-server-autoscaler 6 | namespace: kube-system 7 | spec: 8 | scaleTargetRef: 9 | apiVersion: apps/v1 10 | kind: Deployment 11 | name: metrics-server 12 | minReplicas: 1 13 | maxReplicas: 4 14 | metricsSources: 15 | - metricSourceType: "pod" 16 | protocolType: "https" 17 | port: "4443" 18 | path: "/metrics" 19 | targetMetric: "go_threads" 20 | targetValue: "20" 21 | scalingStrategy: "HPA" -------------------------------------------------------------------------------- /development/app/config/templates/podautoscaler/podautoscaler_kpa.yaml: -------------------------------------------------------------------------------- 1 | # Pod autoscaler works with gpu-optimizer 2 | apiVersion: autoscaling.aibrix.ai/v1alpha1 3 | kind: PodAutoscaler 4 | metadata: 5 | name: podautoscaler-mock-llama2-7b 6 | labels: 7 | app.kubernetes.io/name: aibrix 8 | app.kubernetes.io/managed-by: kustomize 9 | annotations: 10 | kpa.autoscaling.aibrix.ai/scale-down-delay: 30s 11 | namespace: default 12 | spec: 13 | scaleTargetRef: 14 | apiVersion: apps/v1 15 | kind: Deployment 16 | name: mock-llama2-7b 17 | minReplicas: 0 18 | maxReplicas: 10 19 | metricsSources: 20 | - metricSourceType: pod 21 | protocolType: http 22 | port: "8000" 23 | path: metrics 24 | targetMetric: "avg_prompt_throughput_toks_per_s" 25 | targetValue: "60" 26 | scalingStrategy: "KPA" -------------------------------------------------------------------------------- /development/app/config/vke/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - ../mock 3 | - ../templates/podautoscaler 4 | 5 | images: 6 | - name: aibrix/vllm-mock 7 | newName: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/vllm-mock 8 | newTag: nightly 9 | -------------------------------------------------------------------------------- /development/app/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python app.py "$@" # Pass all arguments to app.py -------------------------------------------------------------------------------- /development/app/requirements.txt: -------------------------------------------------------------------------------- 1 | flask 2 | Flask-HTTPAuth 3 | kubernetes 4 | pandas 5 | scikit-learn 6 | wandb 7 | ddsketch 8 | plotly_express 9 | fasteners 10 | transformers 11 | git+https://github.com/zhangjyr/vidur.git 12 | ray[default] -------------------------------------------------------------------------------- /development/tutorials/distributed/fleet-autoscaling.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | name: facebook-opt-13b-autoscaling 5 | namespace: default 6 | labels: 7 | app.kubernetes.io/name: aibrix 8 | annotations: 9 | kpa.autoscaling.aibrix.ai/scale-down-delay: 1m 10 | spec: 11 | scalingStrategy: KPA 12 | minReplicas: 1 13 | maxReplicas: 4 14 | metricsSources: 15 | - metricSourceType: pod 16 | protocolType: http 17 | port: '8000' 18 | path: metrics 19 | targetMetric: gpu_cache_usage_perc 20 | targetValue: '70' 21 | scaleTargetRef: 22 | apiVersion: orchestration.aibrix.ai/v1alpha1 23 | kind: RayClusterFleet 24 | name: facebook-opt-13b 25 | -------------------------------------------------------------------------------- /development/tutorials/kvcache/kvcache.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: orchestration.aibrix.ai/v1alpha1 2 | kind: KVCache 3 | metadata: 4 | name: aibrix-deepseek-33b-kvcache 5 | namespace: aibrix-system 6 | annotations: 7 | kvcache.orchestration.aibrix.ai/node-affinity-gpu-type: NVIDIA-L20 8 | kvcache.orchestration.aibrix.ai/pod-affinity-workload: aibrix-deepseek-33b 9 | spec: 10 | mode: centralized 11 | service: 12 | type: ClusterIP 13 | ports: 14 | - name: service 15 | port: 9600 16 | targetPort: 9600 17 | protocol: TCP 18 | cache: 19 | image: aibrix/vineyardd:20241120 20 | imagePullPolicy: IfNotPresent 21 | resources: 22 | requests: 23 | cpu: "2000m" 24 | memory: "4Gi" 25 | limits: 26 | cpu: "2000m" 27 | memory: "4Gi" 28 | 29 | 30 | -------------------------------------------------------------------------------- /development/tutorials/lora/model_adapter.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: model.aibrix.ai/v1alpha1 2 | kind: ModelAdapter 3 | metadata: 4 | name: text2sql-lora-1 5 | namespace: default 6 | labels: 7 | model.aibrix.ai/name: "text2sql-lora-1" 8 | model.aibrix.ai/port: "8000" 9 | spec: 10 | baseModel: llama2-7b 11 | podSelector: 12 | matchLabels: 13 | model.aibrix.ai/name: llama2-7b 14 | artifactURL: huggingface://yard1/llama-2-7b-sql-lora-test 15 | schedulerName: default 16 | -------------------------------------------------------------------------------- /development/tutorials/lora/model_adapter_api_key.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: model.aibrix.ai/v1alpha1 2 | kind: ModelAdapter 3 | metadata: 4 | name: text2sql-lora-2 5 | namespace: default 6 | labels: 7 | model.aibrix.ai/name: "text2sql-lora-2" 8 | model.aibrix.ai/port: "8000" 9 | spec: 10 | replicas: 1 11 | baseModel: llama2-7b 12 | podSelector: 13 | matchLabels: 14 | model.aibrix.ai/name: llama2-7b 15 | artifactURL: huggingface://yard1/llama-2-7b-sql-lora-test 16 | additionalConfig: 17 | api-key: test-key-1234567890 18 | schedulerName: default 19 | -------------------------------------------------------------------------------- /development/tutorials/metrics/service-monitor.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | name: metric-exporter-testing 5 | namespace: default 6 | spec: 7 | endpoints: 8 | - interval: 15s 9 | path: /metrics 10 | port: metrics 11 | namespaceSelector: 12 | matchNames: 13 | - default 14 | selector: 15 | matchLabels: 16 | prometheus-discovery: "true" 17 | 18 | -------------------------------------------------------------------------------- /development/tutorials/podautoscaler/hpa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling/v2 2 | kind: HorizontalPodAutoscaler 3 | metadata: 4 | labels: 5 | app.kubernetes.io/managed-by: kustomize 6 | app.kubernetes.io/name: aibrix 7 | name: mock-llama2-7b-hpa 8 | namespace: default 9 | spec: 10 | maxReplicas: 10 11 | metrics: 12 | - pods: 13 | metric: 14 | name: gpu_cache_usage_perc 15 | target: 16 | averageValue: "40" 17 | type: AverageValue 18 | type: Pods 19 | minReplicas: 1 20 | scaleTargetRef: 21 | apiVersion: apps/v1 22 | kind: Deployment 23 | name: mock-llama2-7b 24 | -------------------------------------------------------------------------------- /development/tutorials/podautoscaler/pa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | name: llama2-70b-pa 5 | labels: 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/managed-by: kustomize 8 | namespace: default 9 | spec: 10 | scaleTargetRef: 11 | apiVersion: apps/v1 12 | kind: Deployment 13 | name: mock-llama2-7b 14 | minReplicas: 1 15 | maxReplicas: 10 16 | metricsSources: 17 | - metricSourceType: "pod" 18 | protocolType: "http" 19 | port: "8000" 20 | path: "/metrics" 21 | targetMetric: "gpu_cache_usage_perc" 22 | targetValue: "40" 23 | scalingStrategy: "HPA" 24 | -------------------------------------------------------------------------------- /development/tutorials/runtime/README.md: -------------------------------------------------------------------------------- 1 | # AIBrix Runtime Demo 2 | 3 | ## Model Download 4 | AIBrix runtime support to download model from different sources. 5 | 6 | - Download model from HuggingFace 7 | ```shell 8 | kubectl apply -f runtime-hf-download.yaml 9 | ``` 10 | 11 | - Download model from AWS S3 12 | ```shell 13 | kubectl apply -f runtime-s3-download.yaml 14 | ``` 15 | 16 | - Download model from TOS 17 | ```shell 18 | kubectl apply -f runtime-tos-download.yaml 19 | ``` 20 | 21 | ## Metrics Merge 22 | -------------------------------------------------------------------------------- /development/vllm/kind-config.yaml: -------------------------------------------------------------------------------- 1 | kind: Cluster 2 | apiVersion: kind.x-k8s.io/v1alpha4 3 | nodes: 4 | - role: control-plane 5 | - role: worker 6 | extraMounts: 7 | - hostPath: //.cache/huggingface 8 | containerPath: /root/.cache/huggingface 9 | -------------------------------------------------------------------------------- /development/vllm/linux/kustomization.yaml: -------------------------------------------------------------------------------- 1 | kind: Kustomization 2 | 3 | resources: 4 | - ../macos 5 | 6 | images: 7 | - name: aibrix/vllm-cpu-env 8 | newTag: linux-amd64 9 | -------------------------------------------------------------------------------- /development/vllm/macos/kustomization.yaml: -------------------------------------------------------------------------------- 1 | kind: Kustomization 2 | 3 | resources: 4 | - deployment.yaml 5 | - components.yaml -------------------------------------------------------------------------------- /dist/chart/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building Helm packages. 2 | # Operating system files 3 | .DS_Store 4 | 5 | # Version control directories 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .hg/ 10 | .hgignore 11 | .svn/ 12 | 13 | # Backup and temporary files 14 | *.swp 15 | *.tmp 16 | *.bak 17 | *.orig 18 | *~ 19 | 20 | # IDE and editor-related files 21 | .idea/ 22 | .vscode/ 23 | 24 | # Helm chart artifacts 25 | dist/chart/*.tgz 26 | -------------------------------------------------------------------------------- /dist/chart/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: aibrix 3 | description: Cloud-native LLM inference infrastructure with distributed inference, KV caching, intelligent routing, and autoscaling capabilities 4 | type: application 5 | version: 0.4.1 6 | appVersion: "0.4.1" 7 | home: https://github.com/vllm-project/aibrix 8 | sources: 9 | - https://github.com/vllm-project/aibrix 10 | keywords: 11 | - ai 12 | - ai-gateway 13 | - llm 14 | - inference 15 | - kubernetes 16 | - autoscaling 17 | - distributed-kv-cache 18 | - lora 19 | - distributed-inference 20 | - disaggregation 21 | maintainers: 22 | - name: AIBrix Team 23 | url: https://github.com/vllm-project/aibrix 24 | annotations: 25 | category: AI Infra 26 | -------------------------------------------------------------------------------- /dist/chart/templates/controller-manager/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: aibrix-controller-manager-metrics-service 5 | namespace: {{ .Release.Namespace }} 6 | annotations: 7 | prometheus.io/path: /metrics 8 | prometheus.io/port: "8080" 9 | prometheus.io/scrape: "true" 10 | labels: 11 | {{- include "chart.labels" . | nindent 4 }} 12 | app.kubernetes.io/component: aibrix-controller-manager 13 | spec: 14 | ports: 15 | - name: http 16 | port: 8080 17 | protocol: TCP 18 | targetPort: 8080 19 | selector: 20 | {{- include "chart.selectorLabels" . | nindent 4 }} 21 | app.kubernetes.io/component: aibrix-controller-manager -------------------------------------------------------------------------------- /dist/chart/templates/gateway-plugin/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: aibrix-gateway-plugins 5 | namespace: {{ .Release.Namespace }} 6 | labels: 7 | {{- include "chart.labels" . | nindent 4 }} 8 | app.kubernetes.io/component: aibrix-gateway-plugin 9 | annotations: 10 | prometheus.io/scrape: "true" 11 | prometheus.io/port: "8080" 12 | prometheus.io/path: "/metrics" 13 | spec: 14 | selector: 15 | {{- include "chart.selectorLabels" . | nindent 4 }} 16 | app.kubernetes.io/component: aibrix-gateway-plugin 17 | ports: 18 | - name: gateway 19 | port: 50052 20 | targetPort: 50052 21 | - name: profiling 22 | port: 6060 23 | targetPort: 6060 24 | - name: metrics 25 | port: 8080 26 | targetPort: 8080 27 | -------------------------------------------------------------------------------- /dist/chart/templates/gpu-optimizer/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: aibrix-gpu-optimizer 5 | namespace: {{ .Release.Namespace }} 6 | labels: 7 | {{- include "chart.labels" . | nindent 4 }} 8 | app.kubernetes.io/component: aibrix-gpu-optimizer 9 | spec: 10 | selector: 11 | app.kubernetes.io/component: aibrix-gpu-optimizer 12 | ports: 13 | - protocol: TCP 14 | port: 8080 15 | targetPort: 8080 16 | type: ClusterIP 17 | -------------------------------------------------------------------------------- /dist/chart/templates/metadata-service/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: aibrix-metadata-service 5 | namespace: {{ .Release.Namespace }} 6 | labels: 7 | {{- include "chart.labels" . | nindent 4 }} 8 | app.kubernetes.io/component: aibrix-metadata-service 9 | spec: 10 | selector: 11 | {{- include "chart.selectorLabels" . | nindent 4 }} 12 | app.kubernetes.io/component: aibrix-metadata-service 13 | ports: 14 | - name: http 15 | protocol: TCP 16 | port: 8090 17 | targetPort: 8090 -------------------------------------------------------------------------------- /dist/chart/templates/webhook/secret.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.webhook.enable }} 2 | apiVersion: v1 3 | kind: Secret 4 | metadata: 5 | labels: 6 | {{- include "chart.labels" . | nindent 4 }} 7 | app.kubernetes.io/component: aibrix-controller-manager 8 | name: aibrix-webhook-server-cert 9 | namespace: {{ .Release.Namespace }} 10 | {{- end }} -------------------------------------------------------------------------------- /dist/chart/templates/webhook/service.yaml: -------------------------------------------------------------------------------- 1 | {{- if .Values.webhook.enable }} 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: aibrix-webhook-service 6 | namespace: {{ .Release.Namespace }} 7 | labels: 8 | {{- include "chart.labels" . | nindent 4 }} 9 | app.kubernetes.io/component: aibrix-controller-manager 10 | spec: 11 | ports: 12 | - name: webhook 13 | port: 443 14 | protocol: TCP 15 | targetPort: 9443 16 | selector: 17 | {{- include "chart.selectorLabels" . | nindent 4 }} 18 | app.kubernetes.io/component: aibrix-controller-manager 19 | {{- end }} 20 | -------------------------------------------------------------------------------- /docs/.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb linguist-vendored 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Using Sphinx to build html web pages for AIBrix 2 | 3 | ## Environment setup 4 | Make sure that your python conda environment is setup correctly. The following installs sphinx package and necessary templates. 5 | 6 | ```bash 7 | pip install -r requirements-docs.txt 8 | ``` 9 | 10 | ## Compile html pages 11 | 12 | ``` 13 | make html 14 | ``` 15 | 16 | Now the html paged should be generated at "docs/build/html/index.html". You can open this html page with your web browser as our project front page. 17 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/paper/AIBrix_White_Paper_0219_2025.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/paper/AIBrix_White_Paper_0219_2025.pdf -------------------------------------------------------------------------------- /docs/requirements-docs.txt: -------------------------------------------------------------------------------- 1 | sphinx==8.0.2 2 | sphinx-book-theme==1.1.3 3 | sphinx-click==6.0.0 4 | sphinx-copybutton==0.5.2 5 | sphinx-togglebutton==0.3.2 6 | sphinxemoji==0.3.1 7 | sphinx-autodoc-typehints==2.4.1 8 | sphinx_design==0.6.1 9 | sphinxcontrib-mermaid==1.0.0 10 | -------------------------------------------------------------------------------- /docs/source/assets/images/ai-engine-runtime-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/ai-engine-runtime-overview.png -------------------------------------------------------------------------------- /docs/source/assets/images/aibrix-architecture-v1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/aibrix-architecture-v1.jpeg -------------------------------------------------------------------------------- /docs/source/assets/images/aibrix-dist-kv-cache-arch-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/aibrix-dist-kv-cache-arch-overview.png -------------------------------------------------------------------------------- /docs/source/assets/images/aibrix-dist-kv-cache-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/aibrix-dist-kv-cache-dashboard.png -------------------------------------------------------------------------------- /docs/source/assets/images/aibrix-infinistore-arch-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/aibrix-infinistore-arch-overview.png -------------------------------------------------------------------------------- /docs/source/assets/images/aibrix-kvcache-offloading-arch-overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/aibrix-kvcache-offloading-arch-overview.png -------------------------------------------------------------------------------- /docs/source/assets/images/aibrix-kvcache-profiling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/aibrix-kvcache-profiling.png -------------------------------------------------------------------------------- /docs/source/assets/images/autoscaler/aibrix-controller-manager-output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/autoscaler/aibrix-controller-manager-output.png -------------------------------------------------------------------------------- /docs/source/assets/images/autoscaler/autoscaling_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/autoscaler/autoscaling_result.png -------------------------------------------------------------------------------- /docs/source/assets/images/autoscaler/optimizer-based-autoscaling-70-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/autoscaler/optimizer-based-autoscaling-70-results.png -------------------------------------------------------------------------------- /docs/source/assets/images/autoscaler/optimizer-based-podautoscaler.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/autoscaler/optimizer-based-podautoscaler.png -------------------------------------------------------------------------------- /docs/source/assets/images/autoscaler/podautoscaler-describe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/autoscaler/podautoscaler-describe.png -------------------------------------------------------------------------------- /docs/source/assets/images/benchmark/aibrix-benchmark-component-doc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/benchmark/aibrix-benchmark-component-doc.png -------------------------------------------------------------------------------- /docs/source/assets/images/cloud/lambda-cloud-installation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/cloud/lambda-cloud-installation.png -------------------------------------------------------------------------------- /docs/source/assets/images/cloud/lambda-cloud-instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/cloud/lambda-cloud-instance.png -------------------------------------------------------------------------------- /docs/source/assets/images/cloud/lambda-cloud-ssh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/cloud/lambda-cloud-ssh.png -------------------------------------------------------------------------------- /docs/source/assets/images/cloud/lambda-cloud-verify-installation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/cloud/lambda-cloud-verify-installation.png -------------------------------------------------------------------------------- /docs/source/assets/images/delete-namespace-stuck-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/delete-namespace-stuck-1.png -------------------------------------------------------------------------------- /docs/source/assets/images/delete-namespace-stuck-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/delete-namespace-stuck-2.png -------------------------------------------------------------------------------- /docs/source/assets/images/draft-release.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/draft-release.png -------------------------------------------------------------------------------- /docs/source/assets/images/gateway-design.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/gateway-design.png -------------------------------------------------------------------------------- /docs/source/assets/images/heterogeneous-gpu-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/heterogeneous-gpu-diagram.png -------------------------------------------------------------------------------- /docs/source/assets/images/lora-controller-workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/lora-controller-workflow.png -------------------------------------------------------------------------------- /docs/source/assets/images/lora-sequence-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/lora-sequence-diagram.png -------------------------------------------------------------------------------- /docs/source/assets/images/lora-service-discovery-resources.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/lora-service-discovery-resources.png -------------------------------------------------------------------------------- /docs/source/assets/images/mix-grain-orchestration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/mix-grain-orchestration.png -------------------------------------------------------------------------------- /docs/source/assets/images/model-error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/model-error.png -------------------------------------------------------------------------------- /docs/source/assets/images/release-pipeline-manifests.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/release-pipeline-manifests.png -------------------------------------------------------------------------------- /docs/source/assets/images/release-pipeline-python-package.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/release-pipeline-python-package.png -------------------------------------------------------------------------------- /docs/source/assets/images/slo_routing/evaluation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/slo_routing/evaluation.png -------------------------------------------------------------------------------- /docs/source/assets/images/slo_routing/motivation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/slo_routing/motivation.png -------------------------------------------------------------------------------- /docs/source/assets/images/slo_routing/variation_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/slo_routing/variation_comparison.png -------------------------------------------------------------------------------- /docs/source/assets/images/stormservice/aibrix-stormservice-illustration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/images/stormservice/aibrix-stormservice-illustration.png -------------------------------------------------------------------------------- /docs/source/assets/logos/aibrix-logo.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/docs/source/assets/logos/aibrix-logo.jpeg -------------------------------------------------------------------------------- /docs/source/features/autoscaling/autoscaling.rst: -------------------------------------------------------------------------------- 1 | =========== 2 | Autoscaling 3 | =========== 4 | 5 | Overview of AIBrix Autoscaler 6 | ----------------------------- 7 | 8 | Autoscaling is crucial for deploying Large Language Model (LLM) services on Kubernetes (K8s), as timely scaling up handles peaks in request traffic, and scaling down conserves resources when demand wanes. 9 | 10 | .. toctree:: 11 | :maxdepth: 1 12 | 13 | metric-based-autoscaling 14 | optimizer-based-autoscaling -------------------------------------------------------------------------------- /hack/boilerplate.go.txt: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ -------------------------------------------------------------------------------- /hack/ci/kind-config.yaml: -------------------------------------------------------------------------------- 1 | kind: Cluster 2 | apiVersion: kind.x-k8s.io/v1alpha4 3 | nodes: 4 | - role: control-plane 5 | - role: worker 6 | -------------------------------------------------------------------------------- /hack/lambda-cloud/README.md: -------------------------------------------------------------------------------- 1 | # AIBrix Single-Node Deployment on Lambda Instances 2 | 3 | Please refer to doc here for more details. -------------------------------------------------------------------------------- /hack/lambda-cloud/verify.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Run nvidia-smi to list GPU devices 4 | nvidia-smi -L 5 | if [ $? -ne 0 ]; then 6 | echo "nvidia-smi failed to execute." 7 | exit 1 8 | fi 9 | 10 | # Run a Docker container with NVIDIA runtime to list GPU devices 11 | docker run --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all ubuntu:20.04 nvidia-smi -L 12 | if [ $? -ne 0 ]; then 13 | echo "Docker command with NVIDIA runtime failed to execute." 14 | exit 1 15 | fi 16 | 17 | # Run a Docker container with mounted /dev/null to check GPU accessibility 18 | docker run -v /dev/null:/var/run/nvidia-container-devices/all ubuntu:20.04 nvidia-smi -L 19 | if [ $? -ne 0 ]; then 20 | echo "Docker command with mounted /dev/null failed to execute." 21 | exit 1 22 | fi 23 | 24 | echo "All verification checks passed successfully." 25 | -------------------------------------------------------------------------------- /hack/release/sync-images.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # check the required parameters 4 | if [ -z "$1" ] || [ -z "$2" ]; then 5 | echo "Error: Missing required parameters." 6 | echo "Usage: $0 " 7 | echo "Example: $0 v0.2.0-rc.2 aibrix-container-registry-cn-beijing.cr.volces.com" 8 | exit 1 9 | fi 10 | 11 | # aibrix tag,e.g. v0.2.0-rc.2 12 | # registry,e.g. aibrix-container-registry-cn-beijing.cr.volces.com 13 | VERSION=$1 14 | REGISTRY=$2 15 | 16 | # image list 17 | IMAGES=("runtime" "metadata-service" "gateway-plugins" "controller-manager" "kvcache-watcher") 18 | 19 | # pull、retag and push images 20 | for IMAGE in "${IMAGES[@]}"; do 21 | docker pull aibrix/${IMAGE}:${VERSION} 22 | docker tag aibrix/${IMAGE}:${VERSION} ${REGISTRY}/aibrix/${IMAGE}:${VERSION} 23 | docker push ${REGISTRY}/aibrix/${IMAGE}:${VERSION} 24 | done 25 | -------------------------------------------------------------------------------- /hack/tools.go: -------------------------------------------------------------------------------- 1 | //go:build tools 2 | // +build tools 3 | 4 | /* 5 | Copyright 2024 The Aibrix Team. 6 | 7 | Licensed under the Apache License, Version 2.0 (the "License"); 8 | you may not use this file except in compliance with the License. 9 | You may obtain a copy of the License at 10 | 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | */ 19 | 20 | package hack 21 | 22 | import ( 23 | _ "k8s.io/code-generator" 24 | _ "k8s.io/kube-openapi/cmd/openapi-gen" 25 | ) 26 | -------------------------------------------------------------------------------- /observability/monitor/envoy_metrics_service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: envoy-admin-metrics 5 | namespace: envoy-gateway-system 6 | labels: 7 | app.kubernetes.io/name: envoy 8 | app.kubernetes.io/component: proxy 9 | app.kubernetes.io/managed-by: envoy-gateway 10 | spec: 11 | ports: 12 | - name: metrics 13 | port: 19001 14 | targetPort: 19001 15 | protocol: TCP 16 | selector: 17 | app.kubernetes.io/name: envoy 18 | app.kubernetes.io/component: proxy 19 | app.kubernetes.io/managed-by: envoy-gateway 20 | type: ClusterIP 21 | -------------------------------------------------------------------------------- /observability/monitor/service_monitor_controller_manager.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | app.kubernetes.io/managed-by: kubectl 6 | app.kubernetes.io/name: aibrix 7 | app.kubernetes.io/version: nightly 8 | control-plane: controller-manager 9 | name: aibrix-controller-manager-metrics-monitor 10 | namespace: aibrix-system 11 | spec: 12 | endpoints: 13 | - path: /metrics 14 | port: http 15 | scheme: http 16 | selector: 17 | matchLabels: 18 | control-plane: controller-manager 19 | -------------------------------------------------------------------------------- /observability/monitor/service_monitor_gateway.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | name: envoy-metrics-monitor 5 | namespace: envoy-gateway-system 6 | labels: 7 | release: prometheus 8 | spec: 9 | selector: 10 | matchLabels: 11 | app.kubernetes.io/name: envoy 12 | namespaceSelector: 13 | matchNames: 14 | - envoy-gateway-system 15 | endpoints: 16 | - port: metrics 17 | path: /stats/prometheus 18 | scheme: http 19 | interval: 30s 20 | -------------------------------------------------------------------------------- /observability/monitor/service_monitor_gateway_plugin.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | name: gateway-plugin-metrics-monitor 5 | namespace: aibrix-system 6 | labels: 7 | release: prometheus 8 | spec: 9 | selector: 10 | matchLabels: 11 | app: gateway-plugins 12 | endpoints: 13 | - port: metrics 14 | path: /metrics 15 | interval: 15s 16 | namespaceSelector: 17 | matchNames: 18 | - aibrix-system 19 | -------------------------------------------------------------------------------- /observability/monitor/service_monitor_vllm.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: monitoring.coreos.com/v1 2 | kind: ServiceMonitor 3 | metadata: 4 | labels: 5 | release: prometheus 6 | name: test-service-monitor 7 | namespace: default 8 | spec: 9 | endpoints: 10 | - interval: 15s 11 | path: /metrics 12 | port: metrics 13 | namespaceSelector: 14 | matchNames: 15 | - default 16 | selector: 17 | matchLabels: 18 | prometheus-discovery: "true" 19 | -------------------------------------------------------------------------------- /pkg/client/clientset/versioned/fake/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | // Code generated by client-gen. DO NOT EDIT. 17 | 18 | // This package has the automatically generated fake clientset. 19 | package fake 20 | -------------------------------------------------------------------------------- /pkg/client/clientset/versioned/scheme/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | // Code generated by client-gen. DO NOT EDIT. 17 | 18 | // This package contains the scheme of the automatically generated clientset. 19 | package scheme 20 | -------------------------------------------------------------------------------- /pkg/client/clientset/versioned/typed/autoscaling/v1alpha1/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | // Code generated by client-gen. DO NOT EDIT. 17 | 18 | // This package has the automatically generated typed clients. 19 | package v1alpha1 20 | -------------------------------------------------------------------------------- /pkg/client/clientset/versioned/typed/autoscaling/v1alpha1/fake/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | // Code generated by client-gen. DO NOT EDIT. 17 | 18 | // Package fake has the automatically generated clients. 19 | package fake 20 | -------------------------------------------------------------------------------- /pkg/client/clientset/versioned/typed/autoscaling/v1alpha1/generated_expansion.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | // Code generated by client-gen. DO NOT EDIT. 17 | 18 | package v1alpha1 19 | 20 | type PodAutoscalerExpansion interface{} 21 | -------------------------------------------------------------------------------- /pkg/client/clientset/versioned/typed/model/v1alpha1/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | // Code generated by client-gen. DO NOT EDIT. 17 | 18 | // This package has the automatically generated typed clients. 19 | package v1alpha1 20 | -------------------------------------------------------------------------------- /pkg/client/clientset/versioned/typed/model/v1alpha1/fake/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | // Code generated by client-gen. DO NOT EDIT. 17 | 18 | // Package fake has the automatically generated clients. 19 | package fake 20 | -------------------------------------------------------------------------------- /pkg/client/clientset/versioned/typed/model/v1alpha1/generated_expansion.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | // Code generated by client-gen. DO NOT EDIT. 17 | 18 | package v1alpha1 19 | 20 | type ModelAdapterExpansion interface{} 21 | -------------------------------------------------------------------------------- /pkg/client/clientset/versioned/typed/orchestration/v1alpha1/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | // Code generated by client-gen. DO NOT EDIT. 17 | 18 | // This package has the automatically generated typed clients. 19 | package v1alpha1 20 | -------------------------------------------------------------------------------- /pkg/client/clientset/versioned/typed/orchestration/v1alpha1/fake/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | // Code generated by client-gen. DO NOT EDIT. 17 | 18 | // Package fake has the automatically generated clients. 19 | package fake 20 | -------------------------------------------------------------------------------- /pkg/client/clientset/versioned/typed/orchestration/v1alpha1/generated_expansion.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | // Code generated by client-gen. DO NOT EDIT. 17 | 18 | package v1alpha1 19 | 20 | type RayClusterFleetExpansion interface{} 21 | 22 | type RayClusterReplicaSetExpansion interface{} 23 | 24 | type StormServiceExpansion interface{} 25 | -------------------------------------------------------------------------------- /pkg/metrics/common.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package metrics 18 | 19 | type MetricSubscriber interface { 20 | SubscribedMetrics() []string 21 | } 22 | -------------------------------------------------------------------------------- /pkg/plugins/gateway/algorithms/algorithms_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package routingalgorithms 17 | 18 | import ( 19 | "testing" 20 | 21 | . "github.com/onsi/ginkgo" 22 | . "github.com/onsi/gomega" 23 | ) 24 | 25 | func TestRoutingAlgorithms(t *testing.T) { 26 | RegisterFailHandler(Fail) 27 | RunSpecs(t, "RoutingAlgorithms Suite") 28 | } 29 | -------------------------------------------------------------------------------- /pkg/plugins/gateway/algorithms/model_router_factory.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package routingalgorithms 17 | 18 | var ModelRouterFactory = NewSLORouter 19 | -------------------------------------------------------------------------------- /pkg/plugins/gateway/algorithms/vtc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package routingalgorithms 18 | 19 | import ( 20 | "github.com/vllm-project/aibrix/pkg/plugins/gateway/algorithms/vtc" 21 | ) 22 | 23 | func init() { 24 | // Register the VTC Basic router 25 | Register(vtc.RouterVTCBasic, vtc.NewVTCBasicRouter) 26 | } 27 | -------------------------------------------------------------------------------- /pkg/plugins/gateway/queue/queue_test.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package queue 17 | 18 | import ( 19 | "testing" 20 | 21 | . "github.com/onsi/ginkgo" 22 | . "github.com/onsi/gomega" 23 | ) 24 | 25 | func TestQueue(t *testing.T) { 26 | RegisterFailHandler(Fail) 27 | RunSpecs(t, "Queue Suite") 28 | } 29 | -------------------------------------------------------------------------------- /pkg/types/router_queue.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2024 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | package types 18 | 19 | import ( 20 | "errors" 21 | "time" 22 | ) 23 | 24 | const DefaultQueueCapacity = 1024 25 | 26 | var ( 27 | ErrQueueEmpty = errors.New("queue is empty") 28 | ) 29 | 30 | type RouterQueue[V comparable] interface { 31 | Enqueue(V, time.Time) error 32 | Peek(time.Time, PodList) (V, error) 33 | Dequeue(time.Time) (V, error) 34 | Len() int 35 | } 36 | -------------------------------------------------------------------------------- /pkg/types/types_test.go: -------------------------------------------------------------------------------- 1 | //go:build !race 2 | 3 | /* 4 | Copyright 2024 The Aibrix Team. 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | package types 19 | 20 | import ( 21 | "testing" 22 | 23 | . "github.com/onsi/ginkgo" 24 | . "github.com/onsi/gomega" 25 | ) 26 | 27 | func TestTypes(t *testing.T) { 28 | RegisterFailHandler(Fail) 29 | RunSpecs(t, "Types Suite") 30 | } 31 | -------------------------------------------------------------------------------- /pkg/utils/lrustore/store.go: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2025 The Aibrix Team. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | package cache 17 | 18 | type Store[K comparable, V any] interface { 19 | Put(key K, value V) bool 20 | Get(key K) (V, bool) 21 | Len() int 22 | } 23 | -------------------------------------------------------------------------------- /pkg/utils/utils_test.go: -------------------------------------------------------------------------------- 1 | //go:build !race 2 | 3 | /* 4 | Copyright 2024 The Aibrix Team. 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | package utils 20 | 21 | import ( 22 | "testing" 23 | 24 | . "github.com/onsi/ginkgo" 25 | . "github.com/onsi/gomega" 26 | ) 27 | 28 | func TestCache(t *testing.T) { 29 | RegisterFailHandler(Fail) 30 | RunSpecs(t, "Utils Suite") 31 | } 32 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/__version__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | VERSION = (0, 0, 0) # Placeholder (will be replaced dynamically) 16 | 17 | __version__ = "0.0.0" # Placeholder (will be replaced dynamically) 18 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/batch/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .driver import BatchDriver 15 | 16 | __all__ = ["BatchDriver"] 17 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/batch/constant.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | # The following are all constants. 17 | # This is the time interval for the sliding window to check. 18 | EXPIRE_INTERVAL: float = 1 19 | # This is the job pool size in job scheduler. 20 | # It should be proportional to resource size in the backend. 21 | DEFAULT_JOB_POOL_SIZE = 1 22 | 23 | # Job opts are for testing purpose. 24 | BATCH_OPTS_FAIL_AFTER_N_REQUESTS = "fail_after_n_requests" 25 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/common/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/gpu_optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/gpu_optimizer/load_monitor/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/gpu_optimizer/optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .optimizer import Optimizer as Optimizer 15 | from .types import GPUProfile as GPUProfile 16 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/result/simulator-llama2-7b-a100_obsoleted_v1.json: -------------------------------------------------------------------------------- 1 | {"gpu": "simulator-llama2-7b-a100", "cost": 1.0, "tputs": [[62.4770592037908, 54.41609758859857, 27.479217348298977, 13.233761409619609, 6.364180103253477], [60.316466916176516, 50.191932920882834, 26.125096038397714, 12.725834514073275, 6.189499703439544], [56.75260595422426, 47.022895270218406, 24.2563402721543, 11.908798077103592, 5.906818680155886], [50.55276394015143, 40.282564741237664, 21.527138429115244, 11.091621232153178, 5.340385383428255], [39.156908682972, 31.064483545526286, 17.981327336506435, 9.485761082753623, 4.766802405552006], [25.48358337772967, 20.84102605997694, 13.326249617862985, 7.462876735738044, 3.8320375960764395], [14.400357243268942, 12.221727123400482, 8.517260437465913, 5.099627101138905, 2.732587694175802], [7.062882339740163, 6.174371506110377, 4.67281574035367, 2.662288521332232, 1.6317610529300335]], "indexes": [[4, 8, 16, 32, 64, 128, 256, 512], [128, 256, 512, 1024, 2048]]} -------------------------------------------------------------------------------- /python/aibrix/aibrix/gpu_optimizer/optimizer/profiling/result/simulator-llama2-7b-a40.json: -------------------------------------------------------------------------------- 1 | {"gpu": "simulator-llama2-7b-a40", "cost": 0.3, "tputs": [[53.46768813782049, 26.94702168379827, 13.351384925561684, 6.449112831829054, 3.0687269478197807], [49.18066045134238, 25.769051024042447, 12.750500448630419, 6.217046222817317, 3.908221660884725], [41.108372151595, 22.75667452657716, 11.871417823932267, 5.829879694384015, 2.8224716954769615], [26.893000851172808, 19.116530652345308, 10.259072704641495, 5.2228653781964365, 2.543157981660752], [22.158835440491103, 14.4763730301464, 8.033579290082939, 4.2722041267679, 2.164619049620232], [12.63449771529047, 9.105625644461483, 5.341921516909921, 3.0724262148293473, 1.605589539944446], [6.44070093597801, 4.908506391879229, 2.825722541489097, 1.7049267539064084, 0.975843626543094], [2.6864554196635524, 2.0460122075598965, 1.365640744818924, 1.2238675036166577, 0.5042209229511221]], "indexes": [[4, 8, 16, 32, 64, 128, 256, 512], [128, 256, 512, 1024, 2048]], "created": 1732598726.235767} -------------------------------------------------------------------------------- /python/aibrix/aibrix/gpu_optimizer/optimizer/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/__init__.py: -------------------------------------------------------------------------------- 1 | from .runner import Config as Config 2 | from .runner import SolverRunner as SolverRunner 3 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/config_example.json: -------------------------------------------------------------------------------- 1 | { 2 | "gpu_info": { 3 | "A10G": { 4 | "cost": 1.01, 5 | "tputs": [[2, 1], [5, 2]] 6 | }, 7 | "A100-80GB": { 8 | "cost": 3.67, 9 | "tputs": [[20, 20], [40, 20]] 10 | } 11 | }, 12 | "workload_distribution": [[0.2, 0.1], [0.5, 0.2]], 13 | "total_request_rate": 30.0, 14 | "slice_factor": 1 15 | } -------------------------------------------------------------------------------- /python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/example.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from .runner import SolverRunner 4 | 5 | 6 | def main(config_path: str): 7 | runner = SolverRunner(config_path) 8 | print(runner.run()) 9 | 10 | 11 | if __name__ == "__main__": 12 | parser = argparse.ArgumentParser() 13 | # Input arguments 14 | parser.add_argument( 15 | "--config", 16 | "-c", 17 | type=str, 18 | default="melange/config/example.json", 19 | help="Path to the input configuration file, in json", 20 | ) 21 | args = parser.parse_args() 22 | 23 | main(args.config) 24 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/requirements.txt: -------------------------------------------------------------------------------- 1 | # used for the solver 2 | numpy 3 | pulp==2.8.0 4 | pandas 5 | ruamel.yaml==0.18.6 -------------------------------------------------------------------------------- /python/aibrix/aibrix/gpu_optimizer/optimizer/solver/melange/util.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | 4 | # Convert max throughput profiling to a mapping from request size to load 5 | def tputs_to_loads_2d(max_tputs: List[List[float]]): 6 | loads: List[List[float]] = [] 7 | for i in range(len(max_tputs)): 8 | loads.append([]) 9 | for j in range(len(max_tputs[0])): 10 | load = 1000000.0 # inf 11 | if max_tputs[i][j] > 0: 12 | load = 1.0 / max_tputs[i][j] 13 | loads[-1].append(load) 14 | return loads 15 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/gpu_optimizer/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .logging import DelayedLog as DelayedLog 15 | from .logging import ExcludePathsFilter as ExcludePathsFilter 16 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/metadata/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/metadata/api/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/metadata/api/v1/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from . import batch as batch 15 | from . import files as files 16 | 17 | __all__ = ["batch", "files"] 18 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/metadata/cache/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .job import JobCache 16 | 17 | __all__ = [ 18 | "JobCache", 19 | ] 20 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/metadata/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .asyncio_thread import AsyncLoopThread, T 15 | from .httpx_client import HTTPXClientWrapper 16 | from .kopf_operator import KopfOperatorWrapper 17 | 18 | __all__ = ["AsyncLoopThread", "HTTPXClientWrapper", "KopfOperatorWrapper", "T"] 19 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/metadata/setting/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .config import settings 15 | 16 | __all__ = ["settings"] 17 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/metadata/setting/k8s_job_redis_patch.yaml: -------------------------------------------------------------------------------- 1 | # Patch for k8s_job_template.yaml to enable S3 testing with Kubernetes secrets 2 | apiVersion: batch/v1 3 | kind: Job 4 | spec: 5 | template: 6 | spec: 7 | containers: 8 | - name: batch-worker 9 | env: 10 | - name: REDIS_HOST 11 | value: "aibrix-redis-master.aibrix-system.svc.cluster.local" 12 | - name: REDIS_PORT 13 | value: "6379" 14 | - name: REDIS_DB 15 | value: "0" -------------------------------------------------------------------------------- /python/aibrix/aibrix/metadata/setting/k8s_job_s3_patch.yaml: -------------------------------------------------------------------------------- 1 | # Patch for k8s_job_template.yaml to enable S3 testing with Kubernetes secrets 2 | apiVersion: batch/v1 3 | kind: Job 4 | spec: 5 | template: 6 | spec: 7 | containers: 8 | - name: batch-worker 9 | env: 10 | - name: STORAGE_AWS_ACCESS_KEY_ID 11 | valueFrom: 12 | secretKeyRef: 13 | name: aibrix-s3-credentials 14 | key: access-key-id 15 | - name: STORAGE_AWS_SECRET_ACCESS_KEY 16 | valueFrom: 17 | secretKeyRef: 18 | name: aibrix-s3-credentials 19 | key: secret-access-key 20 | - name: STORAGE_AWS_REGION 21 | valueFrom: 22 | secretKeyRef: 23 | name: aibrix-s3-credentials 24 | key: region 25 | - name: STORAGE_AWS_BUCKET 26 | valueFrom: 27 | secretKeyRef: 28 | name: aibrix-s3-credentials 29 | key: bucket-name -------------------------------------------------------------------------------- /python/aibrix/aibrix/metadata/setting/s3_secret_template.yaml: -------------------------------------------------------------------------------- 1 | # Kubernetes Secret template for S3 credentials 2 | # This is a template that can be populated using secret_gen utility 3 | apiVersion: v1 4 | kind: Secret 5 | metadata: 6 | name: aibrix-s3-credentials 7 | namespace: default 8 | type: Opaque 9 | data: 10 | # Base64 encoded values will be populated by the test 11 | access-key-id: "" 12 | secret-access-key: "" 13 | region: "" 14 | bucket-name: "" -------------------------------------------------------------------------------- /python/aibrix/aibrix/metadata/setting/tos_secret_template.yaml: -------------------------------------------------------------------------------- 1 | # Kubernetes Secret template for TOS credentials 2 | # This is a template that can be populated using secret_gen utility 3 | apiVersion: v1 4 | kind: Secret 5 | metadata: 6 | name: aibrix-tos-credentials 7 | namespace: default 8 | type: Opaque 9 | data: 10 | # Base64 encoded values will be populated by the test 11 | access-key: "" 12 | secret-key: "" 13 | endpoint: "" 14 | region: "" 15 | bucket-name: "" -------------------------------------------------------------------------------- /python/aibrix/aibrix/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/openapi/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/openapi/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/protos/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/runtime/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """AIBrix Runtime - Artifact delegation and sidecar services.""" 16 | -------------------------------------------------------------------------------- /python/aibrix/aibrix/storage/types.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from enum import Enum 16 | 17 | 18 | class StorageType(Enum): 19 | """Supported storage types.""" 20 | 21 | LOCAL = "local" 22 | S3 = "s3" 23 | TOS = "tos" 24 | REDIS = "redis" 25 | AUTO = "auto" 26 | -------------------------------------------------------------------------------- /python/aibrix/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/aibrix/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/aibrix/tests/batch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/python/aibrix/tests/batch/__init__.py -------------------------------------------------------------------------------- /python/aibrix/tests/batch/testdata/k8s_job_patch_unittest.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: batch-job-template 5 | namespace: default 6 | spec: 7 | template: 8 | spec: 9 | serviceAccountName: unittest-job-reader-sa # Use unittest-specific service account 10 | containers: 11 | - name: batch-worker 12 | image: aibrix/runtime:nightly 13 | - name: llm-engine 14 | image: aibrix/vllm-mock:nightly -------------------------------------------------------------------------------- /python/aibrix/tests/batch/testdata/s3_secret.yaml: -------------------------------------------------------------------------------- 1 | # Kubernetes Secret template for S3 credentials 2 | # This is a template that will be populated by the test with actual values 3 | apiVersion: v1 4 | kind: Secret 5 | metadata: 6 | name: aibrix-s3-credentials 7 | namespace: default 8 | type: Opaque 9 | data: 10 | # Base64 encoded values will be populated by the test 11 | access-key-id: "" 12 | secret-access-key: "" 13 | region: "" 14 | bucket-name: "" -------------------------------------------------------------------------------- /python/aibrix/tests/downloader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/python/aibrix/tests/downloader/__init__.py -------------------------------------------------------------------------------- /python/aibrix/tests/e2e/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ 16 | End-to-end tests for Aibrix services. 17 | 18 | This package contains tests that validate complete workflows against real 19 | running services, as opposed to unit tests that test individual components 20 | in isolation. 21 | """ 22 | -------------------------------------------------------------------------------- /python/aibrix/tests/gpu_optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /python/aibrix/tests/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/python/aibrix/tests/metrics/__init__.py -------------------------------------------------------------------------------- /python/aibrix/tests/openapi/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/python/aibrix/tests/openapi/__init__.py -------------------------------------------------------------------------------- /python/aibrix/tests/openapi/engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/python/aibrix/tests/openapi/engine/__init__.py -------------------------------------------------------------------------------- /python/aibrix/tests/storage/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/.gitignore: -------------------------------------------------------------------------------- 1 | # python 2 | __pycache__ 3 | 4 | # pytest 5 | .benchmarks 6 | .pytest_cache 7 | 8 | # ruff 9 | .ruff_cache 10 | 11 | # mypy 12 | .mypy_cache 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | cmake-build-*/ 18 | CMakeUserPresets.json 19 | develop-eggs/ 20 | dist/ 21 | downloads/ 22 | eggs/ 23 | .eggs/ 24 | lib/ 25 | lib64/ 26 | parts/ 27 | sdist/ 28 | var/ 29 | wheels/ 30 | share/python-wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | /.deps/ 36 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/aibrix_kvcache/common/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .async_base import AsyncBase 16 | from .cached_pyobject import CachedPyObjectBase 17 | from .object_pool import ObjectPool 18 | from .threading import ConditionalLock 19 | 20 | __all__ = ["AsyncBase", "ObjectPool", "CachedPyObjectBase", "ConditionalLock"] 21 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/aibrix_kvcache/l1/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .l1_cache import L1Cache 16 | 17 | __all__ = ["L1Cache"] 18 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/aibrix_kvcache/l1/eviction_policy/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base_eviction_policy import BaseEvictionPolicy, Functor 16 | from .fifo import FIFO 17 | from .lru import LRU 18 | from .s3fifo import S3FIFO 19 | 20 | __all__ = ["BaseEvictionPolicy", "Functor", "FIFO", "LRU", "S3FIFO"] 21 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/aibrix_kvcache/l2/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .key_builders import KeyBuilder 16 | from .l2_cache import L2Cache 17 | from .marshallers import StringSerializer, TensorSerializer, ZstdCompressor 18 | 19 | __all__ = [ 20 | "KeyBuilder", 21 | "L2Cache", 22 | "StringSerializer", 23 | "TensorSerializer", 24 | "ZstdCompressor", 25 | ] 26 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/aibrix_kvcache/l2/connectors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .connector import ( 16 | Connector, 17 | ConnectorConfig, 18 | ConnectorFeature, 19 | ConnectorRegisterDescriptor, 20 | ) 21 | 22 | __all__ = [ 23 | "Connector", 24 | "ConnectorConfig", 25 | "ConnectorFeature", 26 | "ConnectorRegisterDescriptor", 27 | ] 28 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/aibrix_kvcache/l2/marshallers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .marshaller import BaseMarshaller, Marshaller 16 | from .string_serializer import StringSerializer 17 | from .tensor_serializer import TensorSerializer 18 | from .zstd_compressor import ZstdCompressor 19 | 20 | __all__ = [ 21 | "BaseMarshaller", 22 | "Marshaller", 23 | "StringSerializer", 24 | "TensorSerializer", 25 | "ZstdCompressor", 26 | ] 27 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/aibrix_kvcache/l2/placement/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .placement import BasePlacement, Member, Placement, PlacementConfig 16 | from .simple_placement import SimplePlacement 17 | 18 | __all__ = [ 19 | "BasePlacement", 20 | "SimplePlacement", 21 | "Member", 22 | "Placement", 23 | "PlacementConfig", 24 | ] 25 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/aibrix_kvcache/meta_service/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .meta_service import MetaService, MetaServiceConfig 16 | from .redis_meta_service import RedisMetaService 17 | 18 | __all__ = [ 19 | "MetaService", 20 | "MetaServiceConfig", 21 | "RedisMetaService", 22 | ] 23 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/csrc/attention/attention_dtypes.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "attention_generic.cuh" 4 | #include "dtype_bfloat16.cuh" 5 | #include "dtype_float16.cuh" 6 | #include "dtype_float32.cuh" 7 | #include "dtype_fp8.cuh" 8 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/csrc/attention/dtype_fp8.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "attention_generic.cuh" 4 | 5 | #include 6 | #ifdef ENABLE_FP8 7 | #ifndef USE_ROCM 8 | #include 9 | #endif // USE_ROCM 10 | #endif // ENABLE_FP8 11 | 12 | namespace vllm { 13 | 14 | enum class Fp8KVCacheDataType { 15 | kAuto = 0, 16 | kFp8E4M3 = 1, 17 | kFp8E5M2 = 2, 18 | }; 19 | 20 | // fp8 vector types for quantization of kv cache 21 | template <> struct Vec { 22 | using Type = uint8_t; 23 | }; 24 | 25 | template <> struct Vec { 26 | using Type = uint16_t; 27 | }; 28 | 29 | template <> struct Vec { 30 | using Type = uint32_t; 31 | }; 32 | 33 | template <> struct Vec { 34 | using Type = uint2; 35 | }; 36 | 37 | } // namespace vllm 38 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/csrc/cache.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | void reshape_and_cache_multi_layer( 9 | const std::vector &offload_kv_cache_blocks, 10 | const std::vector &kv_caches, torch::Tensor &slot_mapping, 11 | const int64_t block_size, const std::string &kv_cache_dtype, 12 | const std::vector &k_scales, 13 | const std::vector &v_scales, const std::string &layout_str); 14 | 15 | void reshape_and_offload_multi_layer( 16 | const std::vector &offload_kv_cache_blocks, 17 | const std::vector &kv_caches, torch::Tensor &slot_mapping, 18 | const int64_t block_size, const std::string &kv_cache_dtype, 19 | const std::vector &k_scales, 20 | const std::vector &v_scales, const std::string &layout_str); 21 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/requirements/build.txt: -------------------------------------------------------------------------------- 1 | # Should be mirrored in pyproject.toml 2 | build 3 | cmake>=3.26.1 4 | ninja 5 | packaging>=24.2 6 | setuptools>=77.0.3,<80.0.0 7 | setuptools-scm>=8 8 | torch>=2.7.0 9 | wheel 10 | jinja2>=3.1.6 11 | regex 12 | numpy >= 1.26.4 13 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/requirements/core.txt: -------------------------------------------------------------------------------- 1 | asyncio >= 3.4.3 2 | cityhash >= 0.4.8 3 | more-itertools >= 10.7.0 4 | msgspec >= 0.19.0 5 | netifaces 6 | numpy >= 1.26.4 7 | requests >= 2.26.0 8 | tqdm >= 4.67.1 9 | pydantic >= 2.10 10 | pyyaml 11 | setuptools>=77.0.3,<80; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12 12 | sortedcontainers>=2.4.0 13 | uvloop 14 | validators >= 0.35.0 15 | zstandard 16 | redis >= 6.0.0 17 | 18 | nvtx 19 | torch>=2.7.0 20 | 21 | # profiling 22 | pyroscope-io 23 | 24 | # vendor 25 | rocksdict 26 | 27 | # optional 28 | # infinistore >= 0.2.35 29 | # --extra-index-url https://scqq9isgq31i0fb8nt4eg.apigateway-cn-beijing.volceapi.com/simple/ 30 | # hpkv >= 0.0.1 31 | # pris >= 0.0.4 32 | # pyverbs 33 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/requirements/dev.txt: -------------------------------------------------------------------------------- 1 | -r lint.txt 2 | -r test.txt 3 | 4 | # Avoid adding requirements directly to this file. 5 | # Instead, modify the two files referenced above. 6 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/requirements/lint.txt: -------------------------------------------------------------------------------- 1 | # formatting 2 | pre-commit==4.0.1 3 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/requirements/test.txt: -------------------------------------------------------------------------------- 1 | # Common dependencies 2 | -r core.txt 3 | 4 | pytest 5 | pytest-asyncio 6 | pytest-mock 7 | pytest-timeout 8 | pytest-rerunfailures 9 | pytest-benchmark 10 | pytest-forked 11 | 12 | fakeredis==2.30.1 13 | 14 | vllm -------------------------------------------------------------------------------- /python/aibrix_kvcache/scripts/format.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -eo pipefail 3 | 4 | # this stops git rev-parse from failing if we run this from the .git directory 5 | SUBDIR="python/aibrix_kvcache/" 6 | ROOT="$(git rev-parse --show-toplevel)/$SUBDIR" 7 | builtin cd "$ROOT" || exit 1 8 | 9 | check_command() { 10 | if ! command -v "$1" &> /dev/null; then 11 | echo "$1 is not installed, please run \`poetry install --no-root --with dev\`" 12 | exit 1 13 | fi 14 | } 15 | 16 | check_command pre-commit 17 | 18 | export SKIP="suggestion" 19 | pre-commit run --all-files 20 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2024 The Aibrix Team. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /python/aibrix_kvcache/tests/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | log_cli = true 3 | log_cli_level = INFO 4 | -------------------------------------------------------------------------------- /samples/adapter/adapter-api-key.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: model.aibrix.ai/v1alpha1 2 | kind: ModelAdapter 3 | metadata: 4 | name: qwen-code-lora-with-key 5 | namespace: default 6 | labels: 7 | model.aibrix.ai/name: "qwen-code-lora-with-key" 8 | model.aibrix.ai/port: "8000" 9 | spec: 10 | baseModel: qwen-coder-1-5b-instruct 11 | podSelector: 12 | matchLabels: 13 | model.aibrix.ai/name: qwen-coder-1-5b-instruct 14 | adapter.model.aibrix.ai/enabled: "true" 15 | artifactURL: huggingface://ai-blond/Qwen-Qwen2.5-Coder-1.5B-Instruct-lora 16 | additionalConfig: 17 | api-key: sk-kFJ12nKsFakefVmGpj3QzX65s4RbN2xJqWzPYCjYu7wT3BFake 18 | schedulerName: default 19 | -------------------------------------------------------------------------------- /samples/autoscaling/apa-resource.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | name: mock-llama2-7b-hpa1 5 | namespace: default 6 | labels: 7 | app.kubernetes.io/name: aibrix 8 | app.kubernetes.io/managed-by: kustomize 9 | annotations: 10 | autoscaling.aibrix.ai/up-fluctuation-tolerance: '0.1' 11 | autoscaling.aibrix.ai/down-fluctuation-tolerance: '0.2' 12 | apa.autoscaling.aibrix.ai/window: 30s 13 | spec: 14 | scalingStrategy: APA 15 | minReplicas: 1 16 | maxReplicas: 8 17 | metricsSources: 18 | - metricSourceType: resource 19 | targetMetric: cpu 20 | targetValue: "50" 21 | scaleTargetRef: 22 | apiVersion: apps/v1 23 | kind: Deployment 24 | name: mock-llama2-7b 25 | -------------------------------------------------------------------------------- /samples/autoscaling/apa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | name: deepseek-r1-distill-llama-8b-apa 5 | namespace: default 6 | labels: 7 | app.kubernetes.io/name: aibrix 8 | app.kubernetes.io/managed-by: kustomize 9 | annotations: 10 | autoscaling.aibrix.ai/up-fluctuation-tolerance: '0.1' 11 | autoscaling.aibrix.ai/down-fluctuation-tolerance: '0.2' 12 | apa.autoscaling.aibrix.ai/window: 30s 13 | spec: 14 | scalingStrategy: APA 15 | minReplicas: 1 16 | maxReplicas: 8 17 | metricsSources: 18 | - metricSourceType: pod 19 | protocolType: http 20 | port: '8000' 21 | path: metrics 22 | targetMetric: gpu_cache_usage_perc 23 | targetValue: '0.5' 24 | scaleTargetRef: 25 | apiVersion: apps/v1 26 | kind: Deployment 27 | name: deepseek-r1-distill-llama-8b 28 | -------------------------------------------------------------------------------- /samples/autoscaling/hpa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | name: deepseek-r1-distill-llama-8b-hpa 5 | namespace: default 6 | labels: 7 | app.kubernetes.io/name: aibrix 8 | app.kubernetes.io/managed-by: kustomize 9 | spec: 10 | scalingStrategy: HPA 11 | minReplicas: 1 12 | maxReplicas: 10 13 | metricsSources: 14 | - metricSourceType: pod 15 | protocolType: http 16 | port: '8000' 17 | path: /metrics 18 | targetMetric: gpu_cache_usage_perc 19 | targetValue: '50' 20 | scaleTargetRef: 21 | apiVersion: apps/v1 22 | kind: Deployment 23 | name: deepseek-r1-distill-llama-8b 24 | -------------------------------------------------------------------------------- /samples/autoscaling/kpa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | name: deepseek-r1-distill-llama-8b-kpa 5 | namespace: default 6 | labels: 7 | app.kubernetes.io/name: aibrix 8 | app.kubernetes.io/managed-by: kustomize 9 | annotations: 10 | kpa.autoscaling.aibrix.ai/scale-down-delay: 3m 11 | spec: 12 | scalingStrategy: KPA 13 | minReplicas: 1 14 | maxReplicas: 8 15 | metricsSources: 16 | - metricSourceType: pod 17 | protocolType: http 18 | port: '8000' 19 | path: metrics 20 | targetMetric: gpu_cache_usage_perc 21 | targetValue: '0.5' 22 | scaleTargetRef: 23 | apiVersion: apps/v1 24 | kind: Deployment 25 | name: deepseek-r1-distill-llama-8b 26 | -------------------------------------------------------------------------------- /samples/autoscaling/optimizer-kpa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | name: deepseek-r1-distill-llama-8b-optimizer-scaling 5 | namespace: default 6 | labels: 7 | app.kubernetes.io/name: aibrix 8 | app.kubernetes.io/managed-by: kustomize 9 | annotations: 10 | kpa.autoscaling.aibrix.ai/scale-down-delay: 0s 11 | spec: 12 | scalingStrategy: KPA 13 | minReplicas: 1 14 | maxReplicas: 8 15 | metricsSources: 16 | - endpoint: aibrix-gpu-optimizer.aibrix-system.svc.cluster.local:8080 17 | metricSourceType: domain 18 | path: /metrics/default/deepseek-r1-distill-llama-8b 19 | protocolType: http 20 | targetMetric: vllm:deployment_replicas 21 | targetValue: "100" 22 | scaleTargetRef: 23 | apiVersion: apps/v1 24 | kind: Deployment 25 | name: deepseek-r1-distill-llama-8b 26 | -------------------------------------------------------------------------------- /samples/deepseek-r1/deepseek-r1-autoscaling.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | name: deepseek-r1-671b-autoscaling 5 | namespace: default 6 | labels: 7 | app.kubernetes.io/name: aibrix 8 | annotations: 9 | kpa.autoscaling.aibrix.ai/scale-down-delay: 2m 10 | spec: 11 | scalingStrategy: KPA 12 | minReplicas: 1 13 | maxReplicas: 4 14 | metricsSources: 15 | - metricSourceType: pod 16 | protocolType: http 17 | port: '8000' 18 | path: metrics 19 | targetMetric: gpu_cache_usage_perc 20 | targetValue: '50' 21 | scaleTargetRef: 22 | apiVersion: orchestration.aibrix.ai/v1alpha1 23 | kind: RayClusterFleet 24 | name: deepseek-r1-671b 25 | -------------------------------------------------------------------------------- /samples/deepseek-r1/deepseek-r1-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | model.aibrix.ai/name: deepseek-r1-671b 6 | prometheus-discovery: "true" 7 | annotations: 8 | prometheus.io/scrape: "true" 9 | prometheus.io/port: "8080" 10 | name: deepseek-r1-671b # Note: The Service name must match the label value `model.aibrix.ai/name` in the Deployment 11 | namespace: default 12 | spec: 13 | ports: 14 | - name: serve 15 | port: 8000 16 | protocol: TCP 17 | targetPort: 8000 18 | - name: http 19 | port: 8080 20 | protocol: TCP 21 | targetPort: 8080 22 | selector: 23 | model.aibrix.ai/name: deepseek-r1-671b 24 | ray.io/node-type: head 25 | type: ClusterIP -------------------------------------------------------------------------------- /samples/deepseek-r1/static/deepseek-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/samples/deepseek-r1/static/deepseek-dashboard.png -------------------------------------------------------------------------------- /samples/deepseek-r1/static/deepseek-deployment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/samples/deepseek-r1/static/deepseek-deployment.png -------------------------------------------------------------------------------- /samples/disaggregation/vllm/router.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: disagg-proxy-router 5 | spec: 6 | containers: 7 | - name: disagg-proxy 8 | image: kvcache-container-image-hb2-cn-beijing.cr.volces.com/aibrix/vllm-openai:v0.10.0-cu128-nixl-v0.4.1-lmcache-0.3.2 9 | command: ["sh", "-c"] 10 | args: 11 | - | 12 | sleep 6000 13 | -------------------------------------------------------------------------------- /samples/heterogeneous/deepseek-coder-7b-l20-podautoscaler.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | labels: 5 | app.kubernetes.io/managed-by: kustomize 6 | app.kubernetes.io/name: aibrix 7 | annotations: 8 | kpa.autoscaling.aibrix.ai/scale-down-delay: 0s 9 | name: podautoscaler-deepseek-coder-7b-l20 10 | namespace: default 11 | spec: 12 | maxReplicas: 10 13 | metricsSources: 14 | - endpoint: aibrix-gpu-optimizer.aibrix-system.svc.cluster.local:8080 15 | metricSourceType: domain 16 | path: /metrics/default/deepseek-coder-7b-l20 17 | protocolType: http 18 | targetMetric: vllm:deployment_replicas 19 | targetValue: "100" # For stable workloads. Set to a fraction to tolerate bursts. 20 | minReplicas: 0 21 | scaleTargetRef: 22 | apiVersion: apps/v1 23 | kind: Deployment 24 | name: deepseek-coder-7b-l20 25 | scalingStrategy: KPA -------------------------------------------------------------------------------- /samples/heterogeneous/deepseek-coder-7b-v100-podautoscaler.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | labels: 5 | app.kubernetes.io/managed-by: kustomize 6 | app.kubernetes.io/name: aibrix 7 | annotations: 8 | kpa.autoscaling.aibrix.ai/scale-down-delay: 0s 9 | name: podautoscaler-deepseek-coder-7b-v100 10 | namespace: default 11 | spec: 12 | maxReplicas: 10 13 | metricsSources: 14 | - endpoint: aibrix-gpu-optimizer.aibrix-system.svc.cluster.local:8080 15 | metricSourceType: domain 16 | path: /metrics/default/deepseek-coder-7b-v100 17 | protocolType: http 18 | targetMetric: vllm:deployment_replicas 19 | targetValue: "100" # For stable workloads. Set to a fraction to tolerate bursts. 20 | minReplicas: 0 21 | scaleTargetRef: 22 | apiVersion: apps/v1 23 | kind: Deployment 24 | name: deepseek-coder-7b-v100 25 | scalingStrategy: KPA 26 | -------------------------------------------------------------------------------- /samples/heterogeneous/kustomization.yaml: -------------------------------------------------------------------------------- 1 | kind: Kustomization 2 | 3 | resources: 4 | - deepseek-coder-7b-service.yaml 5 | - deepseek-coder-7b-l20-deployment.yaml 6 | - deepseek-coder-7b-l20-podautoscaler.yaml 7 | - deepseek-coder-7b-v100-deployment.yaml 8 | - deepseek-coder-7b-v100-podautoscaler.yaml 9 | 10 | patches: 11 | - patch: |- # Use the '|' and '-' for inline patching, warm up 10 hosts and start with 7 12 | apiVersion: apps/v1 13 | kind: Deployment 14 | metadata: 15 | name: deepseek-coder-7b-v100 16 | labels: 17 | model.aibrix.ai/min_replicas: "1" 18 | target: 19 | kind: Deployment 20 | name: deepseek-coder-7b-v100 21 | - patch: |- # Use the '|' and '-' for inline patching, warm up 10 hosts and start with 7 22 | apiVersion: apps/v1 23 | kind: Deployment 24 | metadata: 25 | name: deepseek-coder-7b-l20 26 | labels: 27 | model.aibrix.ai/min_replicas: "0" 28 | target: 29 | kind: Deployment 30 | name: deepseek-coder-7b-l20 31 | 32 | apiVersion: kustomize.config.k8s.io/v1beta1 -------------------------------------------------------------------------------- /samples/kvcache/vineyard/kvcache-tp.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: orchestration.aibrix.ai/v1alpha1 2 | kind: KVCache 3 | metadata: 4 | name: deepseek-coder-33b-kvcache 5 | namespace: default 6 | annotations: 7 | # kvcache.orchestration.aibrix.ai/node-affinity-gpu-type: NVIDIA-L20 8 | kvcache.orchestration.aibrix.ai/pod-affinity-workload: aibrix-model-deepseek-coder-33b-instruct 9 | spec: 10 | mode: centralized 11 | service: 12 | type: ClusterIP 13 | ports: 14 | - name: service 15 | port: 9600 16 | targetPort: 9600 17 | protocol: TCP 18 | cache: 19 | image: aibrix/vineyardd:20241120 20 | imagePullPolicy: IfNotPresent 21 | resources: 22 | requests: 23 | cpu: "2000m" 24 | memory: "4Gi" 25 | limits: 26 | cpu: "2000m" 27 | memory: "4Gi" -------------------------------------------------------------------------------- /samples/kvcache/vineyard/kvcache.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: orchestration.aibrix.ai/v1alpha1 2 | kind: KVCache 3 | metadata: 4 | name: deepseek-coder-7b-kvcache 5 | namespace: default 6 | annotations: 7 | kvcache.orchestration.aibrix.ai/pod-affinity-workload: deepseek-coder-7b-instruct 8 | spec: 9 | mode: centralized 10 | service: 11 | type: ClusterIP 12 | ports: 13 | - name: service 14 | port: 9600 15 | targetPort: 9600 16 | protocol: TCP 17 | cache: 18 | image: aibrix/vineyardd:20241120 19 | imagePullPolicy: IfNotPresent 20 | resources: 21 | requests: 22 | cpu: "2000m" 23 | memory: "4Gi" 24 | limits: 25 | cpu: "2000m" 26 | memory: "4Gi" 27 | -------------------------------------------------------------------------------- /samples/volcano-engine/autoscaler.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | name: deepseek-r1-distill-llama-8b-kpa 5 | namespace: default 6 | labels: 7 | app.kubernetes.io/name: aibrix 8 | app.kubernetes.io/managed-by: kustomize 9 | annotations: 10 | kpa.autoscaling.aibrix.ai/scale-down-delay: 5m 11 | spec: 12 | scalingStrategy: KPA 13 | minReplicas: 1 14 | maxReplicas: 8 15 | metricsSources: 16 | - metricSourceType: pod 17 | protocolType: http 18 | port: '8000' 19 | path: metrics 20 | targetMetric: gpu_cache_usage_perc 21 | targetValue: '0.3' 22 | scaleTargetRef: 23 | apiVersion: apps/v1 24 | kind: Deployment 25 | name: deepseek-r1-distill-llama-8b 26 | -------------------------------------------------------------------------------- /samples/volcano-engine/hpa-r1.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling.aibrix.ai/v1alpha1 2 | kind: PodAutoscaler 3 | metadata: 4 | name: deepseek-r1-671b-autoscaling 5 | namespace: default 6 | labels: 7 | app.kubernetes.io/name: aibrix 8 | spec: 9 | scalingStrategy: HPA 10 | minReplicas: 1 11 | maxReplicas: 4 12 | metricsSources: 13 | - metricSourceType: pod 14 | protocolType: http 15 | port: '8000' 16 | path: metrics 17 | targetMetric: gpu_cache_usage_perc 18 | targetValue: '50' 19 | scaleTargetRef: 20 | apiVersion: orchestration.aibrix.ai/v1alpha1 21 | kind: RayClusterFleet 22 | name: deepseek-r1-671b 23 | -------------------------------------------------------------------------------- /test/regression/v0.2.1/benchmark_output_20250323.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/test/regression/v0.2.1/benchmark_output_20250323.zip -------------------------------------------------------------------------------- /test/regression/v0.2.1/client.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: benchmark-client 5 | namespace: default 6 | spec: 7 | containers: 8 | - name: debug-container 9 | image: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/ray:2.10.0 10 | command: ["sh", "-c", "sleep infinity"] -------------------------------------------------------------------------------- /test/regression/v0.3.0/benchmark_output_20250519.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/test/regression/v0.3.0/benchmark_output_20250519.zip -------------------------------------------------------------------------------- /test/regression/v0.3.0/client.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Pod 3 | metadata: 4 | name: benchmark-client 5 | namespace: default 6 | spec: 7 | containers: 8 | - name: debug-container 9 | image: aibrix-container-registry-cn-beijing.cr.volces.com/aibrix/ray:2.10.0 10 | command: ["sh", "-c", "sleep infinity"] -------------------------------------------------------------------------------- /test/regression/v0.3.0/figure_ttft_generation_time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/aibrix/bda162d814f96b69ec08f6dafe428f1d97cabbf2/test/regression/v0.3.0/figure_ttft_generation_time.png -------------------------------------------------------------------------------- /test/regression/v0.3.0/lmcache_helm_naive.yaml: -------------------------------------------------------------------------------- 1 | servingEngineSpec: 2 | runtimeClassName: "" 3 | modelSpec: 4 | - name: "llama3" 5 | repository: "vllm/vllm-openai" 6 | tag: "latest" 7 | modelURL: "meta-llama/Llama-3.1-8B-Instruct" 8 | replicaCount: 8 9 | requestCPU: 10 10 | requestMemory: "150Gi" 11 | requestGPU: 1 12 | pvcStorage: "50Gi" 13 | pvcMatchLabels: 14 | model: "llama3" 15 | pvcAccessMode: 16 | - ReadWriteOnce 17 | vllmConfig: 18 | enableChunkedPrefill: false 19 | enablePrefixCaching: true 20 | maxModelLen: 32000 21 | extraArgs: ["--disable-log-requests", "--swap-space", 0] 22 | 23 | lmcacheConfig: 24 | enabled: false 25 | 26 | hf_token: 27 | 28 | routerSpec: 29 | resources: 30 | requests: 31 | cpu: "2" 32 | memory: "8G" 33 | limits: 34 | cpu: "2" 35 | memory: "8G" 36 | routingLogic: "session" 37 | sessionKey: "x-user-id" 38 | -------------------------------------------------------------------------------- /test/regression/v0.4.0/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: aibrix-regression-test 3 | description: Helm chart for AIBrix regression testing experiments 4 | type: application 5 | version: 0.1.0 6 | appVersion: "0.4.0" 7 | maintainers: 8 | - name: AIBrix Team 9 | keywords: 10 | - aibrix 11 | - llm 12 | - inference 13 | - testing 14 | home: https://github.com/aibrix/aibrix 15 | --------------------------------------------------------------------------------