├── .codespell-ignore ├── .github ├── ISSUE_TEMPLATE │ ├── 001_feature_request.yaml │ ├── 002_bug_report.yaml │ └── config.yml ├── PULL_REQUEST_TEMPLATE.md ├── curl-02-two-pods.sh ├── curl-04-multiple-models.sh ├── curl-05-secure-vllm.sh ├── port-forward.sh ├── template-chatml.jinja ├── values-01-2pods-minimal-example.yaml ├── values-04-multiple-models.yaml ├── values-05-secure-vllm.yaml ├── values-06-session-routing.yaml ├── values-07-prefix-routing.yaml ├── values-08-roundrobin-routing.yaml ├── values-09-kvaware-routing.yaml ├── values-10-disagg-prefill.yaml └── workflows │ ├── ci.yml │ ├── functionality-helm-chart.yml │ ├── helm-release.yml │ ├── matchers │ └── actionlint.json │ ├── operator-test.yml │ ├── pre-commit.yml │ ├── router-docker-release.yml │ ├── router-e2e-test.yml │ └── scheduled-release.yml ├── .gitignore ├── .hadolint.yaml ├── .markdownlint.yaml ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── artifacthub-repo.yml ├── benchmarks └── multi-round-qa │ ├── README.md │ ├── data_preprocessing.py │ ├── model.yaml │ ├── multi-round-qa.py │ ├── multi-round.png │ ├── plot.py │ ├── prepare_sharegpt_data.sh │ ├── requirements.txt │ ├── run.sh │ ├── run_aws.sh │ ├── run_single.sh │ ├── utils.py │ └── warmup_single.sh ├── community └── community-event.md ├── deployment_on_cloud ├── aws │ ├── Readme.md │ ├── clean_up.sh │ ├── entry_point.sh │ ├── production_stack_specification.yaml │ └── set_up_efs.sh ├── azure │ ├── README.md │ ├── entry_point.sh │ └── nvidia-device-plugin-ds.yaml └── gcp │ ├── OPT125_CPU │ ├── README.md │ ├── cleanup_ql.sh │ ├── entrypoint_ql.sh │ └── production_stack_specification_ql.yaml │ ├── README.md │ ├── clean_up_basic.sh │ ├── entry_point_basic.sh │ └── production_stack_specification_basic.yaml ├── docker ├── Dockerfile ├── Dockerfile.kvaware └── Dockerfile.sidecar ├── docs ├── Makefile ├── README.md ├── make.bat ├── requirements-docs.txt └── source │ ├── assets │ ├── graf.png │ ├── multi-round.png │ ├── output.ico │ ├── prodarch.png │ ├── prodstack.png │ ├── prodstack_icon.png │ ├── vllm-logo-only-light.ico │ ├── vllm-logo-only-light.png │ └── vllm-logo-text-light.png │ ├── community │ └── meetings.rst │ ├── conf.py │ ├── deployment │ ├── crd.rst │ ├── gateway-inference-extension.rst │ ├── helm.rst │ └── index.rst │ ├── developer_guide │ ├── contributing.rst │ └── docker.rst │ ├── getting_started │ ├── faq.rst │ ├── prerequisite.rst │ └── quickstart.rst │ ├── index.rst │ └── use_cases │ ├── autoscaling-keda.rst │ ├── benchmarking.rst │ ├── disaggregated-prefill.rst │ ├── distributed-tracing.rst │ ├── kv-cache-aware-routing.rst │ ├── pipeline-parallelism-kuberay.rst │ ├── prefix-aware-routing.rst │ ├── semantic-router-integration.rst │ ├── sharing-kv-cache.rst │ ├── sleep-wakeup-mode.rst │ └── tool-enabled-installation.rst ├── examples ├── batch.jsonl ├── disaggregated_prefill │ ├── pd.yaml │ ├── router.sh │ ├── start_decode.sh │ └── start_prefill.sh └── openai_api_client_batch.py ├── helm ├── .helmignore ├── Chart.yaml ├── README.md ├── crds │ └── crd-lora-adapter.yaml ├── ct.yaml ├── lintconf.yaml ├── templates │ ├── _helpers.tpl │ ├── configmap.yaml │ ├── deployment-cache-server.yaml │ ├── deployment-lora-adapter.yaml │ ├── deployment-lora-controller.yaml │ ├── deployment-router.yaml │ ├── deployment-vllm-multi.yaml │ ├── hpa-router.yaml │ ├── ingress.yaml │ ├── poddisruptionbudget.yaml │ ├── pvc.yaml │ ├── ray-cluster.yaml │ ├── role.yaml │ ├── rolebinding.yaml │ ├── route.yaml │ ├── secrets.yaml │ ├── service-cache-server.yaml │ ├── service-router.yaml │ ├── service-vllm.yaml │ ├── serviceaccount.yaml │ └── shared-storage.yaml ├── test.sh ├── tests │ ├── imagePullSecrets_test.yaml │ └── runtimeClassName_test.yaml ├── values-example.yaml ├── values.schema.json └── values.yaml ├── observability ├── README.md ├── install.sh ├── kube-prom-stack.yaml ├── lmcache-dashboard-cm.yaml ├── prom-adapter.yaml ├── uninstall.sh └── vllm-dashboard.json ├── operator ├── .dockerignore ├── Dockerfile ├── Makefile ├── PROJECT ├── api │ └── v1alpha1 │ │ ├── cacheserver_types.go │ │ ├── common.go │ │ ├── groupversion_info.go │ │ ├── loraadapter_types.go │ │ ├── vllmrouter_types.go │ │ ├── vllmruntime_types.go │ │ └── zz_generated.deepcopy.go ├── cmd │ └── main.go ├── config │ ├── crd │ │ ├── bases │ │ │ ├── production-stack.vllm.ai_cacheservers.yaml │ │ │ ├── production-stack.vllm.ai_loraadapters.yaml │ │ │ ├── production-stack.vllm.ai_vllmrouters.yaml │ │ │ └── production-stack.vllm.ai_vllmruntimes.yaml │ │ ├── kustomization.yaml │ │ └── kustomizeconfig.yaml │ ├── default.yaml │ ├── default │ │ ├── cert_metrics_manager_patch.yaml │ │ ├── kustomization.yaml │ │ ├── manager_image_patch.yaml │ │ ├── manager_metrics_patch.yaml │ │ └── metrics_service.yaml │ ├── manager │ │ ├── deployment.yaml │ │ ├── kustomization.yaml │ │ └── namespace.yaml │ ├── network-policy │ │ ├── allow-metrics-traffic.yaml │ │ └── kustomization.yaml │ ├── prometheus │ │ ├── kustomization.yaml │ │ ├── monitor.yaml │ │ └── monitor_tls_patch.yaml │ ├── rbac │ │ ├── cacheserver_admin_role.yaml │ │ ├── cacheserver_editor_role.yaml │ │ ├── cacheserver_viewer_role.yaml │ │ ├── kustomization.yaml │ │ ├── leader_election_role.yaml │ │ ├── leader_election_role_binding.yaml │ │ ├── loraadapter_admin_role.yaml │ │ ├── loraadapter_editor_role.yaml │ │ ├── loraadapter_viewer_role.yaml │ │ ├── metrics_auth_role.yaml │ │ ├── metrics_auth_role_binding.yaml │ │ ├── metrics_reader_role.yaml │ │ ├── role.yaml │ │ ├── role_binding.yaml │ │ ├── service_account.yaml │ │ ├── vllmrouter_admin_role.yaml │ │ ├── vllmrouter_editor_role.yaml │ │ ├── vllmrouter_viewer_role.yaml │ │ ├── vllmruntime_admin_role.yaml │ │ ├── vllmruntime_editor_role.yaml │ │ └── vllmruntime_viewer_role.yaml │ └── samples │ │ ├── kustomization.yaml │ │ ├── production-stack_v1alpha1_cacheserver.yaml │ │ ├── production-stack_v1alpha1_loraadapter.yaml │ │ ├── production-stack_v1alpha1_vllmrouter.yaml │ │ └── production-stack_v1alpha1_vllmruntime.yaml ├── go.mod ├── go.sum ├── hack │ └── boilerplate.go.txt ├── internal │ └── controller │ │ ├── cacheserver_controller.go │ │ ├── cacheserver_controller_test.go │ │ ├── loraadapter_controller.go │ │ ├── loraadapter_controller_test.go │ │ ├── suite_test.go │ │ ├── vllmrouter_controller.go │ │ ├── vllmrouter_controller_test.go │ │ ├── vllmruntime_controller.go │ │ └── vllmruntime_controller_test.go └── test │ ├── e2e │ ├── e2e_suite_test.go │ └── e2e_test.go │ └── utils │ └── utils.go ├── proposals ├── TEMPLATE.md ├── imgs │ └── lora-k8s-arch.png └── lora-k8s-support.md ├── pyproject.toml ├── requirements-test.txt ├── scripts ├── huggingface_downloader.py └── setup_vllm_templates.sh ├── src ├── examples │ ├── example_file_upload.py │ └── tool_calling_example.py ├── gateway_inference_extension │ ├── Dockerfile │ ├── README.md │ ├── configs │ │ ├── gateway │ │ │ ├── gke │ │ │ │ ├── gateway.yaml │ │ │ │ ├── gcp-backend-policy.yaml │ │ │ │ ├── healthcheck.yaml │ │ │ │ └── httproute.yaml │ │ │ ├── istio │ │ │ │ ├── destination-rule.yaml │ │ │ │ ├── gateway.yaml │ │ │ │ └── httproute.yaml │ │ │ └── kgateway │ │ │ │ ├── gateway.yaml │ │ │ │ └── httproute.yaml │ │ ├── httproute.yaml │ │ ├── inferencemodel.yaml │ │ ├── inferencepool-resources.yaml │ │ └── vllm │ │ │ ├── gpu-deployment.yaml │ │ │ └── vllm-runtime.yaml │ ├── delete.sh │ ├── install.sh │ ├── kv_aware_picker.go │ ├── prefix_aware_picker.go │ ├── roundrobin_picker.go │ └── scheduler.patch ├── tests │ ├── README.md │ ├── __init__.py │ ├── perftest │ │ ├── clean-up.sh │ │ ├── fake-openai-server.py │ │ ├── request_generator.py │ │ ├── run-multi-server.sh │ │ └── run-server.sh │ ├── requirements.txt │ ├── test-openai.py │ ├── test_file_storage.py │ ├── test_parser.py │ ├── test_roundrobin_router.py │ ├── test_session_router.py │ ├── test_singleton.py │ ├── test_static_service_discovery.py │ └── test_utils.py └── vllm_router │ ├── README.md │ ├── __init__.py │ ├── aiohttp_client.py │ ├── app.py │ ├── dynamic_config.py │ ├── experimental │ ├── README.md │ ├── __init__.py │ ├── feature_gates.py │ ├── pii │ │ ├── __init__.py │ │ ├── analyzers │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── factory.py │ │ │ ├── presidio.py │ │ │ └── regex.py │ │ ├── config.py │ │ ├── middleware.py │ │ ├── requirements.txt │ │ ├── test_pii.sh │ │ └── types.py │ ├── semantic_cache.py │ ├── semantic_cache │ │ ├── README.md │ │ ├── __init__.py │ │ ├── db_adapters │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ └── faiss_adapter.py │ │ ├── requirements.txt │ │ ├── semantic_cache.py │ │ └── test_cache.sh │ └── semantic_cache_integration.py │ ├── log.py │ ├── parsers │ ├── __init__.py │ ├── parser.py │ └── yaml_utils.py │ ├── perf-test.sh │ ├── prefix │ └── hashtrie.py │ ├── protocols.py │ ├── requirements.txt │ ├── routers │ ├── __init__.py │ ├── batches_router.py │ ├── files_router.py │ ├── main_router.py │ ├── metrics_router.py │ └── routing_logic.py │ ├── run-router.sh │ ├── service_discovery.py │ ├── services │ ├── __init__.py │ ├── batch_service │ │ ├── __init__.py │ │ ├── batch.py │ │ ├── local_processor.py │ │ └── processor.py │ ├── callbacks_service │ │ ├── __init__.py │ │ ├── callbacks.py │ │ └── custom_callbacks.py │ ├── files_service │ │ ├── __init__.py │ │ ├── file_storage.py │ │ ├── openai_files.py │ │ └── storage.py │ ├── metrics_service │ │ ├── __init__.py │ │ └── prometheus_gauge.py │ └── request_service │ │ ├── __init__.py │ │ ├── request.py │ │ └── rewriter.py │ ├── stats │ ├── __init__.py │ ├── engine_stats.py │ ├── log_stats.py │ └── request_stats.py │ ├── utils.py │ └── version.py ├── tests └── e2e │ ├── run-k8s-routing-test.sh │ ├── run-static-discovery-routing-test.sh │ ├── stress-test.sh │ ├── test-crds.sh │ ├── test-routing.py │ ├── test-sticky-routing.sh │ ├── wait-for-backends.sh │ └── wait-for-pods.sh ├── tutorials ├── 00-a-install-multinode-kubernetes-env.md ├── 00-b-install-kuberay-operator.md ├── 00-c-install-kind-kubernetes-env.md ├── 00-install-kubernetes-env.md ├── 01-b-minimal-helm-installation-amd.md ├── 01-minimal-helm-installation.md ├── 02-basic-vllm-config.md ├── 03-load-model-from-pv.md ├── 04-launch-multiple-model.md ├── 05-offload-kv-cache.md ├── 06-remote-shared-kv-cache.md ├── 07-benchmark-multi-round-qa-single-gpu.md ├── 08-benchmark-multi-round-qa-multi-gpu.md ├── 09-lora-enabled-installation.md ├── 10-horizontal-autoscaling.md ├── 11-secure-vllm-serve.md ├── 12-distributed-tracing.md ├── 13-tool-enabled-installation.md ├── 14-vllm-v1.md ├── 15-basic-pipeline-parallel.md ├── 16-disagg-prefill.md ├── 17-kv-aware-routing.md ├── 18-prefix-aware-routing.md ├── 19-engine-sleep-wakeup-mode.md ├── 20-keda-autoscaling.md ├── 21-gateway-inference-extension.md ├── 22-gateway-api-for-ingress.md ├── 23-whisper-api-transcription.md ├── 24-semantic-router-integration.md ├── README.md ├── assets │ ├── example-04-openai.py │ ├── example-10-load-generator.py │ ├── gke-example │ │ └── values-01-offload-kv-cache-local-disk.yaml │ ├── hpa-10.yaml │ ├── otel-example │ │ ├── jaeger-collector.yaml │ │ ├── jaeger-query.yaml │ │ ├── jaeger.yaml │ │ ├── otel-collector-config.yaml │ │ └── otel-collector.yaml │ ├── pv-03.yaml │ ├── values-01-2pods-minimal-example.yaml │ ├── values-01-minimal-amd-example.yaml │ ├── values-01-minimal-example.yaml │ ├── values-02-basic-config.yaml │ ├── values-03-match-pv.yaml │ ├── values-04-multiple-models.yaml │ ├── values-05-cpu-offloading.yaml │ ├── values-06-shared-storage.yaml │ ├── values-09-lora-helm.yaml │ ├── values-09-lora-operator.yaml │ ├── values-11-secure-vllm.yaml │ ├── values-12-otel-vllm.yaml │ ├── values-13-tool-enabled.yaml │ ├── values-14-vllm-v1.yaml │ ├── values-15-a-minimal-pipeline-parallel-example-raycluster.yaml │ ├── values-15-b-minimal-pipeline-parallel-example-multiple-modelspec.yaml │ ├── values-16-disagg-prefill.yaml │ ├── values-17-kv-aware.yaml │ ├── values-18-prefix-aware.yaml │ ├── values-19-sleep-mode-aware.yaml │ ├── values-20-keda.yaml │ ├── values-22-gateway-api.yaml │ └── values-23-SR.yaml ├── cloud_deployments │ ├── 01-AWS-EKS-deployment.md │ ├── 02-GCP-GKE-deployment.md │ ├── 03-Azure-AKS-deployment.md │ └── 04-GCP-GKE-lmcache-local-disk.md └── terraform │ ├── aks │ ├── Makefile │ ├── README.md │ ├── azure-infrastructure │ │ ├── .terraform.lock.hcl │ │ ├── cluster.tf │ │ ├── create-config.tf │ │ ├── outputs.tf │ │ ├── provider.tf │ │ ├── ssh.tf │ │ └── variables.tf │ ├── production-stack │ │ ├── .terraform.lock.hcl │ │ ├── helm.tf │ │ ├── provider.tf │ │ └── variables.tf │ └── production_stack_specification.yaml │ ├── eks │ ├── .gitignore │ ├── README.md │ ├── cluster-tools.tf │ ├── config │ │ ├── calico-values.tpl │ │ ├── efs_pvct.yaml │ │ ├── kubeconfig.tpl │ │ ├── vllm-dashboard.json │ │ └── vllm-svc-monitor.yml │ ├── datasources.tf │ ├── env-vars.template │ ├── iam_role.tf │ ├── main.tf │ ├── modules │ │ └── llm-stack │ │ │ └── helm │ │ │ ├── cpu │ │ │ ├── cpu-tinyllama-light-ingress.tpl │ │ │ ├── cpu-tinyllama-light-local.tpl │ │ │ └── cpu-tinyllama-light.tpl │ │ │ └── gpu │ │ │ ├── gpu-operator-values.yaml │ │ │ └── gpu-tinyllama-light-ingress.tpl │ ├── network.tf │ ├── output.tf │ ├── provider.tf │ ├── storage.tf │ ├── terraform.tfvars.template │ ├── variables.tf │ └── vllm-production-stack.tf │ ├── gke │ ├── Makefile │ ├── README.md │ ├── gke-infrastructure │ │ ├── .terraform.lock.hcl │ │ ├── backend.tf │ │ ├── cluster.tf │ │ ├── node_pools.tf │ │ ├── outputs.tf │ │ ├── providers.tf │ │ ├── services.tf │ │ └── variables.tf │ ├── production-stack │ │ ├── .terraform.lock.hcl │ │ ├── backend.tf │ │ ├── helm.tf │ │ ├── providers.tf │ │ └── variables.tf │ └── production_stack_specification.yaml │ └── nebius │ ├── .gitignore │ ├── README.md │ ├── cluster-tools.tf │ ├── config │ ├── helm │ │ └── kube-prome-stack.yaml │ ├── kubeconfig.tpl │ ├── llm-stack │ │ └── helm │ │ │ ├── cpu │ │ │ └── cpu-tinyllama-light-ingress-nebius.tpl │ │ │ └── gpu │ │ │ ├── gpu-operator-values.yaml │ │ │ └── gpu-tinyllama-light-ingress-nebius.tpl │ ├── manifests │ │ └── letsencrypt-issuer.yaml │ └── vllm-dashboard.json │ ├── data_sources.tf │ ├── env-vars.template │ ├── main.tf │ ├── network.tf │ ├── output.tf │ ├── provider.tf │ ├── terraform.tfvars.template │ ├── variables.tf │ └── vllm-production-stack.tf ├── utils ├── helm-chart-test-values.sh ├── init-nvidia-gpu-setup-k8s.sh ├── install-calico.sh ├── install-cri-o.sh ├── install-helm.sh ├── install-kind-cluster.sh ├── install-kind.sh ├── install-kubeadm.sh ├── install-kubectl.sh ├── install-kuberay.sh └── install-minikube-cluster.sh └── uv.lock /.codespell-ignore: -------------------------------------------------------------------------------- 1 | AKS 2 | aks 3 | NotIn 4 | AfterAll 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/001_feature_request.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/ISSUE_TEMPLATE/001_feature_request.yaml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/002_bug_report.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/ISSUE_TEMPLATE/002_bug_report.yaml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/ISSUE_TEMPLATE/config.yml -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /.github/curl-02-two-pods.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/curl-02-two-pods.sh -------------------------------------------------------------------------------- /.github/curl-04-multiple-models.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/curl-04-multiple-models.sh -------------------------------------------------------------------------------- /.github/curl-05-secure-vllm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/curl-05-secure-vllm.sh -------------------------------------------------------------------------------- /.github/port-forward.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/port-forward.sh -------------------------------------------------------------------------------- /.github/template-chatml.jinja: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/template-chatml.jinja -------------------------------------------------------------------------------- /.github/values-01-2pods-minimal-example.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/values-01-2pods-minimal-example.yaml -------------------------------------------------------------------------------- /.github/values-04-multiple-models.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/values-04-multiple-models.yaml -------------------------------------------------------------------------------- /.github/values-05-secure-vllm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/values-05-secure-vllm.yaml -------------------------------------------------------------------------------- /.github/values-06-session-routing.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/values-06-session-routing.yaml -------------------------------------------------------------------------------- /.github/values-07-prefix-routing.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/values-07-prefix-routing.yaml -------------------------------------------------------------------------------- /.github/values-08-roundrobin-routing.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/values-08-roundrobin-routing.yaml -------------------------------------------------------------------------------- /.github/values-09-kvaware-routing.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/values-09-kvaware-routing.yaml -------------------------------------------------------------------------------- /.github/values-10-disagg-prefill.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/values-10-disagg-prefill.yaml -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/workflows/ci.yml -------------------------------------------------------------------------------- /.github/workflows/functionality-helm-chart.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/workflows/functionality-helm-chart.yml -------------------------------------------------------------------------------- /.github/workflows/helm-release.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/workflows/helm-release.yml -------------------------------------------------------------------------------- /.github/workflows/matchers/actionlint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/workflows/matchers/actionlint.json -------------------------------------------------------------------------------- /.github/workflows/operator-test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/workflows/operator-test.yml -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/workflows/pre-commit.yml -------------------------------------------------------------------------------- /.github/workflows/router-docker-release.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/workflows/router-docker-release.yml -------------------------------------------------------------------------------- /.github/workflows/router-e2e-test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/workflows/router-e2e-test.yml -------------------------------------------------------------------------------- /.github/workflows/scheduled-release.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.github/workflows/scheduled-release.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.gitignore -------------------------------------------------------------------------------- /.hadolint.yaml: -------------------------------------------------------------------------------- 1 | ignored: 2 | - DL3013 # Pin versions in pip. 3 | -------------------------------------------------------------------------------- /.markdownlint.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.markdownlint.yaml -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/.readthedocs.yaml -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/README.md -------------------------------------------------------------------------------- /artifacthub-repo.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/artifacthub-repo.yml -------------------------------------------------------------------------------- /benchmarks/multi-round-qa/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/benchmarks/multi-round-qa/README.md -------------------------------------------------------------------------------- /benchmarks/multi-round-qa/data_preprocessing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/benchmarks/multi-round-qa/data_preprocessing.py -------------------------------------------------------------------------------- /benchmarks/multi-round-qa/model.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/benchmarks/multi-round-qa/model.yaml -------------------------------------------------------------------------------- /benchmarks/multi-round-qa/multi-round-qa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/benchmarks/multi-round-qa/multi-round-qa.py -------------------------------------------------------------------------------- /benchmarks/multi-round-qa/multi-round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/benchmarks/multi-round-qa/multi-round.png -------------------------------------------------------------------------------- /benchmarks/multi-round-qa/plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/benchmarks/multi-round-qa/plot.py -------------------------------------------------------------------------------- /benchmarks/multi-round-qa/prepare_sharegpt_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/benchmarks/multi-round-qa/prepare_sharegpt_data.sh -------------------------------------------------------------------------------- /benchmarks/multi-round-qa/requirements.txt: -------------------------------------------------------------------------------- 1 | openai 2 | pandas 3 | tqdm 4 | -------------------------------------------------------------------------------- /benchmarks/multi-round-qa/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/benchmarks/multi-round-qa/run.sh -------------------------------------------------------------------------------- /benchmarks/multi-round-qa/run_aws.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/benchmarks/multi-round-qa/run_aws.sh -------------------------------------------------------------------------------- /benchmarks/multi-round-qa/run_single.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/benchmarks/multi-round-qa/run_single.sh -------------------------------------------------------------------------------- /benchmarks/multi-round-qa/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/benchmarks/multi-round-qa/utils.py -------------------------------------------------------------------------------- /benchmarks/multi-round-qa/warmup_single.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/benchmarks/multi-round-qa/warmup_single.sh -------------------------------------------------------------------------------- /community/community-event.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/community/community-event.md -------------------------------------------------------------------------------- /deployment_on_cloud/aws/Readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/deployment_on_cloud/aws/Readme.md -------------------------------------------------------------------------------- /deployment_on_cloud/aws/clean_up.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/deployment_on_cloud/aws/clean_up.sh -------------------------------------------------------------------------------- /deployment_on_cloud/aws/entry_point.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/deployment_on_cloud/aws/entry_point.sh -------------------------------------------------------------------------------- /deployment_on_cloud/aws/production_stack_specification.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/deployment_on_cloud/aws/production_stack_specification.yaml -------------------------------------------------------------------------------- /deployment_on_cloud/aws/set_up_efs.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/deployment_on_cloud/aws/set_up_efs.sh -------------------------------------------------------------------------------- /deployment_on_cloud/azure/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/deployment_on_cloud/azure/README.md -------------------------------------------------------------------------------- /deployment_on_cloud/azure/entry_point.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/deployment_on_cloud/azure/entry_point.sh -------------------------------------------------------------------------------- /deployment_on_cloud/azure/nvidia-device-plugin-ds.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/deployment_on_cloud/azure/nvidia-device-plugin-ds.yaml -------------------------------------------------------------------------------- /deployment_on_cloud/gcp/OPT125_CPU/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/deployment_on_cloud/gcp/OPT125_CPU/README.md -------------------------------------------------------------------------------- /deployment_on_cloud/gcp/OPT125_CPU/cleanup_ql.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/deployment_on_cloud/gcp/OPT125_CPU/cleanup_ql.sh -------------------------------------------------------------------------------- /deployment_on_cloud/gcp/OPT125_CPU/entrypoint_ql.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/deployment_on_cloud/gcp/OPT125_CPU/entrypoint_ql.sh -------------------------------------------------------------------------------- /deployment_on_cloud/gcp/OPT125_CPU/production_stack_specification_ql.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/deployment_on_cloud/gcp/OPT125_CPU/production_stack_specification_ql.yaml -------------------------------------------------------------------------------- /deployment_on_cloud/gcp/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/deployment_on_cloud/gcp/README.md -------------------------------------------------------------------------------- /deployment_on_cloud/gcp/clean_up_basic.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/deployment_on_cloud/gcp/clean_up_basic.sh -------------------------------------------------------------------------------- /deployment_on_cloud/gcp/entry_point_basic.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/deployment_on_cloud/gcp/entry_point_basic.sh -------------------------------------------------------------------------------- /deployment_on_cloud/gcp/production_stack_specification_basic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/deployment_on_cloud/gcp/production_stack_specification_basic.yaml -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docker/Dockerfile -------------------------------------------------------------------------------- /docker/Dockerfile.kvaware: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docker/Dockerfile.kvaware -------------------------------------------------------------------------------- /docker/Dockerfile.sidecar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docker/Dockerfile.sidecar -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/make.bat -------------------------------------------------------------------------------- /docs/requirements-docs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/requirements-docs.txt -------------------------------------------------------------------------------- /docs/source/assets/graf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/assets/graf.png -------------------------------------------------------------------------------- /docs/source/assets/multi-round.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/assets/multi-round.png -------------------------------------------------------------------------------- /docs/source/assets/output.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/assets/output.ico -------------------------------------------------------------------------------- /docs/source/assets/prodarch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/assets/prodarch.png -------------------------------------------------------------------------------- /docs/source/assets/prodstack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/assets/prodstack.png -------------------------------------------------------------------------------- /docs/source/assets/prodstack_icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/assets/prodstack_icon.png -------------------------------------------------------------------------------- /docs/source/assets/vllm-logo-only-light.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/assets/vllm-logo-only-light.ico -------------------------------------------------------------------------------- /docs/source/assets/vllm-logo-only-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/assets/vllm-logo-only-light.png -------------------------------------------------------------------------------- /docs/source/assets/vllm-logo-text-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/assets/vllm-logo-text-light.png -------------------------------------------------------------------------------- /docs/source/community/meetings.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/community/meetings.rst -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/conf.py -------------------------------------------------------------------------------- /docs/source/deployment/crd.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/deployment/crd.rst -------------------------------------------------------------------------------- /docs/source/deployment/gateway-inference-extension.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/deployment/gateway-inference-extension.rst -------------------------------------------------------------------------------- /docs/source/deployment/helm.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/deployment/helm.rst -------------------------------------------------------------------------------- /docs/source/deployment/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/deployment/index.rst -------------------------------------------------------------------------------- /docs/source/developer_guide/contributing.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/developer_guide/contributing.rst -------------------------------------------------------------------------------- /docs/source/developer_guide/docker.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/developer_guide/docker.rst -------------------------------------------------------------------------------- /docs/source/getting_started/faq.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/getting_started/faq.rst -------------------------------------------------------------------------------- /docs/source/getting_started/prerequisite.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/getting_started/prerequisite.rst -------------------------------------------------------------------------------- /docs/source/getting_started/quickstart.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/getting_started/quickstart.rst -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/index.rst -------------------------------------------------------------------------------- /docs/source/use_cases/autoscaling-keda.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/use_cases/autoscaling-keda.rst -------------------------------------------------------------------------------- /docs/source/use_cases/benchmarking.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/use_cases/benchmarking.rst -------------------------------------------------------------------------------- /docs/source/use_cases/disaggregated-prefill.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/use_cases/disaggregated-prefill.rst -------------------------------------------------------------------------------- /docs/source/use_cases/distributed-tracing.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/use_cases/distributed-tracing.rst -------------------------------------------------------------------------------- /docs/source/use_cases/kv-cache-aware-routing.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/use_cases/kv-cache-aware-routing.rst -------------------------------------------------------------------------------- /docs/source/use_cases/pipeline-parallelism-kuberay.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/use_cases/pipeline-parallelism-kuberay.rst -------------------------------------------------------------------------------- /docs/source/use_cases/prefix-aware-routing.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/use_cases/prefix-aware-routing.rst -------------------------------------------------------------------------------- /docs/source/use_cases/semantic-router-integration.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/use_cases/semantic-router-integration.rst -------------------------------------------------------------------------------- /docs/source/use_cases/sharing-kv-cache.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/use_cases/sharing-kv-cache.rst -------------------------------------------------------------------------------- /docs/source/use_cases/sleep-wakeup-mode.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/use_cases/sleep-wakeup-mode.rst -------------------------------------------------------------------------------- /docs/source/use_cases/tool-enabled-installation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/docs/source/use_cases/tool-enabled-installation.rst -------------------------------------------------------------------------------- /examples/batch.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/examples/batch.jsonl -------------------------------------------------------------------------------- /examples/disaggregated_prefill/pd.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/examples/disaggregated_prefill/pd.yaml -------------------------------------------------------------------------------- /examples/disaggregated_prefill/router.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/examples/disaggregated_prefill/router.sh -------------------------------------------------------------------------------- /examples/disaggregated_prefill/start_decode.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/examples/disaggregated_prefill/start_decode.sh -------------------------------------------------------------------------------- /examples/disaggregated_prefill/start_prefill.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/examples/disaggregated_prefill/start_prefill.sh -------------------------------------------------------------------------------- /examples/openai_api_client_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/examples/openai_api_client_batch.py -------------------------------------------------------------------------------- /helm/.helmignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/.helmignore -------------------------------------------------------------------------------- /helm/Chart.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/Chart.yaml -------------------------------------------------------------------------------- /helm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/README.md -------------------------------------------------------------------------------- /helm/crds/crd-lora-adapter.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/crds/crd-lora-adapter.yaml -------------------------------------------------------------------------------- /helm/ct.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/ct.yaml -------------------------------------------------------------------------------- /helm/lintconf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/lintconf.yaml -------------------------------------------------------------------------------- /helm/templates/_helpers.tpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/_helpers.tpl -------------------------------------------------------------------------------- /helm/templates/configmap.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/configmap.yaml -------------------------------------------------------------------------------- /helm/templates/deployment-cache-server.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/deployment-cache-server.yaml -------------------------------------------------------------------------------- /helm/templates/deployment-lora-adapter.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/deployment-lora-adapter.yaml -------------------------------------------------------------------------------- /helm/templates/deployment-lora-controller.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/deployment-lora-controller.yaml -------------------------------------------------------------------------------- /helm/templates/deployment-router.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/deployment-router.yaml -------------------------------------------------------------------------------- /helm/templates/deployment-vllm-multi.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/deployment-vllm-multi.yaml -------------------------------------------------------------------------------- /helm/templates/hpa-router.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/hpa-router.yaml -------------------------------------------------------------------------------- /helm/templates/ingress.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/ingress.yaml -------------------------------------------------------------------------------- /helm/templates/poddisruptionbudget.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/poddisruptionbudget.yaml -------------------------------------------------------------------------------- /helm/templates/pvc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/pvc.yaml -------------------------------------------------------------------------------- /helm/templates/ray-cluster.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/ray-cluster.yaml -------------------------------------------------------------------------------- /helm/templates/role.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/role.yaml -------------------------------------------------------------------------------- /helm/templates/rolebinding.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/rolebinding.yaml -------------------------------------------------------------------------------- /helm/templates/route.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/route.yaml -------------------------------------------------------------------------------- /helm/templates/secrets.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/secrets.yaml -------------------------------------------------------------------------------- /helm/templates/service-cache-server.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/service-cache-server.yaml -------------------------------------------------------------------------------- /helm/templates/service-router.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/service-router.yaml -------------------------------------------------------------------------------- /helm/templates/service-vllm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/service-vllm.yaml -------------------------------------------------------------------------------- /helm/templates/serviceaccount.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/serviceaccount.yaml -------------------------------------------------------------------------------- /helm/templates/shared-storage.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/templates/shared-storage.yaml -------------------------------------------------------------------------------- /helm/test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/test.sh -------------------------------------------------------------------------------- /helm/tests/imagePullSecrets_test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/tests/imagePullSecrets_test.yaml -------------------------------------------------------------------------------- /helm/tests/runtimeClassName_test.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/tests/runtimeClassName_test.yaml -------------------------------------------------------------------------------- /helm/values-example.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/values-example.yaml -------------------------------------------------------------------------------- /helm/values.schema.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/values.schema.json -------------------------------------------------------------------------------- /helm/values.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/helm/values.yaml -------------------------------------------------------------------------------- /observability/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/observability/README.md -------------------------------------------------------------------------------- /observability/install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/observability/install.sh -------------------------------------------------------------------------------- /observability/kube-prom-stack.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/observability/kube-prom-stack.yaml -------------------------------------------------------------------------------- /observability/lmcache-dashboard-cm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/observability/lmcache-dashboard-cm.yaml -------------------------------------------------------------------------------- /observability/prom-adapter.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/observability/prom-adapter.yaml -------------------------------------------------------------------------------- /observability/uninstall.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/observability/uninstall.sh -------------------------------------------------------------------------------- /observability/vllm-dashboard.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/observability/vllm-dashboard.json -------------------------------------------------------------------------------- /operator/.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/.dockerignore -------------------------------------------------------------------------------- /operator/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/Dockerfile -------------------------------------------------------------------------------- /operator/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/Makefile -------------------------------------------------------------------------------- /operator/PROJECT: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/PROJECT -------------------------------------------------------------------------------- /operator/api/v1alpha1/cacheserver_types.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/api/v1alpha1/cacheserver_types.go -------------------------------------------------------------------------------- /operator/api/v1alpha1/common.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/api/v1alpha1/common.go -------------------------------------------------------------------------------- /operator/api/v1alpha1/groupversion_info.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/api/v1alpha1/groupversion_info.go -------------------------------------------------------------------------------- /operator/api/v1alpha1/loraadapter_types.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/api/v1alpha1/loraadapter_types.go -------------------------------------------------------------------------------- /operator/api/v1alpha1/vllmrouter_types.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/api/v1alpha1/vllmrouter_types.go -------------------------------------------------------------------------------- /operator/api/v1alpha1/vllmruntime_types.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/api/v1alpha1/vllmruntime_types.go -------------------------------------------------------------------------------- /operator/api/v1alpha1/zz_generated.deepcopy.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/api/v1alpha1/zz_generated.deepcopy.go -------------------------------------------------------------------------------- /operator/cmd/main.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/cmd/main.go -------------------------------------------------------------------------------- /operator/config/crd/bases/production-stack.vllm.ai_cacheservers.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/crd/bases/production-stack.vllm.ai_cacheservers.yaml -------------------------------------------------------------------------------- /operator/config/crd/bases/production-stack.vllm.ai_loraadapters.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/crd/bases/production-stack.vllm.ai_loraadapters.yaml -------------------------------------------------------------------------------- /operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/crd/bases/production-stack.vllm.ai_vllmrouters.yaml -------------------------------------------------------------------------------- /operator/config/crd/bases/production-stack.vllm.ai_vllmruntimes.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/crd/bases/production-stack.vllm.ai_vllmruntimes.yaml -------------------------------------------------------------------------------- /operator/config/crd/kustomization.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/crd/kustomization.yaml -------------------------------------------------------------------------------- /operator/config/crd/kustomizeconfig.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/crd/kustomizeconfig.yaml -------------------------------------------------------------------------------- /operator/config/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/default.yaml -------------------------------------------------------------------------------- /operator/config/default/cert_metrics_manager_patch.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/default/cert_metrics_manager_patch.yaml -------------------------------------------------------------------------------- /operator/config/default/kustomization.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/default/kustomization.yaml -------------------------------------------------------------------------------- /operator/config/default/manager_image_patch.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/default/manager_image_patch.yaml -------------------------------------------------------------------------------- /operator/config/default/manager_metrics_patch.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/default/manager_metrics_patch.yaml -------------------------------------------------------------------------------- /operator/config/default/metrics_service.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/default/metrics_service.yaml -------------------------------------------------------------------------------- /operator/config/manager/deployment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/manager/deployment.yaml -------------------------------------------------------------------------------- /operator/config/manager/kustomization.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/manager/kustomization.yaml -------------------------------------------------------------------------------- /operator/config/manager/namespace.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/manager/namespace.yaml -------------------------------------------------------------------------------- /operator/config/network-policy/allow-metrics-traffic.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/network-policy/allow-metrics-traffic.yaml -------------------------------------------------------------------------------- /operator/config/network-policy/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - allow-metrics-traffic.yaml 3 | -------------------------------------------------------------------------------- /operator/config/prometheus/kustomization.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/prometheus/kustomization.yaml -------------------------------------------------------------------------------- /operator/config/prometheus/monitor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/prometheus/monitor.yaml -------------------------------------------------------------------------------- /operator/config/prometheus/monitor_tls_patch.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/prometheus/monitor_tls_patch.yaml -------------------------------------------------------------------------------- /operator/config/rbac/cacheserver_admin_role.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/cacheserver_admin_role.yaml -------------------------------------------------------------------------------- /operator/config/rbac/cacheserver_editor_role.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/cacheserver_editor_role.yaml -------------------------------------------------------------------------------- /operator/config/rbac/cacheserver_viewer_role.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/cacheserver_viewer_role.yaml -------------------------------------------------------------------------------- /operator/config/rbac/kustomization.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/kustomization.yaml -------------------------------------------------------------------------------- /operator/config/rbac/leader_election_role.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/leader_election_role.yaml -------------------------------------------------------------------------------- /operator/config/rbac/leader_election_role_binding.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/leader_election_role_binding.yaml -------------------------------------------------------------------------------- /operator/config/rbac/loraadapter_admin_role.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/loraadapter_admin_role.yaml -------------------------------------------------------------------------------- /operator/config/rbac/loraadapter_editor_role.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/loraadapter_editor_role.yaml -------------------------------------------------------------------------------- /operator/config/rbac/loraadapter_viewer_role.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/loraadapter_viewer_role.yaml -------------------------------------------------------------------------------- /operator/config/rbac/metrics_auth_role.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/metrics_auth_role.yaml -------------------------------------------------------------------------------- /operator/config/rbac/metrics_auth_role_binding.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/metrics_auth_role_binding.yaml -------------------------------------------------------------------------------- /operator/config/rbac/metrics_reader_role.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/metrics_reader_role.yaml -------------------------------------------------------------------------------- /operator/config/rbac/role.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/role.yaml -------------------------------------------------------------------------------- /operator/config/rbac/role_binding.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/role_binding.yaml -------------------------------------------------------------------------------- /operator/config/rbac/service_account.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/service_account.yaml -------------------------------------------------------------------------------- /operator/config/rbac/vllmrouter_admin_role.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/vllmrouter_admin_role.yaml -------------------------------------------------------------------------------- /operator/config/rbac/vllmrouter_editor_role.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/vllmrouter_editor_role.yaml -------------------------------------------------------------------------------- /operator/config/rbac/vllmrouter_viewer_role.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/vllmrouter_viewer_role.yaml -------------------------------------------------------------------------------- /operator/config/rbac/vllmruntime_admin_role.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/vllmruntime_admin_role.yaml -------------------------------------------------------------------------------- /operator/config/rbac/vllmruntime_editor_role.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/vllmruntime_editor_role.yaml -------------------------------------------------------------------------------- /operator/config/rbac/vllmruntime_viewer_role.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/rbac/vllmruntime_viewer_role.yaml -------------------------------------------------------------------------------- /operator/config/samples/kustomization.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/samples/kustomization.yaml -------------------------------------------------------------------------------- /operator/config/samples/production-stack_v1alpha1_cacheserver.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/samples/production-stack_v1alpha1_cacheserver.yaml -------------------------------------------------------------------------------- /operator/config/samples/production-stack_v1alpha1_loraadapter.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/samples/production-stack_v1alpha1_loraadapter.yaml -------------------------------------------------------------------------------- /operator/config/samples/production-stack_v1alpha1_vllmrouter.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/samples/production-stack_v1alpha1_vllmrouter.yaml -------------------------------------------------------------------------------- /operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/config/samples/production-stack_v1alpha1_vllmruntime.yaml -------------------------------------------------------------------------------- /operator/go.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/go.mod -------------------------------------------------------------------------------- /operator/go.sum: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/go.sum -------------------------------------------------------------------------------- /operator/hack/boilerplate.go.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/hack/boilerplate.go.txt -------------------------------------------------------------------------------- /operator/internal/controller/cacheserver_controller.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/internal/controller/cacheserver_controller.go -------------------------------------------------------------------------------- /operator/internal/controller/cacheserver_controller_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/internal/controller/cacheserver_controller_test.go -------------------------------------------------------------------------------- /operator/internal/controller/loraadapter_controller.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/internal/controller/loraadapter_controller.go -------------------------------------------------------------------------------- /operator/internal/controller/loraadapter_controller_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/internal/controller/loraadapter_controller_test.go -------------------------------------------------------------------------------- /operator/internal/controller/suite_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/internal/controller/suite_test.go -------------------------------------------------------------------------------- /operator/internal/controller/vllmrouter_controller.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/internal/controller/vllmrouter_controller.go -------------------------------------------------------------------------------- /operator/internal/controller/vllmrouter_controller_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/internal/controller/vllmrouter_controller_test.go -------------------------------------------------------------------------------- /operator/internal/controller/vllmruntime_controller.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/internal/controller/vllmruntime_controller.go -------------------------------------------------------------------------------- /operator/internal/controller/vllmruntime_controller_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/internal/controller/vllmruntime_controller_test.go -------------------------------------------------------------------------------- /operator/test/e2e/e2e_suite_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/test/e2e/e2e_suite_test.go -------------------------------------------------------------------------------- /operator/test/e2e/e2e_test.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/test/e2e/e2e_test.go -------------------------------------------------------------------------------- /operator/test/utils/utils.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/operator/test/utils/utils.go -------------------------------------------------------------------------------- /proposals/TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/proposals/TEMPLATE.md -------------------------------------------------------------------------------- /proposals/imgs/lora-k8s-arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/proposals/imgs/lora-k8s-arch.png -------------------------------------------------------------------------------- /proposals/lora-k8s-support.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/proposals/lora-k8s-support.md -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements-test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/requirements-test.txt -------------------------------------------------------------------------------- /scripts/huggingface_downloader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/scripts/huggingface_downloader.py -------------------------------------------------------------------------------- /scripts/setup_vllm_templates.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/scripts/setup_vllm_templates.sh -------------------------------------------------------------------------------- /src/examples/example_file_upload.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/examples/example_file_upload.py -------------------------------------------------------------------------------- /src/examples/tool_calling_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/examples/tool_calling_example.py -------------------------------------------------------------------------------- /src/gateway_inference_extension/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/Dockerfile -------------------------------------------------------------------------------- /src/gateway_inference_extension/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/README.md -------------------------------------------------------------------------------- /src/gateway_inference_extension/configs/gateway/gke/gateway.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/configs/gateway/gke/gateway.yaml -------------------------------------------------------------------------------- /src/gateway_inference_extension/configs/gateway/gke/gcp-backend-policy.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/configs/gateway/gke/gcp-backend-policy.yaml -------------------------------------------------------------------------------- /src/gateway_inference_extension/configs/gateway/gke/healthcheck.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/configs/gateway/gke/healthcheck.yaml -------------------------------------------------------------------------------- /src/gateway_inference_extension/configs/gateway/gke/httproute.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/configs/gateway/gke/httproute.yaml -------------------------------------------------------------------------------- /src/gateway_inference_extension/configs/gateway/istio/destination-rule.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/configs/gateway/istio/destination-rule.yaml -------------------------------------------------------------------------------- /src/gateway_inference_extension/configs/gateway/istio/gateway.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/configs/gateway/istio/gateway.yaml -------------------------------------------------------------------------------- /src/gateway_inference_extension/configs/gateway/istio/httproute.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/configs/gateway/istio/httproute.yaml -------------------------------------------------------------------------------- /src/gateway_inference_extension/configs/gateway/kgateway/gateway.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/configs/gateway/kgateway/gateway.yaml -------------------------------------------------------------------------------- /src/gateway_inference_extension/configs/gateway/kgateway/httproute.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/configs/gateway/kgateway/httproute.yaml -------------------------------------------------------------------------------- /src/gateway_inference_extension/configs/httproute.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/configs/httproute.yaml -------------------------------------------------------------------------------- /src/gateway_inference_extension/configs/inferencemodel.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/configs/inferencemodel.yaml -------------------------------------------------------------------------------- /src/gateway_inference_extension/configs/inferencepool-resources.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/configs/inferencepool-resources.yaml -------------------------------------------------------------------------------- /src/gateway_inference_extension/configs/vllm/gpu-deployment.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/configs/vllm/gpu-deployment.yaml -------------------------------------------------------------------------------- /src/gateway_inference_extension/configs/vllm/vllm-runtime.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/configs/vllm/vllm-runtime.yaml -------------------------------------------------------------------------------- /src/gateway_inference_extension/delete.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/delete.sh -------------------------------------------------------------------------------- /src/gateway_inference_extension/install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/install.sh -------------------------------------------------------------------------------- /src/gateway_inference_extension/kv_aware_picker.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/kv_aware_picker.go -------------------------------------------------------------------------------- /src/gateway_inference_extension/prefix_aware_picker.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/prefix_aware_picker.go -------------------------------------------------------------------------------- /src/gateway_inference_extension/roundrobin_picker.go: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/roundrobin_picker.go -------------------------------------------------------------------------------- /src/gateway_inference_extension/scheduler.patch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/gateway_inference_extension/scheduler.patch -------------------------------------------------------------------------------- /src/tests/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/tests/README.md -------------------------------------------------------------------------------- /src/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/tests/perftest/clean-up.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pkill -9 -f "python3 ./fake-openai" 4 | -------------------------------------------------------------------------------- /src/tests/perftest/fake-openai-server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/tests/perftest/fake-openai-server.py -------------------------------------------------------------------------------- /src/tests/perftest/request_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/tests/perftest/request_generator.py -------------------------------------------------------------------------------- /src/tests/perftest/run-multi-server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/tests/perftest/run-multi-server.sh -------------------------------------------------------------------------------- /src/tests/perftest/run-server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/tests/perftest/run-server.sh -------------------------------------------------------------------------------- /src/tests/requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp 2 | fastapi 3 | openai 4 | uvicorn 5 | vllm 6 | -------------------------------------------------------------------------------- /src/tests/test-openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/tests/test-openai.py -------------------------------------------------------------------------------- /src/tests/test_file_storage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/tests/test_file_storage.py -------------------------------------------------------------------------------- /src/tests/test_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/tests/test_parser.py -------------------------------------------------------------------------------- /src/tests/test_roundrobin_router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/tests/test_roundrobin_router.py -------------------------------------------------------------------------------- /src/tests/test_session_router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/tests/test_session_router.py -------------------------------------------------------------------------------- /src/tests/test_singleton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/tests/test_singleton.py -------------------------------------------------------------------------------- /src/tests/test_static_service_discovery.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/tests/test_static_service_discovery.py -------------------------------------------------------------------------------- /src/tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/tests/test_utils.py -------------------------------------------------------------------------------- /src/vllm_router/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/README.md -------------------------------------------------------------------------------- /src/vllm_router/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/vllm_router/aiohttp_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/aiohttp_client.py -------------------------------------------------------------------------------- /src/vllm_router/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/app.py -------------------------------------------------------------------------------- /src/vllm_router/dynamic_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/dynamic_config.py -------------------------------------------------------------------------------- /src/vllm_router/experimental/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/README.md -------------------------------------------------------------------------------- /src/vllm_router/experimental/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/__init__.py -------------------------------------------------------------------------------- /src/vllm_router/experimental/feature_gates.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/feature_gates.py -------------------------------------------------------------------------------- /src/vllm_router/experimental/pii/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/pii/__init__.py -------------------------------------------------------------------------------- /src/vllm_router/experimental/pii/analyzers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/pii/analyzers/__init__.py -------------------------------------------------------------------------------- /src/vllm_router/experimental/pii/analyzers/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/pii/analyzers/base.py -------------------------------------------------------------------------------- /src/vllm_router/experimental/pii/analyzers/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/pii/analyzers/factory.py -------------------------------------------------------------------------------- /src/vllm_router/experimental/pii/analyzers/presidio.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/pii/analyzers/presidio.py -------------------------------------------------------------------------------- /src/vllm_router/experimental/pii/analyzers/regex.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/pii/analyzers/regex.py -------------------------------------------------------------------------------- /src/vllm_router/experimental/pii/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/pii/config.py -------------------------------------------------------------------------------- /src/vllm_router/experimental/pii/middleware.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/pii/middleware.py -------------------------------------------------------------------------------- /src/vllm_router/experimental/pii/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/pii/requirements.txt -------------------------------------------------------------------------------- /src/vllm_router/experimental/pii/test_pii.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/pii/test_pii.sh -------------------------------------------------------------------------------- /src/vllm_router/experimental/pii/types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/pii/types.py -------------------------------------------------------------------------------- /src/vllm_router/experimental/semantic_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/semantic_cache.py -------------------------------------------------------------------------------- /src/vllm_router/experimental/semantic_cache/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/semantic_cache/README.md -------------------------------------------------------------------------------- /src/vllm_router/experimental/semantic_cache/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/semantic_cache/__init__.py -------------------------------------------------------------------------------- /src/vllm_router/experimental/semantic_cache/db_adapters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/semantic_cache/db_adapters/__init__.py -------------------------------------------------------------------------------- /src/vllm_router/experimental/semantic_cache/db_adapters/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/semantic_cache/db_adapters/base.py -------------------------------------------------------------------------------- /src/vllm_router/experimental/semantic_cache/db_adapters/faiss_adapter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/semantic_cache/db_adapters/faiss_adapter.py -------------------------------------------------------------------------------- /src/vllm_router/experimental/semantic_cache/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/semantic_cache/requirements.txt -------------------------------------------------------------------------------- /src/vllm_router/experimental/semantic_cache/semantic_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/semantic_cache/semantic_cache.py -------------------------------------------------------------------------------- /src/vllm_router/experimental/semantic_cache/test_cache.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/semantic_cache/test_cache.sh -------------------------------------------------------------------------------- /src/vllm_router/experimental/semantic_cache_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/experimental/semantic_cache_integration.py -------------------------------------------------------------------------------- /src/vllm_router/log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/log.py -------------------------------------------------------------------------------- /src/vllm_router/parsers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/vllm_router/parsers/parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/parsers/parser.py -------------------------------------------------------------------------------- /src/vllm_router/parsers/yaml_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/parsers/yaml_utils.py -------------------------------------------------------------------------------- /src/vllm_router/perf-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/perf-test.sh -------------------------------------------------------------------------------- /src/vllm_router/prefix/hashtrie.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/prefix/hashtrie.py -------------------------------------------------------------------------------- /src/vllm_router/protocols.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/protocols.py -------------------------------------------------------------------------------- /src/vllm_router/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/requirements.txt -------------------------------------------------------------------------------- /src/vllm_router/routers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/vllm_router/routers/batches_router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/routers/batches_router.py -------------------------------------------------------------------------------- /src/vllm_router/routers/files_router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/routers/files_router.py -------------------------------------------------------------------------------- /src/vllm_router/routers/main_router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/routers/main_router.py -------------------------------------------------------------------------------- /src/vllm_router/routers/metrics_router.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/routers/metrics_router.py -------------------------------------------------------------------------------- /src/vllm_router/routers/routing_logic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/routers/routing_logic.py -------------------------------------------------------------------------------- /src/vllm_router/run-router.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/run-router.sh -------------------------------------------------------------------------------- /src/vllm_router/service_discovery.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/service_discovery.py -------------------------------------------------------------------------------- /src/vllm_router/services/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/vllm_router/services/batch_service/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/services/batch_service/__init__.py -------------------------------------------------------------------------------- /src/vllm_router/services/batch_service/batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/services/batch_service/batch.py -------------------------------------------------------------------------------- /src/vllm_router/services/batch_service/local_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/services/batch_service/local_processor.py -------------------------------------------------------------------------------- /src/vllm_router/services/batch_service/processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/services/batch_service/processor.py -------------------------------------------------------------------------------- /src/vllm_router/services/callbacks_service/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/vllm_router/services/callbacks_service/callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/services/callbacks_service/callbacks.py -------------------------------------------------------------------------------- /src/vllm_router/services/callbacks_service/custom_callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/services/callbacks_service/custom_callbacks.py -------------------------------------------------------------------------------- /src/vllm_router/services/files_service/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/services/files_service/__init__.py -------------------------------------------------------------------------------- /src/vllm_router/services/files_service/file_storage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/services/files_service/file_storage.py -------------------------------------------------------------------------------- /src/vllm_router/services/files_service/openai_files.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/services/files_service/openai_files.py -------------------------------------------------------------------------------- /src/vllm_router/services/files_service/storage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/services/files_service/storage.py -------------------------------------------------------------------------------- /src/vllm_router/services/metrics_service/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/services/metrics_service/__init__.py -------------------------------------------------------------------------------- /src/vllm_router/services/metrics_service/prometheus_gauge.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/vllm_router/services/request_service/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/vllm_router/services/request_service/request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/services/request_service/request.py -------------------------------------------------------------------------------- /src/vllm_router/services/request_service/rewriter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/services/request_service/rewriter.py -------------------------------------------------------------------------------- /src/vllm_router/stats/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/vllm_router/stats/engine_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/stats/engine_stats.py -------------------------------------------------------------------------------- /src/vllm_router/stats/log_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/stats/log_stats.py -------------------------------------------------------------------------------- /src/vllm_router/stats/request_stats.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/stats/request_stats.py -------------------------------------------------------------------------------- /src/vllm_router/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/utils.py -------------------------------------------------------------------------------- /src/vllm_router/version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/src/vllm_router/version.py -------------------------------------------------------------------------------- /tests/e2e/run-k8s-routing-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tests/e2e/run-k8s-routing-test.sh -------------------------------------------------------------------------------- /tests/e2e/run-static-discovery-routing-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tests/e2e/run-static-discovery-routing-test.sh -------------------------------------------------------------------------------- /tests/e2e/stress-test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tests/e2e/stress-test.sh -------------------------------------------------------------------------------- /tests/e2e/test-crds.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tests/e2e/test-crds.sh -------------------------------------------------------------------------------- /tests/e2e/test-routing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tests/e2e/test-routing.py -------------------------------------------------------------------------------- /tests/e2e/test-sticky-routing.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tests/e2e/test-sticky-routing.sh -------------------------------------------------------------------------------- /tests/e2e/wait-for-backends.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tests/e2e/wait-for-backends.sh -------------------------------------------------------------------------------- /tests/e2e/wait-for-pods.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tests/e2e/wait-for-pods.sh -------------------------------------------------------------------------------- /tutorials/00-a-install-multinode-kubernetes-env.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/00-a-install-multinode-kubernetes-env.md -------------------------------------------------------------------------------- /tutorials/00-b-install-kuberay-operator.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/00-b-install-kuberay-operator.md -------------------------------------------------------------------------------- /tutorials/00-c-install-kind-kubernetes-env.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/00-c-install-kind-kubernetes-env.md -------------------------------------------------------------------------------- /tutorials/00-install-kubernetes-env.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/00-install-kubernetes-env.md -------------------------------------------------------------------------------- /tutorials/01-b-minimal-helm-installation-amd.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/01-b-minimal-helm-installation-amd.md -------------------------------------------------------------------------------- /tutorials/01-minimal-helm-installation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/01-minimal-helm-installation.md -------------------------------------------------------------------------------- /tutorials/02-basic-vllm-config.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/02-basic-vllm-config.md -------------------------------------------------------------------------------- /tutorials/03-load-model-from-pv.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/03-load-model-from-pv.md -------------------------------------------------------------------------------- /tutorials/04-launch-multiple-model.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/04-launch-multiple-model.md -------------------------------------------------------------------------------- /tutorials/05-offload-kv-cache.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/05-offload-kv-cache.md -------------------------------------------------------------------------------- /tutorials/06-remote-shared-kv-cache.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/06-remote-shared-kv-cache.md -------------------------------------------------------------------------------- /tutorials/07-benchmark-multi-round-qa-single-gpu.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/07-benchmark-multi-round-qa-single-gpu.md -------------------------------------------------------------------------------- /tutorials/08-benchmark-multi-round-qa-multi-gpu.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/08-benchmark-multi-round-qa-multi-gpu.md -------------------------------------------------------------------------------- /tutorials/09-lora-enabled-installation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/09-lora-enabled-installation.md -------------------------------------------------------------------------------- /tutorials/10-horizontal-autoscaling.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/10-horizontal-autoscaling.md -------------------------------------------------------------------------------- /tutorials/11-secure-vllm-serve.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/11-secure-vllm-serve.md -------------------------------------------------------------------------------- /tutorials/12-distributed-tracing.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/12-distributed-tracing.md -------------------------------------------------------------------------------- /tutorials/13-tool-enabled-installation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/13-tool-enabled-installation.md -------------------------------------------------------------------------------- /tutorials/14-vllm-v1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/14-vllm-v1.md -------------------------------------------------------------------------------- /tutorials/15-basic-pipeline-parallel.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/15-basic-pipeline-parallel.md -------------------------------------------------------------------------------- /tutorials/16-disagg-prefill.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/16-disagg-prefill.md -------------------------------------------------------------------------------- /tutorials/17-kv-aware-routing.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/17-kv-aware-routing.md -------------------------------------------------------------------------------- /tutorials/18-prefix-aware-routing.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/18-prefix-aware-routing.md -------------------------------------------------------------------------------- /tutorials/19-engine-sleep-wakeup-mode.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/19-engine-sleep-wakeup-mode.md -------------------------------------------------------------------------------- /tutorials/20-keda-autoscaling.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/20-keda-autoscaling.md -------------------------------------------------------------------------------- /tutorials/21-gateway-inference-extension.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/21-gateway-inference-extension.md -------------------------------------------------------------------------------- /tutorials/22-gateway-api-for-ingress.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/22-gateway-api-for-ingress.md -------------------------------------------------------------------------------- /tutorials/23-whisper-api-transcription.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/23-whisper-api-transcription.md -------------------------------------------------------------------------------- /tutorials/24-semantic-router-integration.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/24-semantic-router-integration.md -------------------------------------------------------------------------------- /tutorials/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/README.md -------------------------------------------------------------------------------- /tutorials/assets/example-04-openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/example-04-openai.py -------------------------------------------------------------------------------- /tutorials/assets/example-10-load-generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/example-10-load-generator.py -------------------------------------------------------------------------------- /tutorials/assets/gke-example/values-01-offload-kv-cache-local-disk.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/gke-example/values-01-offload-kv-cache-local-disk.yaml -------------------------------------------------------------------------------- /tutorials/assets/hpa-10.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/hpa-10.yaml -------------------------------------------------------------------------------- /tutorials/assets/otel-example/jaeger-collector.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/otel-example/jaeger-collector.yaml -------------------------------------------------------------------------------- /tutorials/assets/otel-example/jaeger-query.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/otel-example/jaeger-query.yaml -------------------------------------------------------------------------------- /tutorials/assets/otel-example/jaeger.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/otel-example/jaeger.yaml -------------------------------------------------------------------------------- /tutorials/assets/otel-example/otel-collector-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/otel-example/otel-collector-config.yaml -------------------------------------------------------------------------------- /tutorials/assets/otel-example/otel-collector.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/otel-example/otel-collector.yaml -------------------------------------------------------------------------------- /tutorials/assets/pv-03.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/pv-03.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-01-2pods-minimal-example.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-01-2pods-minimal-example.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-01-minimal-amd-example.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-01-minimal-amd-example.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-01-minimal-example.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-01-minimal-example.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-02-basic-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-02-basic-config.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-03-match-pv.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-03-match-pv.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-04-multiple-models.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-04-multiple-models.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-05-cpu-offloading.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-05-cpu-offloading.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-06-shared-storage.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-06-shared-storage.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-09-lora-helm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-09-lora-helm.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-09-lora-operator.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-09-lora-operator.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-11-secure-vllm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-11-secure-vllm.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-12-otel-vllm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-12-otel-vllm.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-13-tool-enabled.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-13-tool-enabled.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-14-vllm-v1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-14-vllm-v1.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-15-a-minimal-pipeline-parallel-example-raycluster.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-15-a-minimal-pipeline-parallel-example-raycluster.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-15-b-minimal-pipeline-parallel-example-multiple-modelspec.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-15-b-minimal-pipeline-parallel-example-multiple-modelspec.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-16-disagg-prefill.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-16-disagg-prefill.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-17-kv-aware.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-17-kv-aware.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-18-prefix-aware.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-18-prefix-aware.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-19-sleep-mode-aware.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-19-sleep-mode-aware.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-20-keda.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-20-keda.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-22-gateway-api.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-22-gateway-api.yaml -------------------------------------------------------------------------------- /tutorials/assets/values-23-SR.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/assets/values-23-SR.yaml -------------------------------------------------------------------------------- /tutorials/cloud_deployments/01-AWS-EKS-deployment.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/cloud_deployments/01-AWS-EKS-deployment.md -------------------------------------------------------------------------------- /tutorials/cloud_deployments/02-GCP-GKE-deployment.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/cloud_deployments/02-GCP-GKE-deployment.md -------------------------------------------------------------------------------- /tutorials/cloud_deployments/03-Azure-AKS-deployment.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/cloud_deployments/03-Azure-AKS-deployment.md -------------------------------------------------------------------------------- /tutorials/cloud_deployments/04-GCP-GKE-lmcache-local-disk.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/cloud_deployments/04-GCP-GKE-lmcache-local-disk.md -------------------------------------------------------------------------------- /tutorials/terraform/aks/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/aks/Makefile -------------------------------------------------------------------------------- /tutorials/terraform/aks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/aks/README.md -------------------------------------------------------------------------------- /tutorials/terraform/aks/azure-infrastructure/.terraform.lock.hcl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/aks/azure-infrastructure/.terraform.lock.hcl -------------------------------------------------------------------------------- /tutorials/terraform/aks/azure-infrastructure/cluster.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/aks/azure-infrastructure/cluster.tf -------------------------------------------------------------------------------- /tutorials/terraform/aks/azure-infrastructure/create-config.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/aks/azure-infrastructure/create-config.tf -------------------------------------------------------------------------------- /tutorials/terraform/aks/azure-infrastructure/outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/aks/azure-infrastructure/outputs.tf -------------------------------------------------------------------------------- /tutorials/terraform/aks/azure-infrastructure/provider.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/aks/azure-infrastructure/provider.tf -------------------------------------------------------------------------------- /tutorials/terraform/aks/azure-infrastructure/ssh.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/aks/azure-infrastructure/ssh.tf -------------------------------------------------------------------------------- /tutorials/terraform/aks/azure-infrastructure/variables.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/aks/azure-infrastructure/variables.tf -------------------------------------------------------------------------------- /tutorials/terraform/aks/production-stack/.terraform.lock.hcl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/aks/production-stack/.terraform.lock.hcl -------------------------------------------------------------------------------- /tutorials/terraform/aks/production-stack/helm.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/aks/production-stack/helm.tf -------------------------------------------------------------------------------- /tutorials/terraform/aks/production-stack/provider.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/aks/production-stack/provider.tf -------------------------------------------------------------------------------- /tutorials/terraform/aks/production-stack/variables.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/aks/production-stack/variables.tf -------------------------------------------------------------------------------- /tutorials/terraform/aks/production_stack_specification.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/aks/production_stack_specification.yaml -------------------------------------------------------------------------------- /tutorials/terraform/eks/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/.gitignore -------------------------------------------------------------------------------- /tutorials/terraform/eks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/README.md -------------------------------------------------------------------------------- /tutorials/terraform/eks/cluster-tools.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/cluster-tools.tf -------------------------------------------------------------------------------- /tutorials/terraform/eks/config/calico-values.tpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/config/calico-values.tpl -------------------------------------------------------------------------------- /tutorials/terraform/eks/config/efs_pvct.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/config/efs_pvct.yaml -------------------------------------------------------------------------------- /tutorials/terraform/eks/config/kubeconfig.tpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/config/kubeconfig.tpl -------------------------------------------------------------------------------- /tutorials/terraform/eks/config/vllm-dashboard.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/config/vllm-dashboard.json -------------------------------------------------------------------------------- /tutorials/terraform/eks/config/vllm-svc-monitor.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/config/vllm-svc-monitor.yml -------------------------------------------------------------------------------- /tutorials/terraform/eks/datasources.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/datasources.tf -------------------------------------------------------------------------------- /tutorials/terraform/eks/env-vars.template: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/env-vars.template -------------------------------------------------------------------------------- /tutorials/terraform/eks/iam_role.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/iam_role.tf -------------------------------------------------------------------------------- /tutorials/terraform/eks/main.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/main.tf -------------------------------------------------------------------------------- /tutorials/terraform/eks/modules/llm-stack/helm/cpu/cpu-tinyllama-light-ingress.tpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/modules/llm-stack/helm/cpu/cpu-tinyllama-light-ingress.tpl -------------------------------------------------------------------------------- /tutorials/terraform/eks/modules/llm-stack/helm/cpu/cpu-tinyllama-light-local.tpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/modules/llm-stack/helm/cpu/cpu-tinyllama-light-local.tpl -------------------------------------------------------------------------------- /tutorials/terraform/eks/modules/llm-stack/helm/cpu/cpu-tinyllama-light.tpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/modules/llm-stack/helm/cpu/cpu-tinyllama-light.tpl -------------------------------------------------------------------------------- /tutorials/terraform/eks/modules/llm-stack/helm/gpu/gpu-operator-values.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/modules/llm-stack/helm/gpu/gpu-operator-values.yaml -------------------------------------------------------------------------------- /tutorials/terraform/eks/modules/llm-stack/helm/gpu/gpu-tinyllama-light-ingress.tpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/modules/llm-stack/helm/gpu/gpu-tinyllama-light-ingress.tpl -------------------------------------------------------------------------------- /tutorials/terraform/eks/network.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/network.tf -------------------------------------------------------------------------------- /tutorials/terraform/eks/output.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/output.tf -------------------------------------------------------------------------------- /tutorials/terraform/eks/provider.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/provider.tf -------------------------------------------------------------------------------- /tutorials/terraform/eks/storage.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/storage.tf -------------------------------------------------------------------------------- /tutorials/terraform/eks/terraform.tfvars.template: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/terraform.tfvars.template -------------------------------------------------------------------------------- /tutorials/terraform/eks/variables.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/variables.tf -------------------------------------------------------------------------------- /tutorials/terraform/eks/vllm-production-stack.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/eks/vllm-production-stack.tf -------------------------------------------------------------------------------- /tutorials/terraform/gke/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/gke/Makefile -------------------------------------------------------------------------------- /tutorials/terraform/gke/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/gke/README.md -------------------------------------------------------------------------------- /tutorials/terraform/gke/gke-infrastructure/.terraform.lock.hcl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/gke/gke-infrastructure/.terraform.lock.hcl -------------------------------------------------------------------------------- /tutorials/terraform/gke/gke-infrastructure/backend.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/gke/gke-infrastructure/backend.tf -------------------------------------------------------------------------------- /tutorials/terraform/gke/gke-infrastructure/cluster.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/gke/gke-infrastructure/cluster.tf -------------------------------------------------------------------------------- /tutorials/terraform/gke/gke-infrastructure/node_pools.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/gke/gke-infrastructure/node_pools.tf -------------------------------------------------------------------------------- /tutorials/terraform/gke/gke-infrastructure/outputs.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/gke/gke-infrastructure/outputs.tf -------------------------------------------------------------------------------- /tutorials/terraform/gke/gke-infrastructure/providers.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/gke/gke-infrastructure/providers.tf -------------------------------------------------------------------------------- /tutorials/terraform/gke/gke-infrastructure/services.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/gke/gke-infrastructure/services.tf -------------------------------------------------------------------------------- /tutorials/terraform/gke/gke-infrastructure/variables.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/gke/gke-infrastructure/variables.tf -------------------------------------------------------------------------------- /tutorials/terraform/gke/production-stack/.terraform.lock.hcl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/gke/production-stack/.terraform.lock.hcl -------------------------------------------------------------------------------- /tutorials/terraform/gke/production-stack/backend.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/gke/production-stack/backend.tf -------------------------------------------------------------------------------- /tutorials/terraform/gke/production-stack/helm.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/gke/production-stack/helm.tf -------------------------------------------------------------------------------- /tutorials/terraform/gke/production-stack/providers.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/gke/production-stack/providers.tf -------------------------------------------------------------------------------- /tutorials/terraform/gke/production-stack/variables.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/gke/production-stack/variables.tf -------------------------------------------------------------------------------- /tutorials/terraform/gke/production_stack_specification.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/gke/production_stack_specification.yaml -------------------------------------------------------------------------------- /tutorials/terraform/nebius/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/.gitignore -------------------------------------------------------------------------------- /tutorials/terraform/nebius/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/README.md -------------------------------------------------------------------------------- /tutorials/terraform/nebius/cluster-tools.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/cluster-tools.tf -------------------------------------------------------------------------------- /tutorials/terraform/nebius/config/helm/kube-prome-stack.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/config/helm/kube-prome-stack.yaml -------------------------------------------------------------------------------- /tutorials/terraform/nebius/config/kubeconfig.tpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/config/kubeconfig.tpl -------------------------------------------------------------------------------- /tutorials/terraform/nebius/config/llm-stack/helm/cpu/cpu-tinyllama-light-ingress-nebius.tpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/config/llm-stack/helm/cpu/cpu-tinyllama-light-ingress-nebius.tpl -------------------------------------------------------------------------------- /tutorials/terraform/nebius/config/llm-stack/helm/gpu/gpu-operator-values.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/config/llm-stack/helm/gpu/gpu-operator-values.yaml -------------------------------------------------------------------------------- /tutorials/terraform/nebius/config/llm-stack/helm/gpu/gpu-tinyllama-light-ingress-nebius.tpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/config/llm-stack/helm/gpu/gpu-tinyllama-light-ingress-nebius.tpl -------------------------------------------------------------------------------- /tutorials/terraform/nebius/config/manifests/letsencrypt-issuer.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/config/manifests/letsencrypt-issuer.yaml -------------------------------------------------------------------------------- /tutorials/terraform/nebius/config/vllm-dashboard.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/config/vllm-dashboard.json -------------------------------------------------------------------------------- /tutorials/terraform/nebius/data_sources.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/data_sources.tf -------------------------------------------------------------------------------- /tutorials/terraform/nebius/env-vars.template: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/env-vars.template -------------------------------------------------------------------------------- /tutorials/terraform/nebius/main.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/main.tf -------------------------------------------------------------------------------- /tutorials/terraform/nebius/network.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/network.tf -------------------------------------------------------------------------------- /tutorials/terraform/nebius/output.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/output.tf -------------------------------------------------------------------------------- /tutorials/terraform/nebius/provider.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/provider.tf -------------------------------------------------------------------------------- /tutorials/terraform/nebius/terraform.tfvars.template: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/terraform.tfvars.template -------------------------------------------------------------------------------- /tutorials/terraform/nebius/variables.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/variables.tf -------------------------------------------------------------------------------- /tutorials/terraform/nebius/vllm-production-stack.tf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/tutorials/terraform/nebius/vllm-production-stack.tf -------------------------------------------------------------------------------- /utils/helm-chart-test-values.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/utils/helm-chart-test-values.sh -------------------------------------------------------------------------------- /utils/init-nvidia-gpu-setup-k8s.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/utils/init-nvidia-gpu-setup-k8s.sh -------------------------------------------------------------------------------- /utils/install-calico.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/utils/install-calico.sh -------------------------------------------------------------------------------- /utils/install-cri-o.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/utils/install-cri-o.sh -------------------------------------------------------------------------------- /utils/install-helm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/utils/install-helm.sh -------------------------------------------------------------------------------- /utils/install-kind-cluster.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/utils/install-kind-cluster.sh -------------------------------------------------------------------------------- /utils/install-kind.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/utils/install-kind.sh -------------------------------------------------------------------------------- /utils/install-kubeadm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/utils/install-kubeadm.sh -------------------------------------------------------------------------------- /utils/install-kubectl.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/utils/install-kubectl.sh -------------------------------------------------------------------------------- /utils/install-kuberay.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/utils/install-kuberay.sh -------------------------------------------------------------------------------- /utils/install-minikube-cluster.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/utils/install-minikube-cluster.sh -------------------------------------------------------------------------------- /uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/production-stack/HEAD/uv.lock --------------------------------------------------------------------------------