├── .dockerignore ├── .github ├── ISSUE_TEMPLATE │ ├── blank_issue.md │ ├── bug_request.md │ ├── config.yml │ └── feature_request.md ├── changelog-config.json └── workflows │ ├── e2e_test-on-change.yml │ ├── format.yml │ ├── publish-on-change.yml │ ├── publish-on-release.yml │ ├── test-release.yml │ └── unit_test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── OWNERS ├── OWNERS_ALIASES ├── README.md ├── RELEASE.md ├── SECURITY.md ├── SECURITY_CONTACTS ├── code-of-conduct.md ├── config.yml ├── demos └── kubecon-na-2025 │ ├── README.md │ ├── config.yaml │ ├── vllm+batch_size │ ├── manifests.yaml │ └── results │ │ ├── latency_vs_qps.png │ │ ├── stage_0_lifecycle_metrics.json │ │ ├── stage_0_prometheus_metrics.json │ │ ├── stage_1_lifecycle_metrics.json │ │ ├── stage_1_prometheus_metrics.json │ │ ├── stage_2_lifecycle_metrics.json │ │ ├── stage_2_prometheus_metrics.json │ │ ├── stage_3_lifecycle_metrics.json │ │ ├── stage_3_prometheus_metrics.json │ │ ├── stage_4_lifecycle_metrics.json │ │ ├── stage_4_prometheus_metrics.json │ │ ├── stage_5_lifecycle_metrics.json │ │ ├── stage_5_prometheus_metrics.json │ │ ├── stage_6_lifecycle_metrics.json │ │ ├── stage_6_prometheus_metrics.json │ │ ├── summary_lifecycle_metrics.json │ │ ├── summary_prometheus_metrics.json │ │ ├── throughput_vs_latency.png │ │ └── throughput_vs_qps.png │ ├── vllm+chunking_and_prefix_caching │ ├── manifests.yaml │ └── results │ │ ├── latency_vs_qps.png │ │ ├── stage_0_lifecycle_metrics.json │ │ ├── stage_0_prometheus_metrics.json │ │ ├── stage_1_lifecycle_metrics.json │ │ ├── stage_1_prometheus_metrics.json │ │ ├── stage_2_lifecycle_metrics.json │ │ ├── stage_2_prometheus_metrics.json │ │ ├── stage_3_lifecycle_metrics.json │ │ ├── stage_3_prometheus_metrics.json │ │ ├── stage_4_lifecycle_metrics.json │ │ ├── stage_4_prometheus_metrics.json │ │ ├── stage_5_lifecycle_metrics.json │ │ ├── stage_5_prometheus_metrics.json │ │ ├── stage_6_lifecycle_metrics.json │ │ ├── stage_6_prometheus_metrics.json │ │ ├── summary_lifecycle_metrics.json │ │ ├── summary_prometheus_metrics.json │ │ ├── throughput_vs_latency.png │ │ └── throughput_vs_qps.png │ ├── vllm+eagle │ ├── manifests.yaml │ └── results │ │ ├── latency_vs_qps.png │ │ ├── stage_0_lifecycle_metrics.json │ │ ├── stage_0_prometheus_metrics.json │ │ ├── stage_1_lifecycle_metrics.json │ │ ├── stage_1_prometheus_metrics.json │ │ ├── stage_2_lifecycle_metrics.json │ │ ├── stage_2_prometheus_metrics.json │ │ ├── stage_3_lifecycle_metrics.json │ │ ├── stage_3_prometheus_metrics.json │ │ ├── stage_4_lifecycle_metrics.json │ │ ├── stage_4_prometheus_metrics.json │ │ ├── stage_5_lifecycle_metrics.json │ │ ├── stage_5_prometheus_metrics.json │ │ ├── stage_6_lifecycle_metrics.json │ │ ├── stage_6_prometheus_metrics.json │ │ ├── summary_lifecycle_metrics.json │ │ ├── summary_prometheus_metrics.json │ │ ├── throughput_vs_latency.png │ │ └── throughput_vs_qps.png │ ├── vllm+quantization │ ├── manifests.yaml │ └── results │ │ ├── latency_vs_qps.png │ │ ├── stage_0_lifecycle_metrics.json │ │ ├── stage_0_prometheus_metrics.json │ │ ├── stage_1_lifecycle_metrics.json │ │ ├── stage_1_prometheus_metrics.json │ │ ├── stage_2_lifecycle_metrics.json │ │ ├── stage_2_prometheus_metrics.json │ │ ├── stage_3_lifecycle_metrics.json │ │ ├── stage_3_prometheus_metrics.json │ │ ├── stage_4_lifecycle_metrics.json │ │ ├── stage_4_prometheus_metrics.json │ │ ├── stage_5_lifecycle_metrics.json │ │ ├── stage_5_prometheus_metrics.json │ │ ├── stage_6_lifecycle_metrics.json │ │ ├── stage_6_prometheus_metrics.json │ │ ├── summary_lifecycle_metrics.json │ │ ├── summary_prometheus_metrics.json │ │ ├── throughput_vs_latency.png │ │ └── throughput_vs_qps.png │ ├── vllm+v0_engine │ ├── manifests.yaml │ └── results │ │ ├── latency_vs_qps.png │ │ ├── stage_0_lifecycle_metrics.json │ │ ├── stage_0_prometheus_metrics.json │ │ ├── stage_1_lifecycle_metrics.json │ │ ├── stage_1_prometheus_metrics.json │ │ ├── stage_2_lifecycle_metrics.json │ │ ├── stage_2_prometheus_metrics.json │ │ ├── stage_3_lifecycle_metrics.json │ │ ├── stage_3_prometheus_metrics.json │ │ ├── stage_4_lifecycle_metrics.json │ │ ├── stage_4_prometheus_metrics.json │ │ ├── stage_5_lifecycle_metrics.json │ │ ├── stage_5_prometheus_metrics.json │ │ ├── stage_6_lifecycle_metrics.json │ │ ├── stage_6_prometheus_metrics.json │ │ ├── summary_lifecycle_metrics.json │ │ ├── summary_prometheus_metrics.json │ │ ├── throughput_vs_latency.png │ │ └── throughput_vs_qps.png │ └── vllm │ ├── manifests.yaml │ └── results │ ├── latency_vs_qps.png │ ├── stage_0_lifecycle_metrics.json │ ├── stage_0_prometheus_metrics.json │ ├── stage_1_lifecycle_metrics.json │ ├── stage_1_prometheus_metrics.json │ ├── stage_2_lifecycle_metrics.json │ ├── stage_2_prometheus_metrics.json │ ├── stage_3_lifecycle_metrics.json │ ├── stage_3_prometheus_metrics.json │ ├── stage_4_lifecycle_metrics.json │ ├── stage_4_prometheus_metrics.json │ ├── stage_5_lifecycle_metrics.json │ ├── stage_5_prometheus_metrics.json │ ├── stage_6_lifecycle_metrics.json │ ├── stage_6_prometheus_metrics.json │ ├── summary_lifecycle_metrics.json │ ├── summary_prometheus_metrics.json │ ├── throughput_vs_latency.png │ └── throughput_vs_qps.png ├── deploy ├── README.md ├── inference-perf │ ├── .helmignore │ ├── Chart.yaml │ ├── README.md │ ├── templates │ │ ├── _helpers.tpl │ │ ├── configmap.yaml │ │ ├── job.yaml │ │ └── secret.yaml │ └── values.yaml └── manifests.yaml ├── docs ├── config.md ├── design.md ├── images │ ├── architecture.png │ ├── design.png │ ├── latency_vs_qps.png │ ├── throughput_vs_latency.png │ └── throughput_vs_qps.png ├── loadgen.md └── metrics.md ├── e2e ├── configs │ └── e2e_simple_mock_client.yaml ├── conftest.py ├── tests │ └── test_mock_client.py └── utils │ └── benchmark.py ├── examples ├── sglang │ ├── .gitignore │ ├── config-random.yml │ ├── config-shared-prefix.yml │ ├── config-synthetic.yml │ ├── config.yml │ ├── docker-compose.yml │ ├── main.ipynb │ └── prometheus.yml ├── tgi │ ├── .gitignore │ ├── config-random.yml │ ├── config-shared-prefix.yml │ ├── config-synthetic.yml │ ├── config.yml │ ├── docker-compose.yml │ ├── main.ipynb │ └── prometheus.yml ├── trace-replay │ └── trace.csv └── vllm │ ├── .gitignore │ ├── config-circuitbreaker.yml │ ├── config-random.yml │ ├── config-shared-prefix-multi-turn.yml │ ├── config-shared-prefix.yml │ ├── config-sweep.yml │ ├── config-synthetic.yml │ ├── config-trace-replay.yml │ ├── config.yml │ ├── docker-compose.yml │ ├── main.ipynb │ └── prometheus.yml ├── inference_perf ├── __init__.py ├── analysis │ ├── __init__.py │ └── analyze.py ├── apis │ ├── __init__.py │ ├── base.py │ ├── chat.py │ ├── completion.py │ └── user_session.py ├── circuit_breaker │ ├── __init__.py │ ├── base.py │ ├── config.py │ ├── simple_breaker.py │ └── triggers │ │ ├── __init__.py │ │ ├── base.py │ │ ├── config.py │ │ ├── consecutive.py │ │ └── rate_over_window.py ├── client │ ├── filestorage │ │ ├── __init__.py │ │ ├── base.py │ │ ├── gcs.py │ │ ├── local.py │ │ └── s3.py │ ├── metricsclient │ │ ├── README.md │ │ ├── __init__.py │ │ ├── base.py │ │ ├── mock_client.py │ │ └── prometheus_client │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ └── google_managed_prometheus_client.py │ ├── modelserver │ │ ├── __init__.py │ │ ├── base.py │ │ ├── mock_client.py │ │ ├── openai_client.py │ │ ├── sglang_client.py │ │ ├── tgi_client.py │ │ └── vllm_client.py │ └── requestdatacollector │ │ ├── __init__.py │ │ ├── base.py │ │ ├── local.py │ │ └── multiprocess.py ├── config.py ├── datagen │ ├── __init__.py │ ├── base.py │ ├── cnn_dailymail_datagen.py │ ├── hf_billsum_datagen.py │ ├── hf_sharegpt_datagen.py │ ├── infinity_instruct_datagen.py │ ├── mock_datagen.py │ ├── random_datagen.py │ ├── shared_prefix_datagen.py │ └── synthetic_datagen.py ├── loadgen │ ├── __init__.py │ ├── load_generator.py │ └── load_timer.py ├── logger.py ├── main.py ├── reportgen │ ├── __init__.py │ └── base.py └── utils │ ├── __init__.py │ ├── custom_tokenizer.py │ ├── distribution.py │ ├── report_file.py │ ├── request_queue.py │ └── trace_reader.py ├── pdm.lock ├── pyproject.toml ├── requirements.txt └── tests ├── apis ├── test_chat.py └── test_completion.py ├── test_config.py ├── test_logger.py └── test_trace_replay.py /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/.dockerignore -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/blank_issue.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/.github/ISSUE_TEMPLATE/blank_issue.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_request.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/.github/ISSUE_TEMPLATE/bug_request.md -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/.github/ISSUE_TEMPLATE/feature_request.md -------------------------------------------------------------------------------- /.github/changelog-config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/.github/changelog-config.json -------------------------------------------------------------------------------- /.github/workflows/e2e_test-on-change.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/.github/workflows/e2e_test-on-change.yml -------------------------------------------------------------------------------- /.github/workflows/format.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/.github/workflows/format.yml -------------------------------------------------------------------------------- /.github/workflows/publish-on-change.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/.github/workflows/publish-on-change.yml -------------------------------------------------------------------------------- /.github/workflows/publish-on-release.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/.github/workflows/publish-on-release.yml -------------------------------------------------------------------------------- /.github/workflows/test-release.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/.github/workflows/test-release.yml -------------------------------------------------------------------------------- /.github/workflows/unit_test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/.github/workflows/unit_test.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/.gitignore -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/LICENSE -------------------------------------------------------------------------------- /OWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/OWNERS -------------------------------------------------------------------------------- /OWNERS_ALIASES: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/OWNERS_ALIASES -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/README.md -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/RELEASE.md -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/SECURITY.md -------------------------------------------------------------------------------- /SECURITY_CONTACTS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/SECURITY_CONTACTS -------------------------------------------------------------------------------- /code-of-conduct.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/code-of-conduct.md -------------------------------------------------------------------------------- /config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/config.yml -------------------------------------------------------------------------------- /demos/kubecon-na-2025/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/README.md -------------------------------------------------------------------------------- /demos/kubecon-na-2025/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/config.yaml -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/manifests.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/manifests.yaml -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/latency_vs_qps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/latency_vs_qps.png -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/stage_0_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/stage_0_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/stage_0_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/stage_0_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/stage_1_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/stage_1_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/stage_1_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/stage_1_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/stage_2_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/stage_2_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/stage_2_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/stage_2_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/stage_3_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/stage_3_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/stage_3_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/stage_3_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/stage_4_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/stage_4_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/stage_4_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/stage_4_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/stage_5_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/stage_5_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/stage_5_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/stage_5_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/stage_6_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/stage_6_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/stage_6_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/stage_6_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/summary_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/summary_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/summary_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/summary_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/throughput_vs_latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/throughput_vs_latency.png -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+batch_size/results/throughput_vs_qps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+batch_size/results/throughput_vs_qps.png -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/manifests.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/manifests.yaml -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/latency_vs_qps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/latency_vs_qps.png -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_0_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_0_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_0_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_0_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_1_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_1_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_1_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_1_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_2_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_2_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_2_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_2_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_3_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_3_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_3_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_3_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_4_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_4_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_4_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_4_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_5_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_5_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_5_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_5_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_6_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_6_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_6_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/stage_6_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/summary_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/summary_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/summary_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/summary_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/throughput_vs_latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/throughput_vs_latency.png -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/throughput_vs_qps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+chunking_and_prefix_caching/results/throughput_vs_qps.png -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/manifests.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/manifests.yaml -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/latency_vs_qps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/latency_vs_qps.png -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/stage_0_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/stage_0_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/stage_0_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/stage_0_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/stage_1_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/stage_1_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/stage_1_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/stage_1_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/stage_2_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/stage_2_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/stage_2_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/stage_2_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/stage_3_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/stage_3_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/stage_3_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/stage_3_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/stage_4_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/stage_4_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/stage_4_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/stage_4_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/stage_5_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/stage_5_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/stage_5_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/stage_5_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/stage_6_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/stage_6_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/stage_6_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/stage_6_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/summary_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/summary_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/summary_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/summary_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/throughput_vs_latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/throughput_vs_latency.png -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+eagle/results/throughput_vs_qps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+eagle/results/throughput_vs_qps.png -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/manifests.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/manifests.yaml -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/latency_vs_qps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/latency_vs_qps.png -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/stage_0_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/stage_0_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/stage_0_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/stage_0_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/stage_1_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/stage_1_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/stage_1_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/stage_1_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/stage_2_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/stage_2_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/stage_2_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/stage_2_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/stage_3_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/stage_3_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/stage_3_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/stage_3_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/stage_4_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/stage_4_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/stage_4_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/stage_4_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/stage_5_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/stage_5_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/stage_5_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/stage_5_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/stage_6_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/stage_6_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/stage_6_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/stage_6_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/summary_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/summary_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/summary_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/summary_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/throughput_vs_latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/throughput_vs_latency.png -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+quantization/results/throughput_vs_qps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+quantization/results/throughput_vs_qps.png -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/manifests.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/manifests.yaml -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/latency_vs_qps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/latency_vs_qps.png -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/stage_0_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/stage_0_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/stage_0_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/stage_0_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/stage_1_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/stage_1_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/stage_1_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/stage_1_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/stage_2_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/stage_2_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/stage_2_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/stage_2_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/stage_3_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/stage_3_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/stage_3_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/stage_3_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/stage_4_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/stage_4_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/stage_4_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/stage_4_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/stage_5_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/stage_5_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/stage_5_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/stage_5_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/stage_6_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/stage_6_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/stage_6_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/stage_6_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/summary_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/summary_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/summary_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/summary_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/throughput_vs_latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/throughput_vs_latency.png -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm+v0_engine/results/throughput_vs_qps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm+v0_engine/results/throughput_vs_qps.png -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/manifests.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/manifests.yaml -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/latency_vs_qps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/latency_vs_qps.png -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/stage_0_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/stage_0_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/stage_0_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/stage_0_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/stage_1_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/stage_1_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/stage_1_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/stage_1_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/stage_2_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/stage_2_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/stage_2_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/stage_2_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/stage_3_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/stage_3_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/stage_3_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/stage_3_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/stage_4_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/stage_4_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/stage_4_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/stage_4_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/stage_5_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/stage_5_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/stage_5_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/stage_5_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/stage_6_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/stage_6_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/stage_6_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/stage_6_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/summary_lifecycle_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/summary_lifecycle_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/summary_prometheus_metrics.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/summary_prometheus_metrics.json -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/throughput_vs_latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/throughput_vs_latency.png -------------------------------------------------------------------------------- /demos/kubecon-na-2025/vllm/results/throughput_vs_qps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/demos/kubecon-na-2025/vllm/results/throughput_vs_qps.png -------------------------------------------------------------------------------- /deploy/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/deploy/README.md -------------------------------------------------------------------------------- /deploy/inference-perf/.helmignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/deploy/inference-perf/.helmignore -------------------------------------------------------------------------------- /deploy/inference-perf/Chart.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/deploy/inference-perf/Chart.yaml -------------------------------------------------------------------------------- /deploy/inference-perf/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/deploy/inference-perf/README.md -------------------------------------------------------------------------------- /deploy/inference-perf/templates/_helpers.tpl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/deploy/inference-perf/templates/_helpers.tpl -------------------------------------------------------------------------------- /deploy/inference-perf/templates/configmap.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/deploy/inference-perf/templates/configmap.yaml -------------------------------------------------------------------------------- /deploy/inference-perf/templates/job.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/deploy/inference-perf/templates/job.yaml -------------------------------------------------------------------------------- /deploy/inference-perf/templates/secret.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/deploy/inference-perf/templates/secret.yaml -------------------------------------------------------------------------------- /deploy/inference-perf/values.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/deploy/inference-perf/values.yaml -------------------------------------------------------------------------------- /deploy/manifests.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/deploy/manifests.yaml -------------------------------------------------------------------------------- /docs/config.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/docs/config.md -------------------------------------------------------------------------------- /docs/design.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/docs/design.md -------------------------------------------------------------------------------- /docs/images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/docs/images/architecture.png -------------------------------------------------------------------------------- /docs/images/design.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/docs/images/design.png -------------------------------------------------------------------------------- /docs/images/latency_vs_qps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/docs/images/latency_vs_qps.png -------------------------------------------------------------------------------- /docs/images/throughput_vs_latency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/docs/images/throughput_vs_latency.png -------------------------------------------------------------------------------- /docs/images/throughput_vs_qps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/docs/images/throughput_vs_qps.png -------------------------------------------------------------------------------- /docs/loadgen.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/docs/loadgen.md -------------------------------------------------------------------------------- /docs/metrics.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/docs/metrics.md -------------------------------------------------------------------------------- /e2e/configs/e2e_simple_mock_client.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/e2e/configs/e2e_simple_mock_client.yaml -------------------------------------------------------------------------------- /e2e/conftest.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /e2e/tests/test_mock_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/e2e/tests/test_mock_client.py -------------------------------------------------------------------------------- /e2e/utils/benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/e2e/utils/benchmark.py -------------------------------------------------------------------------------- /examples/sglang/.gitignore: -------------------------------------------------------------------------------- 1 | reports-* -------------------------------------------------------------------------------- /examples/sglang/config-random.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/sglang/config-random.yml -------------------------------------------------------------------------------- /examples/sglang/config-shared-prefix.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/sglang/config-shared-prefix.yml -------------------------------------------------------------------------------- /examples/sglang/config-synthetic.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/sglang/config-synthetic.yml -------------------------------------------------------------------------------- /examples/sglang/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/sglang/config.yml -------------------------------------------------------------------------------- /examples/sglang/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/sglang/docker-compose.yml -------------------------------------------------------------------------------- /examples/sglang/main.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/sglang/main.ipynb -------------------------------------------------------------------------------- /examples/sglang/prometheus.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/sglang/prometheus.yml -------------------------------------------------------------------------------- /examples/tgi/.gitignore: -------------------------------------------------------------------------------- 1 | reports-* -------------------------------------------------------------------------------- /examples/tgi/config-random.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/tgi/config-random.yml -------------------------------------------------------------------------------- /examples/tgi/config-shared-prefix.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/tgi/config-shared-prefix.yml -------------------------------------------------------------------------------- /examples/tgi/config-synthetic.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/tgi/config-synthetic.yml -------------------------------------------------------------------------------- /examples/tgi/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/tgi/config.yml -------------------------------------------------------------------------------- /examples/tgi/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/tgi/docker-compose.yml -------------------------------------------------------------------------------- /examples/tgi/main.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/tgi/main.ipynb -------------------------------------------------------------------------------- /examples/tgi/prometheus.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/tgi/prometheus.yml -------------------------------------------------------------------------------- /examples/trace-replay/trace.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/trace-replay/trace.csv -------------------------------------------------------------------------------- /examples/vllm/.gitignore: -------------------------------------------------------------------------------- 1 | reports-* -------------------------------------------------------------------------------- /examples/vllm/config-circuitbreaker.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/vllm/config-circuitbreaker.yml -------------------------------------------------------------------------------- /examples/vllm/config-random.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/vllm/config-random.yml -------------------------------------------------------------------------------- /examples/vllm/config-shared-prefix-multi-turn.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/vllm/config-shared-prefix-multi-turn.yml -------------------------------------------------------------------------------- /examples/vllm/config-shared-prefix.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/vllm/config-shared-prefix.yml -------------------------------------------------------------------------------- /examples/vllm/config-sweep.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/vllm/config-sweep.yml -------------------------------------------------------------------------------- /examples/vllm/config-synthetic.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/vllm/config-synthetic.yml -------------------------------------------------------------------------------- /examples/vllm/config-trace-replay.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/vllm/config-trace-replay.yml -------------------------------------------------------------------------------- /examples/vllm/config.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/vllm/config.yml -------------------------------------------------------------------------------- /examples/vllm/docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/vllm/docker-compose.yml -------------------------------------------------------------------------------- /examples/vllm/main.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/vllm/main.ipynb -------------------------------------------------------------------------------- /examples/vllm/prometheus.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/examples/vllm/prometheus.yml -------------------------------------------------------------------------------- /inference_perf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/__init__.py -------------------------------------------------------------------------------- /inference_perf/analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/analysis/__init__.py -------------------------------------------------------------------------------- /inference_perf/analysis/analyze.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/analysis/analyze.py -------------------------------------------------------------------------------- /inference_perf/apis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/apis/__init__.py -------------------------------------------------------------------------------- /inference_perf/apis/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/apis/base.py -------------------------------------------------------------------------------- /inference_perf/apis/chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/apis/chat.py -------------------------------------------------------------------------------- /inference_perf/apis/completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/apis/completion.py -------------------------------------------------------------------------------- /inference_perf/apis/user_session.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/apis/user_session.py -------------------------------------------------------------------------------- /inference_perf/circuit_breaker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/circuit_breaker/__init__.py -------------------------------------------------------------------------------- /inference_perf/circuit_breaker/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/circuit_breaker/base.py -------------------------------------------------------------------------------- /inference_perf/circuit_breaker/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/circuit_breaker/config.py -------------------------------------------------------------------------------- /inference_perf/circuit_breaker/simple_breaker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/circuit_breaker/simple_breaker.py -------------------------------------------------------------------------------- /inference_perf/circuit_breaker/triggers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/circuit_breaker/triggers/__init__.py -------------------------------------------------------------------------------- /inference_perf/circuit_breaker/triggers/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/circuit_breaker/triggers/base.py -------------------------------------------------------------------------------- /inference_perf/circuit_breaker/triggers/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/circuit_breaker/triggers/config.py -------------------------------------------------------------------------------- /inference_perf/circuit_breaker/triggers/consecutive.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/circuit_breaker/triggers/consecutive.py -------------------------------------------------------------------------------- /inference_perf/circuit_breaker/triggers/rate_over_window.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/circuit_breaker/triggers/rate_over_window.py -------------------------------------------------------------------------------- /inference_perf/client/filestorage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/filestorage/__init__.py -------------------------------------------------------------------------------- /inference_perf/client/filestorage/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/filestorage/base.py -------------------------------------------------------------------------------- /inference_perf/client/filestorage/gcs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/filestorage/gcs.py -------------------------------------------------------------------------------- /inference_perf/client/filestorage/local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/filestorage/local.py -------------------------------------------------------------------------------- /inference_perf/client/filestorage/s3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/filestorage/s3.py -------------------------------------------------------------------------------- /inference_perf/client/metricsclient/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/metricsclient/README.md -------------------------------------------------------------------------------- /inference_perf/client/metricsclient/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/metricsclient/__init__.py -------------------------------------------------------------------------------- /inference_perf/client/metricsclient/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/metricsclient/base.py -------------------------------------------------------------------------------- /inference_perf/client/metricsclient/mock_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/metricsclient/mock_client.py -------------------------------------------------------------------------------- /inference_perf/client/metricsclient/prometheus_client/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/metricsclient/prometheus_client/__init__.py -------------------------------------------------------------------------------- /inference_perf/client/metricsclient/prometheus_client/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/metricsclient/prometheus_client/base.py -------------------------------------------------------------------------------- /inference_perf/client/metricsclient/prometheus_client/google_managed_prometheus_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/metricsclient/prometheus_client/google_managed_prometheus_client.py -------------------------------------------------------------------------------- /inference_perf/client/modelserver/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/modelserver/__init__.py -------------------------------------------------------------------------------- /inference_perf/client/modelserver/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/modelserver/base.py -------------------------------------------------------------------------------- /inference_perf/client/modelserver/mock_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/modelserver/mock_client.py -------------------------------------------------------------------------------- /inference_perf/client/modelserver/openai_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/modelserver/openai_client.py -------------------------------------------------------------------------------- /inference_perf/client/modelserver/sglang_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/modelserver/sglang_client.py -------------------------------------------------------------------------------- /inference_perf/client/modelserver/tgi_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/modelserver/tgi_client.py -------------------------------------------------------------------------------- /inference_perf/client/modelserver/vllm_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/modelserver/vllm_client.py -------------------------------------------------------------------------------- /inference_perf/client/requestdatacollector/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/requestdatacollector/__init__.py -------------------------------------------------------------------------------- /inference_perf/client/requestdatacollector/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/requestdatacollector/base.py -------------------------------------------------------------------------------- /inference_perf/client/requestdatacollector/local.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/requestdatacollector/local.py -------------------------------------------------------------------------------- /inference_perf/client/requestdatacollector/multiprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/client/requestdatacollector/multiprocess.py -------------------------------------------------------------------------------- /inference_perf/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/config.py -------------------------------------------------------------------------------- /inference_perf/datagen/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/datagen/__init__.py -------------------------------------------------------------------------------- /inference_perf/datagen/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/datagen/base.py -------------------------------------------------------------------------------- /inference_perf/datagen/cnn_dailymail_datagen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/datagen/cnn_dailymail_datagen.py -------------------------------------------------------------------------------- /inference_perf/datagen/hf_billsum_datagen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/datagen/hf_billsum_datagen.py -------------------------------------------------------------------------------- /inference_perf/datagen/hf_sharegpt_datagen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/datagen/hf_sharegpt_datagen.py -------------------------------------------------------------------------------- /inference_perf/datagen/infinity_instruct_datagen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/datagen/infinity_instruct_datagen.py -------------------------------------------------------------------------------- /inference_perf/datagen/mock_datagen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/datagen/mock_datagen.py -------------------------------------------------------------------------------- /inference_perf/datagen/random_datagen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/datagen/random_datagen.py -------------------------------------------------------------------------------- /inference_perf/datagen/shared_prefix_datagen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/datagen/shared_prefix_datagen.py -------------------------------------------------------------------------------- /inference_perf/datagen/synthetic_datagen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/datagen/synthetic_datagen.py -------------------------------------------------------------------------------- /inference_perf/loadgen/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/loadgen/__init__.py -------------------------------------------------------------------------------- /inference_perf/loadgen/load_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/loadgen/load_generator.py -------------------------------------------------------------------------------- /inference_perf/loadgen/load_timer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/loadgen/load_timer.py -------------------------------------------------------------------------------- /inference_perf/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/logger.py -------------------------------------------------------------------------------- /inference_perf/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/main.py -------------------------------------------------------------------------------- /inference_perf/reportgen/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/reportgen/__init__.py -------------------------------------------------------------------------------- /inference_perf/reportgen/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/reportgen/base.py -------------------------------------------------------------------------------- /inference_perf/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/utils/__init__.py -------------------------------------------------------------------------------- /inference_perf/utils/custom_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/utils/custom_tokenizer.py -------------------------------------------------------------------------------- /inference_perf/utils/distribution.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/utils/distribution.py -------------------------------------------------------------------------------- /inference_perf/utils/report_file.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/utils/report_file.py -------------------------------------------------------------------------------- /inference_perf/utils/request_queue.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/utils/request_queue.py -------------------------------------------------------------------------------- /inference_perf/utils/trace_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/inference_perf/utils/trace_reader.py -------------------------------------------------------------------------------- /pdm.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/pdm.lock -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -e . # Install requirements from pyproject 2 | -------------------------------------------------------------------------------- /tests/apis/test_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/tests/apis/test_chat.py -------------------------------------------------------------------------------- /tests/apis/test_completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/tests/apis/test_completion.py -------------------------------------------------------------------------------- /tests/test_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/tests/test_config.py -------------------------------------------------------------------------------- /tests/test_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/tests/test_logger.py -------------------------------------------------------------------------------- /tests/test_trace_replay.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kubernetes-sigs/inference-perf/HEAD/tests/test_trace_replay.py --------------------------------------------------------------------------------