├── .github ├── CODEOWNERS ├── pull_request_template.md └── workflows │ ├── add_label_automerge.yml │ ├── build_and_publish.yaml │ ├── build_docker.yml │ ├── build_docs.yml │ ├── lint_actions.yml │ ├── lint_code.yml │ ├── lint_docs.yml │ ├── lint_scripts.yml │ ├── matchers │ ├── mypy.json │ └── ruff.json │ ├── preview_docs.yml │ ├── publish_docs.yml │ ├── publish_to_test_pypi.yaml │ ├── reminder_comment.yml │ ├── stale.yml │ ├── test.yml │ └── type_check.yaml ├── .gitignore ├── .readthedocs.yaml ├── .shellcheckrc ├── .yapfignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── DCO ├── LICENSE ├── README.md ├── RELEASING.md ├── _local_envs_for_test.sh ├── docker ├── .senlib.json ├── Dockerfile.amd64 └── simple_vllm_serve.sh ├── docs ├── .nav.yml ├── README.md ├── contributing │ ├── README.md │ ├── architecture.md │ ├── continuous_batching │ │ ├── overview.md │ │ └── tests │ │ │ ├── other_tests.md │ │ │ ├── output_tests.md │ │ │ └── scheduler_steps_tests.md │ └── images │ │ ├── vllm_v1.svg │ │ └── vllm_v1_spyre.svg ├── deploying │ ├── docker.md │ ├── k8s.md │ └── rhoai.md ├── getting_started │ └── installation.md ├── mkdocs │ └── hooks │ │ ├── generate_examples.py │ │ └── url_schemes.py ├── requirements-docs.txt ├── roadmaps │ └── q3-2025.md └── user_guide │ ├── configuration.md │ ├── env_vars.md │ ├── supported_features.md │ └── supported_models.md ├── examples ├── offline_inference │ ├── cb_spyre_inference.py │ ├── long_context.py │ └── spyre_inference.py ├── offline_inference_spyre.ipynb ├── online_inference │ ├── openai_spyre_inference.py │ ├── spyre_vllm_benchmark.py │ └── spyre_vllm_setup_container.sh ├── online_inference_spyre.ipynb └── online_inference_spyre_multiple.ipynb ├── format.sh ├── mkdocs.yaml ├── pyproject.toml ├── tests ├── aftu │ ├── graph_compare_utils.py │ └── test_compare_graphs.py ├── conftest.py ├── download_model_configs.py ├── e2e │ ├── cache │ │ ├── prompts_16k_bs1.pickle │ │ ├── prompts_32k_bs1.pickle │ │ └── prompts_8k_bs2.pickle │ ├── test_chunked_prefill.py │ ├── test_chunked_prefill_tkv_steps.py │ ├── test_logits_processors.py │ ├── test_sampling_params.py │ ├── test_spyre_async_llm.py │ ├── test_spyre_basic.py │ ├── test_spyre_cb.py │ ├── test_spyre_cb_scheduler_steps.py │ ├── test_spyre_cp_scheduler_steps.py │ ├── test_spyre_embeddings.py │ ├── test_spyre_max_new_tokens.py │ ├── test_spyre_online.py │ ├── test_spyre_prompt_logprobs.py │ ├── test_spyre_scoring.py │ ├── test_spyre_seed.py │ ├── test_spyre_stagger_basic.py │ ├── test_spyre_static_batching_limits.py │ ├── test_spyre_warmup_shapes.py │ └── test_stats_logger.py ├── fixtures │ └── model_configs │ │ ├── BAAI │ │ ├── bge-reranker-large │ │ │ └── config.json │ │ └── bge-reranker-v2-m3 │ │ │ └── config.json │ │ ├── ibm-ai-platform │ │ └── micro-g3.3-8b-instruct-1b │ │ │ └── config.json │ │ ├── ibm-granite │ │ ├── granite-3.3-8b-instruct-FP8 │ │ │ └── config.json │ │ ├── granite-3.3-8b-instruct │ │ │ └── config.json │ │ ├── granite-embedding-125m-english │ │ │ └── config.json │ │ └── granite-embedding-278m-multilingual │ │ │ └── config.json │ │ ├── intfloat │ │ └── multilingual-e5-large │ │ │ └── config.json │ │ └── sentence-transformers │ │ └── all-roberta-large-v1 │ │ └── config.json ├── hf_cache.json ├── hf_result_cache.py ├── llm_cache.py ├── llm_cache_util.py ├── models │ └── test_granite.py ├── output_util.py ├── precompilation │ └── test_disable_compilation.py ├── scheduling_utils.py ├── spyre_util.py ├── utils │ ├── test_envs.py │ ├── test_golden_token_injector.py │ ├── test_model_config_validator.py │ ├── test_spyre_model_list.py │ └── test_upstream_compatibility.py └── v1 │ └── worker │ └── test_spyre_input_batch.py ├── tools ├── check_aiu.sh ├── check_repo.sh ├── download_model.py ├── lint_actions.sh ├── lint_docs.sh ├── lint_scripts.sh ├── report_build_time_ninja.py └── type_check.sh ├── uv.lock └── vllm_spyre ├── __init__.py ├── compat_utils.py ├── compilation_utils.py ├── config ├── __init__.py ├── known_model_configs.json ├── runtime_config_validator.py └── supported_configs.yaml ├── envs.py ├── model_executor ├── __init__.py └── model_loader │ ├── __init__.py │ ├── spyre.py │ └── spyre_setup.py ├── perf_metrics.py ├── platform.py ├── utils.py └── v1 ├── __init__.py ├── core ├── __init__.py └── scheduler.py ├── metrics ├── __init__.py └── stats_logger.py ├── sample ├── golden_token_injector.py └── spyre_logits_processor.py └── worker ├── __init__.py ├── spyre_input_batch.py ├── spyre_model_runner.py └── spyre_worker.py /.github/CODEOWNERS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/CODEOWNERS -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/pull_request_template.md -------------------------------------------------------------------------------- /.github/workflows/add_label_automerge.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/workflows/add_label_automerge.yml -------------------------------------------------------------------------------- /.github/workflows/build_and_publish.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/workflows/build_and_publish.yaml -------------------------------------------------------------------------------- /.github/workflows/build_docker.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/workflows/build_docker.yml -------------------------------------------------------------------------------- /.github/workflows/build_docs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/workflows/build_docs.yml -------------------------------------------------------------------------------- /.github/workflows/lint_actions.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/workflows/lint_actions.yml -------------------------------------------------------------------------------- /.github/workflows/lint_code.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/workflows/lint_code.yml -------------------------------------------------------------------------------- /.github/workflows/lint_docs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/workflows/lint_docs.yml -------------------------------------------------------------------------------- /.github/workflows/lint_scripts.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/workflows/lint_scripts.yml -------------------------------------------------------------------------------- /.github/workflows/matchers/mypy.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/workflows/matchers/mypy.json -------------------------------------------------------------------------------- /.github/workflows/matchers/ruff.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/workflows/matchers/ruff.json -------------------------------------------------------------------------------- /.github/workflows/preview_docs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/workflows/preview_docs.yml -------------------------------------------------------------------------------- /.github/workflows/publish_docs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/workflows/publish_docs.yml -------------------------------------------------------------------------------- /.github/workflows/publish_to_test_pypi.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/workflows/publish_to_test_pypi.yaml -------------------------------------------------------------------------------- /.github/workflows/reminder_comment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/workflows/reminder_comment.yml -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/workflows/stale.yml -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/workflows/test.yml -------------------------------------------------------------------------------- /.github/workflows/type_check.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.github/workflows/type_check.yaml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.gitignore -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.readthedocs.yaml -------------------------------------------------------------------------------- /.shellcheckrc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.shellcheckrc -------------------------------------------------------------------------------- /.yapfignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/.yapfignore -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /DCO: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/DCO -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/README.md -------------------------------------------------------------------------------- /RELEASING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/RELEASING.md -------------------------------------------------------------------------------- /_local_envs_for_test.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/_local_envs_for_test.sh -------------------------------------------------------------------------------- /docker/.senlib.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docker/.senlib.json -------------------------------------------------------------------------------- /docker/Dockerfile.amd64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docker/Dockerfile.amd64 -------------------------------------------------------------------------------- /docker/simple_vllm_serve.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docker/simple_vllm_serve.sh -------------------------------------------------------------------------------- /docs/.nav.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/.nav.yml -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/contributing/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/contributing/README.md -------------------------------------------------------------------------------- /docs/contributing/architecture.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/contributing/architecture.md -------------------------------------------------------------------------------- /docs/contributing/continuous_batching/overview.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/contributing/continuous_batching/overview.md -------------------------------------------------------------------------------- /docs/contributing/continuous_batching/tests/other_tests.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/contributing/continuous_batching/tests/other_tests.md -------------------------------------------------------------------------------- /docs/contributing/continuous_batching/tests/output_tests.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/contributing/continuous_batching/tests/output_tests.md -------------------------------------------------------------------------------- /docs/contributing/continuous_batching/tests/scheduler_steps_tests.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/contributing/continuous_batching/tests/scheduler_steps_tests.md -------------------------------------------------------------------------------- /docs/contributing/images/vllm_v1.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/contributing/images/vllm_v1.svg -------------------------------------------------------------------------------- /docs/contributing/images/vllm_v1_spyre.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/contributing/images/vllm_v1_spyre.svg -------------------------------------------------------------------------------- /docs/deploying/docker.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/deploying/docker.md -------------------------------------------------------------------------------- /docs/deploying/k8s.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/deploying/k8s.md -------------------------------------------------------------------------------- /docs/deploying/rhoai.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/deploying/rhoai.md -------------------------------------------------------------------------------- /docs/getting_started/installation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/getting_started/installation.md -------------------------------------------------------------------------------- /docs/mkdocs/hooks/generate_examples.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/mkdocs/hooks/generate_examples.py -------------------------------------------------------------------------------- /docs/mkdocs/hooks/url_schemes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/mkdocs/hooks/url_schemes.py -------------------------------------------------------------------------------- /docs/requirements-docs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/requirements-docs.txt -------------------------------------------------------------------------------- /docs/roadmaps/q3-2025.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/roadmaps/q3-2025.md -------------------------------------------------------------------------------- /docs/user_guide/configuration.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/user_guide/configuration.md -------------------------------------------------------------------------------- /docs/user_guide/env_vars.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/user_guide/env_vars.md -------------------------------------------------------------------------------- /docs/user_guide/supported_features.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/user_guide/supported_features.md -------------------------------------------------------------------------------- /docs/user_guide/supported_models.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/docs/user_guide/supported_models.md -------------------------------------------------------------------------------- /examples/offline_inference/cb_spyre_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/examples/offline_inference/cb_spyre_inference.py -------------------------------------------------------------------------------- /examples/offline_inference/long_context.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/examples/offline_inference/long_context.py -------------------------------------------------------------------------------- /examples/offline_inference/spyre_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/examples/offline_inference/spyre_inference.py -------------------------------------------------------------------------------- /examples/offline_inference_spyre.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/examples/offline_inference_spyre.ipynb -------------------------------------------------------------------------------- /examples/online_inference/openai_spyre_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/examples/online_inference/openai_spyre_inference.py -------------------------------------------------------------------------------- /examples/online_inference/spyre_vllm_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/examples/online_inference/spyre_vllm_benchmark.py -------------------------------------------------------------------------------- /examples/online_inference/spyre_vllm_setup_container.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/examples/online_inference/spyre_vllm_setup_container.sh -------------------------------------------------------------------------------- /examples/online_inference_spyre.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/examples/online_inference_spyre.ipynb -------------------------------------------------------------------------------- /examples/online_inference_spyre_multiple.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/examples/online_inference_spyre_multiple.ipynb -------------------------------------------------------------------------------- /format.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/format.sh -------------------------------------------------------------------------------- /mkdocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/mkdocs.yaml -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/pyproject.toml -------------------------------------------------------------------------------- /tests/aftu/graph_compare_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/aftu/graph_compare_utils.py -------------------------------------------------------------------------------- /tests/aftu/test_compare_graphs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/aftu/test_compare_graphs.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/download_model_configs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/download_model_configs.py -------------------------------------------------------------------------------- /tests/e2e/cache/prompts_16k_bs1.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/cache/prompts_16k_bs1.pickle -------------------------------------------------------------------------------- /tests/e2e/cache/prompts_32k_bs1.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/cache/prompts_32k_bs1.pickle -------------------------------------------------------------------------------- /tests/e2e/cache/prompts_8k_bs2.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/cache/prompts_8k_bs2.pickle -------------------------------------------------------------------------------- /tests/e2e/test_chunked_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_chunked_prefill.py -------------------------------------------------------------------------------- /tests/e2e/test_chunked_prefill_tkv_steps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_chunked_prefill_tkv_steps.py -------------------------------------------------------------------------------- /tests/e2e/test_logits_processors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_logits_processors.py -------------------------------------------------------------------------------- /tests/e2e/test_sampling_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_sampling_params.py -------------------------------------------------------------------------------- /tests/e2e/test_spyre_async_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_spyre_async_llm.py -------------------------------------------------------------------------------- /tests/e2e/test_spyre_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_spyre_basic.py -------------------------------------------------------------------------------- /tests/e2e/test_spyre_cb.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_spyre_cb.py -------------------------------------------------------------------------------- /tests/e2e/test_spyre_cb_scheduler_steps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_spyre_cb_scheduler_steps.py -------------------------------------------------------------------------------- /tests/e2e/test_spyre_cp_scheduler_steps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_spyre_cp_scheduler_steps.py -------------------------------------------------------------------------------- /tests/e2e/test_spyre_embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_spyre_embeddings.py -------------------------------------------------------------------------------- /tests/e2e/test_spyre_max_new_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_spyre_max_new_tokens.py -------------------------------------------------------------------------------- /tests/e2e/test_spyre_online.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_spyre_online.py -------------------------------------------------------------------------------- /tests/e2e/test_spyre_prompt_logprobs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_spyre_prompt_logprobs.py -------------------------------------------------------------------------------- /tests/e2e/test_spyre_scoring.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_spyre_scoring.py -------------------------------------------------------------------------------- /tests/e2e/test_spyre_seed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_spyre_seed.py -------------------------------------------------------------------------------- /tests/e2e/test_spyre_stagger_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_spyre_stagger_basic.py -------------------------------------------------------------------------------- /tests/e2e/test_spyre_static_batching_limits.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_spyre_static_batching_limits.py -------------------------------------------------------------------------------- /tests/e2e/test_spyre_warmup_shapes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_spyre_warmup_shapes.py -------------------------------------------------------------------------------- /tests/e2e/test_stats_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/e2e/test_stats_logger.py -------------------------------------------------------------------------------- /tests/fixtures/model_configs/BAAI/bge-reranker-large/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/fixtures/model_configs/BAAI/bge-reranker-large/config.json -------------------------------------------------------------------------------- /tests/fixtures/model_configs/BAAI/bge-reranker-v2-m3/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/fixtures/model_configs/BAAI/bge-reranker-v2-m3/config.json -------------------------------------------------------------------------------- /tests/fixtures/model_configs/ibm-ai-platform/micro-g3.3-8b-instruct-1b/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/fixtures/model_configs/ibm-ai-platform/micro-g3.3-8b-instruct-1b/config.json -------------------------------------------------------------------------------- /tests/fixtures/model_configs/ibm-granite/granite-3.3-8b-instruct-FP8/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/fixtures/model_configs/ibm-granite/granite-3.3-8b-instruct-FP8/config.json -------------------------------------------------------------------------------- /tests/fixtures/model_configs/ibm-granite/granite-3.3-8b-instruct/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/fixtures/model_configs/ibm-granite/granite-3.3-8b-instruct/config.json -------------------------------------------------------------------------------- /tests/fixtures/model_configs/ibm-granite/granite-embedding-125m-english/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/fixtures/model_configs/ibm-granite/granite-embedding-125m-english/config.json -------------------------------------------------------------------------------- /tests/fixtures/model_configs/ibm-granite/granite-embedding-278m-multilingual/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/fixtures/model_configs/ibm-granite/granite-embedding-278m-multilingual/config.json -------------------------------------------------------------------------------- /tests/fixtures/model_configs/intfloat/multilingual-e5-large/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/fixtures/model_configs/intfloat/multilingual-e5-large/config.json -------------------------------------------------------------------------------- /tests/fixtures/model_configs/sentence-transformers/all-roberta-large-v1/config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/fixtures/model_configs/sentence-transformers/all-roberta-large-v1/config.json -------------------------------------------------------------------------------- /tests/hf_cache.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/hf_cache.json -------------------------------------------------------------------------------- /tests/hf_result_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/hf_result_cache.py -------------------------------------------------------------------------------- /tests/llm_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/llm_cache.py -------------------------------------------------------------------------------- /tests/llm_cache_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/llm_cache_util.py -------------------------------------------------------------------------------- /tests/models/test_granite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/models/test_granite.py -------------------------------------------------------------------------------- /tests/output_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/output_util.py -------------------------------------------------------------------------------- /tests/precompilation/test_disable_compilation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/precompilation/test_disable_compilation.py -------------------------------------------------------------------------------- /tests/scheduling_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/scheduling_utils.py -------------------------------------------------------------------------------- /tests/spyre_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/spyre_util.py -------------------------------------------------------------------------------- /tests/utils/test_envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/utils/test_envs.py -------------------------------------------------------------------------------- /tests/utils/test_golden_token_injector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/utils/test_golden_token_injector.py -------------------------------------------------------------------------------- /tests/utils/test_model_config_validator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/utils/test_model_config_validator.py -------------------------------------------------------------------------------- /tests/utils/test_spyre_model_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/utils/test_spyre_model_list.py -------------------------------------------------------------------------------- /tests/utils/test_upstream_compatibility.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/utils/test_upstream_compatibility.py -------------------------------------------------------------------------------- /tests/v1/worker/test_spyre_input_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tests/v1/worker/test_spyre_input_batch.py -------------------------------------------------------------------------------- /tools/check_aiu.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tools/check_aiu.sh -------------------------------------------------------------------------------- /tools/check_repo.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tools/check_repo.sh -------------------------------------------------------------------------------- /tools/download_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tools/download_model.py -------------------------------------------------------------------------------- /tools/lint_actions.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tools/lint_actions.sh -------------------------------------------------------------------------------- /tools/lint_docs.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tools/lint_docs.sh -------------------------------------------------------------------------------- /tools/lint_scripts.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tools/lint_scripts.sh -------------------------------------------------------------------------------- /tools/report_build_time_ninja.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tools/report_build_time_ninja.py -------------------------------------------------------------------------------- /tools/type_check.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/tools/type_check.sh -------------------------------------------------------------------------------- /uv.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/uv.lock -------------------------------------------------------------------------------- /vllm_spyre/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/__init__.py -------------------------------------------------------------------------------- /vllm_spyre/compat_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/compat_utils.py -------------------------------------------------------------------------------- /vllm_spyre/compilation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/compilation_utils.py -------------------------------------------------------------------------------- /vllm_spyre/config/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_spyre/config/known_model_configs.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/config/known_model_configs.json -------------------------------------------------------------------------------- /vllm_spyre/config/runtime_config_validator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/config/runtime_config_validator.py -------------------------------------------------------------------------------- /vllm_spyre/config/supported_configs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/config/supported_configs.yaml -------------------------------------------------------------------------------- /vllm_spyre/envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/envs.py -------------------------------------------------------------------------------- /vllm_spyre/model_executor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_spyre/model_executor/model_loader/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_spyre/model_executor/model_loader/spyre.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/model_executor/model_loader/spyre.py -------------------------------------------------------------------------------- /vllm_spyre/model_executor/model_loader/spyre_setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/model_executor/model_loader/spyre_setup.py -------------------------------------------------------------------------------- /vllm_spyre/perf_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/perf_metrics.py -------------------------------------------------------------------------------- /vllm_spyre/platform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/platform.py -------------------------------------------------------------------------------- /vllm_spyre/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/utils.py -------------------------------------------------------------------------------- /vllm_spyre/v1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/v1/__init__.py -------------------------------------------------------------------------------- /vllm_spyre/v1/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_spyre/v1/core/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/v1/core/scheduler.py -------------------------------------------------------------------------------- /vllm_spyre/v1/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/v1/metrics/__init__.py -------------------------------------------------------------------------------- /vllm_spyre/v1/metrics/stats_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/v1/metrics/stats_logger.py -------------------------------------------------------------------------------- /vllm_spyre/v1/sample/golden_token_injector.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/v1/sample/golden_token_injector.py -------------------------------------------------------------------------------- /vllm_spyre/v1/sample/spyre_logits_processor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/v1/sample/spyre_logits_processor.py -------------------------------------------------------------------------------- /vllm_spyre/v1/worker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm_spyre/v1/worker/spyre_input_batch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/v1/worker/spyre_input_batch.py -------------------------------------------------------------------------------- /vllm_spyre/v1/worker/spyre_model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/v1/worker/spyre_model_runner.py -------------------------------------------------------------------------------- /vllm_spyre/v1/worker/spyre_worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vllm-project/vllm-spyre/HEAD/vllm_spyre/v1/worker/spyre_worker.py --------------------------------------------------------------------------------