├── .github └── workflows │ ├── publish.yml │ ├── pylint.yml │ ├── scripts │ ├── build.sh │ ├── create_release.js │ ├── cuda-install.sh │ ├── env.sh │ └── pytorch-install.sh │ └── yapf.yml ├── .gitignore ├── .pylintrc ├── .readthedocs.yaml ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── benchmarks ├── README.md ├── benchmark_latency.py ├── benchmark_serving.py ├── benchmark_throughput.py └── launch_tgi_server.sh ├── csrc ├── activation.cpp ├── activation_kernels.cu ├── attention.cpp ├── attention │ ├── attention_dtypes.h │ ├── attention_generic.cuh │ ├── attention_kernels.cu │ ├── attention_utils.cuh │ ├── dtype_bfloat16.cuh │ ├── dtype_float16.cuh │ └── dtype_float32.cuh ├── cache.cpp ├── cache_kernels.cu ├── cuda_utils.cpp ├── cuda_utils_kernels.cu ├── dispatch_utils.h ├── layernorm.cpp ├── layernorm_kernels.cu ├── pos_encoding.cpp ├── pos_encoding_kernels.cu ├── quantization.cpp ├── quantization │ └── awq │ │ ├── dequantize.cuh │ │ └── gemm_kernels.cu └── reduction_utils.cuh ├── docs ├── Makefile ├── README.md ├── make.bat ├── requirements-docs.txt └── source │ ├── assets │ ├── figures │ │ ├── perf_a100_n1_dark.png │ │ ├── perf_a100_n1_light.png │ │ ├── perf_a100_n3_dark.png │ │ ├── perf_a100_n3_light.png │ │ ├── perf_a10g_n1_dark.png │ │ ├── perf_a10g_n1_light.png │ │ ├── perf_a10g_n3_dark.png │ │ └── perf_a10g_n3_light.png │ └── logos │ │ ├── vllm-logo-only-light.png │ │ ├── vllm-logo-text-dark.png │ │ └── vllm-logo-text-light.png │ ├── conf.py │ ├── getting_started │ ├── installation.rst │ └── quickstart.rst │ ├── index.rst │ ├── models │ ├── adding_model.rst │ └── supported_models.rst │ └── serving │ ├── deploying_with_triton.rst │ ├── distributed_serving.rst │ └── run_on_sky.rst ├── examples ├── api_client.py ├── gradio_webserver.py ├── llm_engine_example.py ├── offline_inference.py ├── openai_chatcompletion_client.py └── openai_completion_client.py ├── format.sh ├── mypy.ini ├── pyproject.toml ├── requirements-dev.txt ├── requirements.txt ├── setup.py ├── tests ├── async_engine │ ├── api_server_async_engine.py │ ├── test_api_server.py │ ├── test_async_llm_engine.py │ └── test_request_tracker.py ├── conftest.py ├── distributed │ └── test_comm_ops.py ├── engine │ └── test_detokenize.py ├── kernels │ ├── conftest.py │ ├── test_activation.py │ ├── test_attention.py │ ├── test_cache.py │ ├── test_layernorm.py │ └── test_pos_encoding.py ├── models │ └── test_models.py └── samplers │ ├── test_beam_search.py │ └── test_sampler.py └── vllm ├── __init__.py ├── block.py ├── config.py ├── core ├── __init__.py ├── block_manager.py ├── policy.py └── scheduler.py ├── engine ├── __init__.py ├── arg_utils.py ├── async_llm_engine.py ├── llm_engine.py └── ray_utils.py ├── entrypoints ├── __init__.py ├── api_server.py ├── llm.py └── openai │ ├── __init__.py │ ├── api_server.py │ └── protocol.py ├── logger.py ├── model_executor ├── __init__.py ├── input_metadata.py ├── layers │ ├── __init__.py │ ├── activation.py │ ├── attention.py │ ├── layernorm.py │ ├── quantized_linear │ │ ├── __init__.py │ │ └── awq.py │ ├── rotary_embedding.py │ └── sampler.py ├── model_loader.py ├── models │ ├── __init__.py │ ├── aquila.py │ ├── baichuan.py │ ├── bloom.py │ ├── chatglm.py │ ├── falcon.py │ ├── gpt2.py │ ├── gpt_bigcode.py │ ├── gpt_j.py │ ├── gpt_neox.py │ ├── internlm.py │ ├── llama.py │ ├── mistral.py │ ├── mpt.py │ ├── opt.py │ └── qwen.py ├── parallel_utils │ ├── README.md │ ├── __init__.py │ ├── communication_op.py │ ├── layers.py │ ├── parallel_state.py │ └── utils.py ├── quantization_utils │ ├── __init__.py │ ├── awq.py │ └── base.py ├── utils.py └── weight_utils.py ├── outputs.py ├── sampling_params.py ├── sequence.py ├── transformers_utils ├── __init__.py ├── config.py ├── configs │ ├── __init__.py │ ├── aquila.py │ ├── baichuan.py │ ├── chatglm.py │ ├── falcon.py │ ├── mistral.py │ ├── mpt.py │ └── qwen.py └── tokenizer.py ├── utils.py └── worker ├── __init__.py ├── cache_engine.py └── worker.py /.github/workflows/publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/.github/workflows/publish.yml -------------------------------------------------------------------------------- /.github/workflows/pylint.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/.github/workflows/pylint.yml -------------------------------------------------------------------------------- /.github/workflows/scripts/build.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/.github/workflows/scripts/build.sh -------------------------------------------------------------------------------- /.github/workflows/scripts/create_release.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/.github/workflows/scripts/create_release.js -------------------------------------------------------------------------------- /.github/workflows/scripts/cuda-install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/.github/workflows/scripts/cuda-install.sh -------------------------------------------------------------------------------- /.github/workflows/scripts/env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/.github/workflows/scripts/env.sh -------------------------------------------------------------------------------- /.github/workflows/scripts/pytorch-install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/.github/workflows/scripts/pytorch-install.sh -------------------------------------------------------------------------------- /.github/workflows/yapf.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/.github/workflows/yapf.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/.gitignore -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/.pylintrc -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/.readthedocs.yaml -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/CONTRIBUTING.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/README.md -------------------------------------------------------------------------------- /benchmarks/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/benchmarks/README.md -------------------------------------------------------------------------------- /benchmarks/benchmark_latency.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/benchmarks/benchmark_latency.py -------------------------------------------------------------------------------- /benchmarks/benchmark_serving.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/benchmarks/benchmark_serving.py -------------------------------------------------------------------------------- /benchmarks/benchmark_throughput.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/benchmarks/benchmark_throughput.py -------------------------------------------------------------------------------- /benchmarks/launch_tgi_server.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/benchmarks/launch_tgi_server.sh -------------------------------------------------------------------------------- /csrc/activation.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/activation.cpp -------------------------------------------------------------------------------- /csrc/activation_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/activation_kernels.cu -------------------------------------------------------------------------------- /csrc/attention.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/attention.cpp -------------------------------------------------------------------------------- /csrc/attention/attention_dtypes.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/attention/attention_dtypes.h -------------------------------------------------------------------------------- /csrc/attention/attention_generic.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/attention/attention_generic.cuh -------------------------------------------------------------------------------- /csrc/attention/attention_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/attention/attention_kernels.cu -------------------------------------------------------------------------------- /csrc/attention/attention_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/attention/attention_utils.cuh -------------------------------------------------------------------------------- /csrc/attention/dtype_bfloat16.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/attention/dtype_bfloat16.cuh -------------------------------------------------------------------------------- /csrc/attention/dtype_float16.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/attention/dtype_float16.cuh -------------------------------------------------------------------------------- /csrc/attention/dtype_float32.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/attention/dtype_float32.cuh -------------------------------------------------------------------------------- /csrc/cache.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/cache.cpp -------------------------------------------------------------------------------- /csrc/cache_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/cache_kernels.cu -------------------------------------------------------------------------------- /csrc/cuda_utils.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/cuda_utils.cpp -------------------------------------------------------------------------------- /csrc/cuda_utils_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/cuda_utils_kernels.cu -------------------------------------------------------------------------------- /csrc/dispatch_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/dispatch_utils.h -------------------------------------------------------------------------------- /csrc/layernorm.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/layernorm.cpp -------------------------------------------------------------------------------- /csrc/layernorm_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/layernorm_kernels.cu -------------------------------------------------------------------------------- /csrc/pos_encoding.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/pos_encoding.cpp -------------------------------------------------------------------------------- /csrc/pos_encoding_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/pos_encoding_kernels.cu -------------------------------------------------------------------------------- /csrc/quantization.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/quantization.cpp -------------------------------------------------------------------------------- /csrc/quantization/awq/dequantize.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/quantization/awq/dequantize.cuh -------------------------------------------------------------------------------- /csrc/quantization/awq/gemm_kernels.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/quantization/awq/gemm_kernels.cu -------------------------------------------------------------------------------- /csrc/reduction_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/csrc/reduction_utils.cuh -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/README.md -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/make.bat -------------------------------------------------------------------------------- /docs/requirements-docs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/requirements-docs.txt -------------------------------------------------------------------------------- /docs/source/assets/figures/perf_a100_n1_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/assets/figures/perf_a100_n1_dark.png -------------------------------------------------------------------------------- /docs/source/assets/figures/perf_a100_n1_light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/assets/figures/perf_a100_n1_light.png -------------------------------------------------------------------------------- /docs/source/assets/figures/perf_a100_n3_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/assets/figures/perf_a100_n3_dark.png -------------------------------------------------------------------------------- /docs/source/assets/figures/perf_a100_n3_light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/assets/figures/perf_a100_n3_light.png -------------------------------------------------------------------------------- /docs/source/assets/figures/perf_a10g_n1_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/assets/figures/perf_a10g_n1_dark.png -------------------------------------------------------------------------------- /docs/source/assets/figures/perf_a10g_n1_light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/assets/figures/perf_a10g_n1_light.png -------------------------------------------------------------------------------- /docs/source/assets/figures/perf_a10g_n3_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/assets/figures/perf_a10g_n3_dark.png -------------------------------------------------------------------------------- /docs/source/assets/figures/perf_a10g_n3_light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/assets/figures/perf_a10g_n3_light.png -------------------------------------------------------------------------------- /docs/source/assets/logos/vllm-logo-only-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/assets/logos/vllm-logo-only-light.png -------------------------------------------------------------------------------- /docs/source/assets/logos/vllm-logo-text-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/assets/logos/vllm-logo-text-dark.png -------------------------------------------------------------------------------- /docs/source/assets/logos/vllm-logo-text-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/assets/logos/vllm-logo-text-light.png -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/conf.py -------------------------------------------------------------------------------- /docs/source/getting_started/installation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/getting_started/installation.rst -------------------------------------------------------------------------------- /docs/source/getting_started/quickstart.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/getting_started/quickstart.rst -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/index.rst -------------------------------------------------------------------------------- /docs/source/models/adding_model.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/models/adding_model.rst -------------------------------------------------------------------------------- /docs/source/models/supported_models.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/models/supported_models.rst -------------------------------------------------------------------------------- /docs/source/serving/deploying_with_triton.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/serving/deploying_with_triton.rst -------------------------------------------------------------------------------- /docs/source/serving/distributed_serving.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/serving/distributed_serving.rst -------------------------------------------------------------------------------- /docs/source/serving/run_on_sky.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/docs/source/serving/run_on_sky.rst -------------------------------------------------------------------------------- /examples/api_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/examples/api_client.py -------------------------------------------------------------------------------- /examples/gradio_webserver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/examples/gradio_webserver.py -------------------------------------------------------------------------------- /examples/llm_engine_example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/examples/llm_engine_example.py -------------------------------------------------------------------------------- /examples/offline_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/examples/offline_inference.py -------------------------------------------------------------------------------- /examples/openai_chatcompletion_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/examples/openai_chatcompletion_client.py -------------------------------------------------------------------------------- /examples/openai_completion_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/examples/openai_completion_client.py -------------------------------------------------------------------------------- /format.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/format.sh -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/mypy.ini -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/requirements-dev.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/setup.py -------------------------------------------------------------------------------- /tests/async_engine/api_server_async_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/tests/async_engine/api_server_async_engine.py -------------------------------------------------------------------------------- /tests/async_engine/test_api_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/tests/async_engine/test_api_server.py -------------------------------------------------------------------------------- /tests/async_engine/test_async_llm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/tests/async_engine/test_async_llm_engine.py -------------------------------------------------------------------------------- /tests/async_engine/test_request_tracker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/tests/async_engine/test_request_tracker.py -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/tests/conftest.py -------------------------------------------------------------------------------- /tests/distributed/test_comm_ops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/tests/distributed/test_comm_ops.py -------------------------------------------------------------------------------- /tests/engine/test_detokenize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/tests/engine/test_detokenize.py -------------------------------------------------------------------------------- /tests/kernels/conftest.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/tests/kernels/conftest.py -------------------------------------------------------------------------------- /tests/kernels/test_activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/tests/kernels/test_activation.py -------------------------------------------------------------------------------- /tests/kernels/test_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/tests/kernels/test_attention.py -------------------------------------------------------------------------------- /tests/kernels/test_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/tests/kernels/test_cache.py -------------------------------------------------------------------------------- /tests/kernels/test_layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/tests/kernels/test_layernorm.py -------------------------------------------------------------------------------- /tests/kernels/test_pos_encoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/tests/kernels/test_pos_encoding.py -------------------------------------------------------------------------------- /tests/models/test_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/tests/models/test_models.py -------------------------------------------------------------------------------- /tests/samplers/test_beam_search.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/tests/samplers/test_beam_search.py -------------------------------------------------------------------------------- /tests/samplers/test_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/tests/samplers/test_sampler.py -------------------------------------------------------------------------------- /vllm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/__init__.py -------------------------------------------------------------------------------- /vllm/block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/block.py -------------------------------------------------------------------------------- /vllm/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/config.py -------------------------------------------------------------------------------- /vllm/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/core/block_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/core/block_manager.py -------------------------------------------------------------------------------- /vllm/core/policy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/core/policy.py -------------------------------------------------------------------------------- /vllm/core/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/core/scheduler.py -------------------------------------------------------------------------------- /vllm/engine/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/engine/arg_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/engine/arg_utils.py -------------------------------------------------------------------------------- /vllm/engine/async_llm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/engine/async_llm_engine.py -------------------------------------------------------------------------------- /vllm/engine/llm_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/engine/llm_engine.py -------------------------------------------------------------------------------- /vllm/engine/ray_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/engine/ray_utils.py -------------------------------------------------------------------------------- /vllm/entrypoints/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/entrypoints/api_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/entrypoints/api_server.py -------------------------------------------------------------------------------- /vllm/entrypoints/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/entrypoints/llm.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/entrypoints/openai/api_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/entrypoints/openai/api_server.py -------------------------------------------------------------------------------- /vllm/entrypoints/openai/protocol.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/entrypoints/openai/protocol.py -------------------------------------------------------------------------------- /vllm/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/logger.py -------------------------------------------------------------------------------- /vllm/model_executor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/input_metadata.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/input_metadata.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/model_executor/layers/activation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/layers/activation.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/layers/attention.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/layernorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/layers/layernorm.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantized_linear/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/layers/quantized_linear/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/quantized_linear/awq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/layers/quantized_linear/awq.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/rotary_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/layers/rotary_embedding.py -------------------------------------------------------------------------------- /vllm/model_executor/layers/sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/layers/sampler.py -------------------------------------------------------------------------------- /vllm/model_executor/model_loader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/model_loader.py -------------------------------------------------------------------------------- /vllm/model_executor/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/models/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/models/aquila.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/models/aquila.py -------------------------------------------------------------------------------- /vllm/model_executor/models/baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/models/baichuan.py -------------------------------------------------------------------------------- /vllm/model_executor/models/bloom.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/models/bloom.py -------------------------------------------------------------------------------- /vllm/model_executor/models/chatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/models/chatglm.py -------------------------------------------------------------------------------- /vllm/model_executor/models/falcon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/models/falcon.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gpt2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/models/gpt2.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gpt_bigcode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/models/gpt_bigcode.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gpt_j.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/models/gpt_j.py -------------------------------------------------------------------------------- /vllm/model_executor/models/gpt_neox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/models/gpt_neox.py -------------------------------------------------------------------------------- /vllm/model_executor/models/internlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/models/internlm.py -------------------------------------------------------------------------------- /vllm/model_executor/models/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/models/llama.py -------------------------------------------------------------------------------- /vllm/model_executor/models/mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/models/mistral.py -------------------------------------------------------------------------------- /vllm/model_executor/models/mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/models/mpt.py -------------------------------------------------------------------------------- /vllm/model_executor/models/opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/models/opt.py -------------------------------------------------------------------------------- /vllm/model_executor/models/qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/models/qwen.py -------------------------------------------------------------------------------- /vllm/model_executor/parallel_utils/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/parallel_utils/README.md -------------------------------------------------------------------------------- /vllm/model_executor/parallel_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/model_executor/parallel_utils/communication_op.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/parallel_utils/communication_op.py -------------------------------------------------------------------------------- /vllm/model_executor/parallel_utils/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/parallel_utils/layers.py -------------------------------------------------------------------------------- /vllm/model_executor/parallel_utils/parallel_state.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/parallel_utils/parallel_state.py -------------------------------------------------------------------------------- /vllm/model_executor/parallel_utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/parallel_utils/utils.py -------------------------------------------------------------------------------- /vllm/model_executor/quantization_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/quantization_utils/__init__.py -------------------------------------------------------------------------------- /vllm/model_executor/quantization_utils/awq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/quantization_utils/awq.py -------------------------------------------------------------------------------- /vllm/model_executor/quantization_utils/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/quantization_utils/base.py -------------------------------------------------------------------------------- /vllm/model_executor/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/utils.py -------------------------------------------------------------------------------- /vllm/model_executor/weight_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/model_executor/weight_utils.py -------------------------------------------------------------------------------- /vllm/outputs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/outputs.py -------------------------------------------------------------------------------- /vllm/sampling_params.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/sampling_params.py -------------------------------------------------------------------------------- /vllm/sequence.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/sequence.py -------------------------------------------------------------------------------- /vllm/transformers_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/transformers_utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/transformers_utils/config.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/transformers_utils/configs/__init__.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/aquila.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/transformers_utils/configs/aquila.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/transformers_utils/configs/baichuan.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/chatglm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/transformers_utils/configs/chatglm.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/falcon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/transformers_utils/configs/falcon.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/mistral.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/transformers_utils/configs/mistral.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/mpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/transformers_utils/configs/mpt.py -------------------------------------------------------------------------------- /vllm/transformers_utils/configs/qwen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/transformers_utils/configs/qwen.py -------------------------------------------------------------------------------- /vllm/transformers_utils/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/transformers_utils/tokenizer.py -------------------------------------------------------------------------------- /vllm/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/utils.py -------------------------------------------------------------------------------- /vllm/worker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /vllm/worker/cache_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/worker/cache_engine.py -------------------------------------------------------------------------------- /vllm/worker/worker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yanxiyue/vllm/HEAD/vllm/worker/worker.py --------------------------------------------------------------------------------