├── .clang-format ├── .gitignore ├── .gitmodules ├── .isort.cfg ├── .pre-commit-config.yaml ├── LICENSE ├── MANIFEST.in ├── README.md ├── assets ├── inference-result.png └── performance.jpg ├── examples ├── llama │ ├── build.py │ └── weight.py ├── run_conversation.py ├── torch_streamingllm │ ├── README.md │ └── run_streaming_llama.py └── utils.py ├── pytest.ini ├── requirements ├── requirements-test.txt └── requirements.txt ├── scripts ├── build_trt_llm.py └── build_trt_llm.sh ├── setup.py ├── swiftinfer ├── __init__.py ├── layers │ ├── __init__.py │ └── llama_attention.py ├── models │ ├── __init__.py │ ├── generation_utils.py │ └── llama │ │ ├── __init__.py │ │ └── modeling_llama.py └── runtime │ ├── __init__.py │ ├── generation.py │ ├── kv_cache.py │ └── model_runner.py ├── tests └── test_layers │ └── test_attn.py └── version.txt /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/.gitmodules -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/.isort.cfg -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/README.md -------------------------------------------------------------------------------- /assets/inference-result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/assets/inference-result.png -------------------------------------------------------------------------------- /assets/performance.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/assets/performance.jpg -------------------------------------------------------------------------------- /examples/llama/build.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/examples/llama/build.py -------------------------------------------------------------------------------- /examples/llama/weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/examples/llama/weight.py -------------------------------------------------------------------------------- /examples/run_conversation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/examples/run_conversation.py -------------------------------------------------------------------------------- /examples/torch_streamingllm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/examples/torch_streamingllm/README.md -------------------------------------------------------------------------------- /examples/torch_streamingllm/run_streaming_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/examples/torch_streamingllm/run_streaming_llama.py -------------------------------------------------------------------------------- /examples/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/examples/utils.py -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | addopts = --ignore=3rdparty 3 | -------------------------------------------------------------------------------- /requirements/requirements-test.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | -------------------------------------------------------------------------------- /requirements/requirements.txt: -------------------------------------------------------------------------------- 1 | tensorrt-llm==0.6.0 2 | torch 3 | charset-normalizer==2.0.4 -------------------------------------------------------------------------------- /scripts/build_trt_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/scripts/build_trt_llm.py -------------------------------------------------------------------------------- /scripts/build_trt_llm.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/scripts/build_trt_llm.sh -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/setup.py -------------------------------------------------------------------------------- /swiftinfer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /swiftinfer/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/swiftinfer/layers/__init__.py -------------------------------------------------------------------------------- /swiftinfer/layers/llama_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/swiftinfer/layers/llama_attention.py -------------------------------------------------------------------------------- /swiftinfer/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/swiftinfer/models/__init__.py -------------------------------------------------------------------------------- /swiftinfer/models/generation_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/swiftinfer/models/generation_utils.py -------------------------------------------------------------------------------- /swiftinfer/models/llama/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/swiftinfer/models/llama/__init__.py -------------------------------------------------------------------------------- /swiftinfer/models/llama/modeling_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/swiftinfer/models/llama/modeling_llama.py -------------------------------------------------------------------------------- /swiftinfer/runtime/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/swiftinfer/runtime/__init__.py -------------------------------------------------------------------------------- /swiftinfer/runtime/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/swiftinfer/runtime/generation.py -------------------------------------------------------------------------------- /swiftinfer/runtime/kv_cache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/swiftinfer/runtime/kv_cache.py -------------------------------------------------------------------------------- /swiftinfer/runtime/model_runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/swiftinfer/runtime/model_runner.py -------------------------------------------------------------------------------- /tests/test_layers/test_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcaitech/SwiftInfer/HEAD/tests/test_layers/test_attn.py -------------------------------------------------------------------------------- /version.txt: -------------------------------------------------------------------------------- 1 | 0.0.1 2 | --------------------------------------------------------------------------------