├── .clang-format ├── .clangd ├── .github └── workflows │ ├── gpu-test.yml │ ├── release-please.yml │ └── release_wheel.yml ├── .gitignore ├── .gitmodules ├── .release-please-manifest.json ├── CHANGELOG.md ├── CMakeLists.txt ├── LICENSE ├── MANIFEST.in ├── README.md ├── assets ├── backbone-vs-sgmv.png ├── punica-tui-demo.mp4 ├── sgmv.png └── textgen.png ├── benchmarks ├── __init__.py ├── bench_backbone_vs_lora.py ├── bench_batch_decode.py ├── bench_layer_lora_decode.py ├── bench_lora_op_impls.py ├── bench_model_lora_decode.py ├── bench_model_prefill_decode.py ├── bench_sgmv.py ├── bench_sgmv_cutlass.py ├── bench_textgen.py ├── bench_textgen_lora.py ├── bench_textgen_lora_all.py ├── benchmark_utils.py ├── fastertransformer │ ├── .clang-format │ ├── CMakeLists.txt │ ├── __init__.py │ ├── build_ext.py │ ├── ft_llama.cc │ ├── ft_llama.h │ └── ft_pybind11.cc ├── nvbench │ ├── CMakeLists.txt │ ├── sgmv.cu │ └── sgmv_flashinfer.cu └── runft.py ├── ci ├── ci-punica.env.example ├── ci-punica.service ├── run-ci-build-wheel.bash ├── run-ci-gpu-tests.bash └── test-run-ci.bash ├── csrc ├── bgmv │ ├── bgmv_all.cu │ ├── bgmv_config.h │ └── bgmv_impl.cuh ├── flashinfer_adapter │ ├── flashinfer_all.cu │ ├── flashinfer_config.h │ └── flashinfer_decl.h ├── punica_ops.cc ├── rms_norm │ ├── rms_norm.h │ └── rms_norm_cutlass.cu ├── sgmv │ ├── sgmv.h │ ├── sgmv_cutlass.cu │ └── sgmv_cutlass.cuh └── sgmv_flashinfer │ ├── sgmv_all.cu │ ├── sgmv_config.h │ └── sgmv_flashinfer.cuh ├── examples ├── finetune │ ├── README.md │ ├── create-finetune-data.py │ ├── data │ │ └── dataset_info.json │ ├── dataset_info.json │ ├── finetune.sh │ └── run-llmtuner.py ├── textgen.py ├── textgen_lora.py └── tui-multi-lora.py ├── licenses ├── LICENSE.cutlass.txt ├── LICENSE.flashinfer.txt └── LICENSE.nvbench.txt ├── pyproject.toml ├── release-please-config.json ├── setup.py ├── src └── punica │ ├── __init__.py │ ├── models │ ├── __init__.py │ ├── llama.py │ └── llama_lora.py │ ├── ops │ └── __init__.py │ └── utils │ ├── __init__.py │ ├── cat_tensor.py │ ├── convert_lora_weight.py │ ├── kvcache.py │ └── lora.py ├── tests ├── test_bgmv.py ├── test_flashinfer.py ├── test_kvcache.py ├── test_rms_norm.py ├── test_sgmv.py └── test_sgmv_cutlass.py └── version.txt /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google 2 | -------------------------------------------------------------------------------- /.clangd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/.clangd -------------------------------------------------------------------------------- /.github/workflows/gpu-test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/.github/workflows/gpu-test.yml -------------------------------------------------------------------------------- /.github/workflows/release-please.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/.github/workflows/release-please.yml -------------------------------------------------------------------------------- /.github/workflows/release_wheel.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/.github/workflows/release_wheel.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/.gitmodules -------------------------------------------------------------------------------- /.release-please-manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | ".": "1.1.0" 3 | } 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/CHANGELOG.md -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/README.md -------------------------------------------------------------------------------- /assets/backbone-vs-sgmv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/assets/backbone-vs-sgmv.png -------------------------------------------------------------------------------- /assets/punica-tui-demo.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/assets/punica-tui-demo.mp4 -------------------------------------------------------------------------------- /assets/sgmv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/assets/sgmv.png -------------------------------------------------------------------------------- /assets/textgen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/assets/textgen.png -------------------------------------------------------------------------------- /benchmarks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /benchmarks/bench_backbone_vs_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/bench_backbone_vs_lora.py -------------------------------------------------------------------------------- /benchmarks/bench_batch_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/bench_batch_decode.py -------------------------------------------------------------------------------- /benchmarks/bench_layer_lora_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/bench_layer_lora_decode.py -------------------------------------------------------------------------------- /benchmarks/bench_lora_op_impls.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/bench_lora_op_impls.py -------------------------------------------------------------------------------- /benchmarks/bench_model_lora_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/bench_model_lora_decode.py -------------------------------------------------------------------------------- /benchmarks/bench_model_prefill_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/bench_model_prefill_decode.py -------------------------------------------------------------------------------- /benchmarks/bench_sgmv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/bench_sgmv.py -------------------------------------------------------------------------------- /benchmarks/bench_sgmv_cutlass.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/bench_sgmv_cutlass.py -------------------------------------------------------------------------------- /benchmarks/bench_textgen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/bench_textgen.py -------------------------------------------------------------------------------- /benchmarks/bench_textgen_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/bench_textgen_lora.py -------------------------------------------------------------------------------- /benchmarks/bench_textgen_lora_all.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/bench_textgen_lora_all.py -------------------------------------------------------------------------------- /benchmarks/benchmark_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/benchmark_utils.py -------------------------------------------------------------------------------- /benchmarks/fastertransformer/.clang-format: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/fastertransformer/.clang-format -------------------------------------------------------------------------------- /benchmarks/fastertransformer/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/fastertransformer/CMakeLists.txt -------------------------------------------------------------------------------- /benchmarks/fastertransformer/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/fastertransformer/__init__.py -------------------------------------------------------------------------------- /benchmarks/fastertransformer/build_ext.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/fastertransformer/build_ext.py -------------------------------------------------------------------------------- /benchmarks/fastertransformer/ft_llama.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/fastertransformer/ft_llama.cc -------------------------------------------------------------------------------- /benchmarks/fastertransformer/ft_llama.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/fastertransformer/ft_llama.h -------------------------------------------------------------------------------- /benchmarks/fastertransformer/ft_pybind11.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/fastertransformer/ft_pybind11.cc -------------------------------------------------------------------------------- /benchmarks/nvbench/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/nvbench/CMakeLists.txt -------------------------------------------------------------------------------- /benchmarks/nvbench/sgmv.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/nvbench/sgmv.cu -------------------------------------------------------------------------------- /benchmarks/nvbench/sgmv_flashinfer.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/nvbench/sgmv_flashinfer.cu -------------------------------------------------------------------------------- /benchmarks/runft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/benchmarks/runft.py -------------------------------------------------------------------------------- /ci/ci-punica.env.example: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/ci/ci-punica.env.example -------------------------------------------------------------------------------- /ci/ci-punica.service: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/ci/ci-punica.service -------------------------------------------------------------------------------- /ci/run-ci-build-wheel.bash: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/ci/run-ci-build-wheel.bash -------------------------------------------------------------------------------- /ci/run-ci-gpu-tests.bash: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/ci/run-ci-gpu-tests.bash -------------------------------------------------------------------------------- /ci/test-run-ci.bash: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/ci/test-run-ci.bash -------------------------------------------------------------------------------- /csrc/bgmv/bgmv_all.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/csrc/bgmv/bgmv_all.cu -------------------------------------------------------------------------------- /csrc/bgmv/bgmv_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/csrc/bgmv/bgmv_config.h -------------------------------------------------------------------------------- /csrc/bgmv/bgmv_impl.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/csrc/bgmv/bgmv_impl.cuh -------------------------------------------------------------------------------- /csrc/flashinfer_adapter/flashinfer_all.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/csrc/flashinfer_adapter/flashinfer_all.cu -------------------------------------------------------------------------------- /csrc/flashinfer_adapter/flashinfer_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/csrc/flashinfer_adapter/flashinfer_config.h -------------------------------------------------------------------------------- /csrc/flashinfer_adapter/flashinfer_decl.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/csrc/flashinfer_adapter/flashinfer_decl.h -------------------------------------------------------------------------------- /csrc/punica_ops.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/csrc/punica_ops.cc -------------------------------------------------------------------------------- /csrc/rms_norm/rms_norm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/csrc/rms_norm/rms_norm.h -------------------------------------------------------------------------------- /csrc/rms_norm/rms_norm_cutlass.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/csrc/rms_norm/rms_norm_cutlass.cu -------------------------------------------------------------------------------- /csrc/sgmv/sgmv.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/csrc/sgmv/sgmv.h -------------------------------------------------------------------------------- /csrc/sgmv/sgmv_cutlass.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/csrc/sgmv/sgmv_cutlass.cu -------------------------------------------------------------------------------- /csrc/sgmv/sgmv_cutlass.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/csrc/sgmv/sgmv_cutlass.cuh -------------------------------------------------------------------------------- /csrc/sgmv_flashinfer/sgmv_all.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/csrc/sgmv_flashinfer/sgmv_all.cu -------------------------------------------------------------------------------- /csrc/sgmv_flashinfer/sgmv_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/csrc/sgmv_flashinfer/sgmv_config.h -------------------------------------------------------------------------------- /csrc/sgmv_flashinfer/sgmv_flashinfer.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/csrc/sgmv_flashinfer/sgmv_flashinfer.cuh -------------------------------------------------------------------------------- /examples/finetune/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/examples/finetune/README.md -------------------------------------------------------------------------------- /examples/finetune/create-finetune-data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/examples/finetune/create-finetune-data.py -------------------------------------------------------------------------------- /examples/finetune/data/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/examples/finetune/data/dataset_info.json -------------------------------------------------------------------------------- /examples/finetune/dataset_info.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/examples/finetune/dataset_info.json -------------------------------------------------------------------------------- /examples/finetune/finetune.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/examples/finetune/finetune.sh -------------------------------------------------------------------------------- /examples/finetune/run-llmtuner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/examples/finetune/run-llmtuner.py -------------------------------------------------------------------------------- /examples/textgen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/examples/textgen.py -------------------------------------------------------------------------------- /examples/textgen_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/examples/textgen_lora.py -------------------------------------------------------------------------------- /examples/tui-multi-lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/examples/tui-multi-lora.py -------------------------------------------------------------------------------- /licenses/LICENSE.cutlass.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/licenses/LICENSE.cutlass.txt -------------------------------------------------------------------------------- /licenses/LICENSE.flashinfer.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/licenses/LICENSE.flashinfer.txt -------------------------------------------------------------------------------- /licenses/LICENSE.nvbench.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/licenses/LICENSE.nvbench.txt -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/pyproject.toml -------------------------------------------------------------------------------- /release-please-config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/release-please-config.json -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/setup.py -------------------------------------------------------------------------------- /src/punica/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/src/punica/__init__.py -------------------------------------------------------------------------------- /src/punica/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/punica/models/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/src/punica/models/llama.py -------------------------------------------------------------------------------- /src/punica/models/llama_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/src/punica/models/llama_lora.py -------------------------------------------------------------------------------- /src/punica/ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/src/punica/ops/__init__.py -------------------------------------------------------------------------------- /src/punica/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/src/punica/utils/__init__.py -------------------------------------------------------------------------------- /src/punica/utils/cat_tensor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/src/punica/utils/cat_tensor.py -------------------------------------------------------------------------------- /src/punica/utils/convert_lora_weight.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/src/punica/utils/convert_lora_weight.py -------------------------------------------------------------------------------- /src/punica/utils/kvcache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/src/punica/utils/kvcache.py -------------------------------------------------------------------------------- /src/punica/utils/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/src/punica/utils/lora.py -------------------------------------------------------------------------------- /tests/test_bgmv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/tests/test_bgmv.py -------------------------------------------------------------------------------- /tests/test_flashinfer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/tests/test_flashinfer.py -------------------------------------------------------------------------------- /tests/test_kvcache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/tests/test_kvcache.py -------------------------------------------------------------------------------- /tests/test_rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/tests/test_rms_norm.py -------------------------------------------------------------------------------- /tests/test_sgmv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/tests/test_sgmv.py -------------------------------------------------------------------------------- /tests/test_sgmv_cutlass.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/punica-ai/punica/HEAD/tests/test_sgmv_cutlass.py -------------------------------------------------------------------------------- /version.txt: -------------------------------------------------------------------------------- 1 | 1.1.0 2 | --------------------------------------------------------------------------------