├── .dev ├── .clang-format ├── .clang_format.hook ├── .cpplint_pre_commit.hook ├── .pre-commit-config-cpp.yaml ├── .pre-commit-config.yaml ├── clear.sh ├── commit-prepare.sh ├── init_dev.sh ├── init_prod.sh ├── init_prod_mini.sh ├── install.sh └── uninstall.sh ├── .github ├── .gitignore └── workflows │ └── issue.yml ├── .gitignore ├── .gitmodules ├── LICENSE ├── MANIFEST.in ├── README.md ├── bench ├── .gitignore ├── NVIDIA_A30.png ├── NVIDIA_A30_ffpa+acc+f16+L1_Speedup.png ├── NVIDIA_A30_ffpa+acc+f32+L1_Speedup.png ├── NVIDIA_GeForce_RTX_3080_Laptop_GPU_WSL2.png ├── NVIDIA_GeForce_RTX_3080_Laptop_GPU_WSL2_ffpa+acc+f16+L1_Speedup.png ├── NVIDIA_GeForce_RTX_3080_Laptop_GPU_WSL2_ffpa+acc+f32+L1_Speedup.png ├── NVIDIA_GeForce_RTX_4090.png ├── NVIDIA_GeForce_RTX_4090_ffpa+acc+f16+L1_Speedup.png ├── NVIDIA_GeForce_RTX_4090_ffpa+acc+f32+L1_Speedup.png ├── NVIDIA_L20.png ├── NVIDIA_L20_ffpa+acc+f16+L1_Speedup.png ├── NVIDIA_L20_ffpa+acc+f32+L1_Speedup.png ├── bank_conflicts_check.sh └── bench.sh ├── csrc ├── .gitignore ├── cuffpa │ ├── ffpa_attn_F16F16F16_L1.cu │ ├── ffpa_attn_F16F16F32_L1.cu │ ├── ffpa_attn_templates_L1.cuh │ └── launch_templates.cuh ├── deprecated │ ├── faster_prefill_attn_F16F16F16F16_L1.cu │ └── faster_prefill_attn_F32F16F16F32_L1.cu ├── extension │ └── .gitignore └── pybind │ └── ffpa_attn_api.cc ├── env.py ├── ffpa_attn ├── .gitignore ├── __init__.py ├── interface.py └── version.py ├── include ├── .gitignore ├── cuffpa │ ├── cp_async.cuh │ ├── deprecated │ │ ├── mma_utils.cuh │ │ └── smem_swizzle.cuh │ ├── logging.cuh │ ├── mma.cuh │ ├── prefill.cuh │ ├── swizzle.cuh │ ├── utils.cuh │ └── warp.cuh └── extension │ └── .gitignore ├── requirements.txt ├── setup.cfg ├── setup.py └── tests ├── .gitignore ├── requirements.txt ├── swizzle_layout.py └── test_ffpa_attn.py /.dev/.clang-format: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/.dev/.clang-format -------------------------------------------------------------------------------- /.dev/.clang_format.hook: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/.dev/.clang_format.hook -------------------------------------------------------------------------------- /.dev/.cpplint_pre_commit.hook: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/.dev/.cpplint_pre_commit.hook -------------------------------------------------------------------------------- /.dev/.pre-commit-config-cpp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/.dev/.pre-commit-config-cpp.yaml -------------------------------------------------------------------------------- /.dev/.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/.dev/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.dev/clear.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/.dev/clear.sh -------------------------------------------------------------------------------- /.dev/commit-prepare.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/.dev/commit-prepare.sh -------------------------------------------------------------------------------- /.dev/init_dev.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/.dev/init_dev.sh -------------------------------------------------------------------------------- /.dev/init_prod.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/.dev/init_prod.sh -------------------------------------------------------------------------------- /.dev/init_prod_mini.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/.dev/init_prod_mini.sh -------------------------------------------------------------------------------- /.dev/install.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/.dev/install.sh -------------------------------------------------------------------------------- /.dev/uninstall.sh: -------------------------------------------------------------------------------- 1 | python3 -m pip uninstall ffpa-attn -y 2 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/.github/.gitignore -------------------------------------------------------------------------------- /.github/workflows/issue.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/.github/workflows/issue.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/.gitmodules -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/README.md -------------------------------------------------------------------------------- /bench/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/bench/.gitignore -------------------------------------------------------------------------------- /bench/NVIDIA_A30.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/bench/NVIDIA_A30.png -------------------------------------------------------------------------------- /bench/NVIDIA_A30_ffpa+acc+f16+L1_Speedup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/bench/NVIDIA_A30_ffpa+acc+f16+L1_Speedup.png -------------------------------------------------------------------------------- /bench/NVIDIA_A30_ffpa+acc+f32+L1_Speedup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/bench/NVIDIA_A30_ffpa+acc+f32+L1_Speedup.png -------------------------------------------------------------------------------- /bench/NVIDIA_GeForce_RTX_3080_Laptop_GPU_WSL2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/bench/NVIDIA_GeForce_RTX_3080_Laptop_GPU_WSL2.png -------------------------------------------------------------------------------- /bench/NVIDIA_GeForce_RTX_3080_Laptop_GPU_WSL2_ffpa+acc+f16+L1_Speedup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/bench/NVIDIA_GeForce_RTX_3080_Laptop_GPU_WSL2_ffpa+acc+f16+L1_Speedup.png -------------------------------------------------------------------------------- /bench/NVIDIA_GeForce_RTX_3080_Laptop_GPU_WSL2_ffpa+acc+f32+L1_Speedup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/bench/NVIDIA_GeForce_RTX_3080_Laptop_GPU_WSL2_ffpa+acc+f32+L1_Speedup.png -------------------------------------------------------------------------------- /bench/NVIDIA_GeForce_RTX_4090.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/bench/NVIDIA_GeForce_RTX_4090.png -------------------------------------------------------------------------------- /bench/NVIDIA_GeForce_RTX_4090_ffpa+acc+f16+L1_Speedup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/bench/NVIDIA_GeForce_RTX_4090_ffpa+acc+f16+L1_Speedup.png -------------------------------------------------------------------------------- /bench/NVIDIA_GeForce_RTX_4090_ffpa+acc+f32+L1_Speedup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/bench/NVIDIA_GeForce_RTX_4090_ffpa+acc+f32+L1_Speedup.png -------------------------------------------------------------------------------- /bench/NVIDIA_L20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/bench/NVIDIA_L20.png -------------------------------------------------------------------------------- /bench/NVIDIA_L20_ffpa+acc+f16+L1_Speedup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/bench/NVIDIA_L20_ffpa+acc+f16+L1_Speedup.png -------------------------------------------------------------------------------- /bench/NVIDIA_L20_ffpa+acc+f32+L1_Speedup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/bench/NVIDIA_L20_ffpa+acc+f32+L1_Speedup.png -------------------------------------------------------------------------------- /bench/bank_conflicts_check.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/bench/bank_conflicts_check.sh -------------------------------------------------------------------------------- /bench/bench.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/bench/bench.sh -------------------------------------------------------------------------------- /csrc/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/csrc/.gitignore -------------------------------------------------------------------------------- /csrc/cuffpa/ffpa_attn_F16F16F16_L1.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/csrc/cuffpa/ffpa_attn_F16F16F16_L1.cu -------------------------------------------------------------------------------- /csrc/cuffpa/ffpa_attn_F16F16F32_L1.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/csrc/cuffpa/ffpa_attn_F16F16F32_L1.cu -------------------------------------------------------------------------------- /csrc/cuffpa/ffpa_attn_templates_L1.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/csrc/cuffpa/ffpa_attn_templates_L1.cuh -------------------------------------------------------------------------------- /csrc/cuffpa/launch_templates.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/csrc/cuffpa/launch_templates.cuh -------------------------------------------------------------------------------- /csrc/deprecated/faster_prefill_attn_F16F16F16F16_L1.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/csrc/deprecated/faster_prefill_attn_F16F16F16F16_L1.cu -------------------------------------------------------------------------------- /csrc/deprecated/faster_prefill_attn_F32F16F16F32_L1.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/csrc/deprecated/faster_prefill_attn_F32F16F16F32_L1.cu -------------------------------------------------------------------------------- /csrc/extension/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/csrc/extension/.gitignore -------------------------------------------------------------------------------- /csrc/pybind/ffpa_attn_api.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/csrc/pybind/ffpa_attn_api.cc -------------------------------------------------------------------------------- /env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/env.py -------------------------------------------------------------------------------- /ffpa_attn/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/ffpa_attn/.gitignore -------------------------------------------------------------------------------- /ffpa_attn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/ffpa_attn/__init__.py -------------------------------------------------------------------------------- /ffpa_attn/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/ffpa_attn/interface.py -------------------------------------------------------------------------------- /ffpa_attn/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.2" # type: ignore 2 | -------------------------------------------------------------------------------- /include/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/include/.gitignore -------------------------------------------------------------------------------- /include/cuffpa/cp_async.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/include/cuffpa/cp_async.cuh -------------------------------------------------------------------------------- /include/cuffpa/deprecated/mma_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/include/cuffpa/deprecated/mma_utils.cuh -------------------------------------------------------------------------------- /include/cuffpa/deprecated/smem_swizzle.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/include/cuffpa/deprecated/smem_swizzle.cuh -------------------------------------------------------------------------------- /include/cuffpa/logging.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/include/cuffpa/logging.cuh -------------------------------------------------------------------------------- /include/cuffpa/mma.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/include/cuffpa/mma.cuh -------------------------------------------------------------------------------- /include/cuffpa/prefill.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/include/cuffpa/prefill.cuh -------------------------------------------------------------------------------- /include/cuffpa/swizzle.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/include/cuffpa/swizzle.cuh -------------------------------------------------------------------------------- /include/cuffpa/utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/include/cuffpa/utils.cuh -------------------------------------------------------------------------------- /include/cuffpa/warp.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/include/cuffpa/warp.cuh -------------------------------------------------------------------------------- /include/extension/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/include/extension/.gitignore -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | packaging 2 | ninja 3 | torch>=2.4.0 4 | numpy 5 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/setup.cfg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/setup.py -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/tests/.gitignore -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | -------------------------------------------------------------------------------- /tests/swizzle_layout.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/tests/swizzle_layout.py -------------------------------------------------------------------------------- /tests/test_ffpa_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xlite-dev/ffpa-attn/HEAD/tests/test_ffpa_attn.py --------------------------------------------------------------------------------