├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── benchmark ├── bench_flash_mla.py └── visualize.py ├── csrc ├── params.h ├── pybind.cpp ├── sm100 │ ├── decode │ │ └── sparse_fp8 │ │ │ ├── dequant.h │ │ │ ├── splitkv_mla.cu │ │ │ └── splitkv_mla.h │ ├── defines.h │ ├── helpers.h │ ├── intrinsics.h │ ├── prefill │ │ ├── dense │ │ │ ├── collective │ │ │ │ ├── fmha_common.hpp │ │ │ │ ├── fmha_fusion.hpp │ │ │ │ ├── sm100_fmha_fwd_epilogue_tma_warpspecialized.hpp │ │ │ │ ├── sm100_fmha_fwd_mainloop_tma_warpspecialized.hpp │ │ │ │ ├── sm100_fmha_load_tma_warpspecialized.hpp │ │ │ │ ├── sm100_fmha_mla_fwd_mainloop_tma_warpspecialized.hpp │ │ │ │ └── sm100_fmha_mla_load_tma_warpspecialized.hpp │ │ │ ├── common │ │ │ │ ├── gather_tensor.hpp │ │ │ │ ├── helper.h │ │ │ │ ├── mask.cuh │ │ │ │ ├── pipeline_mla.hpp │ │ │ │ ├── pow_2.hpp │ │ │ │ └── utils.hpp │ │ │ ├── device │ │ │ │ ├── fmha.hpp │ │ │ │ └── fmha_device_bwd.hpp │ │ │ ├── fmha_cutlass_bwd_sm100.cu │ │ │ ├── fmha_cutlass_bwd_sm100.cuh │ │ │ ├── fmha_cutlass_fwd_sm100.cu │ │ │ ├── fmha_cutlass_fwd_sm100.cuh │ │ │ ├── interface.h │ │ │ └── kernel │ │ │ │ ├── fmha_causal_tile_scheduler.hpp │ │ │ │ ├── fmha_kernel_bwd_convert.hpp │ │ │ │ ├── fmha_kernel_bwd_sum_OdO.hpp │ │ │ │ ├── fmha_options.hpp │ │ │ │ ├── fmha_tile_scheduler.hpp │ │ │ │ ├── sm100_fmha_bwd_kernel_tma_warpspecialized.hpp │ │ │ │ ├── sm100_fmha_bwd_mla_kernel_tma_warpspecialized.hpp │ │ │ │ └── sm100_fmha_fwd_kernel_tma_warpspecialized.hpp │ │ └── sparse │ │ │ ├── fwd.cu │ │ │ ├── fwd.h │ │ │ ├── helpers.h │ │ │ ├── intrinsics.h │ │ │ └── ws_gemm.h │ ├── tma_cta_group2_nosplit.h │ └── ws_gemm.h ├── sm90 │ ├── decode │ │ ├── dense │ │ │ ├── config.h │ │ │ ├── splitkv_mla.cu │ │ │ ├── splitkv_mla.h │ │ │ └── traits.h │ │ └── sparse_fp8 │ │ │ ├── components │ │ │ ├── config.h │ │ │ ├── dequant.h │ │ │ ├── epilogue.h │ │ │ ├── helpers.h │ │ │ └── named_barriers.h │ │ │ ├── splitkv_mla.cu │ │ │ └── splitkv_mla.h │ └── prefill │ │ └── sparse │ │ ├── fwd.cu │ │ ├── fwd.h │ │ └── helpers.h ├── smxx │ ├── get_mla_metadata.cu │ ├── get_mla_metadata.h │ ├── mla_combine.cu │ └── mla_combine.h └── utils.h ├── docs ├── 20250422-new-kernel-deep-dive.md ├── 20250929-hopper-fp8-sparse-deep-dive.md └── assets │ └── MLA Kernel Sched.drawio.svg ├── flash_mla ├── __init__.py └── flash_mla_interface.py ├── setup.py └── tests ├── lib.py ├── quant.py ├── test_flash_mla_decoding.py ├── test_flash_mla_prefill.py └── test_fmha_sm100.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/.gitmodules -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/README.md -------------------------------------------------------------------------------- /benchmark/bench_flash_mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/benchmark/bench_flash_mla.py -------------------------------------------------------------------------------- /benchmark/visualize.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/benchmark/visualize.py -------------------------------------------------------------------------------- /csrc/params.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/params.h -------------------------------------------------------------------------------- /csrc/pybind.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/pybind.cpp -------------------------------------------------------------------------------- /csrc/sm100/decode/sparse_fp8/dequant.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/decode/sparse_fp8/dequant.h -------------------------------------------------------------------------------- /csrc/sm100/decode/sparse_fp8/splitkv_mla.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/decode/sparse_fp8/splitkv_mla.cu -------------------------------------------------------------------------------- /csrc/sm100/decode/sparse_fp8/splitkv_mla.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/decode/sparse_fp8/splitkv_mla.h -------------------------------------------------------------------------------- /csrc/sm100/defines.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/defines.h -------------------------------------------------------------------------------- /csrc/sm100/helpers.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/helpers.h -------------------------------------------------------------------------------- /csrc/sm100/intrinsics.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/intrinsics.h -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/collective/fmha_common.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/collective/fmha_common.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/collective/fmha_fusion.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/collective/fmha_fusion.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/collective/sm100_fmha_fwd_epilogue_tma_warpspecialized.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/collective/sm100_fmha_fwd_epilogue_tma_warpspecialized.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/collective/sm100_fmha_fwd_mainloop_tma_warpspecialized.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/collective/sm100_fmha_fwd_mainloop_tma_warpspecialized.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/collective/sm100_fmha_load_tma_warpspecialized.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/collective/sm100_fmha_load_tma_warpspecialized.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/collective/sm100_fmha_mla_fwd_mainloop_tma_warpspecialized.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/collective/sm100_fmha_mla_fwd_mainloop_tma_warpspecialized.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/collective/sm100_fmha_mla_load_tma_warpspecialized.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/collective/sm100_fmha_mla_load_tma_warpspecialized.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/common/gather_tensor.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/common/gather_tensor.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/common/helper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/common/helper.h -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/common/mask.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/common/mask.cuh -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/common/pipeline_mla.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/common/pipeline_mla.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/common/pow_2.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/common/pow_2.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/common/utils.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/common/utils.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/device/fmha.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/device/fmha.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/device/fmha_device_bwd.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/device/fmha_device_bwd.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/fmha_cutlass_bwd_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/fmha_cutlass_bwd_sm100.cu -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/fmha_cutlass_bwd_sm100.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/fmha_cutlass_bwd_sm100.cuh -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/fmha_cutlass_fwd_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/fmha_cutlass_fwd_sm100.cu -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/fmha_cutlass_fwd_sm100.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/fmha_cutlass_fwd_sm100.cuh -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/interface.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/interface.h -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/kernel/fmha_causal_tile_scheduler.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/kernel/fmha_causal_tile_scheduler.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/kernel/fmha_kernel_bwd_convert.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/kernel/fmha_kernel_bwd_convert.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/kernel/fmha_kernel_bwd_sum_OdO.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/kernel/fmha_kernel_bwd_sum_OdO.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/kernel/fmha_options.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/kernel/fmha_options.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/kernel/fmha_tile_scheduler.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/kernel/fmha_tile_scheduler.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/kernel/sm100_fmha_bwd_kernel_tma_warpspecialized.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/kernel/sm100_fmha_bwd_kernel_tma_warpspecialized.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/kernel/sm100_fmha_bwd_mla_kernel_tma_warpspecialized.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/kernel/sm100_fmha_bwd_mla_kernel_tma_warpspecialized.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/dense/kernel/sm100_fmha_fwd_kernel_tma_warpspecialized.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/dense/kernel/sm100_fmha_fwd_kernel_tma_warpspecialized.hpp -------------------------------------------------------------------------------- /csrc/sm100/prefill/sparse/fwd.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/sparse/fwd.cu -------------------------------------------------------------------------------- /csrc/sm100/prefill/sparse/fwd.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/sparse/fwd.h -------------------------------------------------------------------------------- /csrc/sm100/prefill/sparse/helpers.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/sparse/helpers.h -------------------------------------------------------------------------------- /csrc/sm100/prefill/sparse/intrinsics.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/sparse/intrinsics.h -------------------------------------------------------------------------------- /csrc/sm100/prefill/sparse/ws_gemm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/prefill/sparse/ws_gemm.h -------------------------------------------------------------------------------- /csrc/sm100/tma_cta_group2_nosplit.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/tma_cta_group2_nosplit.h -------------------------------------------------------------------------------- /csrc/sm100/ws_gemm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm100/ws_gemm.h -------------------------------------------------------------------------------- /csrc/sm90/decode/dense/config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm90/decode/dense/config.h -------------------------------------------------------------------------------- /csrc/sm90/decode/dense/splitkv_mla.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm90/decode/dense/splitkv_mla.cu -------------------------------------------------------------------------------- /csrc/sm90/decode/dense/splitkv_mla.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm90/decode/dense/splitkv_mla.h -------------------------------------------------------------------------------- /csrc/sm90/decode/dense/traits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm90/decode/dense/traits.h -------------------------------------------------------------------------------- /csrc/sm90/decode/sparse_fp8/components/config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm90/decode/sparse_fp8/components/config.h -------------------------------------------------------------------------------- /csrc/sm90/decode/sparse_fp8/components/dequant.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm90/decode/sparse_fp8/components/dequant.h -------------------------------------------------------------------------------- /csrc/sm90/decode/sparse_fp8/components/epilogue.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm90/decode/sparse_fp8/components/epilogue.h -------------------------------------------------------------------------------- /csrc/sm90/decode/sparse_fp8/components/helpers.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm90/decode/sparse_fp8/components/helpers.h -------------------------------------------------------------------------------- /csrc/sm90/decode/sparse_fp8/components/named_barriers.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm90/decode/sparse_fp8/components/named_barriers.h -------------------------------------------------------------------------------- /csrc/sm90/decode/sparse_fp8/splitkv_mla.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm90/decode/sparse_fp8/splitkv_mla.cu -------------------------------------------------------------------------------- /csrc/sm90/decode/sparse_fp8/splitkv_mla.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm90/decode/sparse_fp8/splitkv_mla.h -------------------------------------------------------------------------------- /csrc/sm90/prefill/sparse/fwd.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm90/prefill/sparse/fwd.cu -------------------------------------------------------------------------------- /csrc/sm90/prefill/sparse/fwd.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm90/prefill/sparse/fwd.h -------------------------------------------------------------------------------- /csrc/sm90/prefill/sparse/helpers.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/sm90/prefill/sparse/helpers.h -------------------------------------------------------------------------------- /csrc/smxx/get_mla_metadata.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/smxx/get_mla_metadata.cu -------------------------------------------------------------------------------- /csrc/smxx/get_mla_metadata.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/smxx/get_mla_metadata.h -------------------------------------------------------------------------------- /csrc/smxx/mla_combine.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/smxx/mla_combine.cu -------------------------------------------------------------------------------- /csrc/smxx/mla_combine.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/smxx/mla_combine.h -------------------------------------------------------------------------------- /csrc/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/csrc/utils.h -------------------------------------------------------------------------------- /docs/20250422-new-kernel-deep-dive.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/docs/20250422-new-kernel-deep-dive.md -------------------------------------------------------------------------------- /docs/20250929-hopper-fp8-sparse-deep-dive.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/docs/20250929-hopper-fp8-sparse-deep-dive.md -------------------------------------------------------------------------------- /docs/assets/MLA Kernel Sched.drawio.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/docs/assets/MLA Kernel Sched.drawio.svg -------------------------------------------------------------------------------- /flash_mla/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/flash_mla/__init__.py -------------------------------------------------------------------------------- /flash_mla/flash_mla_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/flash_mla/flash_mla_interface.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/setup.py -------------------------------------------------------------------------------- /tests/lib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/tests/lib.py -------------------------------------------------------------------------------- /tests/quant.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/tests/quant.py -------------------------------------------------------------------------------- /tests/test_flash_mla_decoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/tests/test_flash_mla_decoding.py -------------------------------------------------------------------------------- /tests/test_flash_mla_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/tests/test_flash_mla_prefill.py -------------------------------------------------------------------------------- /tests/test_fmha_sm100.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepseek-ai/FlashMLA/HEAD/tests/test_fmha_sm100.py --------------------------------------------------------------------------------