├── .github └── workflows │ ├── _build.yml │ ├── build.yml │ ├── pre-commit.yaml │ └── publish.yml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── AUTHORS ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── assets ├── flash2_a100_fwd_bwd_benchmark.png ├── flash2_h100_fwd_bwd_benchmark.png ├── flash3_fp16_fwd.png ├── flashattention_logo.png ├── flashattn_banner.jpg ├── flashattn_banner.pdf ├── flashattn_memory.jpg ├── flashattn_speedup.jpg ├── flashattn_speedup_3090.jpg ├── flashattn_speedup_a100_d128.jpg ├── flashattn_speedup_t4.jpg ├── flashattn_speedup_t4_fwd.jpg ├── gpt2_training_curve.jpg ├── gpt2_training_efficiency.jpg ├── gpt3_training_curve.jpg └── gpt3_training_efficiency.jpg ├── benchmarks ├── benchmark_alibi.py ├── benchmark_attn.py ├── benchmark_causal.py ├── benchmark_flash_attention.py ├── benchmark_gemm.py └── cute │ ├── benchmark_block_sparsity.py │ └── benchmark_mask_mod.py ├── csrc ├── flash_attn │ ├── flash_api.cpp │ └── src │ │ ├── alibi.h │ │ ├── block_info.h │ │ ├── dropout.h │ │ ├── flash.h │ │ ├── flash_bwd_hdim128_bf16_causal_sm80.cu │ │ ├── flash_bwd_hdim128_bf16_sm80.cu │ │ ├── flash_bwd_hdim128_fp16_causal_sm80.cu │ │ ├── flash_bwd_hdim128_fp16_sm80.cu │ │ ├── flash_bwd_hdim192_bf16_causal_sm80.cu │ │ ├── flash_bwd_hdim192_bf16_sm80.cu │ │ ├── flash_bwd_hdim192_fp16_causal_sm80.cu │ │ ├── flash_bwd_hdim192_fp16_sm80.cu │ │ ├── flash_bwd_hdim256_bf16_causal_sm80.cu │ │ ├── flash_bwd_hdim256_bf16_sm80.cu │ │ ├── flash_bwd_hdim256_fp16_causal_sm80.cu │ │ ├── flash_bwd_hdim256_fp16_sm80.cu │ │ ├── flash_bwd_hdim32_bf16_causal_sm80.cu │ │ ├── flash_bwd_hdim32_bf16_sm80.cu │ │ ├── flash_bwd_hdim32_fp16_causal_sm80.cu │ │ ├── flash_bwd_hdim32_fp16_sm80.cu │ │ ├── flash_bwd_hdim64_bf16_causal_sm80.cu │ │ ├── flash_bwd_hdim64_bf16_sm80.cu │ │ ├── flash_bwd_hdim64_fp16_causal_sm80.cu │ │ ├── flash_bwd_hdim64_fp16_sm80.cu │ │ ├── flash_bwd_hdim96_bf16_causal_sm80.cu │ │ ├── flash_bwd_hdim96_bf16_sm80.cu │ │ ├── flash_bwd_hdim96_fp16_causal_sm80.cu │ │ ├── flash_bwd_hdim96_fp16_sm80.cu │ │ ├── flash_bwd_kernel.h │ │ ├── flash_bwd_launch_template.h │ │ ├── flash_bwd_preprocess_kernel.h │ │ ├── flash_fwd_hdim128_bf16_causal_sm80.cu │ │ ├── flash_fwd_hdim128_bf16_sm80.cu │ │ ├── flash_fwd_hdim128_fp16_causal_sm80.cu │ │ ├── flash_fwd_hdim128_fp16_sm80.cu │ │ ├── flash_fwd_hdim192_bf16_causal_sm80.cu │ │ ├── flash_fwd_hdim192_bf16_sm80.cu │ │ ├── flash_fwd_hdim192_fp16_causal_sm80.cu │ │ ├── flash_fwd_hdim192_fp16_sm80.cu │ │ ├── flash_fwd_hdim256_bf16_causal_sm80.cu │ │ ├── flash_fwd_hdim256_bf16_sm80.cu │ │ ├── flash_fwd_hdim256_fp16_causal_sm80.cu │ │ ├── flash_fwd_hdim256_fp16_sm80.cu │ │ ├── flash_fwd_hdim32_bf16_causal_sm80.cu │ │ ├── flash_fwd_hdim32_bf16_sm80.cu │ │ ├── flash_fwd_hdim32_fp16_causal_sm80.cu │ │ ├── flash_fwd_hdim32_fp16_sm80.cu │ │ ├── flash_fwd_hdim64_bf16_causal_sm80.cu │ │ ├── flash_fwd_hdim64_bf16_sm80.cu │ │ ├── flash_fwd_hdim64_fp16_causal_sm80.cu │ │ ├── flash_fwd_hdim64_fp16_sm80.cu │ │ ├── flash_fwd_hdim96_bf16_causal_sm80.cu │ │ ├── flash_fwd_hdim96_bf16_sm80.cu │ │ ├── flash_fwd_hdim96_fp16_causal_sm80.cu │ │ ├── flash_fwd_hdim96_fp16_sm80.cu │ │ ├── flash_fwd_kernel.h │ │ ├── flash_fwd_launch_template.h │ │ ├── flash_fwd_split_hdim128_bf16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim128_bf16_sm80.cu │ │ ├── flash_fwd_split_hdim128_fp16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim128_fp16_sm80.cu │ │ ├── flash_fwd_split_hdim192_bf16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim192_bf16_sm80.cu │ │ ├── flash_fwd_split_hdim192_fp16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim192_fp16_sm80.cu │ │ ├── flash_fwd_split_hdim256_bf16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim256_bf16_sm80.cu │ │ ├── flash_fwd_split_hdim256_fp16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim256_fp16_sm80.cu │ │ ├── flash_fwd_split_hdim32_bf16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim32_bf16_sm80.cu │ │ ├── flash_fwd_split_hdim32_fp16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim32_fp16_sm80.cu │ │ ├── flash_fwd_split_hdim64_bf16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim64_bf16_sm80.cu │ │ ├── flash_fwd_split_hdim64_fp16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim64_fp16_sm80.cu │ │ ├── flash_fwd_split_hdim96_bf16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim96_bf16_sm80.cu │ │ ├── flash_fwd_split_hdim96_fp16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim96_fp16_sm80.cu │ │ ├── generate_kernels.py │ │ ├── hardware_info.h │ │ ├── kernel_traits.h │ │ ├── mask.h │ │ ├── namespace_config.h │ │ ├── philox.cuh │ │ ├── philox_unpack.cuh │ │ ├── rotary.h │ │ ├── softmax.h │ │ ├── static_switch.h │ │ └── utils.h ├── flash_attn_ck │ ├── flash_api.cpp │ ├── flash_common.cpp │ ├── flash_common.hpp │ ├── mha_bwd.cpp │ ├── mha_fwd.cpp │ ├── mha_fwd_kvcache.cpp │ ├── mha_varlen_bwd.cpp │ └── mha_varlen_fwd.cpp ├── fused_dense_lib │ ├── README.md │ ├── fused_dense.cpp │ ├── fused_dense_cuda.cu │ └── setup.py └── layer_norm │ ├── README.md │ ├── ln.h │ ├── ln_api.cpp │ ├── ln_bwd_1024.cu │ ├── ln_bwd_1280.cu │ ├── ln_bwd_1536.cu │ ├── ln_bwd_2048.cu │ ├── ln_bwd_256.cu │ ├── ln_bwd_2560.cu │ ├── ln_bwd_3072.cu │ ├── ln_bwd_4096.cu │ ├── ln_bwd_512.cu │ ├── ln_bwd_5120.cu │ ├── ln_bwd_6144.cu │ ├── ln_bwd_7168.cu │ ├── ln_bwd_768.cu │ ├── ln_bwd_8192.cu │ ├── ln_bwd_kernels.cuh │ ├── ln_fwd_1024.cu │ ├── ln_fwd_1280.cu │ ├── ln_fwd_1536.cu │ ├── ln_fwd_2048.cu │ ├── ln_fwd_256.cu │ ├── ln_fwd_2560.cu │ ├── ln_fwd_3072.cu │ ├── ln_fwd_4096.cu │ ├── ln_fwd_512.cu │ ├── ln_fwd_5120.cu │ ├── ln_fwd_6144.cu │ ├── ln_fwd_7168.cu │ ├── ln_fwd_768.cu │ ├── ln_fwd_8192.cu │ ├── ln_fwd_kernels.cuh │ ├── ln_kernel_traits.h │ ├── ln_parallel_bwd_1024.cu │ ├── ln_parallel_bwd_1280.cu │ ├── ln_parallel_bwd_1536.cu │ ├── ln_parallel_bwd_2048.cu │ ├── ln_parallel_bwd_256.cu │ ├── ln_parallel_bwd_2560.cu │ ├── ln_parallel_bwd_3072.cu │ ├── ln_parallel_bwd_4096.cu │ ├── ln_parallel_bwd_512.cu │ ├── ln_parallel_bwd_5120.cu │ ├── ln_parallel_bwd_6144.cu │ ├── ln_parallel_bwd_7168.cu │ ├── ln_parallel_bwd_768.cu │ ├── ln_parallel_bwd_8192.cu │ ├── ln_parallel_fwd_1024.cu │ ├── ln_parallel_fwd_1280.cu │ ├── ln_parallel_fwd_1536.cu │ ├── ln_parallel_fwd_2048.cu │ ├── ln_parallel_fwd_256.cu │ ├── ln_parallel_fwd_2560.cu │ ├── ln_parallel_fwd_3072.cu │ ├── ln_parallel_fwd_4096.cu │ ├── ln_parallel_fwd_512.cu │ ├── ln_parallel_fwd_5120.cu │ ├── ln_parallel_fwd_6144.cu │ ├── ln_parallel_fwd_7168.cu │ ├── ln_parallel_fwd_768.cu │ ├── ln_parallel_fwd_8192.cu │ ├── ln_parallel_residual_bwd_kernels.cuh │ ├── ln_parallel_residual_fwd_kernels.cuh │ ├── ln_utils.cuh │ ├── setup.py │ └── static_switch.h ├── examples └── inference │ └── README.md ├── flash_attn ├── __init__.py ├── bert_padding.py ├── cute │ ├── .flake8 │ ├── AUTHORS │ ├── LICENSE │ ├── README.md │ ├── __init__.py │ ├── ampere_helpers.py │ ├── barrier.py │ ├── benchmark.py │ ├── blackwell_helpers.py │ ├── block_info.py │ ├── block_sparse_utils.py │ ├── block_sparsity.py │ ├── compute_block_sparsity.py │ ├── copy_utils.py │ ├── cute_dsl_utils.py │ ├── fast_math.py │ ├── flash_bwd.py │ ├── flash_bwd_postprocess.py │ ├── flash_bwd_preprocess.py │ ├── flash_bwd_sm100.py │ ├── flash_bwd_sm90.py │ ├── flash_fwd.py │ ├── flash_fwd_combine.py │ ├── flash_fwd_sm100.py │ ├── hopper_helpers.py │ ├── interface.py │ ├── mask.py │ ├── mask_definitions.py │ ├── mma_sm100_desc.py │ ├── named_barrier.py │ ├── pack_gqa.py │ ├── paged_kv.py │ ├── pipeline.py │ ├── pyproject.toml │ ├── seqlen_info.py │ ├── softmax.py │ ├── testing.py │ ├── tile_scheduler.py │ └── utils.py ├── flash_attn_interface.py ├── flash_attn_triton.py ├── flash_attn_triton_amd │ ├── Dockerfile │ ├── README.md │ ├── __init__.py │ ├── bench.py │ ├── bwd_prefill.py │ ├── bwd_prefill_fused.py │ ├── bwd_prefill_onekernel.py │ ├── bwd_prefill_split.py │ ├── bwd_ref.py │ ├── fp8.py │ ├── fwd_decode.py │ ├── fwd_prefill.py │ ├── fwd_ref.py │ ├── interface_fa.py │ ├── test.py │ ├── train.py │ └── utils.py ├── flash_attn_triton_og.py ├── flash_blocksparse_attention.py ├── flash_blocksparse_attn_interface.py ├── layers │ ├── __init__.py │ ├── patch_embed.py │ └── rotary.py ├── losses │ ├── __init__.py │ └── cross_entropy.py ├── models │ ├── __init__.py │ ├── baichuan.py │ ├── bert.py │ ├── bigcode.py │ ├── btlm.py │ ├── falcon.py │ ├── gpt.py │ ├── gpt_neox.py │ ├── gptj.py │ ├── llama.py │ ├── opt.py │ └── vit.py ├── modules │ ├── __init__.py │ ├── block.py │ ├── embedding.py │ ├── mha.py │ └── mlp.py ├── ops │ ├── __init__.py │ ├── activations.py │ ├── fused_dense.py │ ├── layer_norm.py │ ├── rms_norm.py │ └── triton │ │ ├── __init__.py │ │ ├── cross_entropy.py │ │ ├── k_activations.py │ │ ├── layer_norm.py │ │ ├── linear.py │ │ ├── mlp.py │ │ └── rotary.py ├── pyproject.toml └── utils │ ├── __init__.py │ ├── benchmark.py │ ├── distributed.py │ ├── generation.py │ ├── library.py │ ├── pretrained.py │ ├── testing.py │ └── torch.py ├── hopper ├── __init__.py ├── benchmark_attn.py ├── benchmark_flash_attention_fp8.py ├── benchmark_mla_decode.py ├── benchmark_split_kv.py ├── block.h ├── copy_sm90_bulk_reduce.hpp ├── cuda_check.h ├── epilogue_bwd.hpp ├── epilogue_fwd.hpp ├── flash.h ├── flash_api.cpp ├── flash_api_stable.cpp ├── flash_attn_interface.py ├── flash_bwd_kernel_sm80.h ├── flash_bwd_kernel_sm90.h ├── flash_bwd_launch_template.h ├── flash_bwd_postprocess_kernel.h ├── flash_bwd_preprocess_kernel.h ├── flash_fwd_combine.cu ├── flash_fwd_combine_kernel.h ├── flash_fwd_combine_launch_template.h ├── flash_fwd_kernel_sm80.h ├── flash_fwd_kernel_sm90.h ├── flash_fwd_launch_template.h ├── flash_prepare_scheduler.cu ├── generate_kernels.py ├── heuristics.h ├── instantiations │ ├── flash_bwd_hdim128_bf16_sm80.cu │ ├── flash_bwd_hdim128_bf16_sm90.cu │ ├── flash_bwd_hdim128_bf16_softcap_sm80.cu │ ├── flash_bwd_hdim128_bf16_softcap_sm90.cu │ ├── flash_bwd_hdim128_bf16_softcapall_sm90.cu │ ├── flash_bwd_hdim128_fp16_sm80.cu │ ├── flash_bwd_hdim128_fp16_sm90.cu │ ├── flash_bwd_hdim128_fp16_softcap_sm80.cu │ ├── flash_bwd_hdim128_fp16_softcap_sm90.cu │ ├── flash_bwd_hdim128_fp16_softcapall_sm90.cu │ ├── flash_bwd_hdim192_bf16_sm80.cu │ ├── flash_bwd_hdim192_bf16_sm90.cu │ ├── flash_bwd_hdim192_bf16_softcap_sm80.cu │ ├── flash_bwd_hdim192_bf16_softcap_sm90.cu │ ├── flash_bwd_hdim192_bf16_softcapall_sm90.cu │ ├── flash_bwd_hdim192_fp16_sm80.cu │ ├── flash_bwd_hdim192_fp16_sm90.cu │ ├── flash_bwd_hdim192_fp16_softcap_sm80.cu │ ├── flash_bwd_hdim192_fp16_softcap_sm90.cu │ ├── flash_bwd_hdim192_fp16_softcapall_sm90.cu │ ├── flash_bwd_hdim256_bf16_sm80.cu │ ├── flash_bwd_hdim256_bf16_sm90.cu │ ├── flash_bwd_hdim256_bf16_softcap_sm80.cu │ ├── flash_bwd_hdim256_bf16_softcap_sm90.cu │ ├── flash_bwd_hdim256_bf16_softcapall_sm90.cu │ ├── flash_bwd_hdim256_fp16_sm80.cu │ ├── flash_bwd_hdim256_fp16_sm90.cu │ ├── flash_bwd_hdim256_fp16_softcap_sm80.cu │ ├── flash_bwd_hdim256_fp16_softcap_sm90.cu │ ├── flash_bwd_hdim256_fp16_softcapall_sm90.cu │ ├── flash_bwd_hdim64_bf16_sm80.cu │ ├── flash_bwd_hdim64_bf16_sm90.cu │ ├── flash_bwd_hdim64_bf16_softcap_sm80.cu │ ├── flash_bwd_hdim64_bf16_softcap_sm90.cu │ ├── flash_bwd_hdim64_bf16_softcapall_sm90.cu │ ├── flash_bwd_hdim64_fp16_sm80.cu │ ├── flash_bwd_hdim64_fp16_sm90.cu │ ├── flash_bwd_hdim64_fp16_softcap_sm80.cu │ ├── flash_bwd_hdim64_fp16_softcap_sm90.cu │ ├── flash_bwd_hdim64_fp16_softcapall_sm90.cu │ ├── flash_bwd_hdim96_bf16_sm80.cu │ ├── flash_bwd_hdim96_bf16_sm90.cu │ ├── flash_bwd_hdim96_bf16_softcap_sm80.cu │ ├── flash_bwd_hdim96_bf16_softcap_sm90.cu │ ├── flash_bwd_hdim96_bf16_softcapall_sm90.cu │ ├── flash_bwd_hdim96_fp16_sm80.cu │ ├── flash_bwd_hdim96_fp16_sm90.cu │ ├── flash_bwd_hdim96_fp16_softcap_sm80.cu │ ├── flash_bwd_hdim96_fp16_softcap_sm90.cu │ ├── flash_bwd_hdim96_fp16_softcapall_sm90.cu │ ├── flash_fwd_hdim128_bf16_packgqa_sm90.cu │ ├── flash_fwd_hdim128_bf16_paged_sm80.cu │ ├── flash_fwd_hdim128_bf16_paged_sm90.cu │ ├── flash_fwd_hdim128_bf16_paged_softcap_sm80.cu │ ├── flash_fwd_hdim128_bf16_paged_softcap_sm90.cu │ ├── flash_fwd_hdim128_bf16_paged_softcapall_sm80.cu │ ├── flash_fwd_hdim128_bf16_paged_split_sm80.cu │ ├── flash_fwd_hdim128_bf16_paged_split_sm90.cu │ ├── flash_fwd_hdim128_bf16_paged_split_softcap_sm80.cu │ ├── flash_fwd_hdim128_bf16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim128_bf16_paged_split_softcapall_sm80.cu │ ├── flash_fwd_hdim128_bf16_sm100.cu │ ├── flash_fwd_hdim128_bf16_sm80.cu │ ├── flash_fwd_hdim128_bf16_sm90.cu │ ├── flash_fwd_hdim128_bf16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim128_bf16_softcap_sm80.cu │ ├── flash_fwd_hdim128_bf16_softcap_sm90.cu │ ├── flash_fwd_hdim128_bf16_softcapall_sm80.cu │ ├── flash_fwd_hdim128_bf16_split_sm80.cu │ ├── flash_fwd_hdim128_bf16_split_sm90.cu │ ├── flash_fwd_hdim128_bf16_split_softcap_sm80.cu │ ├── flash_fwd_hdim128_bf16_split_softcap_sm90.cu │ ├── flash_fwd_hdim128_bf16_split_softcapall_sm80.cu │ ├── flash_fwd_hdim128_e4m3_packgqa_sm90.cu │ ├── flash_fwd_hdim128_e4m3_paged_sm90.cu │ ├── flash_fwd_hdim128_e4m3_paged_softcap_sm90.cu │ ├── flash_fwd_hdim128_e4m3_paged_split_sm90.cu │ ├── flash_fwd_hdim128_e4m3_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim128_e4m3_sm90.cu │ ├── flash_fwd_hdim128_e4m3_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim128_e4m3_softcap_sm90.cu │ ├── flash_fwd_hdim128_e4m3_split_sm90.cu │ ├── flash_fwd_hdim128_e4m3_split_softcap_sm90.cu │ ├── flash_fwd_hdim128_fp16_packgqa_sm90.cu │ ├── flash_fwd_hdim128_fp16_paged_sm80.cu │ ├── flash_fwd_hdim128_fp16_paged_sm90.cu │ ├── flash_fwd_hdim128_fp16_paged_softcap_sm80.cu │ ├── flash_fwd_hdim128_fp16_paged_softcap_sm90.cu │ ├── flash_fwd_hdim128_fp16_paged_softcapall_sm80.cu │ ├── flash_fwd_hdim128_fp16_paged_split_sm80.cu │ ├── flash_fwd_hdim128_fp16_paged_split_sm90.cu │ ├── flash_fwd_hdim128_fp16_paged_split_softcap_sm80.cu │ ├── flash_fwd_hdim128_fp16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim128_fp16_paged_split_softcapall_sm80.cu │ ├── flash_fwd_hdim128_fp16_sm80.cu │ ├── flash_fwd_hdim128_fp16_sm90.cu │ ├── flash_fwd_hdim128_fp16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim128_fp16_softcap_sm80.cu │ ├── flash_fwd_hdim128_fp16_softcap_sm90.cu │ ├── flash_fwd_hdim128_fp16_softcapall_sm80.cu │ ├── flash_fwd_hdim128_fp16_split_sm80.cu │ ├── flash_fwd_hdim128_fp16_split_sm90.cu │ ├── flash_fwd_hdim128_fp16_split_softcap_sm80.cu │ ├── flash_fwd_hdim128_fp16_split_softcap_sm90.cu │ ├── flash_fwd_hdim128_fp16_split_softcapall_sm80.cu │ ├── flash_fwd_hdim192_128_bf16_packgqa_sm90.cu │ ├── flash_fwd_hdim192_128_bf16_paged_sm90.cu │ ├── flash_fwd_hdim192_128_bf16_paged_softcap_sm90.cu │ ├── flash_fwd_hdim192_128_bf16_paged_split_sm90.cu │ ├── flash_fwd_hdim192_128_bf16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim192_128_bf16_sm90.cu │ ├── flash_fwd_hdim192_128_bf16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim192_128_bf16_softcap_sm90.cu │ ├── flash_fwd_hdim192_128_bf16_split_sm90.cu │ ├── flash_fwd_hdim192_128_bf16_split_softcap_sm90.cu │ ├── flash_fwd_hdim192_128_e4m3_packgqa_sm90.cu │ ├── flash_fwd_hdim192_128_e4m3_paged_sm90.cu │ ├── flash_fwd_hdim192_128_e4m3_paged_softcap_sm90.cu │ ├── flash_fwd_hdim192_128_e4m3_paged_split_sm90.cu │ ├── flash_fwd_hdim192_128_e4m3_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim192_128_e4m3_sm90.cu │ ├── flash_fwd_hdim192_128_e4m3_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim192_128_e4m3_softcap_sm90.cu │ ├── flash_fwd_hdim192_128_e4m3_split_sm90.cu │ ├── flash_fwd_hdim192_128_e4m3_split_softcap_sm90.cu │ ├── flash_fwd_hdim192_128_fp16_packgqa_sm90.cu │ ├── flash_fwd_hdim192_128_fp16_paged_sm90.cu │ ├── flash_fwd_hdim192_128_fp16_paged_softcap_sm90.cu │ ├── flash_fwd_hdim192_128_fp16_paged_split_sm90.cu │ ├── flash_fwd_hdim192_128_fp16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim192_128_fp16_sm90.cu │ ├── flash_fwd_hdim192_128_fp16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim192_128_fp16_softcap_sm90.cu │ ├── flash_fwd_hdim192_128_fp16_split_sm90.cu │ ├── flash_fwd_hdim192_128_fp16_split_softcap_sm90.cu │ ├── flash_fwd_hdim192_bf16_packgqa_sm90.cu │ ├── flash_fwd_hdim192_bf16_paged_sm80.cu │ ├── flash_fwd_hdim192_bf16_paged_sm90.cu │ ├── flash_fwd_hdim192_bf16_paged_softcap_sm80.cu │ ├── flash_fwd_hdim192_bf16_paged_softcap_sm90.cu │ ├── flash_fwd_hdim192_bf16_paged_softcapall_sm80.cu │ ├── flash_fwd_hdim192_bf16_paged_split_sm80.cu │ ├── flash_fwd_hdim192_bf16_paged_split_sm90.cu │ ├── flash_fwd_hdim192_bf16_paged_split_softcap_sm80.cu │ ├── flash_fwd_hdim192_bf16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim192_bf16_paged_split_softcapall_sm80.cu │ ├── flash_fwd_hdim192_bf16_sm80.cu │ ├── flash_fwd_hdim192_bf16_sm90.cu │ ├── flash_fwd_hdim192_bf16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim192_bf16_softcap_sm80.cu │ ├── flash_fwd_hdim192_bf16_softcap_sm90.cu │ ├── flash_fwd_hdim192_bf16_softcapall_sm80.cu │ ├── flash_fwd_hdim192_bf16_split_sm80.cu │ ├── flash_fwd_hdim192_bf16_split_sm90.cu │ ├── flash_fwd_hdim192_bf16_split_softcap_sm80.cu │ ├── flash_fwd_hdim192_bf16_split_softcap_sm90.cu │ ├── flash_fwd_hdim192_bf16_split_softcapall_sm80.cu │ ├── flash_fwd_hdim192_e4m3_packgqa_sm90.cu │ ├── flash_fwd_hdim192_e4m3_paged_sm90.cu │ ├── flash_fwd_hdim192_e4m3_paged_softcap_sm90.cu │ ├── flash_fwd_hdim192_e4m3_paged_split_sm90.cu │ ├── flash_fwd_hdim192_e4m3_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim192_e4m3_sm90.cu │ ├── flash_fwd_hdim192_e4m3_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim192_e4m3_softcap_sm90.cu │ ├── flash_fwd_hdim192_e4m3_split_sm90.cu │ ├── flash_fwd_hdim192_e4m3_split_softcap_sm90.cu │ ├── flash_fwd_hdim192_fp16_packgqa_sm90.cu │ ├── flash_fwd_hdim192_fp16_paged_sm80.cu │ ├── flash_fwd_hdim192_fp16_paged_sm90.cu │ ├── flash_fwd_hdim192_fp16_paged_softcap_sm80.cu │ ├── flash_fwd_hdim192_fp16_paged_softcap_sm90.cu │ ├── flash_fwd_hdim192_fp16_paged_softcapall_sm80.cu │ ├── flash_fwd_hdim192_fp16_paged_split_sm80.cu │ ├── flash_fwd_hdim192_fp16_paged_split_sm90.cu │ ├── flash_fwd_hdim192_fp16_paged_split_softcap_sm80.cu │ ├── flash_fwd_hdim192_fp16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim192_fp16_paged_split_softcapall_sm80.cu │ ├── flash_fwd_hdim192_fp16_sm80.cu │ ├── flash_fwd_hdim192_fp16_sm90.cu │ ├── flash_fwd_hdim192_fp16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim192_fp16_softcap_sm80.cu │ ├── flash_fwd_hdim192_fp16_softcap_sm90.cu │ ├── flash_fwd_hdim192_fp16_softcapall_sm80.cu │ ├── flash_fwd_hdim192_fp16_split_sm80.cu │ ├── flash_fwd_hdim192_fp16_split_sm90.cu │ ├── flash_fwd_hdim192_fp16_split_softcap_sm80.cu │ ├── flash_fwd_hdim192_fp16_split_softcap_sm90.cu │ ├── flash_fwd_hdim192_fp16_split_softcapall_sm80.cu │ ├── flash_fwd_hdim256_bf16_packgqa_sm90.cu │ ├── flash_fwd_hdim256_bf16_paged_sm80.cu │ ├── flash_fwd_hdim256_bf16_paged_sm90.cu │ ├── flash_fwd_hdim256_bf16_paged_softcap_sm80.cu │ ├── flash_fwd_hdim256_bf16_paged_softcap_sm90.cu │ ├── flash_fwd_hdim256_bf16_paged_softcapall_sm80.cu │ ├── flash_fwd_hdim256_bf16_paged_split_sm80.cu │ ├── flash_fwd_hdim256_bf16_paged_split_sm90.cu │ ├── flash_fwd_hdim256_bf16_paged_split_softcap_sm80.cu │ ├── flash_fwd_hdim256_bf16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim256_bf16_paged_split_softcapall_sm80.cu │ ├── flash_fwd_hdim256_bf16_sm80.cu │ ├── flash_fwd_hdim256_bf16_sm90.cu │ ├── flash_fwd_hdim256_bf16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim256_bf16_softcap_sm80.cu │ ├── flash_fwd_hdim256_bf16_softcap_sm90.cu │ ├── flash_fwd_hdim256_bf16_softcapall_sm80.cu │ ├── flash_fwd_hdim256_bf16_split_sm80.cu │ ├── flash_fwd_hdim256_bf16_split_sm90.cu │ ├── flash_fwd_hdim256_bf16_split_softcap_sm80.cu │ ├── flash_fwd_hdim256_bf16_split_softcap_sm90.cu │ ├── flash_fwd_hdim256_bf16_split_softcapall_sm80.cu │ ├── flash_fwd_hdim256_e4m3_packgqa_sm90.cu │ ├── flash_fwd_hdim256_e4m3_paged_sm90.cu │ ├── flash_fwd_hdim256_e4m3_paged_softcap_sm90.cu │ ├── flash_fwd_hdim256_e4m3_paged_split_sm90.cu │ ├── flash_fwd_hdim256_e4m3_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim256_e4m3_sm90.cu │ ├── flash_fwd_hdim256_e4m3_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim256_e4m3_softcap_sm90.cu │ ├── flash_fwd_hdim256_e4m3_split_sm90.cu │ ├── flash_fwd_hdim256_e4m3_split_softcap_sm90.cu │ ├── flash_fwd_hdim256_fp16_packgqa_sm90.cu │ ├── flash_fwd_hdim256_fp16_paged_sm80.cu │ ├── flash_fwd_hdim256_fp16_paged_sm90.cu │ ├── flash_fwd_hdim256_fp16_paged_softcap_sm80.cu │ ├── flash_fwd_hdim256_fp16_paged_softcap_sm90.cu │ ├── flash_fwd_hdim256_fp16_paged_softcapall_sm80.cu │ ├── flash_fwd_hdim256_fp16_paged_split_sm80.cu │ ├── flash_fwd_hdim256_fp16_paged_split_sm90.cu │ ├── flash_fwd_hdim256_fp16_paged_split_softcap_sm80.cu │ ├── flash_fwd_hdim256_fp16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim256_fp16_paged_split_softcapall_sm80.cu │ ├── flash_fwd_hdim256_fp16_sm80.cu │ ├── flash_fwd_hdim256_fp16_sm90.cu │ ├── flash_fwd_hdim256_fp16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim256_fp16_softcap_sm80.cu │ ├── flash_fwd_hdim256_fp16_softcap_sm90.cu │ ├── flash_fwd_hdim256_fp16_softcapall_sm80.cu │ ├── flash_fwd_hdim256_fp16_split_sm80.cu │ ├── flash_fwd_hdim256_fp16_split_sm90.cu │ ├── flash_fwd_hdim256_fp16_split_softcap_sm80.cu │ ├── flash_fwd_hdim256_fp16_split_softcap_sm90.cu │ ├── flash_fwd_hdim256_fp16_split_softcapall_sm80.cu │ ├── flash_fwd_hdim64_256_bf16_packgqa_sm90.cu │ ├── flash_fwd_hdim64_256_bf16_paged_sm90.cu │ ├── flash_fwd_hdim64_256_bf16_paged_softcap_sm90.cu │ ├── flash_fwd_hdim64_256_bf16_paged_split_sm90.cu │ ├── flash_fwd_hdim64_256_bf16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim64_256_bf16_sm90.cu │ ├── flash_fwd_hdim64_256_bf16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim64_256_bf16_softcap_sm90.cu │ ├── flash_fwd_hdim64_256_bf16_split_sm90.cu │ ├── flash_fwd_hdim64_256_bf16_split_softcap_sm90.cu │ ├── flash_fwd_hdim64_256_fp16_packgqa_sm90.cu │ ├── flash_fwd_hdim64_256_fp16_paged_sm90.cu │ ├── flash_fwd_hdim64_256_fp16_paged_softcap_sm90.cu │ ├── flash_fwd_hdim64_256_fp16_paged_split_sm90.cu │ ├── flash_fwd_hdim64_256_fp16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim64_256_fp16_sm90.cu │ ├── flash_fwd_hdim64_256_fp16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim64_256_fp16_softcap_sm90.cu │ ├── flash_fwd_hdim64_256_fp16_split_sm90.cu │ ├── flash_fwd_hdim64_256_fp16_split_softcap_sm90.cu │ ├── flash_fwd_hdim64_512_bf16_packgqa_sm90.cu │ ├── flash_fwd_hdim64_512_bf16_paged_sm90.cu │ ├── flash_fwd_hdim64_512_bf16_paged_softcap_sm90.cu │ ├── flash_fwd_hdim64_512_bf16_paged_split_sm90.cu │ ├── flash_fwd_hdim64_512_bf16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim64_512_bf16_sm90.cu │ ├── flash_fwd_hdim64_512_bf16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim64_512_bf16_softcap_sm90.cu │ ├── flash_fwd_hdim64_512_bf16_split_sm90.cu │ ├── flash_fwd_hdim64_512_bf16_split_softcap_sm90.cu │ ├── flash_fwd_hdim64_512_fp16_packgqa_sm90.cu │ ├── flash_fwd_hdim64_512_fp16_paged_sm90.cu │ ├── flash_fwd_hdim64_512_fp16_paged_softcap_sm90.cu │ ├── flash_fwd_hdim64_512_fp16_paged_split_sm90.cu │ ├── flash_fwd_hdim64_512_fp16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim64_512_fp16_sm90.cu │ ├── flash_fwd_hdim64_512_fp16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim64_512_fp16_softcap_sm90.cu │ ├── flash_fwd_hdim64_512_fp16_split_sm90.cu │ ├── flash_fwd_hdim64_512_fp16_split_softcap_sm90.cu │ ├── flash_fwd_hdim64_bf16_packgqa_sm90.cu │ ├── flash_fwd_hdim64_bf16_paged_sm80.cu │ ├── flash_fwd_hdim64_bf16_paged_sm90.cu │ ├── flash_fwd_hdim64_bf16_paged_softcap_sm80.cu │ ├── flash_fwd_hdim64_bf16_paged_softcap_sm90.cu │ ├── flash_fwd_hdim64_bf16_paged_softcapall_sm80.cu │ ├── flash_fwd_hdim64_bf16_paged_split_sm80.cu │ ├── flash_fwd_hdim64_bf16_paged_split_sm90.cu │ ├── flash_fwd_hdim64_bf16_paged_split_softcap_sm80.cu │ ├── flash_fwd_hdim64_bf16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim64_bf16_paged_split_softcapall_sm80.cu │ ├── flash_fwd_hdim64_bf16_sm80.cu │ ├── flash_fwd_hdim64_bf16_sm90.cu │ ├── flash_fwd_hdim64_bf16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim64_bf16_softcap_sm80.cu │ ├── flash_fwd_hdim64_bf16_softcap_sm90.cu │ ├── flash_fwd_hdim64_bf16_softcapall_sm80.cu │ ├── flash_fwd_hdim64_bf16_split_sm80.cu │ ├── flash_fwd_hdim64_bf16_split_sm90.cu │ ├── flash_fwd_hdim64_bf16_split_softcap_sm80.cu │ ├── flash_fwd_hdim64_bf16_split_softcap_sm90.cu │ ├── flash_fwd_hdim64_bf16_split_softcapall_sm80.cu │ ├── flash_fwd_hdim64_e4m3_packgqa_sm90.cu │ ├── flash_fwd_hdim64_e4m3_paged_sm90.cu │ ├── flash_fwd_hdim64_e4m3_paged_softcap_sm90.cu │ ├── flash_fwd_hdim64_e4m3_paged_split_sm90.cu │ ├── flash_fwd_hdim64_e4m3_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim64_e4m3_sm90.cu │ ├── flash_fwd_hdim64_e4m3_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim64_e4m3_softcap_sm90.cu │ ├── flash_fwd_hdim64_e4m3_split_sm90.cu │ ├── flash_fwd_hdim64_e4m3_split_softcap_sm90.cu │ ├── flash_fwd_hdim64_fp16_packgqa_sm90.cu │ ├── flash_fwd_hdim64_fp16_paged_sm80.cu │ ├── flash_fwd_hdim64_fp16_paged_sm90.cu │ ├── flash_fwd_hdim64_fp16_paged_softcap_sm80.cu │ ├── flash_fwd_hdim64_fp16_paged_softcap_sm90.cu │ ├── flash_fwd_hdim64_fp16_paged_softcapall_sm80.cu │ ├── flash_fwd_hdim64_fp16_paged_split_sm80.cu │ ├── flash_fwd_hdim64_fp16_paged_split_sm90.cu │ ├── flash_fwd_hdim64_fp16_paged_split_softcap_sm80.cu │ ├── flash_fwd_hdim64_fp16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim64_fp16_paged_split_softcapall_sm80.cu │ ├── flash_fwd_hdim64_fp16_sm80.cu │ ├── flash_fwd_hdim64_fp16_sm90.cu │ ├── flash_fwd_hdim64_fp16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim64_fp16_softcap_sm80.cu │ ├── flash_fwd_hdim64_fp16_softcap_sm90.cu │ ├── flash_fwd_hdim64_fp16_softcapall_sm80.cu │ ├── flash_fwd_hdim64_fp16_split_sm80.cu │ ├── flash_fwd_hdim64_fp16_split_sm90.cu │ ├── flash_fwd_hdim64_fp16_split_softcap_sm80.cu │ ├── flash_fwd_hdim64_fp16_split_softcap_sm90.cu │ ├── flash_fwd_hdim64_fp16_split_softcapall_sm80.cu │ ├── flash_fwd_hdim96_bf16_packgqa_sm90.cu │ ├── flash_fwd_hdim96_bf16_paged_sm80.cu │ ├── flash_fwd_hdim96_bf16_paged_sm90.cu │ ├── flash_fwd_hdim96_bf16_paged_softcap_sm80.cu │ ├── flash_fwd_hdim96_bf16_paged_softcap_sm90.cu │ ├── flash_fwd_hdim96_bf16_paged_softcapall_sm80.cu │ ├── flash_fwd_hdim96_bf16_paged_split_sm80.cu │ ├── flash_fwd_hdim96_bf16_paged_split_sm90.cu │ ├── flash_fwd_hdim96_bf16_paged_split_softcap_sm80.cu │ ├── flash_fwd_hdim96_bf16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim96_bf16_paged_split_softcapall_sm80.cu │ ├── flash_fwd_hdim96_bf16_sm80.cu │ ├── flash_fwd_hdim96_bf16_sm90.cu │ ├── flash_fwd_hdim96_bf16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim96_bf16_softcap_sm80.cu │ ├── flash_fwd_hdim96_bf16_softcap_sm90.cu │ ├── flash_fwd_hdim96_bf16_softcapall_sm80.cu │ ├── flash_fwd_hdim96_bf16_split_sm80.cu │ ├── flash_fwd_hdim96_bf16_split_sm90.cu │ ├── flash_fwd_hdim96_bf16_split_softcap_sm80.cu │ ├── flash_fwd_hdim96_bf16_split_softcap_sm90.cu │ ├── flash_fwd_hdim96_bf16_split_softcapall_sm80.cu │ ├── flash_fwd_hdim96_e4m3_packgqa_sm90.cu │ ├── flash_fwd_hdim96_e4m3_paged_sm90.cu │ ├── flash_fwd_hdim96_e4m3_paged_softcap_sm90.cu │ ├── flash_fwd_hdim96_e4m3_paged_split_sm90.cu │ ├── flash_fwd_hdim96_e4m3_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim96_e4m3_sm90.cu │ ├── flash_fwd_hdim96_e4m3_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim96_e4m3_softcap_sm90.cu │ ├── flash_fwd_hdim96_e4m3_split_sm90.cu │ ├── flash_fwd_hdim96_e4m3_split_softcap_sm90.cu │ ├── flash_fwd_hdim96_fp16_packgqa_sm90.cu │ ├── flash_fwd_hdim96_fp16_paged_sm80.cu │ ├── flash_fwd_hdim96_fp16_paged_sm90.cu │ ├── flash_fwd_hdim96_fp16_paged_softcap_sm80.cu │ ├── flash_fwd_hdim96_fp16_paged_softcap_sm90.cu │ ├── flash_fwd_hdim96_fp16_paged_softcapall_sm80.cu │ ├── flash_fwd_hdim96_fp16_paged_split_sm80.cu │ ├── flash_fwd_hdim96_fp16_paged_split_sm90.cu │ ├── flash_fwd_hdim96_fp16_paged_split_softcap_sm80.cu │ ├── flash_fwd_hdim96_fp16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdim96_fp16_paged_split_softcapall_sm80.cu │ ├── flash_fwd_hdim96_fp16_sm80.cu │ ├── flash_fwd_hdim96_fp16_sm90.cu │ ├── flash_fwd_hdim96_fp16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdim96_fp16_softcap_sm80.cu │ ├── flash_fwd_hdim96_fp16_softcap_sm90.cu │ ├── flash_fwd_hdim96_fp16_softcapall_sm80.cu │ ├── flash_fwd_hdim96_fp16_split_sm80.cu │ ├── flash_fwd_hdim96_fp16_split_sm90.cu │ ├── flash_fwd_hdim96_fp16_split_softcap_sm80.cu │ ├── flash_fwd_hdim96_fp16_split_softcap_sm90.cu │ ├── flash_fwd_hdim96_fp16_split_softcapall_sm80.cu │ ├── flash_fwd_hdimall_bf16_packgqa_sm90.cu │ ├── flash_fwd_hdimall_bf16_paged_sm90.cu │ ├── flash_fwd_hdimall_bf16_paged_softcap_sm90.cu │ ├── flash_fwd_hdimall_bf16_paged_split_sm90.cu │ ├── flash_fwd_hdimall_bf16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdimall_bf16_sm90.cu │ ├── flash_fwd_hdimall_bf16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdimall_bf16_softcap_sm90.cu │ ├── flash_fwd_hdimall_bf16_split_sm90.cu │ ├── flash_fwd_hdimall_bf16_split_softcap_sm90.cu │ ├── flash_fwd_hdimall_e4m3_packgqa_sm90.cu │ ├── flash_fwd_hdimall_e4m3_paged_sm90.cu │ ├── flash_fwd_hdimall_e4m3_paged_softcap_sm90.cu │ ├── flash_fwd_hdimall_e4m3_paged_split_sm90.cu │ ├── flash_fwd_hdimall_e4m3_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdimall_e4m3_sm90.cu │ ├── flash_fwd_hdimall_e4m3_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdimall_e4m3_softcap_sm90.cu │ ├── flash_fwd_hdimall_e4m3_split_sm90.cu │ ├── flash_fwd_hdimall_e4m3_split_softcap_sm90.cu │ ├── flash_fwd_hdimall_fp16_packgqa_sm90.cu │ ├── flash_fwd_hdimall_fp16_paged_sm90.cu │ ├── flash_fwd_hdimall_fp16_paged_softcap_sm90.cu │ ├── flash_fwd_hdimall_fp16_paged_split_sm90.cu │ ├── flash_fwd_hdimall_fp16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdimall_fp16_sm90.cu │ ├── flash_fwd_hdimall_fp16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdimall_fp16_softcap_sm90.cu │ ├── flash_fwd_hdimall_fp16_split_sm90.cu │ ├── flash_fwd_hdimall_fp16_split_softcap_sm90.cu │ ├── flash_fwd_hdimdiff_bf16_packgqa_sm90.cu │ ├── flash_fwd_hdimdiff_bf16_paged_sm90.cu │ ├── flash_fwd_hdimdiff_bf16_paged_softcap_sm90.cu │ ├── flash_fwd_hdimdiff_bf16_paged_split_sm90.cu │ ├── flash_fwd_hdimdiff_bf16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdimdiff_bf16_sm90.cu │ ├── flash_fwd_hdimdiff_bf16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdimdiff_bf16_softcap_sm90.cu │ ├── flash_fwd_hdimdiff_bf16_split_sm90.cu │ ├── flash_fwd_hdimdiff_bf16_split_softcap_sm90.cu │ ├── flash_fwd_hdimdiff_e4m3_packgqa_sm90.cu │ ├── flash_fwd_hdimdiff_e4m3_paged_sm90.cu │ ├── flash_fwd_hdimdiff_e4m3_paged_softcap_sm90.cu │ ├── flash_fwd_hdimdiff_e4m3_paged_split_sm90.cu │ ├── flash_fwd_hdimdiff_e4m3_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdimdiff_e4m3_sm90.cu │ ├── flash_fwd_hdimdiff_e4m3_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdimdiff_e4m3_softcap_sm90.cu │ ├── flash_fwd_hdimdiff_e4m3_split_sm90.cu │ ├── flash_fwd_hdimdiff_e4m3_split_softcap_sm90.cu │ ├── flash_fwd_hdimdiff_fp16_packgqa_sm90.cu │ ├── flash_fwd_hdimdiff_fp16_paged_sm90.cu │ ├── flash_fwd_hdimdiff_fp16_paged_softcap_sm90.cu │ ├── flash_fwd_hdimdiff_fp16_paged_split_sm90.cu │ ├── flash_fwd_hdimdiff_fp16_paged_split_softcap_sm90.cu │ ├── flash_fwd_hdimdiff_fp16_sm90.cu │ ├── flash_fwd_hdimdiff_fp16_softcap_packgqa_sm90.cu │ ├── flash_fwd_hdimdiff_fp16_softcap_sm90.cu │ ├── flash_fwd_hdimdiff_fp16_split_sm90.cu │ └── flash_fwd_hdimdiff_fp16_split_softcap_sm90.cu ├── mainloop_bwd_sm80.hpp ├── mainloop_bwd_sm90_tma_gmma_ws.hpp ├── mainloop_fwd_sm80.hpp ├── mainloop_fwd_sm90_tma_gmma_ws.hpp ├── mask.h ├── named_barrier.hpp ├── pack_gqa.h ├── padding.py ├── paged_kv.h ├── rotary.h ├── seqlen.h ├── setup.py ├── sm90_pipeline_no_cluster.hpp ├── softmax.h ├── static_switch.h ├── test_attn_kvcache.py ├── test_flash_attn.py ├── test_flash_attn_bwd_determinism.py ├── test_kvcache.py ├── test_torch_compile_and_export.py ├── test_util.py ├── tile_scheduler.hpp ├── tile_size.h └── utils.h ├── setup.py ├── tests ├── cute │ ├── test_block_sparsity.py │ ├── test_flash_attn.py │ ├── test_flash_attn_race_condition.py │ ├── test_flash_attn_varlen.py │ ├── test_mask_mod.py │ └── test_score_mod.py ├── layers │ └── test_rotary.py ├── losses │ ├── test_cross_entropy.py │ └── test_cross_entropy_parallel.py ├── models │ ├── test_baichuan.py │ ├── test_bert.py │ ├── test_bigcode.py │ ├── test_btlm.py │ ├── test_falcon.py │ ├── test_gpt.py │ ├── test_gpt_generation_parallel.py │ ├── test_gpt_neox.py │ ├── test_gpt_parallel.py │ ├── test_gptj.py │ ├── test_llama.py │ ├── test_opt.py │ └── test_vit.py ├── modules │ ├── test_block_parallel.py │ ├── test_embedding_parallel.py │ ├── test_mha_parallel.py │ └── test_mlp_parallel.py ├── ops │ ├── test_dropout_layer_norm.py │ ├── test_fused_dense.py │ ├── test_fused_dense_parallel.py │ └── triton │ │ └── test_layer_norm.py ├── pyproject.toml ├── test_flash_attn.py ├── test_flash_attn_ck.py ├── test_flash_attn_triton_amd.py ├── test_rotary.py └── test_util.py ├── training ├── Dockerfile ├── README.md ├── configs │ ├── callbacks │ │ ├── causality-monitor.yaml │ │ ├── default.yaml │ │ ├── ema.yaml │ │ ├── flop-count.yaml │ │ ├── gpu-monitor.yaml │ │ ├── model-summary.yaml │ │ ├── none.yaml │ │ ├── norm-monitor.yaml │ │ ├── params-log.yaml │ │ └── wandb.yaml │ ├── config.yaml │ ├── datamodule │ │ ├── openwebtext.yaml │ │ └── thepile.yaml │ ├── experiment │ │ ├── owt │ │ │ ├── base.yaml │ │ │ ├── gpt2l-flash.yaml │ │ │ ├── gpt2l-hf.yaml │ │ │ ├── gpt2l.yaml │ │ │ ├── gpt2m-flash.yaml │ │ │ ├── gpt2m-hf.yaml │ │ │ ├── gpt2m.yaml │ │ │ ├── gpt2s-flash.yaml │ │ │ ├── gpt2s-hf.yaml │ │ │ ├── gpt2s.yaml │ │ │ ├── gpt2xl-flash.yaml │ │ │ ├── gpt2xl-hf.yaml │ │ │ └── gpt2xl.yaml │ │ └── pile │ │ │ ├── base.yaml │ │ │ ├── gpt3-2.7B-flash-8k.yaml │ │ │ ├── gpt3-2.7B-flash-hdim128-rotary-8k.yaml │ │ │ ├── gpt3-2.7B-flash-hdim128-rotary.yaml │ │ │ ├── gpt3-2.7B-flash-hdim128.yaml │ │ │ ├── gpt3-2.7B-flash-rotary-8k.yaml │ │ │ ├── gpt3-2.7B-flash-rotary.yaml │ │ │ ├── gpt3-2.7B-flash.yaml │ │ │ ├── gpt3-2.7B-hf-hdim128.yaml │ │ │ ├── gpt3-2.7B-hf.yaml │ │ │ ├── gpt3l-flash-8k.yaml │ │ │ ├── gpt3l-flash-rotary-30B.yaml │ │ │ ├── gpt3l-flash-rotary-8k.yaml │ │ │ ├── gpt3l-flash-rotary.yaml │ │ │ ├── gpt3l-flash.yaml │ │ │ ├── gpt3l-hf.yaml │ │ │ ├── gpt3m-flash-8k.yaml │ │ │ ├── gpt3m-flash-rotary-30B.yaml │ │ │ ├── gpt3m-flash-rotary-8k.yaml │ │ │ ├── gpt3m-flash-rotary.yaml │ │ │ ├── gpt3m-flash.yaml │ │ │ ├── gpt3m-hf.yaml │ │ │ ├── gpt3s-flash-8k.yaml │ │ │ ├── gpt3s-flash-rotary-30B.yaml │ │ │ ├── gpt3s-flash-rotary-8k.yaml │ │ │ ├── gpt3s-flash-rotary.yaml │ │ │ ├── gpt3s-flash.yaml │ │ │ ├── gpt3s-hf.yaml │ │ │ ├── gpt3xl-flash-8k.yaml │ │ │ ├── gpt3xl-flash-rotary-60B.yaml │ │ │ ├── gpt3xl-flash-rotary-8k.yaml │ │ │ ├── gpt3xl-flash-rotary.yaml │ │ │ ├── gpt3xl-flash.yaml │ │ │ └── gpt3xl-hf.yaml │ ├── logger │ │ ├── comet.yaml │ │ ├── csv.yaml │ │ ├── many_loggers.yaml │ │ ├── mlflow.yaml │ │ ├── neptune.yaml │ │ ├── tensorboard.yaml │ │ └── wandb.yaml │ ├── metrics │ │ ├── acc.yaml │ │ ├── acc_ignore_index.yaml │ │ ├── acctop5.yaml │ │ ├── mse.yaml │ │ ├── num-tokens.yaml │ │ └── perplexity.yaml │ ├── mode │ │ ├── debug.yaml │ │ ├── default.yaml │ │ ├── exp.yaml │ │ ├── profile.yaml │ │ └── smoke.yaml │ ├── model │ │ ├── gpt2-hf.yaml │ │ ├── gpt2.yaml │ │ └── gpt2model │ │ │ ├── gpt2-large.yaml │ │ │ ├── gpt2-medium.yaml │ │ │ ├── gpt2-small.yaml │ │ │ └── gpt2-xlarge.yaml │ ├── optimizer │ │ ├── adam.yaml │ │ ├── adamw-apex-distributed.yaml │ │ ├── adamw-apex-zero.yaml │ │ ├── adamw-apex.yaml │ │ ├── adamw-zero.yaml │ │ ├── adamw.yaml │ │ ├── fusedlamb-ds.yaml │ │ ├── fusedlamb.yaml │ │ └── sgd.yaml │ ├── scheduler │ │ ├── cosine-warmup-timm.yaml │ │ ├── cosine-warmup.yaml │ │ ├── invsqrt.yaml │ │ ├── linear-warmup.yaml │ │ ├── multi-step.yaml │ │ ├── plateau.yaml │ │ ├── poly-warmup.yaml │ │ └── step.yaml │ ├── task │ │ └── sequence-model.yaml │ └── trainer │ │ ├── all_params.yaml │ │ ├── ddp.yaml │ │ ├── debug.yaml │ │ └── default.yaml ├── run.py ├── src │ ├── callbacks │ │ ├── __init__.py │ │ ├── causality_monitor.py │ │ ├── ema.py │ │ ├── flop_count.py │ │ ├── gpu_affinity.py │ │ ├── loss_scale_monitor.py │ │ ├── model_checkpoint.py │ │ ├── norm_monitor.py │ │ ├── params_log.py │ │ ├── speed_monitor.py │ │ └── wandb_callbacks.py │ ├── datamodules │ │ ├── datasets │ │ │ ├── detokenizer.py │ │ │ └── lm_dataset.py │ │ ├── fault_tolerant_sampler.py │ │ ├── imagenet.py │ │ ├── language_modeling_hf.py │ │ └── timm_mixup.py │ ├── distributed │ │ └── ddp_comm_hooks.py │ ├── eval.py │ ├── metrics │ │ ├── accuracy.py │ │ ├── num_tokens.py │ │ └── perplexity.py │ ├── models │ │ └── modules │ │ │ └── seq_common.py │ ├── optim │ │ ├── param_grouping.py │ │ └── timm_lr_scheduler.py │ ├── tasks │ │ └── seq.py │ ├── train.py │ └── utils │ │ ├── checkpoint.py │ │ ├── ddp_zero1.py │ │ ├── ddp_zero2.py │ │ ├── distributed.py │ │ ├── ema.py │ │ ├── flops.py │ │ ├── gpu_affinity.py │ │ └── utils.py └── tests │ └── datamodules │ └── test_language_modeling_hf.py └── usage.md /.github/workflows/_build.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/.github/workflows/_build.yml -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/.github/workflows/build.yml -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/.github/workflows/pre-commit.yaml -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/.github/workflows/publish.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/.gitmodules -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Tri Dao, trid@cs.stanford.edu -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/README.md -------------------------------------------------------------------------------- /assets/flash2_a100_fwd_bwd_benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/assets/flash2_a100_fwd_bwd_benchmark.png -------------------------------------------------------------------------------- /assets/flash2_h100_fwd_bwd_benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/assets/flash2_h100_fwd_bwd_benchmark.png -------------------------------------------------------------------------------- /assets/flash3_fp16_fwd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/assets/flash3_fp16_fwd.png -------------------------------------------------------------------------------- /assets/flashattention_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/assets/flashattention_logo.png -------------------------------------------------------------------------------- /assets/flashattn_banner.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/assets/flashattn_banner.jpg -------------------------------------------------------------------------------- /assets/flashattn_banner.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/assets/flashattn_banner.pdf -------------------------------------------------------------------------------- /assets/flashattn_memory.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/assets/flashattn_memory.jpg -------------------------------------------------------------------------------- /assets/flashattn_speedup.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/assets/flashattn_speedup.jpg -------------------------------------------------------------------------------- /assets/flashattn_speedup_3090.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/assets/flashattn_speedup_3090.jpg -------------------------------------------------------------------------------- /assets/flashattn_speedup_a100_d128.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/assets/flashattn_speedup_a100_d128.jpg -------------------------------------------------------------------------------- /assets/flashattn_speedup_t4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/assets/flashattn_speedup_t4.jpg -------------------------------------------------------------------------------- /assets/flashattn_speedup_t4_fwd.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/assets/flashattn_speedup_t4_fwd.jpg -------------------------------------------------------------------------------- /assets/gpt2_training_curve.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/assets/gpt2_training_curve.jpg -------------------------------------------------------------------------------- /assets/gpt2_training_efficiency.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/assets/gpt2_training_efficiency.jpg -------------------------------------------------------------------------------- /assets/gpt3_training_curve.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/assets/gpt3_training_curve.jpg -------------------------------------------------------------------------------- /assets/gpt3_training_efficiency.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/assets/gpt3_training_efficiency.jpg -------------------------------------------------------------------------------- /benchmarks/benchmark_alibi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/benchmarks/benchmark_alibi.py -------------------------------------------------------------------------------- /benchmarks/benchmark_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/benchmarks/benchmark_attn.py -------------------------------------------------------------------------------- /benchmarks/benchmark_causal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/benchmarks/benchmark_causal.py -------------------------------------------------------------------------------- /benchmarks/benchmark_flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/benchmarks/benchmark_flash_attention.py -------------------------------------------------------------------------------- /benchmarks/benchmark_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/benchmarks/benchmark_gemm.py -------------------------------------------------------------------------------- /benchmarks/cute/benchmark_block_sparsity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/benchmarks/cute/benchmark_block_sparsity.py -------------------------------------------------------------------------------- /benchmarks/cute/benchmark_mask_mod.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/benchmarks/cute/benchmark_mask_mod.py -------------------------------------------------------------------------------- /csrc/flash_attn/flash_api.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/flash_api.cpp -------------------------------------------------------------------------------- /csrc/flash_attn/src/alibi.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/alibi.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/block_info.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/block_info.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/dropout.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/dropout.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim128_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim128_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim128_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim128_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim128_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim128_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim128_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim128_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim192_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim192_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim192_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim192_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim192_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim192_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim192_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim192_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim256_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim256_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim256_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim256_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim256_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim256_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim256_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim256_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim32_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim32_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim32_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim32_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim32_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim32_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim32_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim32_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim64_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim64_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim64_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim64_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim64_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim64_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim64_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim64_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim96_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim96_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim96_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim96_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim96_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim96_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim96_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_hdim96_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_kernel.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_launch_template.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_launch_template.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_preprocess_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_bwd_preprocess_kernel.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim128_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim128_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim128_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim128_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim128_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim128_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim128_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim128_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim192_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim192_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim192_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim192_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim192_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim192_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim192_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim192_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim256_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim256_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim256_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim256_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim256_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim256_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim256_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim256_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim32_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim32_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim32_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim32_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim32_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim32_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim32_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim32_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim64_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim64_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim64_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim64_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim64_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim64_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim64_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim64_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim96_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim96_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim96_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim96_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim96_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim96_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim96_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_hdim96_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_kernel.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_launch_template.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_launch_template.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim128_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim128_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim128_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim128_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim128_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim128_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim128_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim128_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim192_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim192_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim192_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim192_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim192_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim192_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim192_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim192_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim256_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim256_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim256_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim256_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim256_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim256_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim256_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim256_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim32_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim32_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim32_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim32_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim32_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim32_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim32_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim32_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim64_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim64_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim64_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim64_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim64_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim64_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim64_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim64_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim96_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim96_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim96_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim96_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim96_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim96_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim96_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim96_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/generate_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/generate_kernels.py -------------------------------------------------------------------------------- /csrc/flash_attn/src/hardware_info.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/hardware_info.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/kernel_traits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/kernel_traits.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/mask.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/mask.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/namespace_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/namespace_config.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/philox.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/philox.cuh -------------------------------------------------------------------------------- /csrc/flash_attn/src/philox_unpack.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/philox_unpack.cuh -------------------------------------------------------------------------------- /csrc/flash_attn/src/rotary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/rotary.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/softmax.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/static_switch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/static_switch.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn/src/utils.h -------------------------------------------------------------------------------- /csrc/flash_attn_ck/flash_api.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn_ck/flash_api.cpp -------------------------------------------------------------------------------- /csrc/flash_attn_ck/flash_common.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn_ck/flash_common.cpp -------------------------------------------------------------------------------- /csrc/flash_attn_ck/flash_common.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn_ck/flash_common.hpp -------------------------------------------------------------------------------- /csrc/flash_attn_ck/mha_bwd.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn_ck/mha_bwd.cpp -------------------------------------------------------------------------------- /csrc/flash_attn_ck/mha_fwd.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn_ck/mha_fwd.cpp -------------------------------------------------------------------------------- /csrc/flash_attn_ck/mha_fwd_kvcache.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn_ck/mha_fwd_kvcache.cpp -------------------------------------------------------------------------------- /csrc/flash_attn_ck/mha_varlen_bwd.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn_ck/mha_varlen_bwd.cpp -------------------------------------------------------------------------------- /csrc/flash_attn_ck/mha_varlen_fwd.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/flash_attn_ck/mha_varlen_fwd.cpp -------------------------------------------------------------------------------- /csrc/fused_dense_lib/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/fused_dense_lib/README.md -------------------------------------------------------------------------------- /csrc/fused_dense_lib/fused_dense.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/fused_dense_lib/fused_dense.cpp -------------------------------------------------------------------------------- /csrc/fused_dense_lib/fused_dense_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/fused_dense_lib/fused_dense_cuda.cu -------------------------------------------------------------------------------- /csrc/fused_dense_lib/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/fused_dense_lib/setup.py -------------------------------------------------------------------------------- /csrc/layer_norm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/README.md -------------------------------------------------------------------------------- /csrc/layer_norm/ln.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln.h -------------------------------------------------------------------------------- /csrc/layer_norm/ln_api.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_api.cpp -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_1024.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_bwd_1024.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_1280.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_bwd_1280.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_1536.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_bwd_1536.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_2048.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_bwd_2048.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_256.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_bwd_256.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_2560.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_bwd_2560.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_3072.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_bwd_3072.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_4096.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_bwd_4096.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_512.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_bwd_512.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_5120.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_bwd_5120.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_6144.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_bwd_6144.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_7168.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_bwd_7168.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_768.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_bwd_768.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_8192.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_bwd_8192.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_kernels.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_bwd_kernels.cuh -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_1024.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_fwd_1024.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_1280.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_fwd_1280.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_1536.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_fwd_1536.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_2048.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_fwd_2048.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_256.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_fwd_256.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_2560.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_fwd_2560.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_3072.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_fwd_3072.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_4096.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_fwd_4096.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_512.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_fwd_512.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_5120.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_fwd_5120.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_6144.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_fwd_6144.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_7168.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_fwd_7168.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_768.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_fwd_768.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_8192.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_fwd_8192.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_kernels.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_fwd_kernels.cuh -------------------------------------------------------------------------------- /csrc/layer_norm/ln_kernel_traits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_kernel_traits.h -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_1024.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_bwd_1024.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_1280.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_bwd_1280.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_1536.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_bwd_1536.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_2048.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_bwd_2048.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_256.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_bwd_256.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_2560.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_bwd_2560.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_3072.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_bwd_3072.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_4096.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_bwd_4096.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_512.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_bwd_512.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_5120.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_bwd_5120.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_6144.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_bwd_6144.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_7168.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_bwd_7168.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_768.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_bwd_768.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_8192.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_bwd_8192.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_1024.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_fwd_1024.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_1280.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_fwd_1280.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_1536.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_fwd_1536.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_2048.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_fwd_2048.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_256.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_fwd_256.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_2560.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_fwd_2560.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_3072.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_fwd_3072.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_4096.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_fwd_4096.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_512.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_fwd_512.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_5120.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_fwd_5120.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_6144.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_fwd_6144.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_7168.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_fwd_7168.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_768.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_fwd_768.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_8192.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_fwd_8192.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_residual_bwd_kernels.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_residual_bwd_kernels.cuh -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_residual_fwd_kernels.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_parallel_residual_fwd_kernels.cuh -------------------------------------------------------------------------------- /csrc/layer_norm/ln_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/ln_utils.cuh -------------------------------------------------------------------------------- /csrc/layer_norm/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/setup.py -------------------------------------------------------------------------------- /csrc/layer_norm/static_switch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/csrc/layer_norm/static_switch.h -------------------------------------------------------------------------------- /examples/inference/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/examples/inference/README.md -------------------------------------------------------------------------------- /flash_attn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/__init__.py -------------------------------------------------------------------------------- /flash_attn/bert_padding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/bert_padding.py -------------------------------------------------------------------------------- /flash_attn/cute/.flake8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/.flake8 -------------------------------------------------------------------------------- /flash_attn/cute/AUTHORS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/AUTHORS -------------------------------------------------------------------------------- /flash_attn/cute/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/LICENSE -------------------------------------------------------------------------------- /flash_attn/cute/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_attn/cute/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/__init__.py -------------------------------------------------------------------------------- /flash_attn/cute/ampere_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/ampere_helpers.py -------------------------------------------------------------------------------- /flash_attn/cute/barrier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/barrier.py -------------------------------------------------------------------------------- /flash_attn/cute/benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/benchmark.py -------------------------------------------------------------------------------- /flash_attn/cute/blackwell_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/blackwell_helpers.py -------------------------------------------------------------------------------- /flash_attn/cute/block_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/block_info.py -------------------------------------------------------------------------------- /flash_attn/cute/block_sparse_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/block_sparse_utils.py -------------------------------------------------------------------------------- /flash_attn/cute/block_sparsity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/block_sparsity.py -------------------------------------------------------------------------------- /flash_attn/cute/compute_block_sparsity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/compute_block_sparsity.py -------------------------------------------------------------------------------- /flash_attn/cute/copy_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/copy_utils.py -------------------------------------------------------------------------------- /flash_attn/cute/cute_dsl_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/cute_dsl_utils.py -------------------------------------------------------------------------------- /flash_attn/cute/fast_math.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/fast_math.py -------------------------------------------------------------------------------- /flash_attn/cute/flash_bwd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/flash_bwd.py -------------------------------------------------------------------------------- /flash_attn/cute/flash_bwd_postprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/flash_bwd_postprocess.py -------------------------------------------------------------------------------- /flash_attn/cute/flash_bwd_preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/flash_bwd_preprocess.py -------------------------------------------------------------------------------- /flash_attn/cute/flash_bwd_sm100.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/flash_bwd_sm100.py -------------------------------------------------------------------------------- /flash_attn/cute/flash_bwd_sm90.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/flash_bwd_sm90.py -------------------------------------------------------------------------------- /flash_attn/cute/flash_fwd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/flash_fwd.py -------------------------------------------------------------------------------- /flash_attn/cute/flash_fwd_combine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/flash_fwd_combine.py -------------------------------------------------------------------------------- /flash_attn/cute/flash_fwd_sm100.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/flash_fwd_sm100.py -------------------------------------------------------------------------------- /flash_attn/cute/hopper_helpers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/hopper_helpers.py -------------------------------------------------------------------------------- /flash_attn/cute/interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/interface.py -------------------------------------------------------------------------------- /flash_attn/cute/mask.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/mask.py -------------------------------------------------------------------------------- /flash_attn/cute/mask_definitions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/mask_definitions.py -------------------------------------------------------------------------------- /flash_attn/cute/mma_sm100_desc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/mma_sm100_desc.py -------------------------------------------------------------------------------- /flash_attn/cute/named_barrier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/named_barrier.py -------------------------------------------------------------------------------- /flash_attn/cute/pack_gqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/pack_gqa.py -------------------------------------------------------------------------------- /flash_attn/cute/paged_kv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/paged_kv.py -------------------------------------------------------------------------------- /flash_attn/cute/pipeline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/pipeline.py -------------------------------------------------------------------------------- /flash_attn/cute/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/pyproject.toml -------------------------------------------------------------------------------- /flash_attn/cute/seqlen_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/seqlen_info.py -------------------------------------------------------------------------------- /flash_attn/cute/softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/softmax.py -------------------------------------------------------------------------------- /flash_attn/cute/testing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/testing.py -------------------------------------------------------------------------------- /flash_attn/cute/tile_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/tile_scheduler.py -------------------------------------------------------------------------------- /flash_attn/cute/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/cute/utils.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_interface.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_triton.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_amd/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_triton_amd/Dockerfile -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_amd/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_triton_amd/README.md -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_amd/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_amd/bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_triton_amd/bench.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_amd/bwd_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_triton_amd/bwd_prefill.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_amd/bwd_prefill_fused.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_triton_amd/bwd_prefill_fused.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_amd/bwd_prefill_onekernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_triton_amd/bwd_prefill_onekernel.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_amd/bwd_prefill_split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_triton_amd/bwd_prefill_split.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_amd/bwd_ref.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_triton_amd/bwd_ref.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_amd/fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_triton_amd/fp8.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_amd/fwd_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_triton_amd/fwd_decode.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_amd/fwd_prefill.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_triton_amd/fwd_prefill.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_amd/fwd_ref.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_triton_amd/fwd_ref.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_amd/interface_fa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_triton_amd/interface_fa.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_amd/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_triton_amd/test.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_amd/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_triton_amd/train.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_amd/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_triton_amd/utils.py -------------------------------------------------------------------------------- /flash_attn/flash_attn_triton_og.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_attn_triton_og.py -------------------------------------------------------------------------------- /flash_attn/flash_blocksparse_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_blocksparse_attention.py -------------------------------------------------------------------------------- /flash_attn/flash_blocksparse_attn_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/flash_blocksparse_attn_interface.py -------------------------------------------------------------------------------- /flash_attn/layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_attn/layers/patch_embed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/layers/patch_embed.py -------------------------------------------------------------------------------- /flash_attn/layers/rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/layers/rotary.py -------------------------------------------------------------------------------- /flash_attn/losses/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_attn/losses/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/losses/cross_entropy.py -------------------------------------------------------------------------------- /flash_attn/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_attn/models/baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/models/baichuan.py -------------------------------------------------------------------------------- /flash_attn/models/bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/models/bert.py -------------------------------------------------------------------------------- /flash_attn/models/bigcode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/models/bigcode.py -------------------------------------------------------------------------------- /flash_attn/models/btlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/models/btlm.py -------------------------------------------------------------------------------- /flash_attn/models/falcon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/models/falcon.py -------------------------------------------------------------------------------- /flash_attn/models/gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/models/gpt.py -------------------------------------------------------------------------------- /flash_attn/models/gpt_neox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/models/gpt_neox.py -------------------------------------------------------------------------------- /flash_attn/models/gptj.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/models/gptj.py -------------------------------------------------------------------------------- /flash_attn/models/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/models/llama.py -------------------------------------------------------------------------------- /flash_attn/models/opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/models/opt.py -------------------------------------------------------------------------------- /flash_attn/models/vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/models/vit.py -------------------------------------------------------------------------------- /flash_attn/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_attn/modules/block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/modules/block.py -------------------------------------------------------------------------------- /flash_attn/modules/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/modules/embedding.py -------------------------------------------------------------------------------- /flash_attn/modules/mha.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/modules/mha.py -------------------------------------------------------------------------------- /flash_attn/modules/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/modules/mlp.py -------------------------------------------------------------------------------- /flash_attn/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_attn/ops/activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/ops/activations.py -------------------------------------------------------------------------------- /flash_attn/ops/fused_dense.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/ops/fused_dense.py -------------------------------------------------------------------------------- /flash_attn/ops/layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/ops/layer_norm.py -------------------------------------------------------------------------------- /flash_attn/ops/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/ops/rms_norm.py -------------------------------------------------------------------------------- /flash_attn/ops/triton/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /flash_attn/ops/triton/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/ops/triton/cross_entropy.py -------------------------------------------------------------------------------- /flash_attn/ops/triton/k_activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/ops/triton/k_activations.py -------------------------------------------------------------------------------- /flash_attn/ops/triton/layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/ops/triton/layer_norm.py -------------------------------------------------------------------------------- /flash_attn/ops/triton/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/ops/triton/linear.py -------------------------------------------------------------------------------- /flash_attn/ops/triton/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/ops/triton/mlp.py -------------------------------------------------------------------------------- /flash_attn/ops/triton/rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/ops/triton/rotary.py -------------------------------------------------------------------------------- /flash_attn/pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/pyproject.toml -------------------------------------------------------------------------------- /flash_attn/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flash_attn/utils/benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/utils/benchmark.py -------------------------------------------------------------------------------- /flash_attn/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/utils/distributed.py -------------------------------------------------------------------------------- /flash_attn/utils/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/utils/generation.py -------------------------------------------------------------------------------- /flash_attn/utils/library.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/utils/library.py -------------------------------------------------------------------------------- /flash_attn/utils/pretrained.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/utils/pretrained.py -------------------------------------------------------------------------------- /flash_attn/utils/testing.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/utils/testing.py -------------------------------------------------------------------------------- /flash_attn/utils/torch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/flash_attn/utils/torch.py -------------------------------------------------------------------------------- /hopper/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "3.0.0.b1" 2 | -------------------------------------------------------------------------------- /hopper/benchmark_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/benchmark_attn.py -------------------------------------------------------------------------------- /hopper/benchmark_flash_attention_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/benchmark_flash_attention_fp8.py -------------------------------------------------------------------------------- /hopper/benchmark_mla_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/benchmark_mla_decode.py -------------------------------------------------------------------------------- /hopper/benchmark_split_kv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/benchmark_split_kv.py -------------------------------------------------------------------------------- /hopper/block.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/block.h -------------------------------------------------------------------------------- /hopper/copy_sm90_bulk_reduce.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/copy_sm90_bulk_reduce.hpp -------------------------------------------------------------------------------- /hopper/cuda_check.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/cuda_check.h -------------------------------------------------------------------------------- /hopper/epilogue_bwd.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/epilogue_bwd.hpp -------------------------------------------------------------------------------- /hopper/epilogue_fwd.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/epilogue_fwd.hpp -------------------------------------------------------------------------------- /hopper/flash.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/flash.h -------------------------------------------------------------------------------- /hopper/flash_api.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/flash_api.cpp -------------------------------------------------------------------------------- /hopper/flash_api_stable.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/flash_api_stable.cpp -------------------------------------------------------------------------------- /hopper/flash_attn_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/flash_attn_interface.py -------------------------------------------------------------------------------- /hopper/flash_bwd_kernel_sm80.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/flash_bwd_kernel_sm80.h -------------------------------------------------------------------------------- /hopper/flash_bwd_kernel_sm90.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/flash_bwd_kernel_sm90.h -------------------------------------------------------------------------------- /hopper/flash_bwd_launch_template.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/flash_bwd_launch_template.h -------------------------------------------------------------------------------- /hopper/flash_bwd_postprocess_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/flash_bwd_postprocess_kernel.h -------------------------------------------------------------------------------- /hopper/flash_bwd_preprocess_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/flash_bwd_preprocess_kernel.h -------------------------------------------------------------------------------- /hopper/flash_fwd_combine.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/flash_fwd_combine.cu -------------------------------------------------------------------------------- /hopper/flash_fwd_combine_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/flash_fwd_combine_kernel.h -------------------------------------------------------------------------------- /hopper/flash_fwd_combine_launch_template.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/flash_fwd_combine_launch_template.h -------------------------------------------------------------------------------- /hopper/flash_fwd_kernel_sm80.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/flash_fwd_kernel_sm80.h -------------------------------------------------------------------------------- /hopper/flash_fwd_kernel_sm90.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/flash_fwd_kernel_sm90.h -------------------------------------------------------------------------------- /hopper/flash_fwd_launch_template.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/flash_fwd_launch_template.h -------------------------------------------------------------------------------- /hopper/flash_prepare_scheduler.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/flash_prepare_scheduler.cu -------------------------------------------------------------------------------- /hopper/generate_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/generate_kernels.py -------------------------------------------------------------------------------- /hopper/heuristics.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/heuristics.h -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim128_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim128_bf16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim128_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim128_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim128_bf16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim128_bf16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim128_bf16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim128_bf16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim128_bf16_softcapall_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim128_bf16_softcapall_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim128_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim128_fp16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim128_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim128_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim128_fp16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim128_fp16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim128_fp16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim128_fp16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim128_fp16_softcapall_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim128_fp16_softcapall_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim192_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim192_bf16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim192_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim192_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim192_bf16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim192_bf16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim192_bf16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim192_bf16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim192_bf16_softcapall_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim192_bf16_softcapall_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim192_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim192_fp16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim192_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim192_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim192_fp16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim192_fp16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim192_fp16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim192_fp16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim192_fp16_softcapall_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim192_fp16_softcapall_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim256_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim256_bf16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim256_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim256_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim256_bf16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim256_bf16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim256_bf16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim256_bf16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim256_bf16_softcapall_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim256_bf16_softcapall_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim256_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim256_fp16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim256_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim256_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim256_fp16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim256_fp16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim256_fp16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim256_fp16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim256_fp16_softcapall_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim256_fp16_softcapall_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim64_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim64_bf16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim64_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim64_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim64_bf16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim64_bf16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim64_bf16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim64_bf16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim64_bf16_softcapall_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim64_bf16_softcapall_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim64_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim64_fp16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim64_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim64_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim64_fp16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim64_fp16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim64_fp16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim64_fp16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim64_fp16_softcapall_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim64_fp16_softcapall_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim96_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim96_bf16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim96_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim96_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim96_bf16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim96_bf16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim96_bf16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim96_bf16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim96_bf16_softcapall_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim96_bf16_softcapall_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim96_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim96_fp16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim96_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim96_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim96_fp16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim96_fp16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim96_fp16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim96_fp16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_bwd_hdim96_fp16_softcapall_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_bwd_hdim96_fp16_softcapall_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_bf16_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_bf16_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_bf16_paged_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_bf16_paged_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_bf16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_bf16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_bf16_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_bf16_sm100.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_bf16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_bf16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_bf16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_bf16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_bf16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_bf16_softcapall_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_bf16_softcapall_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_bf16_split_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_bf16_split_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_bf16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_bf16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_e4m3_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_e4m3_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_e4m3_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_e4m3_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_e4m3_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_e4m3_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_e4m3_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_e4m3_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_e4m3_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_fp16_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_fp16_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_fp16_paged_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_fp16_paged_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_fp16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_fp16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_fp16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_fp16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_fp16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_fp16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_fp16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_fp16_softcapall_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_fp16_softcapall_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_fp16_split_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_fp16_split_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim128_fp16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim128_fp16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_128_bf16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_128_bf16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_128_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_128_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_128_bf16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_128_bf16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_128_e4m3_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_128_e4m3_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_128_e4m3_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_128_e4m3_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_128_e4m3_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_128_e4m3_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_128_fp16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_128_fp16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_128_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_128_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_128_fp16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_128_fp16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_bf16_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_bf16_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_bf16_paged_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_bf16_paged_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_bf16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_bf16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_bf16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_bf16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_bf16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_bf16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_bf16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_bf16_softcapall_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_bf16_softcapall_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_bf16_split_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_bf16_split_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_bf16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_bf16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_e4m3_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_e4m3_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_e4m3_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_e4m3_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_e4m3_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_e4m3_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_e4m3_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_e4m3_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_e4m3_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_fp16_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_fp16_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_fp16_paged_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_fp16_paged_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_fp16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_fp16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_fp16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_fp16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_fp16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_fp16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_fp16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_fp16_softcapall_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_fp16_softcapall_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_fp16_split_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_fp16_split_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim192_fp16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim192_fp16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_bf16_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_bf16_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_bf16_paged_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_bf16_paged_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_bf16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_bf16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_bf16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_bf16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_bf16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_bf16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_bf16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_bf16_softcapall_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_bf16_softcapall_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_bf16_split_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_bf16_split_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_bf16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_bf16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_e4m3_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_e4m3_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_e4m3_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_e4m3_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_e4m3_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_e4m3_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_e4m3_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_e4m3_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_e4m3_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_fp16_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_fp16_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_fp16_paged_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_fp16_paged_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_fp16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_fp16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_fp16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_fp16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_fp16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_fp16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_fp16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_fp16_softcapall_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_fp16_softcapall_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_fp16_split_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_fp16_split_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim256_fp16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim256_fp16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_256_bf16_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_256_bf16_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_256_bf16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_256_bf16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_256_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_256_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_256_bf16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_256_bf16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_256_bf16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_256_bf16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_256_fp16_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_256_fp16_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_256_fp16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_256_fp16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_256_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_256_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_256_fp16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_256_fp16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_256_fp16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_256_fp16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_512_bf16_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_512_bf16_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_512_bf16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_512_bf16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_512_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_512_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_512_bf16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_512_bf16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_512_bf16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_512_bf16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_512_fp16_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_512_fp16_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_512_fp16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_512_fp16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_512_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_512_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_512_fp16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_512_fp16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_512_fp16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_512_fp16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_bf16_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_bf16_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_bf16_paged_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_bf16_paged_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_bf16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_bf16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_bf16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_bf16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_bf16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_bf16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_bf16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_bf16_softcapall_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_bf16_softcapall_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_bf16_split_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_bf16_split_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_bf16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_bf16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_e4m3_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_e4m3_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_e4m3_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_e4m3_paged_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_e4m3_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_e4m3_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_e4m3_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_e4m3_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_e4m3_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_e4m3_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_fp16_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_fp16_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_fp16_paged_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_fp16_paged_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_fp16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_fp16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_fp16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_fp16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_fp16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_fp16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_fp16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_fp16_softcapall_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_fp16_softcapall_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_fp16_split_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_fp16_split_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim64_fp16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim64_fp16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_bf16_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_bf16_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_bf16_paged_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_bf16_paged_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_bf16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_bf16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_bf16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_bf16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_bf16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_bf16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_bf16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_bf16_softcapall_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_bf16_softcapall_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_bf16_split_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_bf16_split_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_bf16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_bf16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_e4m3_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_e4m3_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_e4m3_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_e4m3_paged_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_e4m3_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_e4m3_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_e4m3_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_e4m3_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_e4m3_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_e4m3_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_fp16_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_fp16_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_fp16_paged_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_fp16_paged_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_fp16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_fp16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_fp16_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_fp16_softcap_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_fp16_softcap_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_fp16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_fp16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_fp16_softcapall_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_fp16_softcapall_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_fp16_split_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_fp16_split_sm80.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdim96_fp16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdim96_fp16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimall_bf16_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimall_bf16_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimall_bf16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimall_bf16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimall_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimall_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimall_bf16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimall_bf16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimall_bf16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimall_bf16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimall_e4m3_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimall_e4m3_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimall_e4m3_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimall_e4m3_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimall_e4m3_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimall_e4m3_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimall_e4m3_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimall_e4m3_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimall_e4m3_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimall_e4m3_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimall_fp16_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimall_fp16_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimall_fp16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimall_fp16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimall_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimall_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimall_fp16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimall_fp16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimall_fp16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimall_fp16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimdiff_bf16_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimdiff_bf16_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimdiff_bf16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimdiff_bf16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimdiff_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimdiff_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimdiff_bf16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimdiff_bf16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimdiff_bf16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimdiff_bf16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimdiff_e4m3_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimdiff_e4m3_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimdiff_e4m3_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimdiff_e4m3_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimdiff_e4m3_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimdiff_e4m3_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimdiff_e4m3_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimdiff_e4m3_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimdiff_e4m3_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimdiff_e4m3_split_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimdiff_fp16_packgqa_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimdiff_fp16_packgqa_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimdiff_fp16_paged_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimdiff_fp16_paged_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimdiff_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimdiff_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimdiff_fp16_softcap_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimdiff_fp16_softcap_sm90.cu -------------------------------------------------------------------------------- /hopper/instantiations/flash_fwd_hdimdiff_fp16_split_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/instantiations/flash_fwd_hdimdiff_fp16_split_sm90.cu -------------------------------------------------------------------------------- /hopper/mainloop_bwd_sm80.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/mainloop_bwd_sm80.hpp -------------------------------------------------------------------------------- /hopper/mainloop_bwd_sm90_tma_gmma_ws.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/mainloop_bwd_sm90_tma_gmma_ws.hpp -------------------------------------------------------------------------------- /hopper/mainloop_fwd_sm80.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/mainloop_fwd_sm80.hpp -------------------------------------------------------------------------------- /hopper/mainloop_fwd_sm90_tma_gmma_ws.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/mainloop_fwd_sm90_tma_gmma_ws.hpp -------------------------------------------------------------------------------- /hopper/mask.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/mask.h -------------------------------------------------------------------------------- /hopper/named_barrier.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/named_barrier.hpp -------------------------------------------------------------------------------- /hopper/pack_gqa.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/pack_gqa.h -------------------------------------------------------------------------------- /hopper/padding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/padding.py -------------------------------------------------------------------------------- /hopper/paged_kv.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/paged_kv.h -------------------------------------------------------------------------------- /hopper/rotary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/rotary.h -------------------------------------------------------------------------------- /hopper/seqlen.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/seqlen.h -------------------------------------------------------------------------------- /hopper/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/setup.py -------------------------------------------------------------------------------- /hopper/sm90_pipeline_no_cluster.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/sm90_pipeline_no_cluster.hpp -------------------------------------------------------------------------------- /hopper/softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/softmax.h -------------------------------------------------------------------------------- /hopper/static_switch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/static_switch.h -------------------------------------------------------------------------------- /hopper/test_attn_kvcache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/test_attn_kvcache.py -------------------------------------------------------------------------------- /hopper/test_flash_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/test_flash_attn.py -------------------------------------------------------------------------------- /hopper/test_flash_attn_bwd_determinism.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/test_flash_attn_bwd_determinism.py -------------------------------------------------------------------------------- /hopper/test_kvcache.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/test_kvcache.py -------------------------------------------------------------------------------- /hopper/test_torch_compile_and_export.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/test_torch_compile_and_export.py -------------------------------------------------------------------------------- /hopper/test_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/test_util.py -------------------------------------------------------------------------------- /hopper/tile_scheduler.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/tile_scheduler.hpp -------------------------------------------------------------------------------- /hopper/tile_size.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/tile_size.h -------------------------------------------------------------------------------- /hopper/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/hopper/utils.h -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/setup.py -------------------------------------------------------------------------------- /tests/cute/test_block_sparsity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/cute/test_block_sparsity.py -------------------------------------------------------------------------------- /tests/cute/test_flash_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/cute/test_flash_attn.py -------------------------------------------------------------------------------- /tests/cute/test_flash_attn_race_condition.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/cute/test_flash_attn_race_condition.py -------------------------------------------------------------------------------- /tests/cute/test_flash_attn_varlen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/cute/test_flash_attn_varlen.py -------------------------------------------------------------------------------- /tests/cute/test_mask_mod.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/cute/test_mask_mod.py -------------------------------------------------------------------------------- /tests/cute/test_score_mod.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/cute/test_score_mod.py -------------------------------------------------------------------------------- /tests/layers/test_rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/layers/test_rotary.py -------------------------------------------------------------------------------- /tests/losses/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/losses/test_cross_entropy.py -------------------------------------------------------------------------------- /tests/losses/test_cross_entropy_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/losses/test_cross_entropy_parallel.py -------------------------------------------------------------------------------- /tests/models/test_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/models/test_baichuan.py -------------------------------------------------------------------------------- /tests/models/test_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/models/test_bert.py -------------------------------------------------------------------------------- /tests/models/test_bigcode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/models/test_bigcode.py -------------------------------------------------------------------------------- /tests/models/test_btlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/models/test_btlm.py -------------------------------------------------------------------------------- /tests/models/test_falcon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/models/test_falcon.py -------------------------------------------------------------------------------- /tests/models/test_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/models/test_gpt.py -------------------------------------------------------------------------------- /tests/models/test_gpt_generation_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/models/test_gpt_generation_parallel.py -------------------------------------------------------------------------------- /tests/models/test_gpt_neox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/models/test_gpt_neox.py -------------------------------------------------------------------------------- /tests/models/test_gpt_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/models/test_gpt_parallel.py -------------------------------------------------------------------------------- /tests/models/test_gptj.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/models/test_gptj.py -------------------------------------------------------------------------------- /tests/models/test_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/models/test_llama.py -------------------------------------------------------------------------------- /tests/models/test_opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/models/test_opt.py -------------------------------------------------------------------------------- /tests/models/test_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/models/test_vit.py -------------------------------------------------------------------------------- /tests/modules/test_block_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/modules/test_block_parallel.py -------------------------------------------------------------------------------- /tests/modules/test_embedding_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/modules/test_embedding_parallel.py -------------------------------------------------------------------------------- /tests/modules/test_mha_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/modules/test_mha_parallel.py -------------------------------------------------------------------------------- /tests/modules/test_mlp_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/modules/test_mlp_parallel.py -------------------------------------------------------------------------------- /tests/ops/test_dropout_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/ops/test_dropout_layer_norm.py -------------------------------------------------------------------------------- /tests/ops/test_fused_dense.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/ops/test_fused_dense.py -------------------------------------------------------------------------------- /tests/ops/test_fused_dense_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/ops/test_fused_dense_parallel.py -------------------------------------------------------------------------------- /tests/ops/triton/test_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/ops/triton/test_layer_norm.py -------------------------------------------------------------------------------- /tests/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 100 3 | target-version = ['py38'] -------------------------------------------------------------------------------- /tests/test_flash_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/test_flash_attn.py -------------------------------------------------------------------------------- /tests/test_flash_attn_ck.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/test_flash_attn_ck.py -------------------------------------------------------------------------------- /tests/test_flash_attn_triton_amd.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/test_flash_attn_triton_amd.py -------------------------------------------------------------------------------- /tests/test_rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/test_rotary.py -------------------------------------------------------------------------------- /tests/test_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/tests/test_util.py -------------------------------------------------------------------------------- /training/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/Dockerfile -------------------------------------------------------------------------------- /training/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/README.md -------------------------------------------------------------------------------- /training/configs/callbacks/causality-monitor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/callbacks/causality-monitor.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/callbacks/default.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/ema.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/callbacks/ema.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/flop-count.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/callbacks/flop-count.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/gpu-monitor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/callbacks/gpu-monitor.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/model-summary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/callbacks/model-summary.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/none.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/configs/callbacks/norm-monitor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/callbacks/norm-monitor.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/params-log.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/callbacks/params-log.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/wandb.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/callbacks/wandb.yaml -------------------------------------------------------------------------------- /training/configs/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/config.yaml -------------------------------------------------------------------------------- /training/configs/datamodule/openwebtext.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/datamodule/openwebtext.yaml -------------------------------------------------------------------------------- /training/configs/datamodule/thepile.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/datamodule/thepile.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/base.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/owt/base.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2l-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/owt/gpt2l-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2l-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/owt/gpt2l-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2l.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/owt/gpt2l.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2m-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/owt/gpt2m-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2m-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/owt/gpt2m-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2m.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/owt/gpt2m.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2s-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/owt/gpt2s-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2s-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/owt/gpt2s-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2s.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/owt/gpt2s.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2xl-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/owt/gpt2xl-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2xl-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/owt/gpt2xl-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2xl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/owt/gpt2xl.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/base.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/base.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-flash-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3-2.7B-flash-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-flash-hdim128.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3-2.7B-flash-hdim128.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-flash-rotary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3-2.7B-flash-rotary.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3-2.7B-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-hf-hdim128.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3-2.7B-hf-hdim128.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3-2.7B-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3l-flash-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3l-flash-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3l-flash-rotary-30B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3l-flash-rotary-30B.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3l-flash-rotary-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3l-flash-rotary-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3l-flash-rotary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3l-flash-rotary.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3l-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3l-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3l-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3l-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3m-flash-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3m-flash-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3m-flash-rotary-30B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3m-flash-rotary-30B.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3m-flash-rotary-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3m-flash-rotary-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3m-flash-rotary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3m-flash-rotary.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3m-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3m-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3m-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3m-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3s-flash-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3s-flash-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3s-flash-rotary-30B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3s-flash-rotary-30B.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3s-flash-rotary-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3s-flash-rotary-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3s-flash-rotary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3s-flash-rotary.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3s-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3s-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3s-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3s-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3xl-flash-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3xl-flash-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3xl-flash-rotary-60B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3xl-flash-rotary-60B.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3xl-flash-rotary-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3xl-flash-rotary-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3xl-flash-rotary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3xl-flash-rotary.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3xl-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3xl-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3xl-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/experiment/pile/gpt3xl-hf.yaml -------------------------------------------------------------------------------- /training/configs/logger/comet.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/logger/comet.yaml -------------------------------------------------------------------------------- /training/configs/logger/csv.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/logger/csv.yaml -------------------------------------------------------------------------------- /training/configs/logger/many_loggers.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/logger/many_loggers.yaml -------------------------------------------------------------------------------- /training/configs/logger/mlflow.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/logger/mlflow.yaml -------------------------------------------------------------------------------- /training/configs/logger/neptune.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/logger/neptune.yaml -------------------------------------------------------------------------------- /training/configs/logger/tensorboard.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/logger/tensorboard.yaml -------------------------------------------------------------------------------- /training/configs/logger/wandb.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/logger/wandb.yaml -------------------------------------------------------------------------------- /training/configs/metrics/acc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/metrics/acc.yaml -------------------------------------------------------------------------------- /training/configs/metrics/acc_ignore_index.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/metrics/acc_ignore_index.yaml -------------------------------------------------------------------------------- /training/configs/metrics/acctop5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/metrics/acctop5.yaml -------------------------------------------------------------------------------- /training/configs/metrics/mse.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/metrics/mse.yaml -------------------------------------------------------------------------------- /training/configs/metrics/num-tokens.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/metrics/num-tokens.yaml -------------------------------------------------------------------------------- /training/configs/metrics/perplexity.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/metrics/perplexity.yaml -------------------------------------------------------------------------------- /training/configs/mode/debug.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/mode/debug.yaml -------------------------------------------------------------------------------- /training/configs/mode/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/mode/default.yaml -------------------------------------------------------------------------------- /training/configs/mode/exp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/mode/exp.yaml -------------------------------------------------------------------------------- /training/configs/mode/profile.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/mode/profile.yaml -------------------------------------------------------------------------------- /training/configs/mode/smoke.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/mode/smoke.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/model/gpt2-hf.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/model/gpt2.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2model/gpt2-large.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/model/gpt2model/gpt2-large.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2model/gpt2-medium.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/model/gpt2model/gpt2-medium.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2model/gpt2-small.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/model/gpt2model/gpt2-small.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2model/gpt2-xlarge.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/model/gpt2model/gpt2-xlarge.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/adam.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/optimizer/adam.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/adamw-apex-distributed.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/optimizer/adamw-apex-distributed.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/adamw-apex-zero.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/optimizer/adamw-apex-zero.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/adamw-apex.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/optimizer/adamw-apex.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/adamw-zero.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/optimizer/adamw-zero.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/adamw.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/optimizer/adamw.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/fusedlamb-ds.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/optimizer/fusedlamb-ds.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/fusedlamb.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/optimizer/fusedlamb.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/sgd.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/optimizer/sgd.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/cosine-warmup-timm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/scheduler/cosine-warmup-timm.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/cosine-warmup.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/scheduler/cosine-warmup.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/invsqrt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/scheduler/invsqrt.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/linear-warmup.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/scheduler/linear-warmup.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/multi-step.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/scheduler/multi-step.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/plateau.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/scheduler/plateau.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/poly-warmup.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/scheduler/poly-warmup.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/step.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/scheduler/step.yaml -------------------------------------------------------------------------------- /training/configs/task/sequence-model.yaml: -------------------------------------------------------------------------------- 1 | _target_: src.tasks.seq.SequenceModel 2 | -------------------------------------------------------------------------------- /training/configs/trainer/all_params.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/trainer/all_params.yaml -------------------------------------------------------------------------------- /training/configs/trainer/ddp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/trainer/ddp.yaml -------------------------------------------------------------------------------- /training/configs/trainer/debug.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/trainer/debug.yaml -------------------------------------------------------------------------------- /training/configs/trainer/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/configs/trainer/default.yaml -------------------------------------------------------------------------------- /training/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/run.py -------------------------------------------------------------------------------- /training/src/callbacks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/src/callbacks/causality_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/callbacks/causality_monitor.py -------------------------------------------------------------------------------- /training/src/callbacks/ema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/callbacks/ema.py -------------------------------------------------------------------------------- /training/src/callbacks/flop_count.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/callbacks/flop_count.py -------------------------------------------------------------------------------- /training/src/callbacks/gpu_affinity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/callbacks/gpu_affinity.py -------------------------------------------------------------------------------- /training/src/callbacks/loss_scale_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/callbacks/loss_scale_monitor.py -------------------------------------------------------------------------------- /training/src/callbacks/model_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/callbacks/model_checkpoint.py -------------------------------------------------------------------------------- /training/src/callbacks/norm_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/callbacks/norm_monitor.py -------------------------------------------------------------------------------- /training/src/callbacks/params_log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/callbacks/params_log.py -------------------------------------------------------------------------------- /training/src/callbacks/speed_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/callbacks/speed_monitor.py -------------------------------------------------------------------------------- /training/src/callbacks/wandb_callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/callbacks/wandb_callbacks.py -------------------------------------------------------------------------------- /training/src/datamodules/datasets/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/datamodules/datasets/detokenizer.py -------------------------------------------------------------------------------- /training/src/datamodules/datasets/lm_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/datamodules/datasets/lm_dataset.py -------------------------------------------------------------------------------- /training/src/datamodules/fault_tolerant_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/datamodules/fault_tolerant_sampler.py -------------------------------------------------------------------------------- /training/src/datamodules/imagenet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/datamodules/imagenet.py -------------------------------------------------------------------------------- /training/src/datamodules/language_modeling_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/datamodules/language_modeling_hf.py -------------------------------------------------------------------------------- /training/src/datamodules/timm_mixup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/datamodules/timm_mixup.py -------------------------------------------------------------------------------- /training/src/distributed/ddp_comm_hooks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/distributed/ddp_comm_hooks.py -------------------------------------------------------------------------------- /training/src/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/eval.py -------------------------------------------------------------------------------- /training/src/metrics/accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/metrics/accuracy.py -------------------------------------------------------------------------------- /training/src/metrics/num_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/metrics/num_tokens.py -------------------------------------------------------------------------------- /training/src/metrics/perplexity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/metrics/perplexity.py -------------------------------------------------------------------------------- /training/src/models/modules/seq_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/models/modules/seq_common.py -------------------------------------------------------------------------------- /training/src/optim/param_grouping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/optim/param_grouping.py -------------------------------------------------------------------------------- /training/src/optim/timm_lr_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/optim/timm_lr_scheduler.py -------------------------------------------------------------------------------- /training/src/tasks/seq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/tasks/seq.py -------------------------------------------------------------------------------- /training/src/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/train.py -------------------------------------------------------------------------------- /training/src/utils/checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/utils/checkpoint.py -------------------------------------------------------------------------------- /training/src/utils/ddp_zero1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/utils/ddp_zero1.py -------------------------------------------------------------------------------- /training/src/utils/ddp_zero2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/utils/ddp_zero2.py -------------------------------------------------------------------------------- /training/src/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/utils/distributed.py -------------------------------------------------------------------------------- /training/src/utils/ema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/utils/ema.py -------------------------------------------------------------------------------- /training/src/utils/flops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/utils/flops.py -------------------------------------------------------------------------------- /training/src/utils/gpu_affinity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/utils/gpu_affinity.py -------------------------------------------------------------------------------- /training/src/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/src/utils/utils.py -------------------------------------------------------------------------------- /training/tests/datamodules/test_language_modeling_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/training/tests/datamodules/test_language_modeling_hf.py -------------------------------------------------------------------------------- /usage.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/flash-attention/HEAD/usage.md --------------------------------------------------------------------------------