├── .github └── workflows │ └── publish.yml ├── .gitignore ├── .gitmodules ├── AUTHORS ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── assets ├── Customflash2_a100_fwd_bwd_benchmark.png ├── flash2_a100_fwd_bwd_benchmark.png ├── flash2_h100_fwd_bwd_benchmark.png ├── flash3_fp16_fwd.png ├── flashattention_logo.png ├── flashattn_banner.jpg ├── flashattn_banner.pdf ├── flashattn_memory.jpg ├── flashattn_speedup.jpg ├── flashattn_speedup_3090.jpg ├── flashattn_speedup_a100_d128.jpg ├── flashattn_speedup_t4.jpg ├── flashattn_speedup_t4_fwd.jpg ├── gpt2_training_curve.jpg ├── gpt2_training_efficiency.jpg ├── gpt3_training_curve.jpg └── gpt3_training_efficiency.jpg ├── autotuner ├── arch │ ├── A100.py │ ├── RTX4090.py │ ├── __init__.py │ └── arch_base.py ├── base_tunner.py ├── code_emitter.py ├── configs │ ├── __init__.py │ ├── base_config.py │ └── fwd_config.py ├── profile_attn.py ├── template │ ├── flash_attn_profile_interface.py │ ├── flash_fwd.cu │ ├── flash_fwd.h │ ├── flash_fwd_launch_template_profile.h │ ├── flash_profile.h │ └── flash_profile_api.cpp ├── test_run_tunner.py └── tunner.py ├── autotunner.md ├── benchmarks ├── benchmark_alibi.py ├── benchmark_causal.py ├── benchmark_flash_attention.py ├── benchmark_gemm.py ├── benchmark_head_headdim.py └── benchmark_headdim.py ├── compute_sm.py ├── csrc ├── flash_attn │ ├── flash_api.cpp │ └── src │ │ ├── alibi.h │ │ ├── block_info.h │ │ ├── dropout.h │ │ ├── flash.h │ │ ├── flash_bwd_hdim128_bf16_causal_sm80.cu │ │ ├── flash_bwd_hdim128_bf16_sm80.cu │ │ ├── flash_bwd_hdim128_fp16_causal_sm80.cu │ │ ├── flash_bwd_hdim128_fp16_sm80.cu │ │ ├── flash_bwd_hdim160_bf16_causal_sm80.cu │ │ ├── flash_bwd_hdim160_bf16_sm80.cu │ │ ├── flash_bwd_hdim160_fp16_causal_sm80.cu │ │ ├── flash_bwd_hdim160_fp16_sm80.cu │ │ ├── flash_bwd_hdim192_bf16_causal_sm80.cu │ │ ├── flash_bwd_hdim192_bf16_sm80.cu │ │ ├── flash_bwd_hdim192_fp16_causal_sm80.cu │ │ ├── flash_bwd_hdim192_fp16_sm80.cu │ │ ├── flash_bwd_hdim256_bf16_causal_sm80.cu │ │ ├── flash_bwd_hdim256_bf16_sm80.cu │ │ ├── flash_bwd_hdim256_fp16_causal_sm80.cu │ │ ├── flash_bwd_hdim256_fp16_sm80.cu │ │ ├── flash_bwd_hdim32_bf16_causal_sm80.cu │ │ ├── flash_bwd_hdim32_bf16_sm80.cu │ │ ├── flash_bwd_hdim32_fp16_causal_sm80.cu │ │ ├── flash_bwd_hdim32_fp16_sm80.cu │ │ ├── flash_bwd_hdim64_bf16_causal_sm80.cu │ │ ├── flash_bwd_hdim64_bf16_sm80.cu │ │ ├── flash_bwd_hdim64_fp16_causal_sm80.cu │ │ ├── flash_bwd_hdim64_fp16_sm80.cu │ │ ├── flash_bwd_hdim96_bf16_causal_sm80.cu │ │ ├── flash_bwd_hdim96_bf16_sm80.cu │ │ ├── flash_bwd_hdim96_fp16_causal_sm80.cu │ │ ├── flash_bwd_hdim96_fp16_sm80.cu │ │ ├── flash_bwd_kernel.h │ │ ├── flash_bwd_launch_template.h │ │ ├── flash_bwd_preprocess_kernel.h │ │ ├── flash_bwd_qkdim128_vdim256_bf16_causal_sm80.cu │ │ ├── flash_bwd_qkdim128_vdim256_bf16_sm80.cu │ │ ├── flash_bwd_qkdim128_vdim256_fp16_causal_sm80.cu │ │ ├── flash_bwd_qkdim128_vdim256_fp16_sm80.cu │ │ ├── flash_bwd_qkdim128_vdim256_sm80.h │ │ ├── flash_bwd_qkdim192_vdim128_bf16_causal_sm80.cu │ │ ├── flash_bwd_qkdim192_vdim128_bf16_sm80.cu │ │ ├── flash_bwd_qkdim192_vdim128_fp16_causal_sm80.cu │ │ ├── flash_bwd_qkdim192_vdim128_fp16_sm80.cu │ │ ├── flash_bwd_qkdim192_vdim128_sm80.h │ │ ├── flash_bwd_qkdim32_vdim64_bf16_causal_sm80.cu │ │ ├── flash_bwd_qkdim32_vdim64_bf16_sm80.cu │ │ ├── flash_bwd_qkdim32_vdim64_fp16_causal_sm80.cu │ │ ├── flash_bwd_qkdim32_vdim64_fp16_sm80.cu │ │ ├── flash_bwd_qkdim32_vdim64_sm80.h │ │ ├── flash_bwd_qkdim64_vdim128_bf16_causal_sm80.cu │ │ ├── flash_bwd_qkdim64_vdim128_bf16_sm80.cu │ │ ├── flash_bwd_qkdim64_vdim128_fp16_causal_sm80.cu │ │ ├── flash_bwd_qkdim64_vdim128_fp16_sm80.cu │ │ ├── flash_bwd_qkdim64_vdim128_sm80.h │ │ ├── flash_bwd_qkdim96_vdim192_bf16_causal_sm80.cu │ │ ├── flash_bwd_qkdim96_vdim192_bf16_sm80.cu │ │ ├── flash_bwd_qkdim96_vdim192_fp16_causal_sm80.cu │ │ ├── flash_bwd_qkdim96_vdim192_fp16_sm80.cu │ │ ├── flash_bwd_qkdim96_vdim192_sm80.h │ │ ├── flash_fwd_hdim128_bf16_causal_sm80.cu │ │ ├── flash_fwd_hdim128_bf16_sm80.cu │ │ ├── flash_fwd_hdim128_fp16_causal_sm80.cu │ │ ├── flash_fwd_hdim128_fp16_sm80.cu │ │ ├── flash_fwd_hdim160_bf16_causal_sm80.cu │ │ ├── flash_fwd_hdim160_bf16_sm80.cu │ │ ├── flash_fwd_hdim160_fp16_causal_sm80.cu │ │ ├── flash_fwd_hdim160_fp16_sm80.cu │ │ ├── flash_fwd_hdim192_bf16_causal_sm80.cu │ │ ├── flash_fwd_hdim192_bf16_sm80.cu │ │ ├── flash_fwd_hdim192_fp16_causal_sm80.cu │ │ ├── flash_fwd_hdim192_fp16_sm80.cu │ │ ├── flash_fwd_hdim256_bf16_causal_sm80.cu │ │ ├── flash_fwd_hdim256_bf16_sm80.cu │ │ ├── flash_fwd_hdim256_fp16_causal_sm80.cu │ │ ├── flash_fwd_hdim256_fp16_sm80.cu │ │ ├── flash_fwd_hdim32_bf16_causal_sm80.cu │ │ ├── flash_fwd_hdim32_bf16_sm80.cu │ │ ├── flash_fwd_hdim32_fp16_causal_sm80.cu │ │ ├── flash_fwd_hdim32_fp16_sm80.cu │ │ ├── flash_fwd_hdim64_bf16_causal_sm80.cu │ │ ├── flash_fwd_hdim64_bf16_sm80.cu │ │ ├── flash_fwd_hdim64_fp16_causal_sm80.cu │ │ ├── flash_fwd_hdim64_fp16_sm80.cu │ │ ├── flash_fwd_hdim96_bf16_causal_sm80.cu │ │ ├── flash_fwd_hdim96_bf16_sm80.cu │ │ ├── flash_fwd_hdim96_fp16_causal_sm80.cu │ │ ├── flash_fwd_hdim96_fp16_sm80.cu │ │ ├── flash_fwd_kernel.h │ │ ├── flash_fwd_launch_template.h │ │ ├── flash_fwd_qkdim128_vdim256_bf16_causal_sm80.cu │ │ ├── flash_fwd_qkdim128_vdim256_bf16_sm80.cu │ │ ├── flash_fwd_qkdim128_vdim256_fp16_causal_sm80.cu │ │ ├── flash_fwd_qkdim128_vdim256_fp16_sm80.cu │ │ ├── flash_fwd_qkdim128_vdim256_sm80.h │ │ ├── flash_fwd_qkdim192_vdim128_bf16_causal_sm80.cu │ │ ├── flash_fwd_qkdim192_vdim128_bf16_sm80.cu │ │ ├── flash_fwd_qkdim192_vdim128_fp16_causal_sm80.cu │ │ ├── flash_fwd_qkdim192_vdim128_fp16_sm80.cu │ │ ├── flash_fwd_qkdim192_vdim128_sm80.h │ │ ├── flash_fwd_qkdim32_vdim64_bf16_causal_sm80.cu │ │ ├── flash_fwd_qkdim32_vdim64_bf16_sm80.cu │ │ ├── flash_fwd_qkdim32_vdim64_fp16_causal_sm80.cu │ │ ├── flash_fwd_qkdim32_vdim64_fp16_sm80.cu │ │ ├── flash_fwd_qkdim32_vdim64_sm80.h │ │ ├── flash_fwd_qkdim64_vdim128_bf16_causal_sm80.cu │ │ ├── flash_fwd_qkdim64_vdim128_bf16_sm80.cu │ │ ├── flash_fwd_qkdim64_vdim128_fp16_causal_sm80.cu │ │ ├── flash_fwd_qkdim64_vdim128_fp16_sm80.cu │ │ ├── flash_fwd_qkdim64_vdim128_sm80.h │ │ ├── flash_fwd_qkdim96_vdim192_bf16_causal_sm80.cu │ │ ├── flash_fwd_qkdim96_vdim192_bf16_sm80.cu │ │ ├── flash_fwd_qkdim96_vdim192_fp16_causal_sm80.cu │ │ ├── flash_fwd_qkdim96_vdim192_fp16_sm80.cu │ │ ├── flash_fwd_qkdim96_vdim192_sm80.h │ │ ├── flash_fwd_split_hdim128_bf16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim128_bf16_sm80.cu │ │ ├── flash_fwd_split_hdim128_fp16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim128_fp16_sm80.cu │ │ ├── flash_fwd_split_hdim160_bf16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim160_bf16_sm80.cu │ │ ├── flash_fwd_split_hdim160_fp16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim160_fp16_sm80.cu │ │ ├── flash_fwd_split_hdim192_bf16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim192_bf16_sm80.cu │ │ ├── flash_fwd_split_hdim192_fp16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim192_fp16_sm80.cu │ │ ├── flash_fwd_split_hdim256_bf16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim256_bf16_sm80.cu │ │ ├── flash_fwd_split_hdim256_fp16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim256_fp16_sm80.cu │ │ ├── flash_fwd_split_hdim32_bf16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim32_bf16_sm80.cu │ │ ├── flash_fwd_split_hdim32_fp16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim32_fp16_sm80.cu │ │ ├── flash_fwd_split_hdim64_bf16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim64_bf16_sm80.cu │ │ ├── flash_fwd_split_hdim64_fp16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim64_fp16_sm80.cu │ │ ├── flash_fwd_split_hdim96_bf16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim96_bf16_sm80.cu │ │ ├── flash_fwd_split_hdim96_fp16_causal_sm80.cu │ │ ├── flash_fwd_split_hdim96_fp16_sm80.cu │ │ ├── flash_fwd_split_qkdim128_vdim256_bf16_causal_sm80.cu │ │ ├── flash_fwd_split_qkdim128_vdim256_bf16_sm80.cu │ │ ├── flash_fwd_split_qkdim128_vdim256_fp16_causal_sm80.cu │ │ ├── flash_fwd_split_qkdim128_vdim256_fp16_sm80.cu │ │ ├── flash_fwd_split_qkdim192_vdim128_bf16_causal_sm80.cu │ │ ├── flash_fwd_split_qkdim192_vdim128_bf16_sm80.cu │ │ ├── flash_fwd_split_qkdim192_vdim128_fp16_causal_sm80.cu │ │ ├── flash_fwd_split_qkdim192_vdim128_fp16_sm80.cu │ │ ├── flash_fwd_split_qkdim32_vdim64_bf16_causal_sm80.cu │ │ ├── flash_fwd_split_qkdim32_vdim64_bf16_sm80.cu │ │ ├── flash_fwd_split_qkdim32_vdim64_fp16_causal_sm80.cu │ │ ├── flash_fwd_split_qkdim32_vdim64_fp16_sm80.cu │ │ ├── flash_fwd_split_qkdim64_vdim128_bf16_causal_sm80.cu │ │ ├── flash_fwd_split_qkdim64_vdim128_bf16_sm80.cu │ │ ├── flash_fwd_split_qkdim64_vdim128_fp16_causal_sm80.cu │ │ ├── flash_fwd_split_qkdim64_vdim128_fp16_sm80.cu │ │ ├── flash_fwd_split_qkdim96_vdim192_bf16_causal_sm80.cu │ │ ├── flash_fwd_split_qkdim96_vdim192_bf16_sm80.cu │ │ ├── flash_fwd_split_qkdim96_vdim192_fp16_causal_sm80.cu │ │ ├── flash_fwd_split_qkdim96_vdim192_fp16_sm80.cu │ │ ├── generate_kernels.py │ │ ├── generate_switch_headdim.py │ │ ├── kernel_traits.h │ │ ├── mask.h │ │ ├── philox.cuh │ │ ├── rotary.h │ │ ├── softmax.h │ │ ├── static_switch.h │ │ ├── static_switch_headdim.h │ │ └── utils.h ├── flash_attn_ck │ ├── flash_api.cpp │ ├── flash_common.hpp │ ├── mha_bwd.cpp │ ├── mha_fwd.cpp │ ├── mha_varlen_bwd.cpp │ └── mha_varlen_fwd.cpp ├── ft_attention │ ├── README.md │ ├── cuda_bf16_fallbacks.cuh │ ├── cuda_bf16_wrapper.h │ ├── decoder_masked_multihead_attention.cu │ ├── decoder_masked_multihead_attention.h │ ├── decoder_masked_multihead_attention_template.hpp │ ├── decoder_masked_multihead_attention_utils.h │ ├── ft_attention.cpp │ └── setup.py ├── fused_dense_lib │ ├── README.md │ ├── fused_dense.cpp │ ├── fused_dense_cuda.cu │ └── setup.py ├── fused_softmax │ ├── fused_softmax.cpp │ ├── scaled_masked_softmax.h │ ├── scaled_masked_softmax_cuda.cu │ ├── scaled_upper_triang_masked_softmax.h │ ├── scaled_upper_triang_masked_softmax_cuda.cu │ ├── setup.py │ └── type_shim.h ├── layer_norm │ ├── README.md │ ├── ln.h │ ├── ln_api.cpp │ ├── ln_bwd_1024.cu │ ├── ln_bwd_1280.cu │ ├── ln_bwd_1536.cu │ ├── ln_bwd_2048.cu │ ├── ln_bwd_256.cu │ ├── ln_bwd_2560.cu │ ├── ln_bwd_3072.cu │ ├── ln_bwd_4096.cu │ ├── ln_bwd_512.cu │ ├── ln_bwd_5120.cu │ ├── ln_bwd_6144.cu │ ├── ln_bwd_7168.cu │ ├── ln_bwd_768.cu │ ├── ln_bwd_8192.cu │ ├── ln_bwd_kernels.cuh │ ├── ln_fwd_1024.cu │ ├── ln_fwd_1280.cu │ ├── ln_fwd_1536.cu │ ├── ln_fwd_2048.cu │ ├── ln_fwd_256.cu │ ├── ln_fwd_2560.cu │ ├── ln_fwd_3072.cu │ ├── ln_fwd_4096.cu │ ├── ln_fwd_512.cu │ ├── ln_fwd_5120.cu │ ├── ln_fwd_6144.cu │ ├── ln_fwd_7168.cu │ ├── ln_fwd_768.cu │ ├── ln_fwd_8192.cu │ ├── ln_fwd_kernels.cuh │ ├── ln_kernel_traits.h │ ├── ln_parallel_bwd_1024.cu │ ├── ln_parallel_bwd_1280.cu │ ├── ln_parallel_bwd_1536.cu │ ├── ln_parallel_bwd_2048.cu │ ├── ln_parallel_bwd_256.cu │ ├── ln_parallel_bwd_2560.cu │ ├── ln_parallel_bwd_3072.cu │ ├── ln_parallel_bwd_4096.cu │ ├── ln_parallel_bwd_512.cu │ ├── ln_parallel_bwd_5120.cu │ ├── ln_parallel_bwd_6144.cu │ ├── ln_parallel_bwd_7168.cu │ ├── ln_parallel_bwd_768.cu │ ├── ln_parallel_bwd_8192.cu │ ├── ln_parallel_fwd_1024.cu │ ├── ln_parallel_fwd_1280.cu │ ├── ln_parallel_fwd_1536.cu │ ├── ln_parallel_fwd_2048.cu │ ├── ln_parallel_fwd_256.cu │ ├── ln_parallel_fwd_2560.cu │ ├── ln_parallel_fwd_3072.cu │ ├── ln_parallel_fwd_4096.cu │ ├── ln_parallel_fwd_512.cu │ ├── ln_parallel_fwd_5120.cu │ ├── ln_parallel_fwd_6144.cu │ ├── ln_parallel_fwd_7168.cu │ ├── ln_parallel_fwd_768.cu │ ├── ln_parallel_fwd_8192.cu │ ├── ln_parallel_residual_bwd_kernels.cuh │ ├── ln_parallel_residual_fwd_kernels.cuh │ ├── ln_utils.cuh │ ├── setup.py │ └── static_switch.h ├── rotary │ ├── rotary.cpp │ ├── rotary_cuda.cu │ └── setup.py └── xentropy │ ├── README.md │ ├── interface.cpp │ ├── setup.py │ └── xentropy_kernel.cu ├── examples └── inference │ └── README.md ├── flex_head_fa ├── __init__.py ├── bert_padding.py ├── flash_attn_interface.py ├── flash_attn_triton.py ├── flash_attn_triton_og.py ├── flash_blocksparse_attention.py ├── flash_blocksparse_attn_interface.py ├── fused_softmax.py ├── layers │ ├── __init__.py │ ├── patch_embed.py │ └── rotary.py ├── losses │ ├── __init__.py │ └── cross_entropy.py ├── models │ ├── __init__.py │ ├── baichuan.py │ ├── bert.py │ ├── bigcode.py │ ├── btlm.py │ ├── falcon.py │ ├── gpt.py │ ├── gpt_neox.py │ ├── gptj.py │ ├── llama.py │ ├── opt.py │ └── vit.py ├── modules │ ├── __init__.py │ ├── block.py │ ├── embedding.py │ ├── mha.py │ └── mlp.py ├── ops │ ├── __init__.py │ ├── activations.py │ ├── fused_dense.py │ ├── layer_norm.py │ ├── rms_norm.py │ └── triton │ │ ├── __init__.py │ │ ├── cross_entropy.py │ │ ├── k_activations.py │ │ ├── layer_norm.py │ │ ├── linear.py │ │ ├── mlp.py │ │ └── rotary.py ├── pyproject.toml └── utils │ ├── __init__.py │ ├── benchmark.py │ ├── distributed.py │ ├── generation.py │ └── pretrained.py ├── headdim.json ├── hopper ├── __init__.py ├── benchmark_attn.py ├── benchmark_flash_attention_fp8.py ├── block_info.h ├── epilogue_bwd_sm90_tma.hpp ├── epilogue_fwd_sm90_tma.hpp ├── flash.h ├── flash_api.cpp ├── flash_attn_interface.py ├── flash_bwd_hdim128_bf16_sm90.cu ├── flash_bwd_hdim128_fp16_sm90.cu ├── flash_bwd_hdim256_fp16_sm90.cu ├── flash_bwd_hdim64_bf16_sm90.cu ├── flash_bwd_hdim64_fp16_sm90.cu ├── flash_bwd_hdim96_bf16_sm90.cu ├── flash_bwd_hdim96_fp16_sm90.cu ├── flash_bwd_kernel.h ├── flash_bwd_launch_template.h ├── flash_bwd_postprocess_kernel.h ├── flash_bwd_preprocess_kernel.h ├── flash_bwd_qkdim128_vdim256_bf16_sm90.cu ├── flash_bwd_qkdim128_vdim256_fp16_sm90.cu ├── flash_bwd_qkdim32_vdim64_bf16_sm90.cu ├── flash_bwd_qkdim32_vdim64_fp16_sm90.cu ├── flash_bwd_qkdim64_vdim128_bf16_sm90.cu ├── flash_bwd_qkdim64_vdim128_fp16_sm90.cu ├── flash_fwd_hdim128_bf16_sm90.cu ├── flash_fwd_hdim128_e4m3_sm90.cu ├── flash_fwd_hdim128_fp16_sm90.cu ├── flash_fwd_hdim256_bf16_sm90.cu ├── flash_fwd_hdim256_e4m3_sm90.cu ├── flash_fwd_hdim256_fp16_sm90.cu ├── flash_fwd_hdim64_bf16_sm90.cu ├── flash_fwd_hdim64_e4m3_sm90.cu ├── flash_fwd_hdim64_fp16_sm90.cu ├── flash_fwd_kernel.h ├── flash_fwd_launch_template.h ├── flash_fwd_qkdim128_vdim256_bf16_sm90.cu ├── flash_fwd_qkdim128_vdim256_fp16_sm90.cu ├── flash_fwd_qkdim32_vdim64_bf16_sm90.cu ├── flash_fwd_qkdim32_vdim64_fp16_sm90.cu ├── flash_fwd_qkdim64_vdim128_bf16_sm90.cu ├── flash_fwd_qkdim64_vdim128_fp16_sm90.cu ├── kernel_traits.h ├── mainloop_bwd_sm90_tma_gmma_ws.hpp ├── mainloop_fwd_sm90_tma_gmma_ws.hpp ├── named_barrier.hpp ├── seq_len.h ├── setup.py ├── softmax.h ├── static_switch.h ├── test.py ├── test_flash_attn.py ├── tile_scheduler.hpp ├── tile_scheduler_bwd.hpp └── utils.h ├── setup.py ├── test.py ├── tests ├── layers │ └── test_rotary.py ├── losses │ ├── test_cross_entropy.py │ └── test_cross_entropy_parallel.py ├── models │ ├── test_baichuan.py │ ├── test_bert.py │ ├── test_bigcode.py │ ├── test_btlm.py │ ├── test_falcon.py │ ├── test_gpt.py │ ├── test_gpt_generation_parallel.py │ ├── test_gpt_neox.py │ ├── test_gpt_parallel.py │ ├── test_gptj.py │ ├── test_llama.py │ ├── test_opt.py │ └── test_vit.py ├── modules │ ├── test_block_parallel.py │ ├── test_embedding_parallel.py │ ├── test_mha_parallel.py │ └── test_mlp_parallel.py ├── ops │ ├── test_dropout_layer_norm.py │ ├── test_fused_dense.py │ ├── test_fused_dense_parallel.py │ └── triton │ │ └── test_layer_norm.py ├── pyproject.toml ├── test_flash_attn.py ├── test_flash_attn_ck.py ├── test_flash_attn_head.py ├── test_flash_attn_headdim.py ├── test_rotary.py └── test_util.py ├── training ├── Dockerfile ├── README.md ├── configs │ ├── callbacks │ │ ├── causality-monitor.yaml │ │ ├── default.yaml │ │ ├── ema.yaml │ │ ├── flop-count.yaml │ │ ├── gpu-monitor.yaml │ │ ├── model-summary.yaml │ │ ├── none.yaml │ │ ├── norm-monitor.yaml │ │ ├── params-log.yaml │ │ └── wandb.yaml │ ├── config.yaml │ ├── datamodule │ │ ├── openwebtext.yaml │ │ └── thepile.yaml │ ├── experiment │ │ ├── owt │ │ │ ├── base.yaml │ │ │ ├── gpt2l-flash.yaml │ │ │ ├── gpt2l-hf.yaml │ │ │ ├── gpt2l.yaml │ │ │ ├── gpt2m-flash.yaml │ │ │ ├── gpt2m-hf.yaml │ │ │ ├── gpt2m.yaml │ │ │ ├── gpt2s-flash.yaml │ │ │ ├── gpt2s-hf.yaml │ │ │ ├── gpt2s.yaml │ │ │ ├── gpt2xl-flash.yaml │ │ │ ├── gpt2xl-hf.yaml │ │ │ └── gpt2xl.yaml │ │ └── pile │ │ │ ├── base.yaml │ │ │ ├── gpt3-2.7B-flash-8k.yaml │ │ │ ├── gpt3-2.7B-flash-hdim128-rotary-8k.yaml │ │ │ ├── gpt3-2.7B-flash-hdim128-rotary.yaml │ │ │ ├── gpt3-2.7B-flash-hdim128.yaml │ │ │ ├── gpt3-2.7B-flash-rotary-8k.yaml │ │ │ ├── gpt3-2.7B-flash-rotary.yaml │ │ │ ├── gpt3-2.7B-flash.yaml │ │ │ ├── gpt3-2.7B-hf-hdim128.yaml │ │ │ ├── gpt3-2.7B-hf.yaml │ │ │ ├── gpt3l-flash-8k.yaml │ │ │ ├── gpt3l-flash-rotary-30B.yaml │ │ │ ├── gpt3l-flash-rotary-8k.yaml │ │ │ ├── gpt3l-flash-rotary.yaml │ │ │ ├── gpt3l-flash.yaml │ │ │ ├── gpt3l-hf.yaml │ │ │ ├── gpt3m-flash-8k.yaml │ │ │ ├── gpt3m-flash-rotary-30B.yaml │ │ │ ├── gpt3m-flash-rotary-8k.yaml │ │ │ ├── gpt3m-flash-rotary.yaml │ │ │ ├── gpt3m-flash.yaml │ │ │ ├── gpt3m-hf.yaml │ │ │ ├── gpt3s-flash-8k.yaml │ │ │ ├── gpt3s-flash-rotary-30B.yaml │ │ │ ├── gpt3s-flash-rotary-8k.yaml │ │ │ ├── gpt3s-flash-rotary.yaml │ │ │ ├── gpt3s-flash.yaml │ │ │ ├── gpt3s-hf.yaml │ │ │ ├── gpt3xl-flash-8k.yaml │ │ │ ├── gpt3xl-flash-rotary-60B.yaml │ │ │ ├── gpt3xl-flash-rotary-8k.yaml │ │ │ ├── gpt3xl-flash-rotary.yaml │ │ │ ├── gpt3xl-flash.yaml │ │ │ └── gpt3xl-hf.yaml │ ├── logger │ │ ├── comet.yaml │ │ ├── csv.yaml │ │ ├── many_loggers.yaml │ │ ├── mlflow.yaml │ │ ├── neptune.yaml │ │ ├── tensorboard.yaml │ │ └── wandb.yaml │ ├── metrics │ │ ├── acc.yaml │ │ ├── acc_ignore_index.yaml │ │ ├── acctop5.yaml │ │ ├── mse.yaml │ │ ├── num-tokens.yaml │ │ └── perplexity.yaml │ ├── mode │ │ ├── debug.yaml │ │ ├── default.yaml │ │ ├── exp.yaml │ │ ├── profile.yaml │ │ └── smoke.yaml │ ├── model │ │ ├── gpt2-hf.yaml │ │ ├── gpt2.yaml │ │ └── gpt2model │ │ │ ├── gpt2-large.yaml │ │ │ ├── gpt2-medium.yaml │ │ │ ├── gpt2-small.yaml │ │ │ └── gpt2-xlarge.yaml │ ├── optimizer │ │ ├── adam.yaml │ │ ├── adamw-apex-distributed.yaml │ │ ├── adamw-apex-zero.yaml │ │ ├── adamw-apex.yaml │ │ ├── adamw-zero.yaml │ │ ├── adamw.yaml │ │ ├── fusedlamb-ds.yaml │ │ ├── fusedlamb.yaml │ │ └── sgd.yaml │ ├── scheduler │ │ ├── cosine-warmup-timm.yaml │ │ ├── cosine-warmup.yaml │ │ ├── invsqrt.yaml │ │ ├── linear-warmup.yaml │ │ ├── multi-step.yaml │ │ ├── plateau.yaml │ │ ├── poly-warmup.yaml │ │ └── step.yaml │ ├── task │ │ └── sequence-model.yaml │ └── trainer │ │ ├── all_params.yaml │ │ ├── ddp.yaml │ │ ├── debug.yaml │ │ └── default.yaml ├── run.py ├── src │ ├── callbacks │ │ ├── __init__.py │ │ ├── causality_monitor.py │ │ ├── ema.py │ │ ├── flop_count.py │ │ ├── gpu_affinity.py │ │ ├── loss_scale_monitor.py │ │ ├── model_checkpoint.py │ │ ├── norm_monitor.py │ │ ├── params_log.py │ │ ├── speed_monitor.py │ │ └── wandb_callbacks.py │ ├── datamodules │ │ ├── datasets │ │ │ ├── detokenizer.py │ │ │ └── lm_dataset.py │ │ ├── fault_tolerant_sampler.py │ │ ├── imagenet.py │ │ ├── language_modeling_hf.py │ │ └── timm_mixup.py │ ├── distributed │ │ └── ddp_comm_hooks.py │ ├── eval.py │ ├── metrics │ │ ├── accuracy.py │ │ ├── num_tokens.py │ │ └── perplexity.py │ ├── models │ │ └── modules │ │ │ └── seq_common.py │ ├── optim │ │ ├── param_grouping.py │ │ └── timm_lr_scheduler.py │ ├── tasks │ │ └── seq.py │ ├── train.py │ └── utils │ │ ├── checkpoint.py │ │ ├── ddp_zero1.py │ │ ├── ddp_zero2.py │ │ ├── distributed.py │ │ ├── ema.py │ │ ├── flops.py │ │ ├── gpu_affinity.py │ │ └── utils.py └── tests │ └── datamodules │ └── test_language_modeling_hf.py └── usage.md /.github/workflows/publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/.github/workflows/publish.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/.gitmodules -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Tri Dao, trid@cs.stanford.edu -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/Makefile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/README.md -------------------------------------------------------------------------------- /assets/Customflash2_a100_fwd_bwd_benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/assets/Customflash2_a100_fwd_bwd_benchmark.png -------------------------------------------------------------------------------- /assets/flash2_a100_fwd_bwd_benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/assets/flash2_a100_fwd_bwd_benchmark.png -------------------------------------------------------------------------------- /assets/flash2_h100_fwd_bwd_benchmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/assets/flash2_h100_fwd_bwd_benchmark.png -------------------------------------------------------------------------------- /assets/flash3_fp16_fwd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/assets/flash3_fp16_fwd.png -------------------------------------------------------------------------------- /assets/flashattention_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/assets/flashattention_logo.png -------------------------------------------------------------------------------- /assets/flashattn_banner.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/assets/flashattn_banner.jpg -------------------------------------------------------------------------------- /assets/flashattn_banner.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/assets/flashattn_banner.pdf -------------------------------------------------------------------------------- /assets/flashattn_memory.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/assets/flashattn_memory.jpg -------------------------------------------------------------------------------- /assets/flashattn_speedup.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/assets/flashattn_speedup.jpg -------------------------------------------------------------------------------- /assets/flashattn_speedup_3090.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/assets/flashattn_speedup_3090.jpg -------------------------------------------------------------------------------- /assets/flashattn_speedup_a100_d128.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/assets/flashattn_speedup_a100_d128.jpg -------------------------------------------------------------------------------- /assets/flashattn_speedup_t4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/assets/flashattn_speedup_t4.jpg -------------------------------------------------------------------------------- /assets/flashattn_speedup_t4_fwd.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/assets/flashattn_speedup_t4_fwd.jpg -------------------------------------------------------------------------------- /assets/gpt2_training_curve.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/assets/gpt2_training_curve.jpg -------------------------------------------------------------------------------- /assets/gpt2_training_efficiency.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/assets/gpt2_training_efficiency.jpg -------------------------------------------------------------------------------- /assets/gpt3_training_curve.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/assets/gpt3_training_curve.jpg -------------------------------------------------------------------------------- /assets/gpt3_training_efficiency.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/assets/gpt3_training_efficiency.jpg -------------------------------------------------------------------------------- /autotuner/arch/A100.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotuner/arch/A100.py -------------------------------------------------------------------------------- /autotuner/arch/RTX4090.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotuner/arch/RTX4090.py -------------------------------------------------------------------------------- /autotuner/arch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotuner/arch/__init__.py -------------------------------------------------------------------------------- /autotuner/arch/arch_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotuner/arch/arch_base.py -------------------------------------------------------------------------------- /autotuner/base_tunner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotuner/base_tunner.py -------------------------------------------------------------------------------- /autotuner/code_emitter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotuner/code_emitter.py -------------------------------------------------------------------------------- /autotuner/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotuner/configs/__init__.py -------------------------------------------------------------------------------- /autotuner/configs/base_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotuner/configs/base_config.py -------------------------------------------------------------------------------- /autotuner/configs/fwd_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotuner/configs/fwd_config.py -------------------------------------------------------------------------------- /autotuner/profile_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotuner/profile_attn.py -------------------------------------------------------------------------------- /autotuner/template/flash_attn_profile_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotuner/template/flash_attn_profile_interface.py -------------------------------------------------------------------------------- /autotuner/template/flash_fwd.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotuner/template/flash_fwd.cu -------------------------------------------------------------------------------- /autotuner/template/flash_fwd.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotuner/template/flash_fwd.h -------------------------------------------------------------------------------- /autotuner/template/flash_fwd_launch_template_profile.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotuner/template/flash_fwd_launch_template_profile.h -------------------------------------------------------------------------------- /autotuner/template/flash_profile.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotuner/template/flash_profile.h -------------------------------------------------------------------------------- /autotuner/template/flash_profile_api.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotuner/template/flash_profile_api.cpp -------------------------------------------------------------------------------- /autotuner/test_run_tunner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotuner/test_run_tunner.py -------------------------------------------------------------------------------- /autotuner/tunner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotuner/tunner.py -------------------------------------------------------------------------------- /autotunner.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/autotunner.md -------------------------------------------------------------------------------- /benchmarks/benchmark_alibi.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/benchmarks/benchmark_alibi.py -------------------------------------------------------------------------------- /benchmarks/benchmark_causal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/benchmarks/benchmark_causal.py -------------------------------------------------------------------------------- /benchmarks/benchmark_flash_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/benchmarks/benchmark_flash_attention.py -------------------------------------------------------------------------------- /benchmarks/benchmark_gemm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/benchmarks/benchmark_gemm.py -------------------------------------------------------------------------------- /benchmarks/benchmark_head_headdim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/benchmarks/benchmark_head_headdim.py -------------------------------------------------------------------------------- /benchmarks/benchmark_headdim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/benchmarks/benchmark_headdim.py -------------------------------------------------------------------------------- /compute_sm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/compute_sm.py -------------------------------------------------------------------------------- /csrc/flash_attn/flash_api.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/flash_api.cpp -------------------------------------------------------------------------------- /csrc/flash_attn/src/alibi.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/alibi.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/block_info.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/block_info.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/dropout.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/dropout.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim128_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim128_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim128_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim128_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim128_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim128_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim128_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim128_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim160_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim160_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim160_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim160_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim160_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim160_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim160_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim160_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim192_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim192_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim192_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim192_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim192_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim192_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim192_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim192_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim256_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim256_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim256_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim256_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim256_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim256_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim256_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim256_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim32_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim32_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim32_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim32_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim32_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim32_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim32_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim32_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim64_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim64_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim64_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim64_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim64_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim64_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim64_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim64_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim96_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim96_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim96_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim96_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim96_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim96_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_hdim96_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_hdim96_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_kernel.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_launch_template.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_launch_template.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_preprocess_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_preprocess_kernel.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim128_vdim256_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim128_vdim256_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim128_vdim256_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim128_vdim256_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim128_vdim256_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim128_vdim256_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim128_vdim256_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim128_vdim256_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim128_vdim256_sm80.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim128_vdim256_sm80.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim192_vdim128_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim192_vdim128_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim192_vdim128_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim192_vdim128_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim192_vdim128_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim192_vdim128_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim192_vdim128_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim192_vdim128_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim192_vdim128_sm80.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim192_vdim128_sm80.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim32_vdim64_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim32_vdim64_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim32_vdim64_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim32_vdim64_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim32_vdim64_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim32_vdim64_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim32_vdim64_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim32_vdim64_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim32_vdim64_sm80.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim32_vdim64_sm80.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim64_vdim128_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim64_vdim128_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim64_vdim128_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim64_vdim128_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim64_vdim128_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim64_vdim128_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim64_vdim128_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim64_vdim128_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim64_vdim128_sm80.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim64_vdim128_sm80.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim96_vdim192_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim96_vdim192_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim96_vdim192_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim96_vdim192_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim96_vdim192_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim96_vdim192_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim96_vdim192_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim96_vdim192_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_bwd_qkdim96_vdim192_sm80.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_bwd_qkdim96_vdim192_sm80.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim128_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim128_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim128_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim128_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim128_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim128_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim128_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim128_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim160_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim160_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim160_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim160_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim160_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim160_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim160_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim160_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim192_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim192_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim192_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim192_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim192_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim192_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim192_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim192_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim256_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim256_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim256_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim256_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim256_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim256_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim256_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim256_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim32_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim32_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim32_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim32_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim32_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim32_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim32_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim32_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim64_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim64_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim64_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim64_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim64_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim64_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim64_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim64_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim96_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim96_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim96_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim96_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim96_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim96_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_hdim96_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_hdim96_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_kernel.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_launch_template.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_launch_template.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim128_vdim256_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim128_vdim256_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim128_vdim256_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim128_vdim256_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim128_vdim256_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim128_vdim256_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim128_vdim256_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim128_vdim256_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim128_vdim256_sm80.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim128_vdim256_sm80.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim192_vdim128_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim192_vdim128_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim192_vdim128_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim192_vdim128_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim192_vdim128_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim192_vdim128_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim192_vdim128_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim192_vdim128_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim192_vdim128_sm80.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim192_vdim128_sm80.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim32_vdim64_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim32_vdim64_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim32_vdim64_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim32_vdim64_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim32_vdim64_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim32_vdim64_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim32_vdim64_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim32_vdim64_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim32_vdim64_sm80.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim32_vdim64_sm80.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim64_vdim128_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim64_vdim128_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim64_vdim128_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim64_vdim128_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim64_vdim128_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim64_vdim128_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim64_vdim128_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim64_vdim128_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim64_vdim128_sm80.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim64_vdim128_sm80.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim96_vdim192_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim96_vdim192_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim96_vdim192_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim96_vdim192_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim96_vdim192_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim96_vdim192_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim96_vdim192_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim96_vdim192_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_qkdim96_vdim192_sm80.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_qkdim96_vdim192_sm80.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim128_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim128_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim128_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim128_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim128_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim128_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim128_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim128_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim160_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim160_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim160_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim160_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim160_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim160_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim160_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim160_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim192_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim192_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim192_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim192_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim192_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim192_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim192_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim192_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim256_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim256_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim256_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim256_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim256_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim256_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim256_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim256_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim32_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim32_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim32_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim32_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim32_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim32_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim32_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim32_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim64_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim64_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim64_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim64_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim64_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim64_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim64_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim64_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim96_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim96_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim96_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim96_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim96_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim96_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_hdim96_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_hdim96_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim128_vdim256_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim128_vdim256_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim128_vdim256_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim128_vdim256_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim128_vdim256_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim128_vdim256_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim128_vdim256_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim128_vdim256_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim192_vdim128_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim192_vdim128_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim192_vdim128_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim192_vdim128_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim192_vdim128_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim192_vdim128_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim192_vdim128_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim192_vdim128_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim32_vdim64_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim32_vdim64_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim32_vdim64_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim32_vdim64_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim32_vdim64_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim32_vdim64_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim32_vdim64_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim32_vdim64_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim64_vdim128_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim64_vdim128_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim64_vdim128_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim64_vdim128_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim64_vdim128_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim64_vdim128_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim64_vdim128_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim64_vdim128_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim96_vdim192_bf16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim96_vdim192_bf16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim96_vdim192_bf16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim96_vdim192_bf16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim96_vdim192_fp16_causal_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim96_vdim192_fp16_causal_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/flash_fwd_split_qkdim96_vdim192_fp16_sm80.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/flash_fwd_split_qkdim96_vdim192_fp16_sm80.cu -------------------------------------------------------------------------------- /csrc/flash_attn/src/generate_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/generate_kernels.py -------------------------------------------------------------------------------- /csrc/flash_attn/src/generate_switch_headdim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/generate_switch_headdim.py -------------------------------------------------------------------------------- /csrc/flash_attn/src/kernel_traits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/kernel_traits.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/mask.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/mask.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/philox.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/philox.cuh -------------------------------------------------------------------------------- /csrc/flash_attn/src/rotary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/rotary.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/softmax.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/static_switch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/static_switch.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/static_switch_headdim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/static_switch_headdim.h -------------------------------------------------------------------------------- /csrc/flash_attn/src/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn/src/utils.h -------------------------------------------------------------------------------- /csrc/flash_attn_ck/flash_api.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn_ck/flash_api.cpp -------------------------------------------------------------------------------- /csrc/flash_attn_ck/flash_common.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn_ck/flash_common.hpp -------------------------------------------------------------------------------- /csrc/flash_attn_ck/mha_bwd.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn_ck/mha_bwd.cpp -------------------------------------------------------------------------------- /csrc/flash_attn_ck/mha_fwd.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn_ck/mha_fwd.cpp -------------------------------------------------------------------------------- /csrc/flash_attn_ck/mha_varlen_bwd.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn_ck/mha_varlen_bwd.cpp -------------------------------------------------------------------------------- /csrc/flash_attn_ck/mha_varlen_fwd.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/flash_attn_ck/mha_varlen_fwd.cpp -------------------------------------------------------------------------------- /csrc/ft_attention/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/ft_attention/README.md -------------------------------------------------------------------------------- /csrc/ft_attention/cuda_bf16_fallbacks.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/ft_attention/cuda_bf16_fallbacks.cuh -------------------------------------------------------------------------------- /csrc/ft_attention/cuda_bf16_wrapper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/ft_attention/cuda_bf16_wrapper.h -------------------------------------------------------------------------------- /csrc/ft_attention/decoder_masked_multihead_attention.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/ft_attention/decoder_masked_multihead_attention.cu -------------------------------------------------------------------------------- /csrc/ft_attention/decoder_masked_multihead_attention.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/ft_attention/decoder_masked_multihead_attention.h -------------------------------------------------------------------------------- /csrc/ft_attention/decoder_masked_multihead_attention_template.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/ft_attention/decoder_masked_multihead_attention_template.hpp -------------------------------------------------------------------------------- /csrc/ft_attention/decoder_masked_multihead_attention_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/ft_attention/decoder_masked_multihead_attention_utils.h -------------------------------------------------------------------------------- /csrc/ft_attention/ft_attention.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/ft_attention/ft_attention.cpp -------------------------------------------------------------------------------- /csrc/ft_attention/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/ft_attention/setup.py -------------------------------------------------------------------------------- /csrc/fused_dense_lib/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/fused_dense_lib/README.md -------------------------------------------------------------------------------- /csrc/fused_dense_lib/fused_dense.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/fused_dense_lib/fused_dense.cpp -------------------------------------------------------------------------------- /csrc/fused_dense_lib/fused_dense_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/fused_dense_lib/fused_dense_cuda.cu -------------------------------------------------------------------------------- /csrc/fused_dense_lib/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/fused_dense_lib/setup.py -------------------------------------------------------------------------------- /csrc/fused_softmax/fused_softmax.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/fused_softmax/fused_softmax.cpp -------------------------------------------------------------------------------- /csrc/fused_softmax/scaled_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/fused_softmax/scaled_masked_softmax.h -------------------------------------------------------------------------------- /csrc/fused_softmax/scaled_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/fused_softmax/scaled_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /csrc/fused_softmax/scaled_upper_triang_masked_softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/fused_softmax/scaled_upper_triang_masked_softmax.h -------------------------------------------------------------------------------- /csrc/fused_softmax/scaled_upper_triang_masked_softmax_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/fused_softmax/scaled_upper_triang_masked_softmax_cuda.cu -------------------------------------------------------------------------------- /csrc/fused_softmax/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/fused_softmax/setup.py -------------------------------------------------------------------------------- /csrc/fused_softmax/type_shim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/fused_softmax/type_shim.h -------------------------------------------------------------------------------- /csrc/layer_norm/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/README.md -------------------------------------------------------------------------------- /csrc/layer_norm/ln.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln.h -------------------------------------------------------------------------------- /csrc/layer_norm/ln_api.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_api.cpp -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_1024.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_bwd_1024.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_1280.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_bwd_1280.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_1536.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_bwd_1536.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_2048.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_bwd_2048.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_256.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_bwd_256.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_2560.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_bwd_2560.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_3072.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_bwd_3072.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_4096.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_bwd_4096.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_512.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_bwd_512.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_5120.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_bwd_5120.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_6144.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_bwd_6144.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_7168.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_bwd_7168.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_768.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_bwd_768.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_8192.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_bwd_8192.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_bwd_kernels.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_bwd_kernels.cuh -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_1024.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_fwd_1024.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_1280.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_fwd_1280.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_1536.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_fwd_1536.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_2048.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_fwd_2048.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_256.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_fwd_256.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_2560.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_fwd_2560.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_3072.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_fwd_3072.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_4096.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_fwd_4096.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_512.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_fwd_512.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_5120.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_fwd_5120.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_6144.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_fwd_6144.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_7168.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_fwd_7168.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_768.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_fwd_768.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_8192.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_fwd_8192.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_fwd_kernels.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_fwd_kernels.cuh -------------------------------------------------------------------------------- /csrc/layer_norm/ln_kernel_traits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_kernel_traits.h -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_1024.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_bwd_1024.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_1280.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_bwd_1280.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_1536.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_bwd_1536.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_2048.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_bwd_2048.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_256.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_bwd_256.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_2560.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_bwd_2560.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_3072.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_bwd_3072.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_4096.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_bwd_4096.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_512.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_bwd_512.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_5120.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_bwd_5120.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_6144.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_bwd_6144.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_7168.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_bwd_7168.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_768.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_bwd_768.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_bwd_8192.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_bwd_8192.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_1024.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_fwd_1024.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_1280.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_fwd_1280.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_1536.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_fwd_1536.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_2048.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_fwd_2048.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_256.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_fwd_256.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_2560.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_fwd_2560.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_3072.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_fwd_3072.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_4096.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_fwd_4096.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_512.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_fwd_512.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_5120.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_fwd_5120.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_6144.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_fwd_6144.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_7168.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_fwd_7168.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_768.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_fwd_768.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_fwd_8192.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_fwd_8192.cu -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_residual_bwd_kernels.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_residual_bwd_kernels.cuh -------------------------------------------------------------------------------- /csrc/layer_norm/ln_parallel_residual_fwd_kernels.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_parallel_residual_fwd_kernels.cuh -------------------------------------------------------------------------------- /csrc/layer_norm/ln_utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/ln_utils.cuh -------------------------------------------------------------------------------- /csrc/layer_norm/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/setup.py -------------------------------------------------------------------------------- /csrc/layer_norm/static_switch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/layer_norm/static_switch.h -------------------------------------------------------------------------------- /csrc/rotary/rotary.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/rotary/rotary.cpp -------------------------------------------------------------------------------- /csrc/rotary/rotary_cuda.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/rotary/rotary_cuda.cu -------------------------------------------------------------------------------- /csrc/rotary/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/rotary/setup.py -------------------------------------------------------------------------------- /csrc/xentropy/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/xentropy/README.md -------------------------------------------------------------------------------- /csrc/xentropy/interface.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/xentropy/interface.cpp -------------------------------------------------------------------------------- /csrc/xentropy/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/xentropy/setup.py -------------------------------------------------------------------------------- /csrc/xentropy/xentropy_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/csrc/xentropy/xentropy_kernel.cu -------------------------------------------------------------------------------- /examples/inference/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/examples/inference/README.md -------------------------------------------------------------------------------- /flex_head_fa/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/__init__.py -------------------------------------------------------------------------------- /flex_head_fa/bert_padding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/bert_padding.py -------------------------------------------------------------------------------- /flex_head_fa/flash_attn_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/flash_attn_interface.py -------------------------------------------------------------------------------- /flex_head_fa/flash_attn_triton.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/flash_attn_triton.py -------------------------------------------------------------------------------- /flex_head_fa/flash_attn_triton_og.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/flash_attn_triton_og.py -------------------------------------------------------------------------------- /flex_head_fa/flash_blocksparse_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/flash_blocksparse_attention.py -------------------------------------------------------------------------------- /flex_head_fa/flash_blocksparse_attn_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/flash_blocksparse_attn_interface.py -------------------------------------------------------------------------------- /flex_head_fa/fused_softmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/fused_softmax.py -------------------------------------------------------------------------------- /flex_head_fa/layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flex_head_fa/layers/patch_embed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/layers/patch_embed.py -------------------------------------------------------------------------------- /flex_head_fa/layers/rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/layers/rotary.py -------------------------------------------------------------------------------- /flex_head_fa/losses/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flex_head_fa/losses/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/losses/cross_entropy.py -------------------------------------------------------------------------------- /flex_head_fa/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flex_head_fa/models/baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/models/baichuan.py -------------------------------------------------------------------------------- /flex_head_fa/models/bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/models/bert.py -------------------------------------------------------------------------------- /flex_head_fa/models/bigcode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/models/bigcode.py -------------------------------------------------------------------------------- /flex_head_fa/models/btlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/models/btlm.py -------------------------------------------------------------------------------- /flex_head_fa/models/falcon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/models/falcon.py -------------------------------------------------------------------------------- /flex_head_fa/models/gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/models/gpt.py -------------------------------------------------------------------------------- /flex_head_fa/models/gpt_neox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/models/gpt_neox.py -------------------------------------------------------------------------------- /flex_head_fa/models/gptj.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/models/gptj.py -------------------------------------------------------------------------------- /flex_head_fa/models/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/models/llama.py -------------------------------------------------------------------------------- /flex_head_fa/models/opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/models/opt.py -------------------------------------------------------------------------------- /flex_head_fa/models/vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/models/vit.py -------------------------------------------------------------------------------- /flex_head_fa/modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flex_head_fa/modules/block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/modules/block.py -------------------------------------------------------------------------------- /flex_head_fa/modules/embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/modules/embedding.py -------------------------------------------------------------------------------- /flex_head_fa/modules/mha.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/modules/mha.py -------------------------------------------------------------------------------- /flex_head_fa/modules/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/modules/mlp.py -------------------------------------------------------------------------------- /flex_head_fa/ops/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flex_head_fa/ops/activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/ops/activations.py -------------------------------------------------------------------------------- /flex_head_fa/ops/fused_dense.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/ops/fused_dense.py -------------------------------------------------------------------------------- /flex_head_fa/ops/layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/ops/layer_norm.py -------------------------------------------------------------------------------- /flex_head_fa/ops/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/ops/rms_norm.py -------------------------------------------------------------------------------- /flex_head_fa/ops/triton/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /flex_head_fa/ops/triton/cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/ops/triton/cross_entropy.py -------------------------------------------------------------------------------- /flex_head_fa/ops/triton/k_activations.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/ops/triton/k_activations.py -------------------------------------------------------------------------------- /flex_head_fa/ops/triton/layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/ops/triton/layer_norm.py -------------------------------------------------------------------------------- /flex_head_fa/ops/triton/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/ops/triton/linear.py -------------------------------------------------------------------------------- /flex_head_fa/ops/triton/mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/ops/triton/mlp.py -------------------------------------------------------------------------------- /flex_head_fa/ops/triton/rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/ops/triton/rotary.py -------------------------------------------------------------------------------- /flex_head_fa/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 100 3 | target-version = ['py38'] -------------------------------------------------------------------------------- /flex_head_fa/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /flex_head_fa/utils/benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/utils/benchmark.py -------------------------------------------------------------------------------- /flex_head_fa/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/utils/distributed.py -------------------------------------------------------------------------------- /flex_head_fa/utils/generation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/utils/generation.py -------------------------------------------------------------------------------- /flex_head_fa/utils/pretrained.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/flex_head_fa/utils/pretrained.py -------------------------------------------------------------------------------- /headdim.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/headdim.json -------------------------------------------------------------------------------- /hopper/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "3.0.0.b1" 2 | -------------------------------------------------------------------------------- /hopper/benchmark_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/benchmark_attn.py -------------------------------------------------------------------------------- /hopper/benchmark_flash_attention_fp8.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/benchmark_flash_attention_fp8.py -------------------------------------------------------------------------------- /hopper/block_info.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/block_info.h -------------------------------------------------------------------------------- /hopper/epilogue_bwd_sm90_tma.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/epilogue_bwd_sm90_tma.hpp -------------------------------------------------------------------------------- /hopper/epilogue_fwd_sm90_tma.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/epilogue_fwd_sm90_tma.hpp -------------------------------------------------------------------------------- /hopper/flash.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash.h -------------------------------------------------------------------------------- /hopper/flash_api.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_api.cpp -------------------------------------------------------------------------------- /hopper/flash_attn_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_attn_interface.py -------------------------------------------------------------------------------- /hopper/flash_bwd_hdim128_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_bwd_hdim128_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_bwd_hdim128_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_bwd_hdim128_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_bwd_hdim256_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_bwd_hdim256_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_bwd_hdim64_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_bwd_hdim64_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_bwd_hdim64_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_bwd_hdim64_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_bwd_hdim96_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_bwd_hdim96_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_bwd_hdim96_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_bwd_hdim96_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_bwd_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_bwd_kernel.h -------------------------------------------------------------------------------- /hopper/flash_bwd_launch_template.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_bwd_launch_template.h -------------------------------------------------------------------------------- /hopper/flash_bwd_postprocess_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_bwd_postprocess_kernel.h -------------------------------------------------------------------------------- /hopper/flash_bwd_preprocess_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_bwd_preprocess_kernel.h -------------------------------------------------------------------------------- /hopper/flash_bwd_qkdim128_vdim256_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_bwd_qkdim128_vdim256_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_bwd_qkdim128_vdim256_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_bwd_qkdim128_vdim256_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_bwd_qkdim32_vdim64_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_bwd_qkdim32_vdim64_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_bwd_qkdim32_vdim64_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_bwd_qkdim32_vdim64_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_bwd_qkdim64_vdim128_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_bwd_qkdim64_vdim128_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_bwd_qkdim64_vdim128_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_bwd_qkdim64_vdim128_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_fwd_hdim128_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_fwd_hdim128_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_fwd_hdim128_e4m3_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_fwd_hdim128_e4m3_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_fwd_hdim128_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_fwd_hdim128_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_fwd_hdim256_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_fwd_hdim256_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_fwd_hdim256_e4m3_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_fwd_hdim256_e4m3_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_fwd_hdim256_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_fwd_hdim256_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_fwd_hdim64_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_fwd_hdim64_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_fwd_hdim64_e4m3_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_fwd_hdim64_e4m3_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_fwd_hdim64_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_fwd_hdim64_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_fwd_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_fwd_kernel.h -------------------------------------------------------------------------------- /hopper/flash_fwd_launch_template.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_fwd_launch_template.h -------------------------------------------------------------------------------- /hopper/flash_fwd_qkdim128_vdim256_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_fwd_qkdim128_vdim256_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_fwd_qkdim128_vdim256_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_fwd_qkdim128_vdim256_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_fwd_qkdim32_vdim64_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_fwd_qkdim32_vdim64_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_fwd_qkdim32_vdim64_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_fwd_qkdim32_vdim64_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_fwd_qkdim64_vdim128_bf16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_fwd_qkdim64_vdim128_bf16_sm90.cu -------------------------------------------------------------------------------- /hopper/flash_fwd_qkdim64_vdim128_fp16_sm90.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/flash_fwd_qkdim64_vdim128_fp16_sm90.cu -------------------------------------------------------------------------------- /hopper/kernel_traits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/kernel_traits.h -------------------------------------------------------------------------------- /hopper/mainloop_bwd_sm90_tma_gmma_ws.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/mainloop_bwd_sm90_tma_gmma_ws.hpp -------------------------------------------------------------------------------- /hopper/mainloop_fwd_sm90_tma_gmma_ws.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/mainloop_fwd_sm90_tma_gmma_ws.hpp -------------------------------------------------------------------------------- /hopper/named_barrier.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/named_barrier.hpp -------------------------------------------------------------------------------- /hopper/seq_len.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/seq_len.h -------------------------------------------------------------------------------- /hopper/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/setup.py -------------------------------------------------------------------------------- /hopper/softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/softmax.h -------------------------------------------------------------------------------- /hopper/static_switch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/static_switch.h -------------------------------------------------------------------------------- /hopper/test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/test.py -------------------------------------------------------------------------------- /hopper/test_flash_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/test_flash_attn.py -------------------------------------------------------------------------------- /hopper/tile_scheduler.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/tile_scheduler.hpp -------------------------------------------------------------------------------- /hopper/tile_scheduler_bwd.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/tile_scheduler_bwd.hpp -------------------------------------------------------------------------------- /hopper/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/hopper/utils.h -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/setup.py -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/test.py -------------------------------------------------------------------------------- /tests/layers/test_rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/layers/test_rotary.py -------------------------------------------------------------------------------- /tests/losses/test_cross_entropy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/losses/test_cross_entropy.py -------------------------------------------------------------------------------- /tests/losses/test_cross_entropy_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/losses/test_cross_entropy_parallel.py -------------------------------------------------------------------------------- /tests/models/test_baichuan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/models/test_baichuan.py -------------------------------------------------------------------------------- /tests/models/test_bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/models/test_bert.py -------------------------------------------------------------------------------- /tests/models/test_bigcode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/models/test_bigcode.py -------------------------------------------------------------------------------- /tests/models/test_btlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/models/test_btlm.py -------------------------------------------------------------------------------- /tests/models/test_falcon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/models/test_falcon.py -------------------------------------------------------------------------------- /tests/models/test_gpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/models/test_gpt.py -------------------------------------------------------------------------------- /tests/models/test_gpt_generation_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/models/test_gpt_generation_parallel.py -------------------------------------------------------------------------------- /tests/models/test_gpt_neox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/models/test_gpt_neox.py -------------------------------------------------------------------------------- /tests/models/test_gpt_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/models/test_gpt_parallel.py -------------------------------------------------------------------------------- /tests/models/test_gptj.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/models/test_gptj.py -------------------------------------------------------------------------------- /tests/models/test_llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/models/test_llama.py -------------------------------------------------------------------------------- /tests/models/test_opt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/models/test_opt.py -------------------------------------------------------------------------------- /tests/models/test_vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/models/test_vit.py -------------------------------------------------------------------------------- /tests/modules/test_block_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/modules/test_block_parallel.py -------------------------------------------------------------------------------- /tests/modules/test_embedding_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/modules/test_embedding_parallel.py -------------------------------------------------------------------------------- /tests/modules/test_mha_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/modules/test_mha_parallel.py -------------------------------------------------------------------------------- /tests/modules/test_mlp_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/modules/test_mlp_parallel.py -------------------------------------------------------------------------------- /tests/ops/test_dropout_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/ops/test_dropout_layer_norm.py -------------------------------------------------------------------------------- /tests/ops/test_fused_dense.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/ops/test_fused_dense.py -------------------------------------------------------------------------------- /tests/ops/test_fused_dense_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/ops/test_fused_dense_parallel.py -------------------------------------------------------------------------------- /tests/ops/triton/test_layer_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/ops/triton/test_layer_norm.py -------------------------------------------------------------------------------- /tests/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 100 3 | target-version = ['py38'] -------------------------------------------------------------------------------- /tests/test_flash_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/test_flash_attn.py -------------------------------------------------------------------------------- /tests/test_flash_attn_ck.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/test_flash_attn_ck.py -------------------------------------------------------------------------------- /tests/test_flash_attn_head.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/test_flash_attn_head.py -------------------------------------------------------------------------------- /tests/test_flash_attn_headdim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/test_flash_attn_headdim.py -------------------------------------------------------------------------------- /tests/test_rotary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/test_rotary.py -------------------------------------------------------------------------------- /tests/test_util.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/tests/test_util.py -------------------------------------------------------------------------------- /training/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/Dockerfile -------------------------------------------------------------------------------- /training/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/README.md -------------------------------------------------------------------------------- /training/configs/callbacks/causality-monitor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/callbacks/causality-monitor.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/callbacks/default.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/ema.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/callbacks/ema.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/flop-count.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/callbacks/flop-count.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/gpu-monitor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/callbacks/gpu-monitor.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/model-summary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/callbacks/model-summary.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/none.yaml: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/configs/callbacks/norm-monitor.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/callbacks/norm-monitor.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/params-log.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/callbacks/params-log.yaml -------------------------------------------------------------------------------- /training/configs/callbacks/wandb.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/callbacks/wandb.yaml -------------------------------------------------------------------------------- /training/configs/config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/config.yaml -------------------------------------------------------------------------------- /training/configs/datamodule/openwebtext.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/datamodule/openwebtext.yaml -------------------------------------------------------------------------------- /training/configs/datamodule/thepile.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/datamodule/thepile.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/base.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/owt/base.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2l-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/owt/gpt2l-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2l-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/owt/gpt2l-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2l.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/owt/gpt2l.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2m-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/owt/gpt2m-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2m-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/owt/gpt2m-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2m.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/owt/gpt2m.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2s-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/owt/gpt2s-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2s-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/owt/gpt2s-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2s.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/owt/gpt2s.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2xl-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/owt/gpt2xl-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2xl-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/owt/gpt2xl-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/owt/gpt2xl.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/owt/gpt2xl.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/base.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/base.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-flash-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3-2.7B-flash-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-flash-hdim128-rotary-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3-2.7B-flash-hdim128-rotary-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-flash-hdim128-rotary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3-2.7B-flash-hdim128-rotary.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-flash-hdim128.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3-2.7B-flash-hdim128.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-flash-rotary-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3-2.7B-flash-rotary-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-flash-rotary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3-2.7B-flash-rotary.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3-2.7B-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-hf-hdim128.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3-2.7B-hf-hdim128.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3-2.7B-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3-2.7B-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3l-flash-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3l-flash-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3l-flash-rotary-30B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3l-flash-rotary-30B.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3l-flash-rotary-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3l-flash-rotary-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3l-flash-rotary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3l-flash-rotary.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3l-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3l-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3l-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3l-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3m-flash-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3m-flash-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3m-flash-rotary-30B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3m-flash-rotary-30B.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3m-flash-rotary-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3m-flash-rotary-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3m-flash-rotary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3m-flash-rotary.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3m-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3m-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3m-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3m-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3s-flash-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3s-flash-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3s-flash-rotary-30B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3s-flash-rotary-30B.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3s-flash-rotary-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3s-flash-rotary-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3s-flash-rotary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3s-flash-rotary.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3s-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3s-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3s-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3s-hf.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3xl-flash-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3xl-flash-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3xl-flash-rotary-60B.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3xl-flash-rotary-60B.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3xl-flash-rotary-8k.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3xl-flash-rotary-8k.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3xl-flash-rotary.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3xl-flash-rotary.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3xl-flash.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3xl-flash.yaml -------------------------------------------------------------------------------- /training/configs/experiment/pile/gpt3xl-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/experiment/pile/gpt3xl-hf.yaml -------------------------------------------------------------------------------- /training/configs/logger/comet.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/logger/comet.yaml -------------------------------------------------------------------------------- /training/configs/logger/csv.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/logger/csv.yaml -------------------------------------------------------------------------------- /training/configs/logger/many_loggers.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/logger/many_loggers.yaml -------------------------------------------------------------------------------- /training/configs/logger/mlflow.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/logger/mlflow.yaml -------------------------------------------------------------------------------- /training/configs/logger/neptune.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/logger/neptune.yaml -------------------------------------------------------------------------------- /training/configs/logger/tensorboard.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/logger/tensorboard.yaml -------------------------------------------------------------------------------- /training/configs/logger/wandb.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/logger/wandb.yaml -------------------------------------------------------------------------------- /training/configs/metrics/acc.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/metrics/acc.yaml -------------------------------------------------------------------------------- /training/configs/metrics/acc_ignore_index.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/metrics/acc_ignore_index.yaml -------------------------------------------------------------------------------- /training/configs/metrics/acctop5.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/metrics/acctop5.yaml -------------------------------------------------------------------------------- /training/configs/metrics/mse.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/metrics/mse.yaml -------------------------------------------------------------------------------- /training/configs/metrics/num-tokens.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/metrics/num-tokens.yaml -------------------------------------------------------------------------------- /training/configs/metrics/perplexity.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/metrics/perplexity.yaml -------------------------------------------------------------------------------- /training/configs/mode/debug.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/mode/debug.yaml -------------------------------------------------------------------------------- /training/configs/mode/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/mode/default.yaml -------------------------------------------------------------------------------- /training/configs/mode/exp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/mode/exp.yaml -------------------------------------------------------------------------------- /training/configs/mode/profile.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/mode/profile.yaml -------------------------------------------------------------------------------- /training/configs/mode/smoke.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/mode/smoke.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2-hf.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/model/gpt2-hf.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/model/gpt2.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2model/gpt2-large.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/model/gpt2model/gpt2-large.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2model/gpt2-medium.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/model/gpt2model/gpt2-medium.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2model/gpt2-small.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/model/gpt2model/gpt2-small.yaml -------------------------------------------------------------------------------- /training/configs/model/gpt2model/gpt2-xlarge.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/model/gpt2model/gpt2-xlarge.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/adam.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/optimizer/adam.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/adamw-apex-distributed.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/optimizer/adamw-apex-distributed.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/adamw-apex-zero.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/optimizer/adamw-apex-zero.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/adamw-apex.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/optimizer/adamw-apex.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/adamw-zero.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/optimizer/adamw-zero.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/adamw.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/optimizer/adamw.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/fusedlamb-ds.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/optimizer/fusedlamb-ds.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/fusedlamb.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/optimizer/fusedlamb.yaml -------------------------------------------------------------------------------- /training/configs/optimizer/sgd.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/optimizer/sgd.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/cosine-warmup-timm.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/scheduler/cosine-warmup-timm.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/cosine-warmup.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/scheduler/cosine-warmup.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/invsqrt.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/scheduler/invsqrt.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/linear-warmup.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/scheduler/linear-warmup.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/multi-step.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/scheduler/multi-step.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/plateau.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/scheduler/plateau.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/poly-warmup.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/scheduler/poly-warmup.yaml -------------------------------------------------------------------------------- /training/configs/scheduler/step.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/scheduler/step.yaml -------------------------------------------------------------------------------- /training/configs/task/sequence-model.yaml: -------------------------------------------------------------------------------- 1 | _target_: src.tasks.seq.SequenceModel 2 | -------------------------------------------------------------------------------- /training/configs/trainer/all_params.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/trainer/all_params.yaml -------------------------------------------------------------------------------- /training/configs/trainer/ddp.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/trainer/ddp.yaml -------------------------------------------------------------------------------- /training/configs/trainer/debug.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/trainer/debug.yaml -------------------------------------------------------------------------------- /training/configs/trainer/default.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/configs/trainer/default.yaml -------------------------------------------------------------------------------- /training/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/run.py -------------------------------------------------------------------------------- /training/src/callbacks/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training/src/callbacks/causality_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/callbacks/causality_monitor.py -------------------------------------------------------------------------------- /training/src/callbacks/ema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/callbacks/ema.py -------------------------------------------------------------------------------- /training/src/callbacks/flop_count.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/callbacks/flop_count.py -------------------------------------------------------------------------------- /training/src/callbacks/gpu_affinity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/callbacks/gpu_affinity.py -------------------------------------------------------------------------------- /training/src/callbacks/loss_scale_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/callbacks/loss_scale_monitor.py -------------------------------------------------------------------------------- /training/src/callbacks/model_checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/callbacks/model_checkpoint.py -------------------------------------------------------------------------------- /training/src/callbacks/norm_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/callbacks/norm_monitor.py -------------------------------------------------------------------------------- /training/src/callbacks/params_log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/callbacks/params_log.py -------------------------------------------------------------------------------- /training/src/callbacks/speed_monitor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/callbacks/speed_monitor.py -------------------------------------------------------------------------------- /training/src/callbacks/wandb_callbacks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/callbacks/wandb_callbacks.py -------------------------------------------------------------------------------- /training/src/datamodules/datasets/detokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/datamodules/datasets/detokenizer.py -------------------------------------------------------------------------------- /training/src/datamodules/datasets/lm_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/datamodules/datasets/lm_dataset.py -------------------------------------------------------------------------------- /training/src/datamodules/fault_tolerant_sampler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/datamodules/fault_tolerant_sampler.py -------------------------------------------------------------------------------- /training/src/datamodules/imagenet.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/datamodules/imagenet.py -------------------------------------------------------------------------------- /training/src/datamodules/language_modeling_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/datamodules/language_modeling_hf.py -------------------------------------------------------------------------------- /training/src/datamodules/timm_mixup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/datamodules/timm_mixup.py -------------------------------------------------------------------------------- /training/src/distributed/ddp_comm_hooks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/distributed/ddp_comm_hooks.py -------------------------------------------------------------------------------- /training/src/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/eval.py -------------------------------------------------------------------------------- /training/src/metrics/accuracy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/metrics/accuracy.py -------------------------------------------------------------------------------- /training/src/metrics/num_tokens.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/metrics/num_tokens.py -------------------------------------------------------------------------------- /training/src/metrics/perplexity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/metrics/perplexity.py -------------------------------------------------------------------------------- /training/src/models/modules/seq_common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/models/modules/seq_common.py -------------------------------------------------------------------------------- /training/src/optim/param_grouping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/optim/param_grouping.py -------------------------------------------------------------------------------- /training/src/optim/timm_lr_scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/optim/timm_lr_scheduler.py -------------------------------------------------------------------------------- /training/src/tasks/seq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/tasks/seq.py -------------------------------------------------------------------------------- /training/src/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/train.py -------------------------------------------------------------------------------- /training/src/utils/checkpoint.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/utils/checkpoint.py -------------------------------------------------------------------------------- /training/src/utils/ddp_zero1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/utils/ddp_zero1.py -------------------------------------------------------------------------------- /training/src/utils/ddp_zero2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/utils/ddp_zero2.py -------------------------------------------------------------------------------- /training/src/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/utils/distributed.py -------------------------------------------------------------------------------- /training/src/utils/ema.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/utils/ema.py -------------------------------------------------------------------------------- /training/src/utils/flops.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/utils/flops.py -------------------------------------------------------------------------------- /training/src/utils/gpu_affinity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/utils/gpu_affinity.py -------------------------------------------------------------------------------- /training/src/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/src/utils/utils.py -------------------------------------------------------------------------------- /training/tests/datamodules/test_language_modeling_hf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/training/tests/datamodules/test_language_modeling_hf.py -------------------------------------------------------------------------------- /usage.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xiayuqing0622/flex_head_fa/HEAD/usage.md --------------------------------------------------------------------------------