├── .github └── workflows │ ├── codeql.yml │ └── dependabot.yml ├── .gitignore ├── .gitmodules ├── CODE_OF_CONDUCT.md ├── LICENSE ├── README.md ├── SECURITY.md ├── attention_engine ├── attn_engine │ ├── __init__.py │ ├── attn_engine.py │ └── linear_attn_engine.py ├── autotuner │ ├── arch │ │ ├── A100.py │ │ ├── H100.py │ │ ├── RTX4090.py │ │ ├── __init__.py │ │ └── arch_base.py │ ├── attnfwd_tunner.py │ ├── attnfwd_tunner_engine2.py │ ├── decider.py │ └── sigmoid_tunner.py ├── benchmark │ └── bench_utils.py ├── core │ ├── __init__.py │ ├── codegen │ │ ├── common.py │ │ └── tl_gen.py │ ├── lower │ │ ├── lower.py │ │ ├── lower_cute.py │ │ ├── lower_decode.py │ │ ├── lower_decode_gqa.py │ │ ├── lower_decode_mla.py │ │ ├── lower_gqa.py │ │ └── lower_linear.py │ ├── template │ │ ├── attn_template.py │ │ ├── blockattn_template.py │ │ ├── cute_template.py │ │ ├── cute_template │ │ │ ├── epilogue_bwd_sm90_tma.hpp │ │ │ ├── epilogue_fwd_sm90_tma.hpp │ │ │ ├── flash.h │ │ │ ├── flash_api.cpp │ │ │ ├── flash_attn_interface.py │ │ │ ├── flash_bwd.cu │ │ │ ├── flash_bwd_kernel.h │ │ │ ├── flash_bwd_launch_template.h │ │ │ ├── flash_bwd_postprocess_kernel.h │ │ │ ├── flash_bwd_preprocess_kernel.h │ │ │ ├── flash_fwd.cu │ │ │ ├── flash_fwd_kernel.h │ │ │ ├── flash_fwd_launch_template.h │ │ │ ├── kernel_traits.h │ │ │ ├── mainloop_bwd_sm90_tma_gmma_ws.hpp │ │ │ ├── mainloop_fwd_sm90_tma_gmma_ws.hpp │ │ │ ├── named_barrier.hpp │ │ │ ├── online_func.h │ │ │ ├── seq_len.h │ │ │ ├── softmax.h │ │ │ ├── static_switch.h │ │ │ ├── tile_scheduler.hpp │ │ │ ├── tile_scheduler_bwd.hpp │ │ │ └── utils.h │ │ ├── cute_template_kvshared │ │ │ ├── flash_api.cpp │ │ │ ├── flash_mla_interface.py │ │ │ └── kernels │ │ │ │ ├── config.h │ │ │ │ ├── get_mla_metadata.cu │ │ │ │ ├── get_mla_metadata.h │ │ │ │ ├── mla_combine.cu │ │ │ │ ├── mla_combine.h │ │ │ │ ├── params.h │ │ │ │ ├── splitkv_mla.cu │ │ │ │ ├── splitkv_mla.h │ │ │ │ ├── traits.h │ │ │ │ └── utils.h │ │ ├── linear_attn_template.py │ │ └── tl_template │ │ │ ├── attn │ │ │ ├── attn_decode_tl.py │ │ │ ├── attn_gqa_decode_tl.py │ │ │ ├── attn_gqa_tl.py │ │ │ ├── attn_tl.py │ │ │ ├── blockattn_tl.py │ │ │ └── mla_decode_tl.py │ │ │ └── linear │ │ │ └── linear_tl.py │ ├── transform │ │ ├── core.py │ │ └── graph.py │ └── utils.py └── tests │ ├── test_blockmask.py │ ├── test_sympy.py │ └── test_torchtrace.py ├── attn_script ├── Readme.md ├── blocksparseattn.py ├── gqa.py ├── gqa_inference.py ├── implementation.md ├── mamba2.py ├── mamba2_ngroup1.py ├── mha.py ├── mha_cute.py ├── mha_inference.py ├── mla_decode.py ├── mla_decode_cute.py ├── reluattn.py ├── reluattn_cute.py ├── retention.py ├── retention_linear.py ├── sigmoidattn.py ├── sigmoidattn_cute.py ├── simple_gla.py └── sparseattn.py ├── docs ├── API.md └── getting_started_example.md └── responsible_AI_FAQ.md /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/.github/workflows/codeql.yml -------------------------------------------------------------------------------- /.github/workflows/dependabot.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/.github/workflows/dependabot.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/.gitmodules -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/README.md -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/SECURITY.md -------------------------------------------------------------------------------- /attention_engine/attn_engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/attn_engine/__init__.py -------------------------------------------------------------------------------- /attention_engine/attn_engine/attn_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/attn_engine/attn_engine.py -------------------------------------------------------------------------------- /attention_engine/attn_engine/linear_attn_engine.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/attn_engine/linear_attn_engine.py -------------------------------------------------------------------------------- /attention_engine/autotuner/arch/A100.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/autotuner/arch/A100.py -------------------------------------------------------------------------------- /attention_engine/autotuner/arch/H100.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/autotuner/arch/H100.py -------------------------------------------------------------------------------- /attention_engine/autotuner/arch/RTX4090.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/autotuner/arch/RTX4090.py -------------------------------------------------------------------------------- /attention_engine/autotuner/arch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/autotuner/arch/__init__.py -------------------------------------------------------------------------------- /attention_engine/autotuner/arch/arch_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/autotuner/arch/arch_base.py -------------------------------------------------------------------------------- /attention_engine/autotuner/attnfwd_tunner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/autotuner/attnfwd_tunner.py -------------------------------------------------------------------------------- /attention_engine/autotuner/attnfwd_tunner_engine2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/autotuner/attnfwd_tunner_engine2.py -------------------------------------------------------------------------------- /attention_engine/autotuner/decider.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/autotuner/decider.py -------------------------------------------------------------------------------- /attention_engine/autotuner/sigmoid_tunner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/autotuner/sigmoid_tunner.py -------------------------------------------------------------------------------- /attention_engine/benchmark/bench_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/benchmark/bench_utils.py -------------------------------------------------------------------------------- /attention_engine/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/__init__.py -------------------------------------------------------------------------------- /attention_engine/core/codegen/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/codegen/common.py -------------------------------------------------------------------------------- /attention_engine/core/codegen/tl_gen.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/codegen/tl_gen.py -------------------------------------------------------------------------------- /attention_engine/core/lower/lower.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/lower/lower.py -------------------------------------------------------------------------------- /attention_engine/core/lower/lower_cute.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/lower/lower_cute.py -------------------------------------------------------------------------------- /attention_engine/core/lower/lower_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/lower/lower_decode.py -------------------------------------------------------------------------------- /attention_engine/core/lower/lower_decode_gqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/lower/lower_decode_gqa.py -------------------------------------------------------------------------------- /attention_engine/core/lower/lower_decode_mla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/lower/lower_decode_mla.py -------------------------------------------------------------------------------- /attention_engine/core/lower/lower_gqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/lower/lower_gqa.py -------------------------------------------------------------------------------- /attention_engine/core/lower/lower_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/lower/lower_linear.py -------------------------------------------------------------------------------- /attention_engine/core/template/attn_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/attn_template.py -------------------------------------------------------------------------------- /attention_engine/core/template/blockattn_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/blockattn_template.py -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template.py -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/epilogue_bwd_sm90_tma.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/epilogue_bwd_sm90_tma.hpp -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/epilogue_fwd_sm90_tma.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/epilogue_fwd_sm90_tma.hpp -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/flash.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/flash.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/flash_api.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/flash_api.cpp -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/flash_attn_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/flash_attn_interface.py -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/flash_bwd.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/flash_bwd.cu -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/flash_bwd_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/flash_bwd_kernel.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/flash_bwd_launch_template.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/flash_bwd_launch_template.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/flash_bwd_postprocess_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/flash_bwd_postprocess_kernel.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/flash_bwd_preprocess_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/flash_bwd_preprocess_kernel.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/flash_fwd.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/flash_fwd.cu -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/flash_fwd_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/flash_fwd_kernel.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/flash_fwd_launch_template.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/flash_fwd_launch_template.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/kernel_traits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/kernel_traits.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/mainloop_bwd_sm90_tma_gmma_ws.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/mainloop_bwd_sm90_tma_gmma_ws.hpp -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/mainloop_fwd_sm90_tma_gmma_ws.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/mainloop_fwd_sm90_tma_gmma_ws.hpp -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/named_barrier.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/named_barrier.hpp -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/online_func.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/online_func.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/seq_len.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/seq_len.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/softmax.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/softmax.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/static_switch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/static_switch.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/tile_scheduler.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/tile_scheduler.hpp -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/tile_scheduler_bwd.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/tile_scheduler_bwd.hpp -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template/utils.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template_kvshared/flash_api.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template_kvshared/flash_api.cpp -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template_kvshared/flash_mla_interface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template_kvshared/flash_mla_interface.py -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template_kvshared/kernels/config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template_kvshared/kernels/config.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template_kvshared/kernels/get_mla_metadata.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template_kvshared/kernels/get_mla_metadata.cu -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template_kvshared/kernels/get_mla_metadata.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template_kvshared/kernels/get_mla_metadata.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template_kvshared/kernels/mla_combine.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template_kvshared/kernels/mla_combine.cu -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template_kvshared/kernels/mla_combine.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template_kvshared/kernels/mla_combine.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template_kvshared/kernels/params.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template_kvshared/kernels/params.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template_kvshared/kernels/splitkv_mla.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template_kvshared/kernels/splitkv_mla.cu -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template_kvshared/kernels/splitkv_mla.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template_kvshared/kernels/splitkv_mla.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template_kvshared/kernels/traits.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template_kvshared/kernels/traits.h -------------------------------------------------------------------------------- /attention_engine/core/template/cute_template_kvshared/kernels/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/cute_template_kvshared/kernels/utils.h -------------------------------------------------------------------------------- /attention_engine/core/template/linear_attn_template.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/linear_attn_template.py -------------------------------------------------------------------------------- /attention_engine/core/template/tl_template/attn/attn_decode_tl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/tl_template/attn/attn_decode_tl.py -------------------------------------------------------------------------------- /attention_engine/core/template/tl_template/attn/attn_gqa_decode_tl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/tl_template/attn/attn_gqa_decode_tl.py -------------------------------------------------------------------------------- /attention_engine/core/template/tl_template/attn/attn_gqa_tl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/tl_template/attn/attn_gqa_tl.py -------------------------------------------------------------------------------- /attention_engine/core/template/tl_template/attn/attn_tl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/tl_template/attn/attn_tl.py -------------------------------------------------------------------------------- /attention_engine/core/template/tl_template/attn/blockattn_tl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/tl_template/attn/blockattn_tl.py -------------------------------------------------------------------------------- /attention_engine/core/template/tl_template/attn/mla_decode_tl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/tl_template/attn/mla_decode_tl.py -------------------------------------------------------------------------------- /attention_engine/core/template/tl_template/linear/linear_tl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/template/tl_template/linear/linear_tl.py -------------------------------------------------------------------------------- /attention_engine/core/transform/core.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/transform/core.py -------------------------------------------------------------------------------- /attention_engine/core/transform/graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/transform/graph.py -------------------------------------------------------------------------------- /attention_engine/core/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/core/utils.py -------------------------------------------------------------------------------- /attention_engine/tests/test_blockmask.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/tests/test_blockmask.py -------------------------------------------------------------------------------- /attention_engine/tests/test_sympy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/tests/test_sympy.py -------------------------------------------------------------------------------- /attention_engine/tests/test_torchtrace.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attention_engine/tests/test_torchtrace.py -------------------------------------------------------------------------------- /attn_script/Readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/Readme.md -------------------------------------------------------------------------------- /attn_script/blocksparseattn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/blocksparseattn.py -------------------------------------------------------------------------------- /attn_script/gqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/gqa.py -------------------------------------------------------------------------------- /attn_script/gqa_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/gqa_inference.py -------------------------------------------------------------------------------- /attn_script/implementation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/implementation.md -------------------------------------------------------------------------------- /attn_script/mamba2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/mamba2.py -------------------------------------------------------------------------------- /attn_script/mamba2_ngroup1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/mamba2_ngroup1.py -------------------------------------------------------------------------------- /attn_script/mha.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/mha.py -------------------------------------------------------------------------------- /attn_script/mha_cute.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/mha_cute.py -------------------------------------------------------------------------------- /attn_script/mha_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/mha_inference.py -------------------------------------------------------------------------------- /attn_script/mla_decode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/mla_decode.py -------------------------------------------------------------------------------- /attn_script/mla_decode_cute.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/mla_decode_cute.py -------------------------------------------------------------------------------- /attn_script/reluattn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/reluattn.py -------------------------------------------------------------------------------- /attn_script/reluattn_cute.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/reluattn_cute.py -------------------------------------------------------------------------------- /attn_script/retention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/retention.py -------------------------------------------------------------------------------- /attn_script/retention_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/retention_linear.py -------------------------------------------------------------------------------- /attn_script/sigmoidattn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/sigmoidattn.py -------------------------------------------------------------------------------- /attn_script/sigmoidattn_cute.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/sigmoidattn_cute.py -------------------------------------------------------------------------------- /attn_script/simple_gla.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/simple_gla.py -------------------------------------------------------------------------------- /attn_script/sparseattn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/attn_script/sparseattn.py -------------------------------------------------------------------------------- /docs/API.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/docs/API.md -------------------------------------------------------------------------------- /docs/getting_started_example.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/docs/getting_started_example.md -------------------------------------------------------------------------------- /responsible_AI_FAQ.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/AttentionEngine/HEAD/responsible_AI_FAQ.md --------------------------------------------------------------------------------