├── .gitignore ├── CODE_OF_CONDUCT.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── doc ├── Asymmetric-RMS-Norms.svg ├── CHECKPOINT.md └── DeepSeekR1-tutel-accel.png ├── figure.svg ├── setup.py ├── tests ├── test_baseline.json └── test_tutel.py └── tutel ├── __init__.py ├── checkpoint ├── __init__.py ├── gather.py └── scatter.py ├── custom ├── __init__.py ├── antares_ops.h ├── backend.hpp ├── custom_kernel.cpp └── extensions │ ├── __init__.py │ ├── mla_decode.h │ ├── mla_stage1_a16w16_bf16.h │ ├── mla_stage2_a16w16_bf16_kvsplit16.h │ └── mla_stage2_a16w16_bf16_kvsplit32.h ├── examples ├── README.md ├── __init__.py ├── bandwidth_test.py ├── figure.svg ├── helloworld.py ├── helloworld_amp.py ├── helloworld_custom_expert_sharded.py ├── helloworld_custom_gate_expert.py ├── helloworld_ddp.py ├── helloworld_ddp_tutel.py ├── helloworld_from_scratch.py ├── helloworld_switch.py ├── modded-nanogpt-moe │ ├── LICENSE │ ├── README.md │ ├── data │ │ └── cached_fineweb10B.py │ ├── run.sh │ └── train_gpt_v0.py ├── moe_cifar10.py ├── moe_mnist.py ├── nccl_all_gather_v.py ├── nccl_all_to_all_v.py ├── nccl_allreduce_perf.py └── oai_request.py ├── experts ├── __init__.py ├── ffn.py └── llama_ffn.py ├── gates ├── __init__.py ├── cosine_top.py └── top.py ├── impls ├── __init__.py ├── communicate.py ├── fast_dispatch.py ├── jit_compiler.py ├── losses.py ├── moe_layer.py └── overlap.py ├── jit.py ├── jit_kernels ├── __init__.py ├── gating.py └── sparse.py ├── launcher ├── __init__.py ├── execl.py └── run.py ├── moe.py ├── net.py ├── ops ├── __init__.py ├── cuda │ ├── deepseek_r1_sigmoid_top_k_routed_scaled_f32.mod │ ├── kimi_k2_sigmoid_top_k_routed_scaled_f32.mod │ ├── qwen3_moe_top_k_routed_scaled_f32.mod │ ├── scatter_sample_ids_i32.mod │ ├── scatter_top_ids_i32.mod │ ├── to_bfloat16_3d.mod │ └── token_sort_i32.mod └── rocm │ ├── deepseek_r1_sigmoid_top_k_routed_scaled_f32.mod │ ├── fmoe_blkvect_phase_1.mod │ ├── fmoe_blkvect_phase_2.mod │ ├── fmoe_blockscal_stage_1.mod │ ├── fmoe_blockscal_stage_2.mod │ ├── fmoe_blockscal_stage_3.mod │ ├── fmoe_blockscal_stage_4.mod │ ├── fmoe_blockscal_vector_1.mod │ ├── fmoe_blockscal_vector_2.mod │ ├── fmoe_f16xf4_phase_1_top_k.mod │ ├── fmoe_f16xf4_phase_2.mod │ ├── fmoe_w8a16_stage_1.mod │ ├── fmoe_w8a16_stage_2.mod │ ├── fmoe_w8a16_stage_3.mod │ ├── fmoe_w8a16_vector_1.mod │ ├── fmoe_w8a16_vector_2.mod │ ├── fused_silu_mul_bf16.mod │ ├── gemm_down_weight_sum_bf16xf8_s_16x16_fnuz.mod │ ├── gemm_down_weight_sum_bf16xf8_s_16x16_fnuz_bs4_v2.mod │ ├── gemm_down_weight_sum_bf16xf8_s_16x16_fnuz_v2.mod │ ├── gemm_gate_up_silu_bf16xf8_s_16x16_fnuz.mod │ ├── gemm_gate_up_silu_bf16xf8_s_16x16_fnuz_bs4_v2.mod │ ├── gemm_gate_up_silu_bf16xf8_s_16x16_fnuz_v2.mod │ ├── gemv_nt_bf16xfp8_block.mod │ ├── gemv_nt_bf16xfp8_block_v2.mod │ ├── gemv_nt_bf16xfp8_row.mod │ ├── kimi_k2_sigmoid_top_k_routed_scaled_f32.mod │ ├── qwen3_moe_top_k_routed_scaled_f32.mod │ ├── qwen3_norm_rotary_kvcache_bf16.mod │ ├── rmsnorm2_bf16.mod │ ├── rmsnorm_bf16.mod │ ├── rope_gmv_bf16.mod │ ├── rope_kt_bf16.mod │ ├── rope_mla_bf16.mod │ ├── rope_q_out_bf16.mod │ ├── rope_qt_bf16_put.mod │ ├── scatter_sample_ids_i32.mod │ ├── scatter_top_ids_i32.mod │ ├── to_bfloat16_3d.mod │ ├── to_float32_3d.mod │ ├── to_float8_block.mod │ ├── to_float8_per_token.mod │ ├── token_sort_i32.mod │ └── wvc_logits_bf16.mod ├── parted ├── __init__.py ├── backend │ ├── __init__.py │ └── torch │ │ ├── __init__.py │ │ ├── config.py │ │ └── executor.py ├── patterns.py ├── solver.py └── spmdx.py └── system.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/.gitignore -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/CODE_OF_CONDUCT.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/README.md -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/SECURITY.md -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/SUPPORT.md -------------------------------------------------------------------------------- /doc/Asymmetric-RMS-Norms.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/doc/Asymmetric-RMS-Norms.svg -------------------------------------------------------------------------------- /doc/CHECKPOINT.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/doc/CHECKPOINT.md -------------------------------------------------------------------------------- /doc/DeepSeekR1-tutel-accel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/doc/DeepSeekR1-tutel-accel.png -------------------------------------------------------------------------------- /figure.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/figure.svg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/setup.py -------------------------------------------------------------------------------- /tests/test_baseline.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tests/test_baseline.json -------------------------------------------------------------------------------- /tests/test_tutel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tests/test_tutel.py -------------------------------------------------------------------------------- /tutel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/__init__.py -------------------------------------------------------------------------------- /tutel/checkpoint/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/checkpoint/__init__.py -------------------------------------------------------------------------------- /tutel/checkpoint/gather.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/checkpoint/gather.py -------------------------------------------------------------------------------- /tutel/checkpoint/scatter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/checkpoint/scatter.py -------------------------------------------------------------------------------- /tutel/custom/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/custom/__init__.py -------------------------------------------------------------------------------- /tutel/custom/antares_ops.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/custom/antares_ops.h -------------------------------------------------------------------------------- /tutel/custom/backend.hpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/custom/backend.hpp -------------------------------------------------------------------------------- /tutel/custom/custom_kernel.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/custom/custom_kernel.cpp -------------------------------------------------------------------------------- /tutel/custom/extensions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/custom/extensions/__init__.py -------------------------------------------------------------------------------- /tutel/custom/extensions/mla_decode.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/custom/extensions/mla_decode.h -------------------------------------------------------------------------------- /tutel/custom/extensions/mla_stage1_a16w16_bf16.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/custom/extensions/mla_stage1_a16w16_bf16.h -------------------------------------------------------------------------------- /tutel/custom/extensions/mla_stage2_a16w16_bf16_kvsplit16.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/custom/extensions/mla_stage2_a16w16_bf16_kvsplit16.h -------------------------------------------------------------------------------- /tutel/custom/extensions/mla_stage2_a16w16_bf16_kvsplit32.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/custom/extensions/mla_stage2_a16w16_bf16_kvsplit32.h -------------------------------------------------------------------------------- /tutel/examples/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/README.md -------------------------------------------------------------------------------- /tutel/examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/__init__.py -------------------------------------------------------------------------------- /tutel/examples/bandwidth_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/bandwidth_test.py -------------------------------------------------------------------------------- /tutel/examples/figure.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/figure.svg -------------------------------------------------------------------------------- /tutel/examples/helloworld.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/helloworld.py -------------------------------------------------------------------------------- /tutel/examples/helloworld_amp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/helloworld_amp.py -------------------------------------------------------------------------------- /tutel/examples/helloworld_custom_expert_sharded.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/helloworld_custom_expert_sharded.py -------------------------------------------------------------------------------- /tutel/examples/helloworld_custom_gate_expert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/helloworld_custom_gate_expert.py -------------------------------------------------------------------------------- /tutel/examples/helloworld_ddp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/helloworld_ddp.py -------------------------------------------------------------------------------- /tutel/examples/helloworld_ddp_tutel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/helloworld_ddp_tutel.py -------------------------------------------------------------------------------- /tutel/examples/helloworld_from_scratch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/helloworld_from_scratch.py -------------------------------------------------------------------------------- /tutel/examples/helloworld_switch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/helloworld_switch.py -------------------------------------------------------------------------------- /tutel/examples/modded-nanogpt-moe/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/modded-nanogpt-moe/LICENSE -------------------------------------------------------------------------------- /tutel/examples/modded-nanogpt-moe/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/modded-nanogpt-moe/README.md -------------------------------------------------------------------------------- /tutel/examples/modded-nanogpt-moe/data/cached_fineweb10B.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/modded-nanogpt-moe/data/cached_fineweb10B.py -------------------------------------------------------------------------------- /tutel/examples/modded-nanogpt-moe/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/modded-nanogpt-moe/run.sh -------------------------------------------------------------------------------- /tutel/examples/modded-nanogpt-moe/train_gpt_v0.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/modded-nanogpt-moe/train_gpt_v0.py -------------------------------------------------------------------------------- /tutel/examples/moe_cifar10.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/moe_cifar10.py -------------------------------------------------------------------------------- /tutel/examples/moe_mnist.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/moe_mnist.py -------------------------------------------------------------------------------- /tutel/examples/nccl_all_gather_v.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/nccl_all_gather_v.py -------------------------------------------------------------------------------- /tutel/examples/nccl_all_to_all_v.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/nccl_all_to_all_v.py -------------------------------------------------------------------------------- /tutel/examples/nccl_allreduce_perf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/nccl_allreduce_perf.py -------------------------------------------------------------------------------- /tutel/examples/oai_request.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/examples/oai_request.py -------------------------------------------------------------------------------- /tutel/experts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/experts/__init__.py -------------------------------------------------------------------------------- /tutel/experts/ffn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/experts/ffn.py -------------------------------------------------------------------------------- /tutel/experts/llama_ffn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/experts/llama_ffn.py -------------------------------------------------------------------------------- /tutel/gates/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/gates/__init__.py -------------------------------------------------------------------------------- /tutel/gates/cosine_top.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/gates/cosine_top.py -------------------------------------------------------------------------------- /tutel/gates/top.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/gates/top.py -------------------------------------------------------------------------------- /tutel/impls/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/impls/__init__.py -------------------------------------------------------------------------------- /tutel/impls/communicate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/impls/communicate.py -------------------------------------------------------------------------------- /tutel/impls/fast_dispatch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/impls/fast_dispatch.py -------------------------------------------------------------------------------- /tutel/impls/jit_compiler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/impls/jit_compiler.py -------------------------------------------------------------------------------- /tutel/impls/losses.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/impls/losses.py -------------------------------------------------------------------------------- /tutel/impls/moe_layer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/impls/moe_layer.py -------------------------------------------------------------------------------- /tutel/impls/overlap.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/impls/overlap.py -------------------------------------------------------------------------------- /tutel/jit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/jit.py -------------------------------------------------------------------------------- /tutel/jit_kernels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/jit_kernels/__init__.py -------------------------------------------------------------------------------- /tutel/jit_kernels/gating.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/jit_kernels/gating.py -------------------------------------------------------------------------------- /tutel/jit_kernels/sparse.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/jit_kernels/sparse.py -------------------------------------------------------------------------------- /tutel/launcher/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/launcher/__init__.py -------------------------------------------------------------------------------- /tutel/launcher/execl.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/launcher/execl.py -------------------------------------------------------------------------------- /tutel/launcher/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/launcher/run.py -------------------------------------------------------------------------------- /tutel/moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/moe.py -------------------------------------------------------------------------------- /tutel/net.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/net.py -------------------------------------------------------------------------------- /tutel/ops/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/__init__.py -------------------------------------------------------------------------------- /tutel/ops/cuda/deepseek_r1_sigmoid_top_k_routed_scaled_f32.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/cuda/deepseek_r1_sigmoid_top_k_routed_scaled_f32.mod -------------------------------------------------------------------------------- /tutel/ops/cuda/kimi_k2_sigmoid_top_k_routed_scaled_f32.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/cuda/kimi_k2_sigmoid_top_k_routed_scaled_f32.mod -------------------------------------------------------------------------------- /tutel/ops/cuda/qwen3_moe_top_k_routed_scaled_f32.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/cuda/qwen3_moe_top_k_routed_scaled_f32.mod -------------------------------------------------------------------------------- /tutel/ops/cuda/scatter_sample_ids_i32.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/cuda/scatter_sample_ids_i32.mod -------------------------------------------------------------------------------- /tutel/ops/cuda/scatter_top_ids_i32.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/cuda/scatter_top_ids_i32.mod -------------------------------------------------------------------------------- /tutel/ops/cuda/to_bfloat16_3d.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/cuda/to_bfloat16_3d.mod -------------------------------------------------------------------------------- /tutel/ops/cuda/token_sort_i32.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/cuda/token_sort_i32.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/deepseek_r1_sigmoid_top_k_routed_scaled_f32.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/deepseek_r1_sigmoid_top_k_routed_scaled_f32.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/fmoe_blkvect_phase_1.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/fmoe_blkvect_phase_1.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/fmoe_blkvect_phase_2.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/fmoe_blkvect_phase_2.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/fmoe_blockscal_stage_1.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/fmoe_blockscal_stage_1.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/fmoe_blockscal_stage_2.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/fmoe_blockscal_stage_2.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/fmoe_blockscal_stage_3.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/fmoe_blockscal_stage_3.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/fmoe_blockscal_stage_4.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/fmoe_blockscal_stage_4.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/fmoe_blockscal_vector_1.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/fmoe_blockscal_vector_1.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/fmoe_blockscal_vector_2.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/fmoe_blockscal_vector_2.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/fmoe_f16xf4_phase_1_top_k.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/fmoe_f16xf4_phase_1_top_k.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/fmoe_f16xf4_phase_2.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/fmoe_f16xf4_phase_2.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/fmoe_w8a16_stage_1.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/fmoe_w8a16_stage_1.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/fmoe_w8a16_stage_2.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/fmoe_w8a16_stage_2.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/fmoe_w8a16_stage_3.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/fmoe_w8a16_stage_3.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/fmoe_w8a16_vector_1.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/fmoe_w8a16_vector_1.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/fmoe_w8a16_vector_2.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/fmoe_w8a16_vector_2.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/fused_silu_mul_bf16.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/fused_silu_mul_bf16.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/gemm_down_weight_sum_bf16xf8_s_16x16_fnuz.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/gemm_down_weight_sum_bf16xf8_s_16x16_fnuz.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/gemm_down_weight_sum_bf16xf8_s_16x16_fnuz_bs4_v2.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/gemm_down_weight_sum_bf16xf8_s_16x16_fnuz_bs4_v2.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/gemm_down_weight_sum_bf16xf8_s_16x16_fnuz_v2.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/gemm_down_weight_sum_bf16xf8_s_16x16_fnuz_v2.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/gemm_gate_up_silu_bf16xf8_s_16x16_fnuz.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/gemm_gate_up_silu_bf16xf8_s_16x16_fnuz.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/gemm_gate_up_silu_bf16xf8_s_16x16_fnuz_bs4_v2.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/gemm_gate_up_silu_bf16xf8_s_16x16_fnuz_bs4_v2.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/gemm_gate_up_silu_bf16xf8_s_16x16_fnuz_v2.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/gemm_gate_up_silu_bf16xf8_s_16x16_fnuz_v2.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/gemv_nt_bf16xfp8_block.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/gemv_nt_bf16xfp8_block.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/gemv_nt_bf16xfp8_block_v2.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/gemv_nt_bf16xfp8_block_v2.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/gemv_nt_bf16xfp8_row.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/gemv_nt_bf16xfp8_row.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/kimi_k2_sigmoid_top_k_routed_scaled_f32.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/kimi_k2_sigmoid_top_k_routed_scaled_f32.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/qwen3_moe_top_k_routed_scaled_f32.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/qwen3_moe_top_k_routed_scaled_f32.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/qwen3_norm_rotary_kvcache_bf16.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/qwen3_norm_rotary_kvcache_bf16.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/rmsnorm2_bf16.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/rmsnorm2_bf16.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/rmsnorm_bf16.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/rmsnorm_bf16.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/rope_gmv_bf16.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/rope_gmv_bf16.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/rope_kt_bf16.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/rope_kt_bf16.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/rope_mla_bf16.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/rope_mla_bf16.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/rope_q_out_bf16.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/rope_q_out_bf16.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/rope_qt_bf16_put.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/rope_qt_bf16_put.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/scatter_sample_ids_i32.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/scatter_sample_ids_i32.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/scatter_top_ids_i32.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/scatter_top_ids_i32.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/to_bfloat16_3d.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/to_bfloat16_3d.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/to_float32_3d.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/to_float32_3d.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/to_float8_block.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/to_float8_block.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/to_float8_per_token.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/to_float8_per_token.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/token_sort_i32.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/token_sort_i32.mod -------------------------------------------------------------------------------- /tutel/ops/rocm/wvc_logits_bf16.mod: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/ops/rocm/wvc_logits_bf16.mod -------------------------------------------------------------------------------- /tutel/parted/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/parted/__init__.py -------------------------------------------------------------------------------- /tutel/parted/backend/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/parted/backend/__init__.py -------------------------------------------------------------------------------- /tutel/parted/backend/torch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/parted/backend/torch/__init__.py -------------------------------------------------------------------------------- /tutel/parted/backend/torch/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/parted/backend/torch/config.py -------------------------------------------------------------------------------- /tutel/parted/backend/torch/executor.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/parted/backend/torch/executor.py -------------------------------------------------------------------------------- /tutel/parted/patterns.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/parted/patterns.py -------------------------------------------------------------------------------- /tutel/parted/solver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/parted/solver.py -------------------------------------------------------------------------------- /tutel/parted/spmdx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/parted/spmdx.py -------------------------------------------------------------------------------- /tutel/system.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Tutel/HEAD/tutel/system.py --------------------------------------------------------------------------------