├── .clang-format ├── .github ├── ISSUE_TEMPLATE │ ├── 1_Report_Bug.yml │ ├── 2_Request_Feature.yml │ ├── 3_Installation.yml │ ├── 4_Call_For_Comment.yml │ └── 5_General.yml ├── PULL_REQUEST_TEMPLATE.md ├── runs-on.yml └── workflows │ ├── build-test.yml │ ├── code-format.yml │ ├── gpu-tests.yml │ ├── helpers │ ├── build_yirage_from_source.sh │ ├── free_space_on_runner.sh │ ├── install_dependencies.sh │ └── set_env.sh │ ├── pypi-deploy.yml │ └── shell-check.yml ├── .gitignore ├── .gitmodules ├── .readthedocs.yaml ├── .tools └── clang-format-15-master-1d7ec53d ├── CMakeLists.txt ├── COPYRIGHT_HEADERS.txt ├── LICENSE ├── MANIFEST.in ├── NOTICE ├── README.md ├── README.pypi.md ├── benchmark ├── baselines │ └── pytorch │ │ ├── gated_mlp.py │ │ ├── gqa.py │ │ ├── lora.py │ │ ├── norm_transformer.py │ │ ├── qknorm_gqa.py │ │ └── rms_norm.py ├── c500_performance.py ├── end-to-end │ ├── ascend │ │ ├── README.md │ │ └── llama_ascend.py │ ├── chameleon.py │ ├── llama.py │ ├── lora.py │ ├── maca │ │ ├── README.md │ │ ├── __init__.py │ │ ├── chameleon_maca.py │ │ ├── llama_maca.py │ │ ├── lora_maca.py │ │ ├── ngpt_maca.py │ │ └── run_all.py │ └── ngpt.py ├── gated_mlp.py ├── group_query_attention.py ├── lora.py ├── maca_c500_benchmark.py ├── maca_full_comparison.py ├── maca_native_benchmark.py ├── maca_pytorch_comparison.py ├── maca_vs_pytorch.py ├── moe.py ├── multi_head_attention.py ├── multi_query_attention.py ├── norm_transformer.py ├── pytorch_baseline.py ├── qknorm_gqa.py ├── quick_comparison.py ├── rmsnorm.py └── saved_mugraphs │ ├── cuda │ ├── gated_mlp.json │ ├── gqa_bs1.json │ ├── gqa_bs8.json │ ├── lora.json │ ├── mha_bs1.json │ ├── mha_bs8.json │ ├── mqa_bs1.json │ ├── mqa_bs8.json │ ├── ntrans_bs1.json │ ├── ntrans_bs8.json │ ├── qknorm_gqa_bs1.json │ ├── qknorm_gqa_bs8.json │ ├── rmsnorm_bs1.json │ ├── rmsnorm_bs8.json │ ├── single_gated_mlp.json │ ├── single_lora.json │ └── single_mqa_bs1.json │ └── mps │ └── yirage_cached_mugraphs_7310eff92c865e30.json ├── cmake └── cuda.cmake ├── conda └── yirage.yml ├── config.ascend.cmake ├── config.cmake ├── config.maca.cmake ├── cpp_examples ├── chameleon.cc ├── common.h ├── dnn.cc ├── egg_tests.cc ├── gated_mlp.cc ├── group_query_attn_inc_decode.cc ├── group_query_attn_prefill.cc ├── group_query_attn_spec_decode.cc ├── lora.cc ├── mlp.cc ├── moe.cc ├── multi_head_attn_inc_decode.cc ├── multi_head_attn_prefill.cc ├── multi_head_attn_spec_decode.cc ├── multi_query_attn_inc_decode.cc ├── multi_query_attn_prefill.cc ├── multi_query_attn_spec_decode.cc ├── profile.cc └── rms.cc ├── demo ├── backend_selection_demo.py ├── checkpoint_lora.json ├── demo_blackwell │ ├── matmul-256x4096x4096.py │ ├── matmul-512x1024x256.py │ └── rmsnorm.py ├── demo_chameleon_attn.py ├── demo_gated_mlp.py ├── demo_group_query_attention.py ├── demo_hopper │ ├── linear.py │ ├── main.py │ ├── mirage_hopper_matmul.py │ └── utils.py ├── demo_jit.py ├── demo_llama3-8b.py ├── demo_lora.py ├── demo_maca_optimization.py ├── demo_rms_norm.py ├── llama3 │ ├── demo.py │ └── models │ │ ├── configuration_llama3.py │ │ ├── modeling_llama3.py │ │ └── rope.py ├── maca_superopt_test.py ├── nki_rms_norm.py ├── profiler │ ├── profile_rmsnorm.py │ └── rmsnorm.png ├── pytorch │ ├── chameleon-7b.py │ ├── llama3-8b.py │ ├── lora.py │ └── ngpt.py ├── qwen2.5 │ ├── demo.py │ ├── mirage_cached_mugraphs_64644b2e7bbffb94.json │ ├── mirage_cached_mugraphs_647dd41aeaf656b4.json │ ├── mirage_cached_mugraphs_6508b4ab6af4f866.json │ ├── mirage_cached_mugraphs_659704256b064506.json │ ├── mirage_cached_mugraphs_659774a49fe1cbf2.json │ ├── mirage_cached_mugraphs_659774eb06d5ca86.json │ └── models │ │ ├── configuration_qwen2.py │ │ ├── modeling_qwen2.py │ │ └── rope.py ├── qwen3 │ ├── demo.py │ ├── demo_30B_A3B.py │ ├── demo_30B_A3B_hopper.py │ ├── demo_chat.py │ ├── demo_hopper.py │ ├── demo_modal.py │ ├── demo_modal_ssh.py │ └── models │ │ ├── configuration_qwen3.py │ │ ├── convert.py │ │ ├── modeling_qwen3.py │ │ └── rope.py ├── reference_mugraphs │ ├── chameleon-7b.py │ ├── chameleon.py │ ├── gated_mlp.py │ ├── group_query_attention.py │ ├── group_query_attention_customized.py │ ├── group_query_attention_online.py │ ├── llama3-8b.py │ ├── lora.py │ ├── qwen_mlp.py │ ├── qwen_mlp_part2.py │ ├── qwen_prenorm.py │ └── rms_norm.py ├── runtime │ └── runtime.py └── triton_rms_norm.py ├── docker-build ├── Dockerfile └── build_wheel.sh ├── docker ├── Dockerfile ├── install_mirage.sh └── run_docker.sh ├── docs ├── Makefile ├── PROFILING_GUIDE.md ├── architecture.drawio ├── ascend_implementation_guide.md ├── ascend_integration_architecture.md ├── ascend_quick_start.md ├── backend_config_comparison.md ├── doxygen │ ├── Doxyfile │ ├── README.md │ └── theme │ │ ├── rust_customdoxygen.css │ │ ├── rust_footer.html │ │ └── rust_header.html ├── maca_quick_start.md ├── make.bat ├── requirements.txt ├── source │ ├── conf.py │ ├── cuda-transpiler.rst │ ├── images │ │ ├── gated_mlp.png │ │ ├── gpu_hierarchy.png │ │ ├── mma-non-divisible-example.drawio.svg │ │ ├── mma-thr-layout-example.drawio.svg │ │ ├── mugraph_gqa.png │ │ ├── swizzle-shift-example.drawio.svg │ │ ├── swizzle-xor-example.drawio.svg │ │ ├── tb-fusion-chain.drawio.svg │ │ ├── tb-sched-conflict-example.drawio.svg │ │ └── tensor-lifecycle.drawio.svg │ ├── index.rst │ ├── installation.rst │ ├── linear_kernel.rst │ ├── mugraph.rst │ ├── triton-transpiler.rst │ ├── tutorials │ │ ├── attention-with-kv-norm.rst │ │ ├── gated-mlp.rst │ │ ├── group-query-attention.rst │ │ ├── images │ │ │ ├── lora_kernel_graph.png │ │ │ ├── lora_performance.png │ │ │ ├── lora_ugraph.png │ │ │ ├── rms_norm_linear_original.png │ │ │ ├── rms_norm_linear_performance.png │ │ │ └── rms_norm_linear_ugraph.png │ │ ├── index.rst │ │ ├── lora.rst │ │ ├── multi-latent-attention.rst │ │ └── rms-norm-linear.rst │ ├── visualizer.rst │ └── welcome.rst ├── transpiler │ ├── mma-non-divisible-example.drawio.svg │ ├── mma-thr-layout-example.drawio.svg │ ├── nki_transpiler.md │ ├── swizzle-shift-example.drawio.svg │ ├── swizzle-xor-example.drawio.svg │ ├── tb-fusion-chain.drawio.svg │ ├── tb-sched-conflict-example.drawio.svg │ ├── tensor-lifecycle.drawio.svg │ ├── transpiler.md │ └── triton_transpiler.md └── ypk │ ├── backend_usage.md │ ├── multi_backend_design.md │ └── tma.md ├── img ├── architecture.drawio.svg ├── group_query_attnetion_spec_decode.png └── llama-3-8b-rms-norm-linear.png ├── include └── yirage │ ├── backend │ ├── ascend_backend.h │ ├── backend_interface.h │ ├── backend_registry.h │ ├── backends.h │ ├── cpu_backend.h │ ├── cuda_backend.h │ ├── cudnn_backend.h │ ├── maca_backend.h │ ├── mkl_backend.h │ ├── mps_backend.h │ ├── nki_backend.h │ └── triton_backend.h │ ├── config.h │ ├── cpu │ └── cmem_tensor.h │ ├── kernel │ ├── all_reduce.h │ ├── ascend │ │ ├── ascend_kernel.h │ │ ├── ascend_kernel_config.h │ │ └── ascend_kernels.h │ ├── chunk.h │ ├── common │ │ └── kernel_interface.h │ ├── cpu │ │ └── cpu_kernel_config.h │ ├── cuda │ │ └── cuda_kernel_config.h │ ├── cudnn │ │ └── cudnn_kernel_config.h │ ├── customized.h │ ├── device_memory_manager.h │ ├── device_tensor.h │ ├── element_binary.h │ ├── element_unary.h │ ├── graph.h │ ├── maca │ │ ├── maca_kernel.h │ │ ├── maca_kernel_config.h │ │ ├── maca_kernels.h │ │ └── maca_warp_utils.h │ ├── matmul.h │ ├── mkl │ │ └── mkl_kernel_config.h │ ├── mps │ │ └── mps_kernel_config.h │ ├── nki │ │ └── nki_kernel_config.h │ ├── operator.h │ ├── reduction.h │ ├── rms_norm.h │ ├── runtime.h │ ├── task_register.h │ └── triton │ │ └── triton_kernel_config.h │ ├── layout.h │ ├── nki_transpiler │ ├── helper_function.h │ ├── transpile.h │ └── utils.h │ ├── persistent_kernel │ ├── mpk_atoms.cuh │ ├── persistent_kernel.cuh │ ├── profiler.h │ ├── runtime_header.h │ ├── tasks │ │ ├── ampere │ │ │ ├── argmax.cuh │ │ │ ├── element_binary.cuh │ │ │ ├── element_unary.cuh │ │ │ ├── embedding.cuh │ │ │ ├── identity.cuh │ │ │ ├── linear.cuh │ │ │ ├── linear_cutlass.cuh │ │ │ ├── mma.cuh │ │ │ ├── multitoken_paged_attention.cuh │ │ │ ├── multitoken_paged_attention_32_64.cuh │ │ │ ├── multitoken_paged_attention_4_16.cuh │ │ │ ├── norm.cuh │ │ │ ├── norm_linear.cuh │ │ │ ├── norm_linear_new.cuh │ │ │ ├── reduction.cuh │ │ │ ├── rmsnorm.cuh │ │ │ ├── rotary_embedding.cuh │ │ │ ├── silu_mul.cuh │ │ │ ├── silu_mul_linear.cuh │ │ │ ├── single_batch_decoding.cuh │ │ │ ├── single_batch_extend.cuh │ │ │ ├── single_batch_gqa.cuh │ │ │ ├── smem_layout.cuh │ │ │ └── task_header.cuh │ │ ├── blackwell │ │ │ ├── argmax_sm100.cuh │ │ │ ├── attention_sm100.cuh │ │ │ ├── linear_sm100_mpk.cuh │ │ │ ├── moe_linear_sm100.cuh │ │ │ ├── mul_sum_add_sm100.cuh │ │ │ ├── norm_sm100.cuh │ │ │ ├── rotary_embedding_sm100.cuh │ │ │ ├── storage.cuh │ │ │ ├── task_header.cuh │ │ │ ├── tensor_init.cuh │ │ │ └── topk_softmax_sm100.cuh │ │ ├── common │ │ │ ├── bfloat16.h │ │ │ ├── common_header.cuh │ │ │ ├── copy_sm80.cuh │ │ │ ├── dmem_layout.cuh │ │ │ ├── utils.cuh │ │ │ └── worker_config.h │ │ ├── cute │ │ │ └── hopper │ │ │ │ ├── epilogue.cuh │ │ │ │ ├── gemm_ws.cuh │ │ │ │ ├── gemm_ws_cooperative.cuh │ │ │ │ ├── gemm_ws_mpk.cuh │ │ │ │ ├── kernel_traits.cuh │ │ │ │ └── mma_tma_ws_mainloop.cuh │ │ ├── deprecated │ │ │ └── paged_attention.cuh │ │ ├── hopper │ │ │ ├── barrier.cuh │ │ │ ├── embedding_hopper.cuh │ │ │ ├── linear_hopper.cuh │ │ │ ├── linear_swapAB_hopper.cuh │ │ │ ├── matmul_demo_hopper.cuh │ │ │ ├── moe_linear_swapAB_hopper.cuh │ │ │ ├── multitoken_paged_attention_hopper.cuh │ │ │ ├── norm_hopper.cuh │ │ │ ├── norm_linear_hopper.cuh │ │ │ ├── rmsnorm_hopper.cuh │ │ │ ├── rotary_embedding_hopper.cuh │ │ │ ├── silu_mul_hopper.cuh │ │ │ ├── smem_layout_tma.cuh │ │ │ ├── task_header.cuh │ │ │ ├── tma.cuh │ │ │ ├── tma_2d.cuh │ │ │ ├── tma_3d.cuh │ │ │ ├── tma_4d.cuh │ │ │ ├── utils.cuh │ │ │ └── wgmma.cuh │ │ └── speculative_decoding │ │ │ ├── prompt_lookup.cuh │ │ │ └── target_verify.cuh │ └── tma.cuh │ ├── search │ ├── abstract_expr │ │ ├── abstract_expr.h │ │ ├── abstract_expr_eval.h │ │ └── abstract_expr_for_ops.h │ ├── backend_strategies │ │ ├── ascend_strategy.h │ │ ├── cpu_strategy.h │ │ ├── cuda_strategy.h │ │ ├── maca_strategy.h │ │ ├── mps_strategy.h │ │ ├── nki_strategy.h │ │ └── triton_strategy.h │ ├── common │ │ └── search_strategy.h │ ├── config.h │ ├── dim_strategy.h │ ├── mps_profiler.h │ ├── op_utils.h │ ├── order.h │ ├── range_propagation │ │ ├── irange.h │ │ ├── propagation_path.h │ │ ├── range.h │ │ ├── range_set.h │ │ └── tbrange.h │ ├── search.h │ ├── search_c.h │ ├── search_context.h │ ├── search_state_manager.h │ ├── symbolic_graph │ │ ├── dim_var_assignments.h │ │ ├── op_args.h │ │ ├── symbolic_graph.h │ │ ├── symbolic_map.h │ │ ├── symbolic_op.h │ │ ├── symbolic_tensor.h │ │ ├── symbolic_tensor_dim.h │ │ ├── tensor_dim_constraint.h │ │ ├── tensor_dim_constraints.h │ │ ├── tensor_dim_expr.h │ │ └── types.h │ └── verification │ │ ├── formal_verifier.h │ │ ├── output_match.h │ │ ├── probabilistic_verifier.h │ │ └── verifier.h │ ├── threadblock │ ├── ascend │ │ ├── all_reduce.h │ │ ├── concat.h │ │ ├── element_binary.h │ │ ├── element_unary.h │ │ ├── forloop_accum.h │ │ ├── input_loader.h │ │ ├── matmul.h │ │ ├── output_saver.h │ │ ├── reduction.h │ │ └── rms_norm.h │ ├── concat.h │ ├── cuda │ │ ├── concat.h │ │ ├── element_binary.h │ │ ├── element_unary.h │ │ ├── forloop_accum.h │ │ ├── input_loader.h │ │ ├── matmul.h │ │ ├── output_saver.h │ │ ├── reduction.h │ │ └── rms_norm.h │ ├── element_binary.h │ ├── element_unary.h │ ├── forloop_accum.h │ ├── graph.h │ ├── maca │ │ ├── concat.h │ │ ├── element_binary.h │ │ ├── element_unary.h │ │ ├── forloop_accum.h │ │ ├── input_loader.h │ │ ├── matmul.h │ │ ├── output_saver.h │ │ ├── reduction.h │ │ └── rms_norm.h │ ├── matmul.h │ ├── operator.h │ ├── reduction.h │ ├── rms_norm.h │ ├── serializer │ │ ├── concat_serializer.h │ │ ├── element_binary_serializer.h │ │ ├── element_unary_serializer.h │ │ ├── forloop_accum_serializer.h │ │ ├── input_loader_serializer.h │ │ ├── kernel_params.h │ │ ├── matmul_serializer.h │ │ ├── output_saver_serializer.h │ │ ├── reduction_serializer.h │ │ └── rms_norm_serializer.h │ └── smem_tensor.h │ ├── transpiler │ ├── common.h │ ├── error_types.h │ ├── runtime │ │ ├── README.md │ │ ├── config.h │ │ ├── kernel │ │ │ ├── element_binary.h │ │ │ ├── element_unary.h │ │ │ ├── matmul.h │ │ │ └── reduction.h │ │ ├── nvshmem.h │ │ ├── runtime.h │ │ ├── threadblock │ │ │ ├── blackwell_matmul.h │ │ │ ├── blackwell_pipeline.h │ │ │ ├── element_binary.h │ │ │ ├── element_unary.h │ │ │ ├── epilogues.h │ │ │ ├── forloop_accum.h │ │ │ ├── hopper_matmul.h │ │ │ ├── input.h │ │ │ ├── matmul.h │ │ │ ├── output.h │ │ │ ├── pipeline.h │ │ │ ├── profiler.h │ │ │ ├── reduction.h │ │ │ ├── threadblock.h │ │ │ └── utils.h │ │ ├── triton_kernels.py │ │ └── utils.h │ ├── sched_tb_graph.h │ ├── structs.h │ ├── transpile.h │ ├── transpiler.h │ └── utils.h │ ├── triton_transpiler │ ├── runtime │ │ └── triton_kernels.py │ └── transpile.h │ ├── type.h │ ├── utils │ ├── ascend_helper.h │ ├── containers.h │ ├── cuda_helper.h │ ├── fingerprint_functions.h │ ├── hash_utils.h │ ├── json_utils.h │ ├── maca_helper.h │ ├── math_utils.h │ ├── static_switch.h │ └── z3_utils.h │ ├── vector_types.h │ └── warp │ └── cuda │ └── matmul.h ├── pyproject.toml ├── python ├── cython_setup.py └── yirage │ ├── __init__.py │ ├── _cython │ ├── CCore.pxd │ └── core.pyx │ ├── ascend_config.py │ ├── backend_api.py │ ├── global_config.py │ ├── graph_dataset.py │ ├── kernel.py │ ├── maca_config.py │ ├── mps_config.py │ ├── persistent_kernel.py │ ├── profiler.py │ ├── profiler_persistent.py │ ├── speculative.py │ ├── threadblock.py │ ├── triton_profiler.py │ ├── utils.py │ ├── version.py │ └── visualizer.py ├── requirements.txt ├── scripts ├── add_backend_support.py ├── analyze_multibackend.sh ├── convert_benchmark_to_multibackend.py ├── display_task_graph.py ├── draw_graph.py ├── format.sh ├── partition_graph.py ├── rename_to_yirage.sh ├── setup_maca.sh ├── update_copyright_yirage.sh ├── validate_multi_backend.sh └── verify_maca.py ├── setup.py ├── src ├── backend │ ├── ascend_backend.cc │ ├── backend_init.cc │ ├── backend_registry.cc │ ├── backend_utils.cc │ ├── backends.cc │ ├── cpu_backend.cc │ ├── cuda_backend.cc │ ├── cudnn_backend.cc │ ├── maca_backend.cc │ ├── mkl_backend.cc │ ├── mps_backend.cc │ ├── nki_backend.cc │ └── triton_backend.cc ├── base │ ├── data_type.cc │ └── layout.cc ├── kernel │ ├── all_reduce.cc │ ├── ascend │ │ ├── all_reduce_kernel.ascend │ │ ├── ascend_fingerprint_kernels.cc │ │ ├── ascend_kernel_generator.cc │ │ ├── ascend_optimizer.cc │ │ ├── customized_kernel.ascend │ │ ├── device_memory_manager.ascend │ │ ├── device_tensor_kernel.ascend │ │ ├── element_binary_kernel.ascend │ │ ├── element_unary_kernel.ascend │ │ ├── input_kernel.ascend │ │ ├── matmul_kernel.ascend │ │ ├── output_kernel.ascend │ │ ├── reduction_kernel.ascend │ │ └── rms_norm_kernel.ascend │ ├── chunk.cc │ ├── common │ │ └── kernel_factory.cc │ ├── cpu │ │ └── cpu_optimizer.cc │ ├── cuda │ │ ├── all_reduce_kernel.cu │ │ ├── cuda_optimizer.cc │ │ ├── customized_kernel.cu │ │ ├── device_tensor_kernel.cu │ │ ├── element_binary_kernel.cu │ │ ├── element_unary_kernel.cu │ │ ├── input_kernel.cu │ │ ├── matmul_kernel.cu │ │ ├── output_kernel.cu │ │ ├── reduction_kernel.cu │ │ └── rms_norm_kernel.cu │ ├── cudnn │ │ └── cudnn_optimizer.cc │ ├── customized.cc │ ├── device_memory_manager.cc │ ├── device_memory_manager.cu │ ├── device_tensor.cc │ ├── element_binary.cc │ ├── element_unary.cc │ ├── graph.cc │ ├── input.cc │ ├── maca │ │ ├── all_reduce_kernel.maca │ │ ├── customized_kernel.maca │ │ ├── device_memory_manager.maca │ │ ├── device_tensor_kernel.maca │ │ ├── element_binary_kernel.maca │ │ ├── element_unary_kernel.maca │ │ ├── input_kernel.maca │ │ ├── maca_optimizer.cc │ │ ├── matmul_kernel.maca │ │ ├── output_kernel.maca │ │ ├── reduction_kernel.maca │ │ └── rms_norm_kernel.maca │ ├── matmul.cc │ ├── mkl │ │ └── mkl_optimizer.cc │ ├── mps │ │ └── mps_optimizer.cc │ ├── nki │ │ └── nki_optimizer.cc │ ├── operator.cc │ ├── output.cc │ ├── reduction.cc │ ├── rms_norm.cc │ ├── runtime.cc │ ├── task_register.cc │ ├── triton │ │ └── triton_optimizer.cc │ └── triton_code_gen.cc ├── nki_transpiler │ ├── helper_function.cc │ ├── transpile.cc │ ├── transpile_tb.cc │ └── utils.cc ├── search │ ├── abstract_expr │ │ ├── abstract_expr.cc │ │ ├── abstract_expr_eval.cc │ │ ├── abstract_expr_for_ops.cc │ │ └── abstract_subexpr │ │ │ ├── Cargo.toml │ │ │ └── src │ │ │ └── lib.rs │ ├── backend_strategies │ │ ├── ascend_strategy.cc │ │ ├── cpu_strategy.cc │ │ ├── cuda_strategy.cc │ │ ├── maca_strategy.cc │ │ ├── mps_strategy.cc │ │ ├── nki_strategy.cc │ │ └── triton_strategy.cc │ ├── common │ │ └── search_strategy_factory.cc │ ├── config.cc │ ├── dim_strategy.cc │ ├── mps_profiler.cc │ ├── op_utils.cc │ ├── order.cc │ ├── range_propagation │ │ ├── irange.cc │ │ ├── range.cc │ │ └── tbrange.cc │ ├── search.cc │ ├── search_c.cc │ ├── search_context.cc │ ├── symbolic_graph │ │ ├── dim_var_assignments.cc │ │ ├── op_args.cc │ │ ├── symbolic_graph.cc │ │ ├── symbolic_map.cc │ │ ├── symbolic_op.cc │ │ ├── symbolic_tensor.cc │ │ ├── symbolic_tensor_dim.cc │ │ ├── tensor_dim_constraint.cc │ │ ├── tensor_dim_constraints.cc │ │ └── tensor_dim_expr.cc │ └── verification │ │ ├── formal_verifier.cc │ │ ├── formal_verifier_equiv │ │ ├── Cargo.toml │ │ └── src │ │ │ └── lib.rs │ │ ├── output_match.cc │ │ └── probabilistic_verifier.cc ├── threadblock │ ├── concat.cc │ ├── cuda │ │ ├── element_unary.cu │ │ ├── input_executor.cu │ │ └── matmul.cu │ ├── element_binary.cc │ ├── element_unary.cc │ ├── forloop_accum.cc │ ├── graph.cc │ ├── input_loader.cc │ ├── matmul.cc │ ├── operator.cc │ ├── output.cc │ ├── reduction.cc │ ├── rms_norm.cc │ └── smem_tensor.cc ├── transpiler │ ├── ascend_transpiler_stub.cc │ ├── plan_dtensor_memory.cc │ ├── plan_stensor_memory.cc │ ├── plan_tb_swizzle.cc │ ├── plan_tb_swizzle_blackwell.cc │ ├── plan_tb_swizzle_hopper.cc │ ├── resolve_dtensor_meta.cc │ ├── resolve_tb_fusion.cc │ ├── resolve_tensor_layout.cc │ ├── sched_tb_graph.cc │ ├── transpile.cc │ ├── transpiler_kn.cc │ ├── transpiler_stub.cc │ ├── transpiler_tb.cc │ ├── transpiler_tb_blackwell.cc │ └── transpiler_tb_hopper.cc ├── triton_transpiler │ ├── transpile.cc │ └── transpile_tb.cc └── utils │ ├── containers.cc │ ├── cuda_helper.cu │ ├── json_utils.cc │ └── z3_utils.cc └── tests ├── CMakeLists.txt ├── ascend └── test_triton_integration.py ├── backend └── test_backend_registry.cc ├── ci-tests ├── qwen2.5 │ ├── demo.py │ ├── mirage_cached_mugraphs_64644b2e7bbffb94.json │ ├── mirage_cached_mugraphs_647dd41aeaf656b4.json │ ├── mirage_cached_mugraphs_6508b4ab6af4f866.json │ ├── mirage_cached_mugraphs_659704256b064506.json │ ├── mirage_cached_mugraphs_659774a49fe1cbf2.json │ ├── mirage_cached_mugraphs_659774eb06d5ca86.json │ ├── mirage_search_checkpoint.json │ └── models │ │ ├── configuration_qwen2.py │ │ └── modeling_qwen2.py └── run_python_tests.sh ├── python └── test_tensor_program.py ├── runtime_python ├── blackwell │ ├── sm100_linear │ │ ├── runtime_kernel_wrapper_sm100.cu │ │ ├── setup.py │ │ ├── test_matmul_mpk.py │ │ └── test_matmul_splitk.py │ └── sm100_moe │ │ ├── runtime_kernel_wrapper_sm100.cu │ │ ├── setup.py │ │ ├── test_gate_topk.py │ │ ├── test_silu_mul.py │ │ ├── test_w13_linear.py │ │ ├── test_w2_linear.py │ │ └── test_weighted_sum.py ├── cute │ └── hopper │ │ ├── gemm.cu │ │ ├── setup.py │ │ ├── test_matmul_ws_hopper.py │ │ └── test_matmul_ws_mpk.py ├── hopper-moe │ ├── runtime_kernel_wrapper_moe_hopper.cu │ ├── setup.py │ ├── test_moe_w13_linear_hopper.py │ └── test_moe_w2_linear_hopper.py ├── hopper │ ├── runtime_kernel_wrapper_hopper.cu │ ├── setup.py │ ├── test_linear_hopper.py │ ├── test_multitoken_paged_attention_hopper.py │ └── test_norm_linear_hopper.py ├── runtime_kernel_wrapper.cu ├── setup.py ├── test_alignment_norm_linear.py ├── test_argmax.py ├── test_attention │ ├── runtime_kernel_wrapper.cu │ ├── setup.py │ ├── test_decoding_flash.py │ └── test_multitoken_paged_attention.py ├── test_decoding_flash.py ├── test_decoding_no_qknorm.py ├── test_decoding_no_sfm.py ├── test_decoding_w_norm.py ├── test_embedding.py ├── test_extend_w_norm.py ├── test_linear.py ├── test_multitoken_paged_attention.py ├── test_norm_linear.py ├── test_paged_attention.py ├── test_prompt_lookup.py ├── test_rmsnorm.py ├── test_rotary_embedding.py ├── test_silu_mul_linear.py └── test_verify.py └── transpiler ├── CMakeLists.txt ├── all_testcases.h ├── config.h ├── lib.h ├── test_cuda_transpiler.cc └── testcases ├── kernel ├── elemwise.h ├── matmul.h └── reduction.h └── threadblock ├── elemwise.h ├── elemwise_bcast.h ├── io.h ├── matmul.h └── reduction.h /.clang-format: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.clang-format -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/1_Report_Bug.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.github/ISSUE_TEMPLATE/1_Report_Bug.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/2_Request_Feature.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.github/ISSUE_TEMPLATE/2_Request_Feature.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/3_Installation.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.github/ISSUE_TEMPLATE/3_Installation.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/4_Call_For_Comment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.github/ISSUE_TEMPLATE/4_Call_For_Comment.yml -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/5_General.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.github/ISSUE_TEMPLATE/5_General.yml -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.github/PULL_REQUEST_TEMPLATE.md -------------------------------------------------------------------------------- /.github/runs-on.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.github/runs-on.yml -------------------------------------------------------------------------------- /.github/workflows/build-test.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.github/workflows/build-test.yml -------------------------------------------------------------------------------- /.github/workflows/code-format.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.github/workflows/code-format.yml -------------------------------------------------------------------------------- /.github/workflows/gpu-tests.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.github/workflows/gpu-tests.yml -------------------------------------------------------------------------------- /.github/workflows/helpers/build_yirage_from_source.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.github/workflows/helpers/build_yirage_from_source.sh -------------------------------------------------------------------------------- /.github/workflows/helpers/free_space_on_runner.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.github/workflows/helpers/free_space_on_runner.sh -------------------------------------------------------------------------------- /.github/workflows/helpers/install_dependencies.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.github/workflows/helpers/install_dependencies.sh -------------------------------------------------------------------------------- /.github/workflows/helpers/set_env.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.github/workflows/helpers/set_env.sh -------------------------------------------------------------------------------- /.github/workflows/pypi-deploy.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.github/workflows/pypi-deploy.yml -------------------------------------------------------------------------------- /.github/workflows/shell-check.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.github/workflows/shell-check.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.gitignore -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.gitmodules -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.readthedocs.yaml -------------------------------------------------------------------------------- /.tools/clang-format-15-master-1d7ec53d: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/.tools/clang-format-15-master-1d7ec53d -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/CMakeLists.txt -------------------------------------------------------------------------------- /COPYRIGHT_HEADERS.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/COPYRIGHT_HEADERS.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/NOTICE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/README.md -------------------------------------------------------------------------------- /README.pypi.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/README.pypi.md -------------------------------------------------------------------------------- /benchmark/baselines/pytorch/gated_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/baselines/pytorch/gated_mlp.py -------------------------------------------------------------------------------- /benchmark/baselines/pytorch/gqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/baselines/pytorch/gqa.py -------------------------------------------------------------------------------- /benchmark/baselines/pytorch/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/baselines/pytorch/lora.py -------------------------------------------------------------------------------- /benchmark/baselines/pytorch/norm_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/baselines/pytorch/norm_transformer.py -------------------------------------------------------------------------------- /benchmark/baselines/pytorch/qknorm_gqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/baselines/pytorch/qknorm_gqa.py -------------------------------------------------------------------------------- /benchmark/baselines/pytorch/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/baselines/pytorch/rms_norm.py -------------------------------------------------------------------------------- /benchmark/c500_performance.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/c500_performance.py -------------------------------------------------------------------------------- /benchmark/end-to-end/ascend/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/end-to-end/ascend/README.md -------------------------------------------------------------------------------- /benchmark/end-to-end/ascend/llama_ascend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/end-to-end/ascend/llama_ascend.py -------------------------------------------------------------------------------- /benchmark/end-to-end/chameleon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/end-to-end/chameleon.py -------------------------------------------------------------------------------- /benchmark/end-to-end/llama.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/end-to-end/llama.py -------------------------------------------------------------------------------- /benchmark/end-to-end/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/end-to-end/lora.py -------------------------------------------------------------------------------- /benchmark/end-to-end/maca/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/end-to-end/maca/README.md -------------------------------------------------------------------------------- /benchmark/end-to-end/maca/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/end-to-end/maca/__init__.py -------------------------------------------------------------------------------- /benchmark/end-to-end/maca/chameleon_maca.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/end-to-end/maca/chameleon_maca.py -------------------------------------------------------------------------------- /benchmark/end-to-end/maca/llama_maca.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/end-to-end/maca/llama_maca.py -------------------------------------------------------------------------------- /benchmark/end-to-end/maca/lora_maca.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/end-to-end/maca/lora_maca.py -------------------------------------------------------------------------------- /benchmark/end-to-end/maca/ngpt_maca.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/end-to-end/maca/ngpt_maca.py -------------------------------------------------------------------------------- /benchmark/end-to-end/maca/run_all.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/end-to-end/maca/run_all.py -------------------------------------------------------------------------------- /benchmark/end-to-end/ngpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/end-to-end/ngpt.py -------------------------------------------------------------------------------- /benchmark/gated_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/gated_mlp.py -------------------------------------------------------------------------------- /benchmark/group_query_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/group_query_attention.py -------------------------------------------------------------------------------- /benchmark/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/lora.py -------------------------------------------------------------------------------- /benchmark/maca_c500_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/maca_c500_benchmark.py -------------------------------------------------------------------------------- /benchmark/maca_full_comparison.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/maca_full_comparison.py -------------------------------------------------------------------------------- /benchmark/maca_native_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/maca_native_benchmark.py -------------------------------------------------------------------------------- /benchmark/maca_pytorch_comparison.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/maca_pytorch_comparison.py -------------------------------------------------------------------------------- /benchmark/maca_vs_pytorch.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/maca_vs_pytorch.py -------------------------------------------------------------------------------- /benchmark/moe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/moe.py -------------------------------------------------------------------------------- /benchmark/multi_head_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/multi_head_attention.py -------------------------------------------------------------------------------- /benchmark/multi_query_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/multi_query_attention.py -------------------------------------------------------------------------------- /benchmark/norm_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/norm_transformer.py -------------------------------------------------------------------------------- /benchmark/pytorch_baseline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/pytorch_baseline.py -------------------------------------------------------------------------------- /benchmark/qknorm_gqa.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/qknorm_gqa.py -------------------------------------------------------------------------------- /benchmark/quick_comparison.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/quick_comparison.py -------------------------------------------------------------------------------- /benchmark/rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/rmsnorm.py -------------------------------------------------------------------------------- /benchmark/saved_mugraphs/cuda/gated_mlp.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/saved_mugraphs/cuda/gated_mlp.json -------------------------------------------------------------------------------- /benchmark/saved_mugraphs/cuda/gqa_bs1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/saved_mugraphs/cuda/gqa_bs1.json -------------------------------------------------------------------------------- /benchmark/saved_mugraphs/cuda/gqa_bs8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/saved_mugraphs/cuda/gqa_bs8.json -------------------------------------------------------------------------------- /benchmark/saved_mugraphs/cuda/lora.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/saved_mugraphs/cuda/lora.json -------------------------------------------------------------------------------- /benchmark/saved_mugraphs/cuda/mha_bs1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/saved_mugraphs/cuda/mha_bs1.json -------------------------------------------------------------------------------- /benchmark/saved_mugraphs/cuda/mha_bs8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/saved_mugraphs/cuda/mha_bs8.json -------------------------------------------------------------------------------- /benchmark/saved_mugraphs/cuda/mqa_bs1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/saved_mugraphs/cuda/mqa_bs1.json -------------------------------------------------------------------------------- /benchmark/saved_mugraphs/cuda/mqa_bs8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/saved_mugraphs/cuda/mqa_bs8.json -------------------------------------------------------------------------------- /benchmark/saved_mugraphs/cuda/ntrans_bs1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/saved_mugraphs/cuda/ntrans_bs1.json -------------------------------------------------------------------------------- /benchmark/saved_mugraphs/cuda/ntrans_bs8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/saved_mugraphs/cuda/ntrans_bs8.json -------------------------------------------------------------------------------- /benchmark/saved_mugraphs/cuda/qknorm_gqa_bs1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/saved_mugraphs/cuda/qknorm_gqa_bs1.json -------------------------------------------------------------------------------- /benchmark/saved_mugraphs/cuda/qknorm_gqa_bs8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/saved_mugraphs/cuda/qknorm_gqa_bs8.json -------------------------------------------------------------------------------- /benchmark/saved_mugraphs/cuda/rmsnorm_bs1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/saved_mugraphs/cuda/rmsnorm_bs1.json -------------------------------------------------------------------------------- /benchmark/saved_mugraphs/cuda/rmsnorm_bs8.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/saved_mugraphs/cuda/rmsnorm_bs8.json -------------------------------------------------------------------------------- /benchmark/saved_mugraphs/cuda/single_gated_mlp.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/saved_mugraphs/cuda/single_gated_mlp.json -------------------------------------------------------------------------------- /benchmark/saved_mugraphs/cuda/single_lora.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/saved_mugraphs/cuda/single_lora.json -------------------------------------------------------------------------------- /benchmark/saved_mugraphs/cuda/single_mqa_bs1.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/saved_mugraphs/cuda/single_mqa_bs1.json -------------------------------------------------------------------------------- /benchmark/saved_mugraphs/mps/yirage_cached_mugraphs_7310eff92c865e30.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/benchmark/saved_mugraphs/mps/yirage_cached_mugraphs_7310eff92c865e30.json -------------------------------------------------------------------------------- /cmake/cuda.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cmake/cuda.cmake -------------------------------------------------------------------------------- /conda/yirage.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/conda/yirage.yml -------------------------------------------------------------------------------- /config.ascend.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/config.ascend.cmake -------------------------------------------------------------------------------- /config.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/config.cmake -------------------------------------------------------------------------------- /config.maca.cmake: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/config.maca.cmake -------------------------------------------------------------------------------- /cpp_examples/chameleon.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/chameleon.cc -------------------------------------------------------------------------------- /cpp_examples/common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/common.h -------------------------------------------------------------------------------- /cpp_examples/dnn.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/dnn.cc -------------------------------------------------------------------------------- /cpp_examples/egg_tests.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/egg_tests.cc -------------------------------------------------------------------------------- /cpp_examples/gated_mlp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/gated_mlp.cc -------------------------------------------------------------------------------- /cpp_examples/group_query_attn_inc_decode.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/group_query_attn_inc_decode.cc -------------------------------------------------------------------------------- /cpp_examples/group_query_attn_prefill.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/group_query_attn_prefill.cc -------------------------------------------------------------------------------- /cpp_examples/group_query_attn_spec_decode.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/group_query_attn_spec_decode.cc -------------------------------------------------------------------------------- /cpp_examples/lora.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/lora.cc -------------------------------------------------------------------------------- /cpp_examples/mlp.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/mlp.cc -------------------------------------------------------------------------------- /cpp_examples/moe.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/moe.cc -------------------------------------------------------------------------------- /cpp_examples/multi_head_attn_inc_decode.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/multi_head_attn_inc_decode.cc -------------------------------------------------------------------------------- /cpp_examples/multi_head_attn_prefill.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/multi_head_attn_prefill.cc -------------------------------------------------------------------------------- /cpp_examples/multi_head_attn_spec_decode.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/multi_head_attn_spec_decode.cc -------------------------------------------------------------------------------- /cpp_examples/multi_query_attn_inc_decode.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/multi_query_attn_inc_decode.cc -------------------------------------------------------------------------------- /cpp_examples/multi_query_attn_prefill.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/multi_query_attn_prefill.cc -------------------------------------------------------------------------------- /cpp_examples/multi_query_attn_spec_decode.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/multi_query_attn_spec_decode.cc -------------------------------------------------------------------------------- /cpp_examples/profile.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/profile.cc -------------------------------------------------------------------------------- /cpp_examples/rms.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/cpp_examples/rms.cc -------------------------------------------------------------------------------- /demo/backend_selection_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/backend_selection_demo.py -------------------------------------------------------------------------------- /demo/checkpoint_lora.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/checkpoint_lora.json -------------------------------------------------------------------------------- /demo/demo_blackwell/matmul-256x4096x4096.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/demo_blackwell/matmul-256x4096x4096.py -------------------------------------------------------------------------------- /demo/demo_blackwell/matmul-512x1024x256.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/demo_blackwell/matmul-512x1024x256.py -------------------------------------------------------------------------------- /demo/demo_blackwell/rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/demo_blackwell/rmsnorm.py -------------------------------------------------------------------------------- /demo/demo_chameleon_attn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/demo_chameleon_attn.py -------------------------------------------------------------------------------- /demo/demo_gated_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/demo_gated_mlp.py -------------------------------------------------------------------------------- /demo/demo_group_query_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/demo_group_query_attention.py -------------------------------------------------------------------------------- /demo/demo_hopper/linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/demo_hopper/linear.py -------------------------------------------------------------------------------- /demo/demo_hopper/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/demo_hopper/main.py -------------------------------------------------------------------------------- /demo/demo_hopper/mirage_hopper_matmul.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/demo_hopper/mirage_hopper_matmul.py -------------------------------------------------------------------------------- /demo/demo_hopper/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/demo_hopper/utils.py -------------------------------------------------------------------------------- /demo/demo_jit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/demo_jit.py -------------------------------------------------------------------------------- /demo/demo_llama3-8b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/demo_llama3-8b.py -------------------------------------------------------------------------------- /demo/demo_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/demo_lora.py -------------------------------------------------------------------------------- /demo/demo_maca_optimization.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/demo_maca_optimization.py -------------------------------------------------------------------------------- /demo/demo_rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/demo_rms_norm.py -------------------------------------------------------------------------------- /demo/llama3/demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/llama3/demo.py -------------------------------------------------------------------------------- /demo/llama3/models/configuration_llama3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/llama3/models/configuration_llama3.py -------------------------------------------------------------------------------- /demo/llama3/models/modeling_llama3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/llama3/models/modeling_llama3.py -------------------------------------------------------------------------------- /demo/llama3/models/rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/llama3/models/rope.py -------------------------------------------------------------------------------- /demo/maca_superopt_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/maca_superopt_test.py -------------------------------------------------------------------------------- /demo/nki_rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/nki_rms_norm.py -------------------------------------------------------------------------------- /demo/profiler/profile_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/profiler/profile_rmsnorm.py -------------------------------------------------------------------------------- /demo/profiler/rmsnorm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/profiler/rmsnorm.png -------------------------------------------------------------------------------- /demo/pytorch/chameleon-7b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/pytorch/chameleon-7b.py -------------------------------------------------------------------------------- /demo/pytorch/llama3-8b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/pytorch/llama3-8b.py -------------------------------------------------------------------------------- /demo/pytorch/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/pytorch/lora.py -------------------------------------------------------------------------------- /demo/pytorch/ngpt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/pytorch/ngpt.py -------------------------------------------------------------------------------- /demo/qwen2.5/demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen2.5/demo.py -------------------------------------------------------------------------------- /demo/qwen2.5/mirage_cached_mugraphs_64644b2e7bbffb94.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen2.5/mirage_cached_mugraphs_64644b2e7bbffb94.json -------------------------------------------------------------------------------- /demo/qwen2.5/mirage_cached_mugraphs_647dd41aeaf656b4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen2.5/mirage_cached_mugraphs_647dd41aeaf656b4.json -------------------------------------------------------------------------------- /demo/qwen2.5/mirage_cached_mugraphs_6508b4ab6af4f866.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen2.5/mirage_cached_mugraphs_6508b4ab6af4f866.json -------------------------------------------------------------------------------- /demo/qwen2.5/mirage_cached_mugraphs_659704256b064506.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen2.5/mirage_cached_mugraphs_659704256b064506.json -------------------------------------------------------------------------------- /demo/qwen2.5/mirage_cached_mugraphs_659774a49fe1cbf2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen2.5/mirage_cached_mugraphs_659774a49fe1cbf2.json -------------------------------------------------------------------------------- /demo/qwen2.5/mirage_cached_mugraphs_659774eb06d5ca86.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen2.5/mirage_cached_mugraphs_659774eb06d5ca86.json -------------------------------------------------------------------------------- /demo/qwen2.5/models/configuration_qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen2.5/models/configuration_qwen2.py -------------------------------------------------------------------------------- /demo/qwen2.5/models/modeling_qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen2.5/models/modeling_qwen2.py -------------------------------------------------------------------------------- /demo/qwen2.5/models/rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen2.5/models/rope.py -------------------------------------------------------------------------------- /demo/qwen3/demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen3/demo.py -------------------------------------------------------------------------------- /demo/qwen3/demo_30B_A3B.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen3/demo_30B_A3B.py -------------------------------------------------------------------------------- /demo/qwen3/demo_30B_A3B_hopper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen3/demo_30B_A3B_hopper.py -------------------------------------------------------------------------------- /demo/qwen3/demo_chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen3/demo_chat.py -------------------------------------------------------------------------------- /demo/qwen3/demo_hopper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen3/demo_hopper.py -------------------------------------------------------------------------------- /demo/qwen3/demo_modal.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen3/demo_modal.py -------------------------------------------------------------------------------- /demo/qwen3/demo_modal_ssh.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen3/demo_modal_ssh.py -------------------------------------------------------------------------------- /demo/qwen3/models/configuration_qwen3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen3/models/configuration_qwen3.py -------------------------------------------------------------------------------- /demo/qwen3/models/convert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen3/models/convert.py -------------------------------------------------------------------------------- /demo/qwen3/models/modeling_qwen3.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen3/models/modeling_qwen3.py -------------------------------------------------------------------------------- /demo/qwen3/models/rope.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/qwen3/models/rope.py -------------------------------------------------------------------------------- /demo/reference_mugraphs/chameleon-7b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/reference_mugraphs/chameleon-7b.py -------------------------------------------------------------------------------- /demo/reference_mugraphs/chameleon.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/reference_mugraphs/chameleon.py -------------------------------------------------------------------------------- /demo/reference_mugraphs/gated_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/reference_mugraphs/gated_mlp.py -------------------------------------------------------------------------------- /demo/reference_mugraphs/group_query_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/reference_mugraphs/group_query_attention.py -------------------------------------------------------------------------------- /demo/reference_mugraphs/group_query_attention_customized.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/reference_mugraphs/group_query_attention_customized.py -------------------------------------------------------------------------------- /demo/reference_mugraphs/group_query_attention_online.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/reference_mugraphs/group_query_attention_online.py -------------------------------------------------------------------------------- /demo/reference_mugraphs/llama3-8b.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/reference_mugraphs/llama3-8b.py -------------------------------------------------------------------------------- /demo/reference_mugraphs/lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/reference_mugraphs/lora.py -------------------------------------------------------------------------------- /demo/reference_mugraphs/qwen_mlp.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/reference_mugraphs/qwen_mlp.py -------------------------------------------------------------------------------- /demo/reference_mugraphs/qwen_mlp_part2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/reference_mugraphs/qwen_mlp_part2.py -------------------------------------------------------------------------------- /demo/reference_mugraphs/qwen_prenorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/reference_mugraphs/qwen_prenorm.py -------------------------------------------------------------------------------- /demo/reference_mugraphs/rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/reference_mugraphs/rms_norm.py -------------------------------------------------------------------------------- /demo/runtime/runtime.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/runtime/runtime.py -------------------------------------------------------------------------------- /demo/triton_rms_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/demo/triton_rms_norm.py -------------------------------------------------------------------------------- /docker-build/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docker-build/Dockerfile -------------------------------------------------------------------------------- /docker-build/build_wheel.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docker-build/build_wheel.sh -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docker/Dockerfile -------------------------------------------------------------------------------- /docker/install_mirage.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docker/install_mirage.sh -------------------------------------------------------------------------------- /docker/run_docker.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docker/run_docker.sh -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/PROFILING_GUIDE.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/PROFILING_GUIDE.md -------------------------------------------------------------------------------- /docs/architecture.drawio: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/architecture.drawio -------------------------------------------------------------------------------- /docs/ascend_implementation_guide.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/ascend_implementation_guide.md -------------------------------------------------------------------------------- /docs/ascend_integration_architecture.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/ascend_integration_architecture.md -------------------------------------------------------------------------------- /docs/ascend_quick_start.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/ascend_quick_start.md -------------------------------------------------------------------------------- /docs/backend_config_comparison.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/backend_config_comparison.md -------------------------------------------------------------------------------- /docs/doxygen/Doxyfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/doxygen/Doxyfile -------------------------------------------------------------------------------- /docs/doxygen/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/doxygen/README.md -------------------------------------------------------------------------------- /docs/doxygen/theme/rust_customdoxygen.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/doxygen/theme/rust_customdoxygen.css -------------------------------------------------------------------------------- /docs/doxygen/theme/rust_footer.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/doxygen/theme/rust_footer.html -------------------------------------------------------------------------------- /docs/doxygen/theme/rust_header.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/doxygen/theme/rust_header.html -------------------------------------------------------------------------------- /docs/maca_quick_start.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/maca_quick_start.md -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/make.bat -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/requirements.txt -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/conf.py -------------------------------------------------------------------------------- /docs/source/cuda-transpiler.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/cuda-transpiler.rst -------------------------------------------------------------------------------- /docs/source/images/gated_mlp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/images/gated_mlp.png -------------------------------------------------------------------------------- /docs/source/images/gpu_hierarchy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/images/gpu_hierarchy.png -------------------------------------------------------------------------------- /docs/source/images/mma-non-divisible-example.drawio.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/images/mma-non-divisible-example.drawio.svg -------------------------------------------------------------------------------- /docs/source/images/mma-thr-layout-example.drawio.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/images/mma-thr-layout-example.drawio.svg -------------------------------------------------------------------------------- /docs/source/images/mugraph_gqa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/images/mugraph_gqa.png -------------------------------------------------------------------------------- /docs/source/images/swizzle-shift-example.drawio.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/images/swizzle-shift-example.drawio.svg -------------------------------------------------------------------------------- /docs/source/images/swizzle-xor-example.drawio.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/images/swizzle-xor-example.drawio.svg -------------------------------------------------------------------------------- /docs/source/images/tb-fusion-chain.drawio.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/images/tb-fusion-chain.drawio.svg -------------------------------------------------------------------------------- /docs/source/images/tb-sched-conflict-example.drawio.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/images/tb-sched-conflict-example.drawio.svg -------------------------------------------------------------------------------- /docs/source/images/tensor-lifecycle.drawio.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/images/tensor-lifecycle.drawio.svg -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/index.rst -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/installation.rst -------------------------------------------------------------------------------- /docs/source/linear_kernel.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/linear_kernel.rst -------------------------------------------------------------------------------- /docs/source/mugraph.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/mugraph.rst -------------------------------------------------------------------------------- /docs/source/triton-transpiler.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/triton-transpiler.rst -------------------------------------------------------------------------------- /docs/source/tutorials/attention-with-kv-norm.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/tutorials/attention-with-kv-norm.rst -------------------------------------------------------------------------------- /docs/source/tutorials/gated-mlp.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/tutorials/gated-mlp.rst -------------------------------------------------------------------------------- /docs/source/tutorials/group-query-attention.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/tutorials/group-query-attention.rst -------------------------------------------------------------------------------- /docs/source/tutorials/images/lora_kernel_graph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/tutorials/images/lora_kernel_graph.png -------------------------------------------------------------------------------- /docs/source/tutorials/images/lora_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/tutorials/images/lora_performance.png -------------------------------------------------------------------------------- /docs/source/tutorials/images/lora_ugraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/tutorials/images/lora_ugraph.png -------------------------------------------------------------------------------- /docs/source/tutorials/images/rms_norm_linear_original.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/tutorials/images/rms_norm_linear_original.png -------------------------------------------------------------------------------- /docs/source/tutorials/images/rms_norm_linear_performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/tutorials/images/rms_norm_linear_performance.png -------------------------------------------------------------------------------- /docs/source/tutorials/images/rms_norm_linear_ugraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/tutorials/images/rms_norm_linear_ugraph.png -------------------------------------------------------------------------------- /docs/source/tutorials/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/tutorials/index.rst -------------------------------------------------------------------------------- /docs/source/tutorials/lora.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/tutorials/lora.rst -------------------------------------------------------------------------------- /docs/source/tutorials/multi-latent-attention.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/tutorials/multi-latent-attention.rst -------------------------------------------------------------------------------- /docs/source/tutorials/rms-norm-linear.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/tutorials/rms-norm-linear.rst -------------------------------------------------------------------------------- /docs/source/visualizer.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/visualizer.rst -------------------------------------------------------------------------------- /docs/source/welcome.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/source/welcome.rst -------------------------------------------------------------------------------- /docs/transpiler/mma-non-divisible-example.drawio.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/transpiler/mma-non-divisible-example.drawio.svg -------------------------------------------------------------------------------- /docs/transpiler/mma-thr-layout-example.drawio.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/transpiler/mma-thr-layout-example.drawio.svg -------------------------------------------------------------------------------- /docs/transpiler/nki_transpiler.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/transpiler/nki_transpiler.md -------------------------------------------------------------------------------- /docs/transpiler/swizzle-shift-example.drawio.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/transpiler/swizzle-shift-example.drawio.svg -------------------------------------------------------------------------------- /docs/transpiler/swizzle-xor-example.drawio.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/transpiler/swizzle-xor-example.drawio.svg -------------------------------------------------------------------------------- /docs/transpiler/tb-fusion-chain.drawio.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/transpiler/tb-fusion-chain.drawio.svg -------------------------------------------------------------------------------- /docs/transpiler/tb-sched-conflict-example.drawio.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/transpiler/tb-sched-conflict-example.drawio.svg -------------------------------------------------------------------------------- /docs/transpiler/tensor-lifecycle.drawio.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/transpiler/tensor-lifecycle.drawio.svg -------------------------------------------------------------------------------- /docs/transpiler/transpiler.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/transpiler/transpiler.md -------------------------------------------------------------------------------- /docs/transpiler/triton_transpiler.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/transpiler/triton_transpiler.md -------------------------------------------------------------------------------- /docs/ypk/backend_usage.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/ypk/backend_usage.md -------------------------------------------------------------------------------- /docs/ypk/multi_backend_design.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/ypk/multi_backend_design.md -------------------------------------------------------------------------------- /docs/ypk/tma.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/docs/ypk/tma.md -------------------------------------------------------------------------------- /img/architecture.drawio.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/img/architecture.drawio.svg -------------------------------------------------------------------------------- /img/group_query_attnetion_spec_decode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/img/group_query_attnetion_spec_decode.png -------------------------------------------------------------------------------- /img/llama-3-8b-rms-norm-linear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/img/llama-3-8b-rms-norm-linear.png -------------------------------------------------------------------------------- /include/yirage/backend/ascend_backend.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/backend/ascend_backend.h -------------------------------------------------------------------------------- /include/yirage/backend/backend_interface.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/backend/backend_interface.h -------------------------------------------------------------------------------- /include/yirage/backend/backend_registry.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/backend/backend_registry.h -------------------------------------------------------------------------------- /include/yirage/backend/backends.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/backend/backends.h -------------------------------------------------------------------------------- /include/yirage/backend/cpu_backend.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/backend/cpu_backend.h -------------------------------------------------------------------------------- /include/yirage/backend/cuda_backend.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/backend/cuda_backend.h -------------------------------------------------------------------------------- /include/yirage/backend/cudnn_backend.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/backend/cudnn_backend.h -------------------------------------------------------------------------------- /include/yirage/backend/maca_backend.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/backend/maca_backend.h -------------------------------------------------------------------------------- /include/yirage/backend/mkl_backend.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/backend/mkl_backend.h -------------------------------------------------------------------------------- /include/yirage/backend/mps_backend.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/backend/mps_backend.h -------------------------------------------------------------------------------- /include/yirage/backend/nki_backend.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/backend/nki_backend.h -------------------------------------------------------------------------------- /include/yirage/backend/triton_backend.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/backend/triton_backend.h -------------------------------------------------------------------------------- /include/yirage/config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/config.h -------------------------------------------------------------------------------- /include/yirage/cpu/cmem_tensor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/cpu/cmem_tensor.h -------------------------------------------------------------------------------- /include/yirage/kernel/all_reduce.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/all_reduce.h -------------------------------------------------------------------------------- /include/yirage/kernel/ascend/ascend_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/ascend/ascend_kernel.h -------------------------------------------------------------------------------- /include/yirage/kernel/ascend/ascend_kernel_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/ascend/ascend_kernel_config.h -------------------------------------------------------------------------------- /include/yirage/kernel/ascend/ascend_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/ascend/ascend_kernels.h -------------------------------------------------------------------------------- /include/yirage/kernel/chunk.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/chunk.h -------------------------------------------------------------------------------- /include/yirage/kernel/common/kernel_interface.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/common/kernel_interface.h -------------------------------------------------------------------------------- /include/yirage/kernel/cpu/cpu_kernel_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/cpu/cpu_kernel_config.h -------------------------------------------------------------------------------- /include/yirage/kernel/cuda/cuda_kernel_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/cuda/cuda_kernel_config.h -------------------------------------------------------------------------------- /include/yirage/kernel/cudnn/cudnn_kernel_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/cudnn/cudnn_kernel_config.h -------------------------------------------------------------------------------- /include/yirage/kernel/customized.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/customized.h -------------------------------------------------------------------------------- /include/yirage/kernel/device_memory_manager.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/device_memory_manager.h -------------------------------------------------------------------------------- /include/yirage/kernel/device_tensor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/device_tensor.h -------------------------------------------------------------------------------- /include/yirage/kernel/element_binary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/element_binary.h -------------------------------------------------------------------------------- /include/yirage/kernel/element_unary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/element_unary.h -------------------------------------------------------------------------------- /include/yirage/kernel/graph.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/graph.h -------------------------------------------------------------------------------- /include/yirage/kernel/maca/maca_kernel.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/maca/maca_kernel.h -------------------------------------------------------------------------------- /include/yirage/kernel/maca/maca_kernel_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/maca/maca_kernel_config.h -------------------------------------------------------------------------------- /include/yirage/kernel/maca/maca_kernels.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/maca/maca_kernels.h -------------------------------------------------------------------------------- /include/yirage/kernel/maca/maca_warp_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/maca/maca_warp_utils.h -------------------------------------------------------------------------------- /include/yirage/kernel/matmul.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/matmul.h -------------------------------------------------------------------------------- /include/yirage/kernel/mkl/mkl_kernel_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/mkl/mkl_kernel_config.h -------------------------------------------------------------------------------- /include/yirage/kernel/mps/mps_kernel_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/mps/mps_kernel_config.h -------------------------------------------------------------------------------- /include/yirage/kernel/nki/nki_kernel_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/nki/nki_kernel_config.h -------------------------------------------------------------------------------- /include/yirage/kernel/operator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/operator.h -------------------------------------------------------------------------------- /include/yirage/kernel/reduction.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/reduction.h -------------------------------------------------------------------------------- /include/yirage/kernel/rms_norm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/rms_norm.h -------------------------------------------------------------------------------- /include/yirage/kernel/runtime.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/runtime.h -------------------------------------------------------------------------------- /include/yirage/kernel/task_register.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/task_register.h -------------------------------------------------------------------------------- /include/yirage/kernel/triton/triton_kernel_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/kernel/triton/triton_kernel_config.h -------------------------------------------------------------------------------- /include/yirage/layout.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/layout.h -------------------------------------------------------------------------------- /include/yirage/nki_transpiler/helper_function.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/nki_transpiler/helper_function.h -------------------------------------------------------------------------------- /include/yirage/nki_transpiler/transpile.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/nki_transpiler/transpile.h -------------------------------------------------------------------------------- /include/yirage/nki_transpiler/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/nki_transpiler/utils.h -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/mpk_atoms.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/mpk_atoms.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/persistent_kernel.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/persistent_kernel.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/profiler.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/profiler.h -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/runtime_header.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/runtime_header.h -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/argmax.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/argmax.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/element_binary.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/element_binary.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/element_unary.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/element_unary.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/embedding.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/embedding.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/identity.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/identity.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/linear.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/linear.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/linear_cutlass.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/linear_cutlass.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/mma.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/mma.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/multitoken_paged_attention.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/multitoken_paged_attention.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/multitoken_paged_attention_32_64.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/multitoken_paged_attention_32_64.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/multitoken_paged_attention_4_16.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/multitoken_paged_attention_4_16.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/norm.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/norm.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/norm_linear.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/norm_linear.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/norm_linear_new.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/norm_linear_new.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/reduction.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/reduction.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/rmsnorm.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/rmsnorm.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/rotary_embedding.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/rotary_embedding.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/silu_mul.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/silu_mul.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/silu_mul_linear.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/silu_mul_linear.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/single_batch_decoding.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/single_batch_decoding.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/single_batch_extend.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/single_batch_extend.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/single_batch_gqa.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/single_batch_gqa.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/smem_layout.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/smem_layout.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/ampere/task_header.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/ampere/task_header.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/blackwell/argmax_sm100.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/blackwell/argmax_sm100.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/blackwell/attention_sm100.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/blackwell/attention_sm100.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/blackwell/linear_sm100_mpk.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/blackwell/linear_sm100_mpk.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/blackwell/moe_linear_sm100.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/blackwell/moe_linear_sm100.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/blackwell/mul_sum_add_sm100.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/blackwell/mul_sum_add_sm100.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/blackwell/norm_sm100.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/blackwell/norm_sm100.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/blackwell/rotary_embedding_sm100.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/blackwell/rotary_embedding_sm100.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/blackwell/storage.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/blackwell/storage.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/blackwell/task_header.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/blackwell/task_header.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/blackwell/tensor_init.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/blackwell/tensor_init.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/blackwell/topk_softmax_sm100.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/blackwell/topk_softmax_sm100.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/common/bfloat16.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/common/bfloat16.h -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/common/common_header.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/common/common_header.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/common/copy_sm80.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/common/copy_sm80.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/common/dmem_layout.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/common/dmem_layout.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/common/utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/common/utils.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/common/worker_config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/common/worker_config.h -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/cute/hopper/epilogue.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/cute/hopper/epilogue.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/cute/hopper/gemm_ws.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/cute/hopper/gemm_ws.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/cute/hopper/gemm_ws_cooperative.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/cute/hopper/gemm_ws_cooperative.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/cute/hopper/gemm_ws_mpk.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/cute/hopper/gemm_ws_mpk.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/cute/hopper/kernel_traits.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/cute/hopper/kernel_traits.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/cute/hopper/mma_tma_ws_mainloop.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/cute/hopper/mma_tma_ws_mainloop.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/deprecated/paged_attention.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/deprecated/paged_attention.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/barrier.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/barrier.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/embedding_hopper.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/embedding_hopper.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/linear_hopper.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/linear_hopper.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/linear_swapAB_hopper.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/linear_swapAB_hopper.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/matmul_demo_hopper.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/matmul_demo_hopper.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/moe_linear_swapAB_hopper.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/moe_linear_swapAB_hopper.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/multitoken_paged_attention_hopper.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/multitoken_paged_attention_hopper.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/norm_hopper.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/norm_hopper.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/norm_linear_hopper.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/norm_linear_hopper.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/rmsnorm_hopper.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/rmsnorm_hopper.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/rotary_embedding_hopper.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/rotary_embedding_hopper.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/silu_mul_hopper.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/silu_mul_hopper.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/smem_layout_tma.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/smem_layout_tma.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/task_header.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/task_header.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/tma.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/tma.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/tma_2d.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/tma_2d.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/tma_3d.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/tma_3d.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/tma_4d.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/tma_4d.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/utils.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/utils.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/hopper/wgmma.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/hopper/wgmma.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/speculative_decoding/prompt_lookup.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/speculative_decoding/prompt_lookup.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tasks/speculative_decoding/target_verify.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tasks/speculative_decoding/target_verify.cuh -------------------------------------------------------------------------------- /include/yirage/persistent_kernel/tma.cuh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/persistent_kernel/tma.cuh -------------------------------------------------------------------------------- /include/yirage/search/abstract_expr/abstract_expr.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/abstract_expr/abstract_expr.h -------------------------------------------------------------------------------- /include/yirage/search/abstract_expr/abstract_expr_eval.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/abstract_expr/abstract_expr_eval.h -------------------------------------------------------------------------------- /include/yirage/search/abstract_expr/abstract_expr_for_ops.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/abstract_expr/abstract_expr_for_ops.h -------------------------------------------------------------------------------- /include/yirage/search/backend_strategies/ascend_strategy.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/backend_strategies/ascend_strategy.h -------------------------------------------------------------------------------- /include/yirage/search/backend_strategies/cpu_strategy.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/backend_strategies/cpu_strategy.h -------------------------------------------------------------------------------- /include/yirage/search/backend_strategies/cuda_strategy.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/backend_strategies/cuda_strategy.h -------------------------------------------------------------------------------- /include/yirage/search/backend_strategies/maca_strategy.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/backend_strategies/maca_strategy.h -------------------------------------------------------------------------------- /include/yirage/search/backend_strategies/mps_strategy.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/backend_strategies/mps_strategy.h -------------------------------------------------------------------------------- /include/yirage/search/backend_strategies/nki_strategy.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/backend_strategies/nki_strategy.h -------------------------------------------------------------------------------- /include/yirage/search/backend_strategies/triton_strategy.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/backend_strategies/triton_strategy.h -------------------------------------------------------------------------------- /include/yirage/search/common/search_strategy.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/common/search_strategy.h -------------------------------------------------------------------------------- /include/yirage/search/config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/config.h -------------------------------------------------------------------------------- /include/yirage/search/dim_strategy.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/dim_strategy.h -------------------------------------------------------------------------------- /include/yirage/search/mps_profiler.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/mps_profiler.h -------------------------------------------------------------------------------- /include/yirage/search/op_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/op_utils.h -------------------------------------------------------------------------------- /include/yirage/search/order.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/order.h -------------------------------------------------------------------------------- /include/yirage/search/range_propagation/irange.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/range_propagation/irange.h -------------------------------------------------------------------------------- /include/yirage/search/range_propagation/propagation_path.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/range_propagation/propagation_path.h -------------------------------------------------------------------------------- /include/yirage/search/range_propagation/range.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/range_propagation/range.h -------------------------------------------------------------------------------- /include/yirage/search/range_propagation/range_set.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/range_propagation/range_set.h -------------------------------------------------------------------------------- /include/yirage/search/range_propagation/tbrange.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/range_propagation/tbrange.h -------------------------------------------------------------------------------- /include/yirage/search/search.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/search.h -------------------------------------------------------------------------------- /include/yirage/search/search_c.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/search_c.h -------------------------------------------------------------------------------- /include/yirage/search/search_context.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/search_context.h -------------------------------------------------------------------------------- /include/yirage/search/search_state_manager.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/search_state_manager.h -------------------------------------------------------------------------------- /include/yirage/search/symbolic_graph/dim_var_assignments.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/symbolic_graph/dim_var_assignments.h -------------------------------------------------------------------------------- /include/yirage/search/symbolic_graph/op_args.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/symbolic_graph/op_args.h -------------------------------------------------------------------------------- /include/yirage/search/symbolic_graph/symbolic_graph.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/symbolic_graph/symbolic_graph.h -------------------------------------------------------------------------------- /include/yirage/search/symbolic_graph/symbolic_map.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/symbolic_graph/symbolic_map.h -------------------------------------------------------------------------------- /include/yirage/search/symbolic_graph/symbolic_op.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/symbolic_graph/symbolic_op.h -------------------------------------------------------------------------------- /include/yirage/search/symbolic_graph/symbolic_tensor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/symbolic_graph/symbolic_tensor.h -------------------------------------------------------------------------------- /include/yirage/search/symbolic_graph/symbolic_tensor_dim.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/symbolic_graph/symbolic_tensor_dim.h -------------------------------------------------------------------------------- /include/yirage/search/symbolic_graph/tensor_dim_constraint.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/symbolic_graph/tensor_dim_constraint.h -------------------------------------------------------------------------------- /include/yirage/search/symbolic_graph/tensor_dim_constraints.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/symbolic_graph/tensor_dim_constraints.h -------------------------------------------------------------------------------- /include/yirage/search/symbolic_graph/tensor_dim_expr.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/symbolic_graph/tensor_dim_expr.h -------------------------------------------------------------------------------- /include/yirage/search/symbolic_graph/types.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/symbolic_graph/types.h -------------------------------------------------------------------------------- /include/yirage/search/verification/formal_verifier.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/verification/formal_verifier.h -------------------------------------------------------------------------------- /include/yirage/search/verification/output_match.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/verification/output_match.h -------------------------------------------------------------------------------- /include/yirage/search/verification/probabilistic_verifier.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/verification/probabilistic_verifier.h -------------------------------------------------------------------------------- /include/yirage/search/verification/verifier.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/search/verification/verifier.h -------------------------------------------------------------------------------- /include/yirage/threadblock/ascend/all_reduce.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/ascend/all_reduce.h -------------------------------------------------------------------------------- /include/yirage/threadblock/ascend/concat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/ascend/concat.h -------------------------------------------------------------------------------- /include/yirage/threadblock/ascend/element_binary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/ascend/element_binary.h -------------------------------------------------------------------------------- /include/yirage/threadblock/ascend/element_unary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/ascend/element_unary.h -------------------------------------------------------------------------------- /include/yirage/threadblock/ascend/forloop_accum.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/ascend/forloop_accum.h -------------------------------------------------------------------------------- /include/yirage/threadblock/ascend/input_loader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/ascend/input_loader.h -------------------------------------------------------------------------------- /include/yirage/threadblock/ascend/matmul.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/ascend/matmul.h -------------------------------------------------------------------------------- /include/yirage/threadblock/ascend/output_saver.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/ascend/output_saver.h -------------------------------------------------------------------------------- /include/yirage/threadblock/ascend/reduction.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/ascend/reduction.h -------------------------------------------------------------------------------- /include/yirage/threadblock/ascend/rms_norm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/ascend/rms_norm.h -------------------------------------------------------------------------------- /include/yirage/threadblock/concat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/concat.h -------------------------------------------------------------------------------- /include/yirage/threadblock/cuda/concat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/cuda/concat.h -------------------------------------------------------------------------------- /include/yirage/threadblock/cuda/element_binary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/cuda/element_binary.h -------------------------------------------------------------------------------- /include/yirage/threadblock/cuda/element_unary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/cuda/element_unary.h -------------------------------------------------------------------------------- /include/yirage/threadblock/cuda/forloop_accum.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/cuda/forloop_accum.h -------------------------------------------------------------------------------- /include/yirage/threadblock/cuda/input_loader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/cuda/input_loader.h -------------------------------------------------------------------------------- /include/yirage/threadblock/cuda/matmul.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/cuda/matmul.h -------------------------------------------------------------------------------- /include/yirage/threadblock/cuda/output_saver.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/cuda/output_saver.h -------------------------------------------------------------------------------- /include/yirage/threadblock/cuda/reduction.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/cuda/reduction.h -------------------------------------------------------------------------------- /include/yirage/threadblock/cuda/rms_norm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/cuda/rms_norm.h -------------------------------------------------------------------------------- /include/yirage/threadblock/element_binary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/element_binary.h -------------------------------------------------------------------------------- /include/yirage/threadblock/element_unary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/element_unary.h -------------------------------------------------------------------------------- /include/yirage/threadblock/forloop_accum.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/forloop_accum.h -------------------------------------------------------------------------------- /include/yirage/threadblock/graph.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/graph.h -------------------------------------------------------------------------------- /include/yirage/threadblock/maca/concat.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/maca/concat.h -------------------------------------------------------------------------------- /include/yirage/threadblock/maca/element_binary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/maca/element_binary.h -------------------------------------------------------------------------------- /include/yirage/threadblock/maca/element_unary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/maca/element_unary.h -------------------------------------------------------------------------------- /include/yirage/threadblock/maca/forloop_accum.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/maca/forloop_accum.h -------------------------------------------------------------------------------- /include/yirage/threadblock/maca/input_loader.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/maca/input_loader.h -------------------------------------------------------------------------------- /include/yirage/threadblock/maca/matmul.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/maca/matmul.h -------------------------------------------------------------------------------- /include/yirage/threadblock/maca/output_saver.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/maca/output_saver.h -------------------------------------------------------------------------------- /include/yirage/threadblock/maca/reduction.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/maca/reduction.h -------------------------------------------------------------------------------- /include/yirage/threadblock/maca/rms_norm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/maca/rms_norm.h -------------------------------------------------------------------------------- /include/yirage/threadblock/matmul.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/matmul.h -------------------------------------------------------------------------------- /include/yirage/threadblock/operator.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/operator.h -------------------------------------------------------------------------------- /include/yirage/threadblock/reduction.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/reduction.h -------------------------------------------------------------------------------- /include/yirage/threadblock/rms_norm.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/rms_norm.h -------------------------------------------------------------------------------- /include/yirage/threadblock/serializer/concat_serializer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/serializer/concat_serializer.h -------------------------------------------------------------------------------- /include/yirage/threadblock/serializer/element_binary_serializer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/serializer/element_binary_serializer.h -------------------------------------------------------------------------------- /include/yirage/threadblock/serializer/element_unary_serializer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/serializer/element_unary_serializer.h -------------------------------------------------------------------------------- /include/yirage/threadblock/serializer/forloop_accum_serializer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/serializer/forloop_accum_serializer.h -------------------------------------------------------------------------------- /include/yirage/threadblock/serializer/input_loader_serializer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/serializer/input_loader_serializer.h -------------------------------------------------------------------------------- /include/yirage/threadblock/serializer/kernel_params.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/serializer/kernel_params.h -------------------------------------------------------------------------------- /include/yirage/threadblock/serializer/matmul_serializer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/serializer/matmul_serializer.h -------------------------------------------------------------------------------- /include/yirage/threadblock/serializer/output_saver_serializer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/serializer/output_saver_serializer.h -------------------------------------------------------------------------------- /include/yirage/threadblock/serializer/reduction_serializer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/serializer/reduction_serializer.h -------------------------------------------------------------------------------- /include/yirage/threadblock/serializer/rms_norm_serializer.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/serializer/rms_norm_serializer.h -------------------------------------------------------------------------------- /include/yirage/threadblock/smem_tensor.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/threadblock/smem_tensor.h -------------------------------------------------------------------------------- /include/yirage/transpiler/common.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/common.h -------------------------------------------------------------------------------- /include/yirage/transpiler/error_types.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/error_types.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/README.md -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/config.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/kernel/element_binary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/kernel/element_binary.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/kernel/element_unary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/kernel/element_unary.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/kernel/matmul.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/kernel/matmul.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/kernel/reduction.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/kernel/reduction.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/nvshmem.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/nvshmem.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/runtime.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/runtime.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/threadblock/blackwell_matmul.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/threadblock/blackwell_matmul.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/threadblock/blackwell_pipeline.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/threadblock/blackwell_pipeline.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/threadblock/element_binary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/threadblock/element_binary.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/threadblock/element_unary.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/threadblock/element_unary.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/threadblock/epilogues.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/threadblock/epilogues.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/threadblock/forloop_accum.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/threadblock/forloop_accum.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/threadblock/hopper_matmul.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/threadblock/hopper_matmul.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/threadblock/input.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/threadblock/input.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/threadblock/matmul.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/threadblock/matmul.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/threadblock/output.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/threadblock/output.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/threadblock/pipeline.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/threadblock/pipeline.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/threadblock/profiler.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/threadblock/profiler.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/threadblock/reduction.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/threadblock/reduction.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/threadblock/threadblock.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/threadblock/threadblock.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/threadblock/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/threadblock/utils.h -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/triton_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/triton_kernels.py -------------------------------------------------------------------------------- /include/yirage/transpiler/runtime/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/runtime/utils.h -------------------------------------------------------------------------------- /include/yirage/transpiler/sched_tb_graph.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/sched_tb_graph.h -------------------------------------------------------------------------------- /include/yirage/transpiler/structs.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/structs.h -------------------------------------------------------------------------------- /include/yirage/transpiler/transpile.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/transpile.h -------------------------------------------------------------------------------- /include/yirage/transpiler/transpiler.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/transpiler.h -------------------------------------------------------------------------------- /include/yirage/transpiler/utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/transpiler/utils.h -------------------------------------------------------------------------------- /include/yirage/triton_transpiler/runtime/triton_kernels.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/triton_transpiler/runtime/triton_kernels.py -------------------------------------------------------------------------------- /include/yirage/triton_transpiler/transpile.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/triton_transpiler/transpile.h -------------------------------------------------------------------------------- /include/yirage/type.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/type.h -------------------------------------------------------------------------------- /include/yirage/utils/ascend_helper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/utils/ascend_helper.h -------------------------------------------------------------------------------- /include/yirage/utils/containers.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/utils/containers.h -------------------------------------------------------------------------------- /include/yirage/utils/cuda_helper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/utils/cuda_helper.h -------------------------------------------------------------------------------- /include/yirage/utils/fingerprint_functions.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/utils/fingerprint_functions.h -------------------------------------------------------------------------------- /include/yirage/utils/hash_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/utils/hash_utils.h -------------------------------------------------------------------------------- /include/yirage/utils/json_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/utils/json_utils.h -------------------------------------------------------------------------------- /include/yirage/utils/maca_helper.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/utils/maca_helper.h -------------------------------------------------------------------------------- /include/yirage/utils/math_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/utils/math_utils.h -------------------------------------------------------------------------------- /include/yirage/utils/static_switch.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/utils/static_switch.h -------------------------------------------------------------------------------- /include/yirage/utils/z3_utils.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/utils/z3_utils.h -------------------------------------------------------------------------------- /include/yirage/vector_types.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/vector_types.h -------------------------------------------------------------------------------- /include/yirage/warp/cuda/matmul.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/include/yirage/warp/cuda/matmul.h -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/pyproject.toml -------------------------------------------------------------------------------- /python/cython_setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/cython_setup.py -------------------------------------------------------------------------------- /python/yirage/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/__init__.py -------------------------------------------------------------------------------- /python/yirage/_cython/CCore.pxd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/_cython/CCore.pxd -------------------------------------------------------------------------------- /python/yirage/_cython/core.pyx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/_cython/core.pyx -------------------------------------------------------------------------------- /python/yirage/ascend_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/ascend_config.py -------------------------------------------------------------------------------- /python/yirage/backend_api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/backend_api.py -------------------------------------------------------------------------------- /python/yirage/global_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/global_config.py -------------------------------------------------------------------------------- /python/yirage/graph_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/graph_dataset.py -------------------------------------------------------------------------------- /python/yirage/kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/kernel.py -------------------------------------------------------------------------------- /python/yirage/maca_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/maca_config.py -------------------------------------------------------------------------------- /python/yirage/mps_config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/mps_config.py -------------------------------------------------------------------------------- /python/yirage/persistent_kernel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/persistent_kernel.py -------------------------------------------------------------------------------- /python/yirage/profiler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/profiler.py -------------------------------------------------------------------------------- /python/yirage/profiler_persistent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/profiler_persistent.py -------------------------------------------------------------------------------- /python/yirage/speculative.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/speculative.py -------------------------------------------------------------------------------- /python/yirage/threadblock.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/threadblock.py -------------------------------------------------------------------------------- /python/yirage/triton_profiler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/triton_profiler.py -------------------------------------------------------------------------------- /python/yirage/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/utils.py -------------------------------------------------------------------------------- /python/yirage/version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/version.py -------------------------------------------------------------------------------- /python/yirage/visualizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/python/yirage/visualizer.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/add_backend_support.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/scripts/add_backend_support.py -------------------------------------------------------------------------------- /scripts/analyze_multibackend.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/scripts/analyze_multibackend.sh -------------------------------------------------------------------------------- /scripts/convert_benchmark_to_multibackend.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/scripts/convert_benchmark_to_multibackend.py -------------------------------------------------------------------------------- /scripts/display_task_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/scripts/display_task_graph.py -------------------------------------------------------------------------------- /scripts/draw_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/scripts/draw_graph.py -------------------------------------------------------------------------------- /scripts/format.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/scripts/format.sh -------------------------------------------------------------------------------- /scripts/partition_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/scripts/partition_graph.py -------------------------------------------------------------------------------- /scripts/rename_to_yirage.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/scripts/rename_to_yirage.sh -------------------------------------------------------------------------------- /scripts/setup_maca.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/scripts/setup_maca.sh -------------------------------------------------------------------------------- /scripts/update_copyright_yirage.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/scripts/update_copyright_yirage.sh -------------------------------------------------------------------------------- /scripts/validate_multi_backend.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/scripts/validate_multi_backend.sh -------------------------------------------------------------------------------- /scripts/verify_maca.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/scripts/verify_maca.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/setup.py -------------------------------------------------------------------------------- /src/backend/ascend_backend.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/backend/ascend_backend.cc -------------------------------------------------------------------------------- /src/backend/backend_init.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/backend/backend_init.cc -------------------------------------------------------------------------------- /src/backend/backend_registry.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/backend/backend_registry.cc -------------------------------------------------------------------------------- /src/backend/backend_utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/backend/backend_utils.cc -------------------------------------------------------------------------------- /src/backend/backends.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/backend/backends.cc -------------------------------------------------------------------------------- /src/backend/cpu_backend.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/backend/cpu_backend.cc -------------------------------------------------------------------------------- /src/backend/cuda_backend.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/backend/cuda_backend.cc -------------------------------------------------------------------------------- /src/backend/cudnn_backend.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/backend/cudnn_backend.cc -------------------------------------------------------------------------------- /src/backend/maca_backend.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/backend/maca_backend.cc -------------------------------------------------------------------------------- /src/backend/mkl_backend.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/backend/mkl_backend.cc -------------------------------------------------------------------------------- /src/backend/mps_backend.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/backend/mps_backend.cc -------------------------------------------------------------------------------- /src/backend/nki_backend.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/backend/nki_backend.cc -------------------------------------------------------------------------------- /src/backend/triton_backend.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/backend/triton_backend.cc -------------------------------------------------------------------------------- /src/base/data_type.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/base/data_type.cc -------------------------------------------------------------------------------- /src/base/layout.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/base/layout.cc -------------------------------------------------------------------------------- /src/kernel/all_reduce.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/all_reduce.cc -------------------------------------------------------------------------------- /src/kernel/ascend/all_reduce_kernel.ascend: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/ascend/all_reduce_kernel.ascend -------------------------------------------------------------------------------- /src/kernel/ascend/ascend_fingerprint_kernels.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/ascend/ascend_fingerprint_kernels.cc -------------------------------------------------------------------------------- /src/kernel/ascend/ascend_kernel_generator.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/ascend/ascend_kernel_generator.cc -------------------------------------------------------------------------------- /src/kernel/ascend/ascend_optimizer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/ascend/ascend_optimizer.cc -------------------------------------------------------------------------------- /src/kernel/ascend/customized_kernel.ascend: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/ascend/customized_kernel.ascend -------------------------------------------------------------------------------- /src/kernel/ascend/device_memory_manager.ascend: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/ascend/device_memory_manager.ascend -------------------------------------------------------------------------------- /src/kernel/ascend/device_tensor_kernel.ascend: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/ascend/device_tensor_kernel.ascend -------------------------------------------------------------------------------- /src/kernel/ascend/element_binary_kernel.ascend: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/ascend/element_binary_kernel.ascend -------------------------------------------------------------------------------- /src/kernel/ascend/element_unary_kernel.ascend: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/ascend/element_unary_kernel.ascend -------------------------------------------------------------------------------- /src/kernel/ascend/input_kernel.ascend: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/ascend/input_kernel.ascend -------------------------------------------------------------------------------- /src/kernel/ascend/matmul_kernel.ascend: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/ascend/matmul_kernel.ascend -------------------------------------------------------------------------------- /src/kernel/ascend/output_kernel.ascend: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/ascend/output_kernel.ascend -------------------------------------------------------------------------------- /src/kernel/ascend/reduction_kernel.ascend: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/ascend/reduction_kernel.ascend -------------------------------------------------------------------------------- /src/kernel/ascend/rms_norm_kernel.ascend: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/ascend/rms_norm_kernel.ascend -------------------------------------------------------------------------------- /src/kernel/chunk.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/chunk.cc -------------------------------------------------------------------------------- /src/kernel/common/kernel_factory.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/common/kernel_factory.cc -------------------------------------------------------------------------------- /src/kernel/cpu/cpu_optimizer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/cpu/cpu_optimizer.cc -------------------------------------------------------------------------------- /src/kernel/cuda/all_reduce_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/cuda/all_reduce_kernel.cu -------------------------------------------------------------------------------- /src/kernel/cuda/cuda_optimizer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/cuda/cuda_optimizer.cc -------------------------------------------------------------------------------- /src/kernel/cuda/customized_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/cuda/customized_kernel.cu -------------------------------------------------------------------------------- /src/kernel/cuda/device_tensor_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/cuda/device_tensor_kernel.cu -------------------------------------------------------------------------------- /src/kernel/cuda/element_binary_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/cuda/element_binary_kernel.cu -------------------------------------------------------------------------------- /src/kernel/cuda/element_unary_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/cuda/element_unary_kernel.cu -------------------------------------------------------------------------------- /src/kernel/cuda/input_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/cuda/input_kernel.cu -------------------------------------------------------------------------------- /src/kernel/cuda/matmul_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/cuda/matmul_kernel.cu -------------------------------------------------------------------------------- /src/kernel/cuda/output_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/cuda/output_kernel.cu -------------------------------------------------------------------------------- /src/kernel/cuda/reduction_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/cuda/reduction_kernel.cu -------------------------------------------------------------------------------- /src/kernel/cuda/rms_norm_kernel.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/cuda/rms_norm_kernel.cu -------------------------------------------------------------------------------- /src/kernel/cudnn/cudnn_optimizer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/cudnn/cudnn_optimizer.cc -------------------------------------------------------------------------------- /src/kernel/customized.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/customized.cc -------------------------------------------------------------------------------- /src/kernel/device_memory_manager.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/device_memory_manager.cc -------------------------------------------------------------------------------- /src/kernel/device_memory_manager.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/device_memory_manager.cu -------------------------------------------------------------------------------- /src/kernel/device_tensor.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/device_tensor.cc -------------------------------------------------------------------------------- /src/kernel/element_binary.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/element_binary.cc -------------------------------------------------------------------------------- /src/kernel/element_unary.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/element_unary.cc -------------------------------------------------------------------------------- /src/kernel/graph.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/graph.cc -------------------------------------------------------------------------------- /src/kernel/input.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/input.cc -------------------------------------------------------------------------------- /src/kernel/maca/all_reduce_kernel.maca: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/maca/all_reduce_kernel.maca -------------------------------------------------------------------------------- /src/kernel/maca/customized_kernel.maca: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/maca/customized_kernel.maca -------------------------------------------------------------------------------- /src/kernel/maca/device_memory_manager.maca: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/maca/device_memory_manager.maca -------------------------------------------------------------------------------- /src/kernel/maca/device_tensor_kernel.maca: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/maca/device_tensor_kernel.maca -------------------------------------------------------------------------------- /src/kernel/maca/element_binary_kernel.maca: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/maca/element_binary_kernel.maca -------------------------------------------------------------------------------- /src/kernel/maca/element_unary_kernel.maca: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/maca/element_unary_kernel.maca -------------------------------------------------------------------------------- /src/kernel/maca/input_kernel.maca: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/maca/input_kernel.maca -------------------------------------------------------------------------------- /src/kernel/maca/maca_optimizer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/maca/maca_optimizer.cc -------------------------------------------------------------------------------- /src/kernel/maca/matmul_kernel.maca: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/maca/matmul_kernel.maca -------------------------------------------------------------------------------- /src/kernel/maca/output_kernel.maca: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/maca/output_kernel.maca -------------------------------------------------------------------------------- /src/kernel/maca/reduction_kernel.maca: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/maca/reduction_kernel.maca -------------------------------------------------------------------------------- /src/kernel/maca/rms_norm_kernel.maca: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/maca/rms_norm_kernel.maca -------------------------------------------------------------------------------- /src/kernel/matmul.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/matmul.cc -------------------------------------------------------------------------------- /src/kernel/mkl/mkl_optimizer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/mkl/mkl_optimizer.cc -------------------------------------------------------------------------------- /src/kernel/mps/mps_optimizer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/mps/mps_optimizer.cc -------------------------------------------------------------------------------- /src/kernel/nki/nki_optimizer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/nki/nki_optimizer.cc -------------------------------------------------------------------------------- /src/kernel/operator.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/operator.cc -------------------------------------------------------------------------------- /src/kernel/output.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/output.cc -------------------------------------------------------------------------------- /src/kernel/reduction.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/reduction.cc -------------------------------------------------------------------------------- /src/kernel/rms_norm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/rms_norm.cc -------------------------------------------------------------------------------- /src/kernel/runtime.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/runtime.cc -------------------------------------------------------------------------------- /src/kernel/task_register.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/task_register.cc -------------------------------------------------------------------------------- /src/kernel/triton/triton_optimizer.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/triton/triton_optimizer.cc -------------------------------------------------------------------------------- /src/kernel/triton_code_gen.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/kernel/triton_code_gen.cc -------------------------------------------------------------------------------- /src/nki_transpiler/helper_function.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/nki_transpiler/helper_function.cc -------------------------------------------------------------------------------- /src/nki_transpiler/transpile.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/nki_transpiler/transpile.cc -------------------------------------------------------------------------------- /src/nki_transpiler/transpile_tb.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/nki_transpiler/transpile_tb.cc -------------------------------------------------------------------------------- /src/nki_transpiler/utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/nki_transpiler/utils.cc -------------------------------------------------------------------------------- /src/search/abstract_expr/abstract_expr.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/abstract_expr/abstract_expr.cc -------------------------------------------------------------------------------- /src/search/abstract_expr/abstract_expr_eval.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/abstract_expr/abstract_expr_eval.cc -------------------------------------------------------------------------------- /src/search/abstract_expr/abstract_expr_for_ops.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/abstract_expr/abstract_expr_for_ops.cc -------------------------------------------------------------------------------- /src/search/abstract_expr/abstract_subexpr/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/abstract_expr/abstract_subexpr/Cargo.toml -------------------------------------------------------------------------------- /src/search/abstract_expr/abstract_subexpr/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/abstract_expr/abstract_subexpr/src/lib.rs -------------------------------------------------------------------------------- /src/search/backend_strategies/ascend_strategy.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/backend_strategies/ascend_strategy.cc -------------------------------------------------------------------------------- /src/search/backend_strategies/cpu_strategy.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/backend_strategies/cpu_strategy.cc -------------------------------------------------------------------------------- /src/search/backend_strategies/cuda_strategy.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/backend_strategies/cuda_strategy.cc -------------------------------------------------------------------------------- /src/search/backend_strategies/maca_strategy.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/backend_strategies/maca_strategy.cc -------------------------------------------------------------------------------- /src/search/backend_strategies/mps_strategy.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/backend_strategies/mps_strategy.cc -------------------------------------------------------------------------------- /src/search/backend_strategies/nki_strategy.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/backend_strategies/nki_strategy.cc -------------------------------------------------------------------------------- /src/search/backend_strategies/triton_strategy.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/backend_strategies/triton_strategy.cc -------------------------------------------------------------------------------- /src/search/common/search_strategy_factory.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/common/search_strategy_factory.cc -------------------------------------------------------------------------------- /src/search/config.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/config.cc -------------------------------------------------------------------------------- /src/search/dim_strategy.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/dim_strategy.cc -------------------------------------------------------------------------------- /src/search/mps_profiler.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/mps_profiler.cc -------------------------------------------------------------------------------- /src/search/op_utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/op_utils.cc -------------------------------------------------------------------------------- /src/search/order.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/order.cc -------------------------------------------------------------------------------- /src/search/range_propagation/irange.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/range_propagation/irange.cc -------------------------------------------------------------------------------- /src/search/range_propagation/range.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/range_propagation/range.cc -------------------------------------------------------------------------------- /src/search/range_propagation/tbrange.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/range_propagation/tbrange.cc -------------------------------------------------------------------------------- /src/search/search.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/search.cc -------------------------------------------------------------------------------- /src/search/search_c.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/search_c.cc -------------------------------------------------------------------------------- /src/search/search_context.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/search_context.cc -------------------------------------------------------------------------------- /src/search/symbolic_graph/dim_var_assignments.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/symbolic_graph/dim_var_assignments.cc -------------------------------------------------------------------------------- /src/search/symbolic_graph/op_args.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/symbolic_graph/op_args.cc -------------------------------------------------------------------------------- /src/search/symbolic_graph/symbolic_graph.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/symbolic_graph/symbolic_graph.cc -------------------------------------------------------------------------------- /src/search/symbolic_graph/symbolic_map.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/symbolic_graph/symbolic_map.cc -------------------------------------------------------------------------------- /src/search/symbolic_graph/symbolic_op.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/symbolic_graph/symbolic_op.cc -------------------------------------------------------------------------------- /src/search/symbolic_graph/symbolic_tensor.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/symbolic_graph/symbolic_tensor.cc -------------------------------------------------------------------------------- /src/search/symbolic_graph/symbolic_tensor_dim.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/symbolic_graph/symbolic_tensor_dim.cc -------------------------------------------------------------------------------- /src/search/symbolic_graph/tensor_dim_constraint.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/symbolic_graph/tensor_dim_constraint.cc -------------------------------------------------------------------------------- /src/search/symbolic_graph/tensor_dim_constraints.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/symbolic_graph/tensor_dim_constraints.cc -------------------------------------------------------------------------------- /src/search/symbolic_graph/tensor_dim_expr.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/symbolic_graph/tensor_dim_expr.cc -------------------------------------------------------------------------------- /src/search/verification/formal_verifier.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/verification/formal_verifier.cc -------------------------------------------------------------------------------- /src/search/verification/formal_verifier_equiv/Cargo.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/verification/formal_verifier_equiv/Cargo.toml -------------------------------------------------------------------------------- /src/search/verification/formal_verifier_equiv/src/lib.rs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/verification/formal_verifier_equiv/src/lib.rs -------------------------------------------------------------------------------- /src/search/verification/output_match.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/verification/output_match.cc -------------------------------------------------------------------------------- /src/search/verification/probabilistic_verifier.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/search/verification/probabilistic_verifier.cc -------------------------------------------------------------------------------- /src/threadblock/concat.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/threadblock/concat.cc -------------------------------------------------------------------------------- /src/threadblock/cuda/element_unary.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/threadblock/cuda/element_unary.cu -------------------------------------------------------------------------------- /src/threadblock/cuda/input_executor.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/threadblock/cuda/input_executor.cu -------------------------------------------------------------------------------- /src/threadblock/cuda/matmul.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/threadblock/cuda/matmul.cu -------------------------------------------------------------------------------- /src/threadblock/element_binary.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/threadblock/element_binary.cc -------------------------------------------------------------------------------- /src/threadblock/element_unary.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/threadblock/element_unary.cc -------------------------------------------------------------------------------- /src/threadblock/forloop_accum.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/threadblock/forloop_accum.cc -------------------------------------------------------------------------------- /src/threadblock/graph.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/threadblock/graph.cc -------------------------------------------------------------------------------- /src/threadblock/input_loader.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/threadblock/input_loader.cc -------------------------------------------------------------------------------- /src/threadblock/matmul.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/threadblock/matmul.cc -------------------------------------------------------------------------------- /src/threadblock/operator.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/threadblock/operator.cc -------------------------------------------------------------------------------- /src/threadblock/output.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/threadblock/output.cc -------------------------------------------------------------------------------- /src/threadblock/reduction.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/threadblock/reduction.cc -------------------------------------------------------------------------------- /src/threadblock/rms_norm.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/threadblock/rms_norm.cc -------------------------------------------------------------------------------- /src/threadblock/smem_tensor.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/threadblock/smem_tensor.cc -------------------------------------------------------------------------------- /src/transpiler/ascend_transpiler_stub.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/transpiler/ascend_transpiler_stub.cc -------------------------------------------------------------------------------- /src/transpiler/plan_dtensor_memory.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/transpiler/plan_dtensor_memory.cc -------------------------------------------------------------------------------- /src/transpiler/plan_stensor_memory.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/transpiler/plan_stensor_memory.cc -------------------------------------------------------------------------------- /src/transpiler/plan_tb_swizzle.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/transpiler/plan_tb_swizzle.cc -------------------------------------------------------------------------------- /src/transpiler/plan_tb_swizzle_blackwell.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/transpiler/plan_tb_swizzle_blackwell.cc -------------------------------------------------------------------------------- /src/transpiler/plan_tb_swizzle_hopper.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/transpiler/plan_tb_swizzle_hopper.cc -------------------------------------------------------------------------------- /src/transpiler/resolve_dtensor_meta.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/transpiler/resolve_dtensor_meta.cc -------------------------------------------------------------------------------- /src/transpiler/resolve_tb_fusion.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/transpiler/resolve_tb_fusion.cc -------------------------------------------------------------------------------- /src/transpiler/resolve_tensor_layout.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/transpiler/resolve_tensor_layout.cc -------------------------------------------------------------------------------- /src/transpiler/sched_tb_graph.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/transpiler/sched_tb_graph.cc -------------------------------------------------------------------------------- /src/transpiler/transpile.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/transpiler/transpile.cc -------------------------------------------------------------------------------- /src/transpiler/transpiler_kn.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/transpiler/transpiler_kn.cc -------------------------------------------------------------------------------- /src/transpiler/transpiler_stub.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/transpiler/transpiler_stub.cc -------------------------------------------------------------------------------- /src/transpiler/transpiler_tb.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/transpiler/transpiler_tb.cc -------------------------------------------------------------------------------- /src/transpiler/transpiler_tb_blackwell.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/transpiler/transpiler_tb_blackwell.cc -------------------------------------------------------------------------------- /src/transpiler/transpiler_tb_hopper.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/transpiler/transpiler_tb_hopper.cc -------------------------------------------------------------------------------- /src/triton_transpiler/transpile.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/triton_transpiler/transpile.cc -------------------------------------------------------------------------------- /src/triton_transpiler/transpile_tb.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/triton_transpiler/transpile_tb.cc -------------------------------------------------------------------------------- /src/utils/containers.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/utils/containers.cc -------------------------------------------------------------------------------- /src/utils/cuda_helper.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/utils/cuda_helper.cu -------------------------------------------------------------------------------- /src/utils/json_utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/utils/json_utils.cc -------------------------------------------------------------------------------- /src/utils/z3_utils.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/src/utils/z3_utils.cc -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(transpiler) 2 | -------------------------------------------------------------------------------- /tests/ascend/test_triton_integration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/ascend/test_triton_integration.py -------------------------------------------------------------------------------- /tests/backend/test_backend_registry.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/backend/test_backend_registry.cc -------------------------------------------------------------------------------- /tests/ci-tests/qwen2.5/demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/ci-tests/qwen2.5/demo.py -------------------------------------------------------------------------------- /tests/ci-tests/qwen2.5/mirage_cached_mugraphs_64644b2e7bbffb94.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/ci-tests/qwen2.5/mirage_cached_mugraphs_64644b2e7bbffb94.json -------------------------------------------------------------------------------- /tests/ci-tests/qwen2.5/mirage_cached_mugraphs_647dd41aeaf656b4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/ci-tests/qwen2.5/mirage_cached_mugraphs_647dd41aeaf656b4.json -------------------------------------------------------------------------------- /tests/ci-tests/qwen2.5/mirage_cached_mugraphs_6508b4ab6af4f866.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/ci-tests/qwen2.5/mirage_cached_mugraphs_6508b4ab6af4f866.json -------------------------------------------------------------------------------- /tests/ci-tests/qwen2.5/mirage_cached_mugraphs_659704256b064506.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/ci-tests/qwen2.5/mirage_cached_mugraphs_659704256b064506.json -------------------------------------------------------------------------------- /tests/ci-tests/qwen2.5/mirage_cached_mugraphs_659774a49fe1cbf2.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/ci-tests/qwen2.5/mirage_cached_mugraphs_659774a49fe1cbf2.json -------------------------------------------------------------------------------- /tests/ci-tests/qwen2.5/mirage_cached_mugraphs_659774eb06d5ca86.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/ci-tests/qwen2.5/mirage_cached_mugraphs_659774eb06d5ca86.json -------------------------------------------------------------------------------- /tests/ci-tests/qwen2.5/mirage_search_checkpoint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/ci-tests/qwen2.5/mirage_search_checkpoint.json -------------------------------------------------------------------------------- /tests/ci-tests/qwen2.5/models/configuration_qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/ci-tests/qwen2.5/models/configuration_qwen2.py -------------------------------------------------------------------------------- /tests/ci-tests/qwen2.5/models/modeling_qwen2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/ci-tests/qwen2.5/models/modeling_qwen2.py -------------------------------------------------------------------------------- /tests/ci-tests/run_python_tests.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/ci-tests/run_python_tests.sh -------------------------------------------------------------------------------- /tests/python/test_tensor_program.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/python/test_tensor_program.py -------------------------------------------------------------------------------- /tests/runtime_python/blackwell/sm100_linear/runtime_kernel_wrapper_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/blackwell/sm100_linear/runtime_kernel_wrapper_sm100.cu -------------------------------------------------------------------------------- /tests/runtime_python/blackwell/sm100_linear/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/blackwell/sm100_linear/setup.py -------------------------------------------------------------------------------- /tests/runtime_python/blackwell/sm100_linear/test_matmul_mpk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/blackwell/sm100_linear/test_matmul_mpk.py -------------------------------------------------------------------------------- /tests/runtime_python/blackwell/sm100_linear/test_matmul_splitk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/blackwell/sm100_linear/test_matmul_splitk.py -------------------------------------------------------------------------------- /tests/runtime_python/blackwell/sm100_moe/runtime_kernel_wrapper_sm100.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/blackwell/sm100_moe/runtime_kernel_wrapper_sm100.cu -------------------------------------------------------------------------------- /tests/runtime_python/blackwell/sm100_moe/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/blackwell/sm100_moe/setup.py -------------------------------------------------------------------------------- /tests/runtime_python/blackwell/sm100_moe/test_gate_topk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/blackwell/sm100_moe/test_gate_topk.py -------------------------------------------------------------------------------- /tests/runtime_python/blackwell/sm100_moe/test_silu_mul.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/blackwell/sm100_moe/test_silu_mul.py -------------------------------------------------------------------------------- /tests/runtime_python/blackwell/sm100_moe/test_w13_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/blackwell/sm100_moe/test_w13_linear.py -------------------------------------------------------------------------------- /tests/runtime_python/blackwell/sm100_moe/test_w2_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/blackwell/sm100_moe/test_w2_linear.py -------------------------------------------------------------------------------- /tests/runtime_python/blackwell/sm100_moe/test_weighted_sum.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/blackwell/sm100_moe/test_weighted_sum.py -------------------------------------------------------------------------------- /tests/runtime_python/cute/hopper/gemm.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/cute/hopper/gemm.cu -------------------------------------------------------------------------------- /tests/runtime_python/cute/hopper/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/cute/hopper/setup.py -------------------------------------------------------------------------------- /tests/runtime_python/cute/hopper/test_matmul_ws_hopper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/cute/hopper/test_matmul_ws_hopper.py -------------------------------------------------------------------------------- /tests/runtime_python/cute/hopper/test_matmul_ws_mpk.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/cute/hopper/test_matmul_ws_mpk.py -------------------------------------------------------------------------------- /tests/runtime_python/hopper-moe/runtime_kernel_wrapper_moe_hopper.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/hopper-moe/runtime_kernel_wrapper_moe_hopper.cu -------------------------------------------------------------------------------- /tests/runtime_python/hopper-moe/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/hopper-moe/setup.py -------------------------------------------------------------------------------- /tests/runtime_python/hopper-moe/test_moe_w13_linear_hopper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/hopper-moe/test_moe_w13_linear_hopper.py -------------------------------------------------------------------------------- /tests/runtime_python/hopper-moe/test_moe_w2_linear_hopper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/hopper-moe/test_moe_w2_linear_hopper.py -------------------------------------------------------------------------------- /tests/runtime_python/hopper/runtime_kernel_wrapper_hopper.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/hopper/runtime_kernel_wrapper_hopper.cu -------------------------------------------------------------------------------- /tests/runtime_python/hopper/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/hopper/setup.py -------------------------------------------------------------------------------- /tests/runtime_python/hopper/test_linear_hopper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/hopper/test_linear_hopper.py -------------------------------------------------------------------------------- /tests/runtime_python/hopper/test_multitoken_paged_attention_hopper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/hopper/test_multitoken_paged_attention_hopper.py -------------------------------------------------------------------------------- /tests/runtime_python/hopper/test_norm_linear_hopper.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/hopper/test_norm_linear_hopper.py -------------------------------------------------------------------------------- /tests/runtime_python/runtime_kernel_wrapper.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/runtime_kernel_wrapper.cu -------------------------------------------------------------------------------- /tests/runtime_python/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/setup.py -------------------------------------------------------------------------------- /tests/runtime_python/test_alignment_norm_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_alignment_norm_linear.py -------------------------------------------------------------------------------- /tests/runtime_python/test_argmax.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_argmax.py -------------------------------------------------------------------------------- /tests/runtime_python/test_attention/runtime_kernel_wrapper.cu: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_attention/runtime_kernel_wrapper.cu -------------------------------------------------------------------------------- /tests/runtime_python/test_attention/setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_attention/setup.py -------------------------------------------------------------------------------- /tests/runtime_python/test_attention/test_decoding_flash.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_attention/test_decoding_flash.py -------------------------------------------------------------------------------- /tests/runtime_python/test_attention/test_multitoken_paged_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_attention/test_multitoken_paged_attention.py -------------------------------------------------------------------------------- /tests/runtime_python/test_decoding_flash.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_decoding_flash.py -------------------------------------------------------------------------------- /tests/runtime_python/test_decoding_no_qknorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_decoding_no_qknorm.py -------------------------------------------------------------------------------- /tests/runtime_python/test_decoding_no_sfm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_decoding_no_sfm.py -------------------------------------------------------------------------------- /tests/runtime_python/test_decoding_w_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_decoding_w_norm.py -------------------------------------------------------------------------------- /tests/runtime_python/test_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_embedding.py -------------------------------------------------------------------------------- /tests/runtime_python/test_extend_w_norm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_extend_w_norm.py -------------------------------------------------------------------------------- /tests/runtime_python/test_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_linear.py -------------------------------------------------------------------------------- /tests/runtime_python/test_multitoken_paged_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_multitoken_paged_attention.py -------------------------------------------------------------------------------- /tests/runtime_python/test_norm_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_norm_linear.py -------------------------------------------------------------------------------- /tests/runtime_python/test_paged_attention.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_paged_attention.py -------------------------------------------------------------------------------- /tests/runtime_python/test_prompt_lookup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_prompt_lookup.py -------------------------------------------------------------------------------- /tests/runtime_python/test_rmsnorm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_rmsnorm.py -------------------------------------------------------------------------------- /tests/runtime_python/test_rotary_embedding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_rotary_embedding.py -------------------------------------------------------------------------------- /tests/runtime_python/test_silu_mul_linear.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_silu_mul_linear.py -------------------------------------------------------------------------------- /tests/runtime_python/test_verify.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/runtime_python/test_verify.py -------------------------------------------------------------------------------- /tests/transpiler/CMakeLists.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/transpiler/CMakeLists.txt -------------------------------------------------------------------------------- /tests/transpiler/all_testcases.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/transpiler/all_testcases.h -------------------------------------------------------------------------------- /tests/transpiler/config.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/transpiler/config.h -------------------------------------------------------------------------------- /tests/transpiler/lib.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/transpiler/lib.h -------------------------------------------------------------------------------- /tests/transpiler/test_cuda_transpiler.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/transpiler/test_cuda_transpiler.cc -------------------------------------------------------------------------------- /tests/transpiler/testcases/kernel/elemwise.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/transpiler/testcases/kernel/elemwise.h -------------------------------------------------------------------------------- /tests/transpiler/testcases/kernel/matmul.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/transpiler/testcases/kernel/matmul.h -------------------------------------------------------------------------------- /tests/transpiler/testcases/kernel/reduction.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/transpiler/testcases/kernel/reduction.h -------------------------------------------------------------------------------- /tests/transpiler/testcases/threadblock/elemwise.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/transpiler/testcases/threadblock/elemwise.h -------------------------------------------------------------------------------- /tests/transpiler/testcases/threadblock/elemwise_bcast.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/transpiler/testcases/threadblock/elemwise_bcast.h -------------------------------------------------------------------------------- /tests/transpiler/testcases/threadblock/io.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/transpiler/testcases/threadblock/io.h -------------------------------------------------------------------------------- /tests/transpiler/testcases/threadblock/matmul.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/transpiler/testcases/threadblock/matmul.h -------------------------------------------------------------------------------- /tests/transpiler/testcases/threadblock/reduction.h: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenxingqiang/YiRage/HEAD/tests/transpiler/testcases/threadblock/reduction.h --------------------------------------------------------------------------------