├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── documentation_request.md │ ├── feature_request.md │ └── submit_question.md └── workflows │ ├── labeler.yml │ └── stale.yml ├── .gitignore ├── CHANGELOG.md ├── CMakeLists.txt ├── LICENSE ├── MUSA.cmake ├── README.md ├── README_CN.md ├── cmake ├── CTestTestfile.configure.cmake ├── googletest.cmake └── version_extended.h.in ├── examples ├── 00_basic_gemm │ ├── CMakeLists.txt │ └── basic_gemm.mu ├── 01_quyuan_gemm_with_collective_builder │ ├── CMakeLists.txt │ └── collective_builder.mu ├── 02_mp31_fp8_gemm_with_collective_builder │ ├── CMakeLists.txt │ └── mp31_fp8_gemm.mu ├── 03_mp31_fp8_scaling_gemm │ ├── CMakeLists.txt │ ├── README.md │ ├── fp8_scaling_gemm.mu │ └── reference │ │ └── host │ │ └── gemm_with_groupwise_scaling.hpp ├── CMakeLists.txt └── common │ ├── gather_tensor.hpp │ └── helper.h ├── experimental ├── CMakeLists.txt ├── README.md └── mp31_flash_attention_fwd │ ├── CMakeLists.txt │ ├── flash_attention.mu │ ├── flash_attn_fwd.hpp │ ├── fwd_params.hpp │ ├── kernel_traits.hpp │ ├── online_softmax.hpp │ └── reference_attention.hpp ├── include ├── mute │ ├── address_space.hpp │ ├── algorithm │ │ ├── axpby.hpp │ │ ├── clear.hpp │ │ ├── cooperative_copy.hpp │ │ ├── cooperative_gemm.hpp │ │ ├── copy.hpp │ │ ├── fill.hpp │ │ ├── functional.hpp │ │ ├── gemm.hpp │ │ ├── prefer.hpp │ │ ├── prefetch.hpp │ │ ├── tensor_algorithms.hpp │ │ └── tuple_algorithms.hpp │ ├── arch │ │ ├── copy.hpp │ │ ├── copy_mp31.hpp │ │ ├── copy_mp31_desc.hpp │ │ ├── copy_mp31_tme.hpp │ │ ├── mma.hpp │ │ ├── mma_mp22.hpp │ │ ├── mma_mp31.hpp │ │ ├── mma_mp31_desc.hpp │ │ ├── mma_mp31_sqmma.hpp │ │ ├── tce_desc.hpp │ │ └── util.hpp │ ├── atom │ │ ├── copy_atom.hpp │ │ ├── copy_traits.hpp │ │ ├── copy_traits_mp31.hpp │ │ ├── copy_traits_mp31_tme.hpp │ │ ├── copy_traits_mp31_tme_swizzle.hpp │ │ ├── mma_atom.hpp │ │ ├── mma_traits.hpp │ │ ├── mma_traits_mp22.hpp │ │ ├── mma_traits_mp31.hpp │ │ └── mma_traits_mp31_sqmma.hpp │ ├── config.hpp │ ├── container │ │ ├── alignment.hpp │ │ ├── array.hpp │ │ ├── array_aligned.hpp │ │ ├── array_subbyte.hpp │ │ ├── bit_field.hpp │ │ ├── musa_types.hpp │ │ ├── tuple.hpp │ │ └── type_list.hpp │ ├── int_tuple.hpp │ ├── layout.hpp │ ├── layout_composed.hpp │ ├── numeric │ │ ├── arithmetic_tuple.hpp │ │ ├── bfloat.hpp │ │ ├── complex.hpp │ │ ├── float8.hpp │ │ ├── half.hpp │ │ ├── int.hpp │ │ ├── integer_sequence.hpp │ │ ├── integer_subbyte.hpp │ │ ├── integral_constant.hpp │ │ ├── integral_ratio.hpp │ │ ├── math.hpp │ │ ├── numeric_types.hpp │ │ ├── real.hpp │ │ ├── tfloat.hpp │ │ └── uint128.hpp │ ├── pointer.hpp │ ├── pointer_base.hpp │ ├── pointer_flagged.hpp │ ├── pointer_sparse.hpp │ ├── pointer_swizzle.hpp │ ├── stride.hpp │ ├── swizzle.hpp │ ├── swizzle_layout.hpp │ ├── tensor.hpp │ ├── tensor_predicate.hpp │ ├── tensor_zip.hpp │ ├── tile.hpp │ ├── underscore.hpp │ └── util │ │ ├── debug.hpp │ │ ├── print.hpp │ │ └── type_traits.hpp └── mutlass │ ├── aligned_buffer.h │ ├── arch │ ├── arch.h │ ├── barrier.hpp │ ├── cache_operation.h │ ├── memory.h │ ├── mma.h │ └── simd.h │ ├── array.h │ ├── array_planar_complex.h │ ├── array_subbyte.h │ ├── bfloat16.h │ ├── blas3.h │ ├── blas3_types.h │ ├── complex.h │ ├── constants.h │ ├── conv │ ├── conv2d_problem_size.h │ ├── conv3d_problem_size.h │ └── convolution.h │ ├── coord.h │ ├── core_io.h │ ├── detail │ ├── collective.hpp │ ├── dependent_false.hpp │ ├── helper_macros.hpp │ ├── layout.hpp │ ├── mainloop_fusion_helper_scale_factor.hpp │ └── mma.hpp │ ├── device_kernel.h │ ├── epilogue │ ├── collective │ │ ├── builders │ │ │ ├── mp22_builder.inl │ │ │ ├── mp31_builder.inl │ │ │ └── mp31_builder_common.inl │ │ ├── collective_builder.hpp │ │ ├── collective_epilogue.hpp │ │ ├── default_epilogue.hpp │ │ ├── detail.hpp │ │ ├── epilogue_tensor_broadcast.hpp │ │ ├── mp22_epilogue_evt.hpp │ │ ├── mp22_epilogue_vectorized.hpp │ │ └── mp31_epilogue_tme.hpp │ ├── dispatch_policy.hpp │ ├── fusion │ │ ├── callbacks.hpp │ │ └── operations.hpp │ └── thread │ │ ├── activation.h │ │ ├── linear_combination.h │ │ ├── linear_combination_params.h │ │ └── scale_type.h │ ├── fast_math.h │ ├── float8.h │ ├── floating_point_mtrtc.h │ ├── functional.h │ ├── gemm │ ├── collective │ │ ├── builders │ │ │ ├── common.inl │ │ │ ├── mp22_gemm_builder.inl │ │ │ ├── mp31_sqmma_builder.inl │ │ │ └── mp31_sqmma_common.inl │ │ ├── collective_builder.hpp │ │ ├── collective_mma.hpp │ │ ├── mp22_mma_twostage.hpp │ │ ├── mp31_mma_tme_sqmma_ss.hpp │ │ ├── mp31_mma_tme_sqmma_ss_scaling.hpp │ │ └── scaling_accumulation.hpp │ ├── device │ │ └── gemm_universal_adapter.h │ ├── dispatch_policy.hpp │ ├── gemm.h │ ├── gemm_enumerated_types.h │ ├── kernel │ │ ├── gemm_universal.hpp │ │ ├── mp22_gemm.hpp │ │ ├── mp31_gemm_tme.hpp │ │ ├── static_tile_scheduler.hpp │ │ ├── tile_scheduler.hpp │ │ └── tile_scheduler_params.hpp │ └── threadblock │ │ └── threadblock_swizzle.h │ ├── gemm_coord.h │ ├── gemm_coord.hpp │ ├── half.h │ ├── integer_subbyte.h │ ├── kernel_hardware_info.h │ ├── kernel_hardware_info.hpp │ ├── kernel_launch.h │ ├── layout │ ├── layout.h │ ├── matrix.h │ ├── permute.h │ ├── pitch_linear.h │ ├── tensor.h │ └── vector.h │ ├── matrix.h │ ├── matrix_coord.h │ ├── matrix_shape.h │ ├── musa_host_adapter.hpp │ ├── mutlass.h │ ├── numeric_conversion.h │ ├── numeric_size.h │ ├── numeric_types.h │ ├── pipeline │ ├── mp31_pipeline.hpp │ └── pipeline.hpp │ ├── pitch_linear_coord.h │ ├── platform │ └── platform.h │ ├── predicate_vector.h │ ├── quaternion.h │ ├── real.h │ ├── relatively_equal.h │ ├── subbyte_reference.h │ ├── tensor_coord.h │ ├── tensor_ref.h │ ├── tensor_ref_planar_complex.h │ ├── tensor_view.h │ ├── tensor_view_planar_complex.h │ ├── tfloat32.h │ ├── trace.h │ ├── transform │ └── thread │ │ ├── transpose.h │ │ └── unary_op.h │ ├── uint128.h │ ├── version.h │ └── workspace.h ├── media ├── docs │ └── quickstart.md └── images │ └── mutlass-0.2.0-gemm-performance.png ├── python ├── mutlass_library │ ├── __init__.py │ ├── gemm_operation.py │ ├── generator.py │ ├── library.py │ └── manifest.py └── setup_library.py ├── test ├── CMakeLists.txt └── unit │ ├── CMakeLists.txt │ ├── common │ ├── filter_architecture.cpp │ ├── mma_inst_rr_test.hpp │ └── mutlass_unit_test.h │ ├── gemm │ ├── CMakeLists.txt │ └── device │ │ ├── CMakeLists.txt │ │ ├── default_gemm_configuration.hpp │ │ ├── gemm_testbed_3x.hpp │ │ ├── mp22_gemm_f32_f32_f32_simt.mu │ │ ├── mp22_gemm_tensorop.mu │ │ ├── mp31_gemm_sqmma_ss.mu │ │ └── testbed_utils.h │ ├── mute │ ├── CMakeLists.txt │ ├── core │ │ ├── CMakeLists.txt │ │ ├── array_subbyte.cpp │ │ ├── bitfield.cpp │ │ ├── coalesce.cpp │ │ ├── compact_xmajor.cpp │ │ ├── compare.cpp │ │ ├── complement.cpp │ │ ├── composition.cpp │ │ ├── constants.cpp │ │ ├── core_unit.cpp │ │ ├── int_tuple.cpp │ │ ├── inverse_left.cpp │ │ ├── inverse_right.cpp │ │ ├── logical_divide.cpp │ │ ├── logical_product.cpp │ │ ├── math.cpp │ │ ├── mixedbits.cpp │ │ ├── nullspace.cpp │ │ ├── pointer.cpp │ │ ├── reverse.cpp │ │ ├── transform.cpp │ │ └── tuple.cpp │ ├── mma │ │ ├── CMakeLists.txt │ │ ├── mp22_mma.mu │ │ └── mp31_mma.mu │ ├── mp31 │ │ ├── CMakeLists.txt │ │ ├── mp31_robust_buffer_access.mu │ │ └── mp31_sqmma.mu │ └── quyuan │ │ ├── CMakeLists.txt │ │ ├── mp22_mma.mu │ │ └── vectorization_auto.mu │ └── test_unit.cpp └── tools ├── CMakeLists.txt ├── library ├── CMakeLists.txt ├── include │ └── mutlass │ │ └── library │ │ ├── arch_mappings.h │ │ ├── descriptions.h │ │ ├── handle.h │ │ ├── library.h │ │ ├── manifest.h │ │ ├── operation_table.h │ │ ├── singleton.h │ │ ├── types.h │ │ └── util.h └── src │ ├── gemm_operation_3x.hpp │ ├── handle.mu │ ├── library_internal.h │ ├── manifest.cpp │ ├── operation_table.mu │ ├── reference │ ├── gemm_fp32out.mu │ ├── gemm_fp_other.mu │ ├── gemm_int8_canonical.mu │ ├── gemm_reference_operation.h │ ├── gemm_s8_s8_s32.mu │ ├── gemm_u8_u8_s32.mu │ └── initialize_reference_operations.mu │ ├── singleton.mu │ └── util.mu ├── profiler ├── CMakeLists.txt ├── include │ └── mutlass │ │ └── profiler │ │ ├── debug.h │ │ ├── device_allocation.h │ │ ├── device_context.h │ │ ├── enumerated_types.h │ │ ├── gemm_operation_profiler.h │ │ ├── gpu_timer.h │ │ ├── mutlass_profiler.h │ │ ├── operation_profiler.h │ │ ├── options.h │ │ ├── performance_report.h │ │ ├── performance_result.h │ │ └── problem_space.h └── src │ ├── device_allocation.mu │ ├── device_context.mu │ ├── enumerated_types.cpp │ ├── gemm_operation_profiler.mu │ ├── gpu_timer.cpp │ ├── main.cpp │ ├── mutlass_profiler.mu │ ├── operation_profiler.mu │ ├── options.mu │ ├── performance_report.cpp │ ├── performance_result.mu │ └── problem_space.cpp └── util ├── CMakeLists.txt └── include └── mutlass └── util ├── GPU_Clock.hpp ├── command_line.h ├── debug.h ├── device_dump.h ├── device_groupnorm.h ├── device_layernorm.h ├── device_memory.h ├── device_nchw_to_nhwc.h ├── device_nhwc_padding.h ├── device_nhwc_pooling.h ├── device_nhwc_to_nchw.h ├── device_rmsnorm.h ├── device_utils.h ├── distribution.h ├── exceptions.h ├── gett_commandline.hpp ├── helper_cuda.hpp ├── host_reorder.h ├── host_tensor.h ├── host_tensor_planar_complex.h ├── host_uncompress.h ├── index_sequence.h ├── packed_stride.hpp ├── print_error.hpp ├── reference ├── detail │ ├── inner_product.h │ └── linear_to_coordinate.h ├── device │ ├── convolution.h │ ├── gemm.h │ ├── gemm_complex.h │ ├── gemm_planar_complex.h │ ├── gett.hpp │ ├── kernel │ │ ├── gemm.h │ │ ├── tensor_elementwise.h │ │ └── tensor_foreach.h │ ├── rank_2k_complex.h │ ├── tensor_compare.h │ ├── tensor_fill.h │ ├── tensor_foreach.h │ ├── tensor_reduce.h │ ├── tensor_relu.h │ └── thread │ │ └── gemm.h └── host │ ├── convolution.h │ ├── error_metrics.h │ ├── gemm.h │ ├── gemm_complex.h │ ├── gemm_planar_complex.h │ ├── gett.hpp │ ├── rank_2k.h │ ├── rank_2k_complex.h │ ├── rank_k_complex.h │ ├── symm.h │ ├── symm_complex.h │ ├── tensor_compare.h │ ├── tensor_compare.hpp │ ├── tensor_copy.h │ ├── tensor_elementwise.h │ ├── tensor_fill.h │ ├── tensor_fill.hpp │ ├── tensor_foreach.h │ ├── tensor_norm.h │ ├── tensor_reduce.h │ ├── tensor_reduce.hpp │ ├── trmm.h │ └── trmm_complex.h ├── tensor_view_io.h └── type_traits.h /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a bug report to help us improve MUTLASS 4 | title: "[BUG]" 5 | labels: "? - Needs Triage, bug" 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **Steps/Code to reproduce bug** 14 | Follow this guide http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports to craft a minimal bug report. This helps us reproduce the issue you're having and resolve the issue more quickly. 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **Environment details (please complete the following information):** 20 | - Environment location: [Bare-metal, Docker, Cloud(specify cloud provider)] 21 | 22 | **Additional context** 23 | Add any other context about the problem here. 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/documentation_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Documentation request 3 | about: Report incorrect or needed documentation to improve MUTLASS 4 | title: "[DOC]" 5 | labels: "? - Needs Triage, documentation" 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Report incorrect documentation 11 | 12 | **Location of incorrect documentation** 13 | Provide links and line numbers if applicable. 14 | 15 | **Describe the problems or issues found in the documentation** 16 | A clear and concise description of what you found to be incorrect. 17 | 18 | **Steps taken to verify documentation is incorrect** 19 | List any steps you have taken: 20 | 21 | **Suggested fix for documentation** 22 | Detail proposed changes to fix the documentation if you have any. 23 | 24 | --- 25 | 26 | ## Report needed documentation 27 | 28 | **Report needed documentation** 29 | A clear and concise description of what documentation you believe it is needed and why. 30 | 31 | **Describe the documentation you'd like** 32 | A clear and concise description of what you want to happen. 33 | 34 | **Steps taken to search for needed documentation** 35 | List any steps you have taken: 36 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for MUTLASS 4 | title: "[FEA]" 5 | labels: "? - Needs Triage, feature request" 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I wish I could use MUTLASS to do [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context, code examples, or references to existing implementations about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/submit_question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Submit question 3 | about: Ask a general question about MUTLASS 4 | title: "[QST]" 5 | labels: "? - Needs Triage, question" 6 | assignees: '' 7 | 8 | --- 9 | 10 | **What is your question?** 11 | -------------------------------------------------------------------------------- /.github/workflows/labeler.yml: -------------------------------------------------------------------------------- 1 | name: "Pull Request Labeler" 2 | on: 3 | - pull_request_target 4 | 5 | jobs: 6 | triage: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/labeler@main 10 | with: 11 | repo-token: "${{ secrets.GITHUB_TOKEN }}" 12 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Mark inactive issues and pull requests 2 | 3 | on: 4 | schedule: 5 | - cron: "0 * * * *" 6 | 7 | jobs: 8 | mark-inactive-30d: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - name: Mark 30 day inactive issues and pull requests 12 | uses: actions/stale@v3 13 | with: 14 | repo-token: ${{ secrets.GITHUB_TOKEN }} 15 | stale-issue-message: > 16 | This issue has been labeled `inactive-30d` due to no recent activity in the past 30 days. 17 | Please close this issue if no further response or action is needed. 18 | Otherwise, please respond with a comment indicating any updates or changes to the original issue and/or confirm this issue still needs to be addressed. 19 | This issue will be labeled `inactive-90d` if there is no activity in the next 60 days. 20 | stale-issue-label: "inactive-30d" 21 | exempt-issue-labels: "0 - Blocked,0 - Backlog,good first issue" 22 | days-before-issue-stale: 30 23 | days-before-issue-close: -1 24 | stale-pr-message: > 25 | This PR has been labeled `inactive-30d` due to no recent activity in the past 30 days. 26 | Please close this PR if it is no longer required. 27 | Otherwise, please respond with a comment indicating any updates. 28 | This PR will be labeled `inactive-90d` if there is no activity in the next 60 days. 29 | stale-pr-label: "inactive-30d" 30 | exempt-pr-labels: "0 - Blocked,0 - Backlog,good first issue" 31 | days-before-pr-stale: 30 32 | days-before-pr-close: -1 33 | operations-per-run: 50 34 | mark-inactive-90d: 35 | runs-on: ubuntu-latest 36 | steps: 37 | - name: Mark 90 day inactive issues and pull requests 38 | uses: actions/stale@v3 39 | with: 40 | repo-token: ${{ secrets.GITHUB_TOKEN }} 41 | stale-issue-message: > 42 | This issue has been labeled `inactive-90d` due to no recent activity in the past 90 days. 43 | Please close this issue if no further response or action is needed. 44 | Otherwise, please respond with a comment indicating any updates or changes to the original issue and/or confirm this issue still needs to be addressed. 45 | stale-issue-label: "inactive-90d" 46 | exempt-issue-labels: "0 - Blocked,0 - Backlog,good first issue" 47 | days-before-issue-stale: 90 48 | days-before-issue-close: -1 49 | stale-pr-message: > 50 | This PR has been labeled `inactive-90d` due to no recent activity in the past 90 days. 51 | Please close this PR if it is no longer required. 52 | Otherwise, please respond with a comment indicating any updates. 53 | stale-pr-label: "inactive-90d" 54 | exempt-pr-labels: "0 - Blocked,0 - Backlog,good first issue" 55 | days-before-pr-stale: 90 56 | days-before-pr-close: -1 57 | operations-per-run: 50 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # PyCache files 2 | __pycache__/ 3 | mutlass_library.egg-info/ 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # MooreThreads MUTLASS Changelog 2 | 3 | ## [0.2.0](https://github.com/MooreThreads/mutlass/tags/v0.2.0) (2025-02-26) 4 | 5 | - MP31 Features: 6 | - Squad-level MMA(SQMMA) and Warp-level MMA primitives with rich data types (TF32/FP16/BF16/[FP8](./examples/02_mp31_fp8_gemm_with_collective_builder)/S8 etc.). 7 | - Tensor Memory Engine(TME) and [RobustBufferAccess](./test/unit/mute/mp31/mp31_robust_buffer_access.mu) primitives. 8 | - New GEMM mainloop and epilogue targeting MP31 architecture that achieve high performance with TME and SQMMA. 9 | - New tile scheduler to support CTA swizzle for MP31 kernels. 10 | - New experimental directory housing the implementations that are not yet stable and may have significant changes in the future. 11 | - [Prototype of Flash Attention Forward](./experimental/mp31_flash_attention_fwd/) targeting MP31 architecture with TME, RobustBufferAccess and SQMMA. 12 | - New [FP8 GEMM with groupwise scaling](./examples/03_mp31_fp8_scaling_gemm/). 13 | - Upgrade the backend from CUTLASS/CuTe 3.5.0 to CUTLASS/CuTe 3.6.0. 14 | 15 | 16 | ## [0.1.1](https://github.com/MooreThreads/mutlass/tags/v0.1.1) (2024-09-30) 17 | 18 | - [MuTe](./include/mute), a core library and backend adapted from CUTLASS CuTe 19 | - Quyuan Features 20 | - MMA primitives: TensorFloat32, BFloat16, Float16, INT8 21 | - FMA/MMA GEMM Kernels targeting the Quyuan architecture 22 | - Note: this is a beta release. Further updates to MUTLASS will include performance improvements, feature enablement, and possible breaking changes to the API 23 | - MUTLASS Profiler, Library, and Utilities 24 | - Two examples that demonstrate the usage of the [low-level API](./examples/00_basic_gemm) and the [collective builders](./examples/01_quyuan_gemm_with_collective_builder) to build GEMM kernelS 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [中文版](./README_CN.md) 2 | 3 | # MUTLASS 0.2.0 4 | 5 | _MUTLASS 0.2.0 - February 2025_ 6 | 7 | MUTLASS(MUSA Templates for Linear Algebra Subroutines) is a header-only library for implementing high-performance matrix-matrix multiplication (GEMM) within MUSA(**M**eta-computing **U**nified **S**ystem **A**rchitecture). It incorporates strategies for hierarchical decomposition and data movement similar to those used to implement muDNN. 8 | 9 | See the [Quick Start Guide](./media/docs/quickstart.md) to get started quickly. 10 | 11 | Note: MUTLASS uses the CuTe library, introduced in CUTLASS 3.x, as the backend, and thus is incompatible with most implementations of CUTLASS 2.x. 12 | 13 | # What's New in MUTLASS 0.2.0 14 | 15 | MUTLASS 0.2.0 is an update to MUTLASS adding: 16 | 17 | - MP31 Features: 18 | - Squad-level MMA(SQMMA) and Warp-level MMA primitives with rich data types (TF32/FP16/BF16/[FP8](./examples/02_mp31_fp8_gemm_with_collective_builder)/S8 etc.). 19 | - Tensor Memory Engine(TME) and [RobustBufferAccess](./test/unit/mute/mp31/mp31_robust_buffer_access.mu) primitives. 20 | - New GEMM mainloop and epilogue targeting MP31 architecture that achieve high performance with TME and SQMMA. 21 | - New tile scheduler to support CTA swizzle for MP31 kernels. 22 | - New experimental directory housing the implementations that are not yet stable and may have significant changes in the future. 23 | - [Prototype of Flash Attention Forward](./experimental/mp31_flash_attention_fwd/) targeting MP31 architecture with TME, RobustBufferAccess and SQMMA. 24 | - New [FP8 GEMM with groupwise scaling](./examples/03_mp31_fp8_scaling_gemm/). 25 | - Upgrade the backend from CUTLASS/CuTe 3.5.0 to CUTLASS/CuTe 3.6.0. 26 | 27 | 28 | Minimum requirements: 29 | 30 | - Architecture: Quyuan 31 | 32 | - Compiler: MCC 4.0.0 33 | 34 | - MUSA Toolkit version: 4.0.0 35 | 36 | 37 | **See the [CHANGELOG](./CHANGELOG.md) for a detailed listing of releases and updates.** 38 | 39 | # Performance 40 | 41 | 42 |

43 | 44 | The above figure shows the relative performance of the tensorop GEMM compared with muDNN. The performance of TF32 data type be futher optimized in the next release. 45 | 46 | # Documentation 47 | 48 | - [Quick Start Guide](./media/docs/quickstart.md) - build and run MUTLASS 49 | 50 | # Building MUTLASS 51 | 52 | MUTLASS is a header-only template library and does not need to be built to be used by other projects. Client applications should target MUTLASS's `include/` directory in their include paths. 53 | 54 | MUTLASS unit tests, examples, and utilities can be build with CMake. The minimum version of CMake is given in the [QuickStart guide](./media/docs/quickstart.md). 55 | 56 | Create a build directory within the MUTLASS project, then run CMake. By default MUTLASS will build kernels for MUSA architecture versions 2.2 and 3.1. 57 | -------------------------------------------------------------------------------- /README_CN.md: -------------------------------------------------------------------------------- 1 | [English](./README.md) 2 | 3 | # MUTLASS 0.2.0 4 | 5 | *MUTLASS 0.2.0 - 2025年2月* 6 | 7 | MUTLASS(MUSA Templates for Linear Algebra Subroutines)是用于在MUSA(**M**eta-computing **U**nified **S**ystem **A**rchitecture)上实现高性能矩阵乘法运算的纯头文件库,采用了与实现muDNN类似的分层分解和数据搬运策略。 8 | 9 | 参考[快速入门指南](./media/docs/quickstart.md)来快速入门使用。 10 | 11 | 注意:MUTLASS使用了在CUTLASS 3.x引入的CuTe库做为后端,因此与大多数CUTLASS 2.x的实现并不兼容。 12 | 13 | # MUTLASS 0.2.0新增 14 | 15 | MUTLASS 0.2.0是MUTLASS的一次版本更新,添加了: 16 | 17 | - MP31特性: 18 | 19 | - 支持丰富数据类型的Squad-level MMA(SQMMA)和Warp-level MMA原语,包含TF32/FP16/BF16/[FP8](./examples/02_mp31_fp8_gemm_with_collective_builder)/S8等多种精度。 20 | 21 | - Tensor Memory Engine(TME)及[RobustBufferAccess](./test/unit/mute/mp31/mp31_robust_buffer_access.mu)原语。 22 | 23 | - 新适用于MP31架构的矩阵乘法核心循环及后处理实现,基于TME和SQMMA实现高性能的矩阵乘法计算。 24 | 25 | - 新适用于MP31架构算子的Tile调度器,用于实现更好的线程组调度。 26 | 27 | - 新的*experimental*目录,用于存放尚未稳定或可能在未来有重大改变的代码实现。 28 | 29 | - 针对MP31架构的[FlashAttention前向原型](./experimental/mp31_flash_attention_fwd/),运用了TME、RobustBufferAccess和SQMMA等新特性。 30 | 31 | - 新的[Groupwise Scaling FP8矩阵乘法](./examples/03_mp31_fp8_scaling_gemm/)。 32 | 33 | - 将后端库从CUTLASS/CuTe 3.5.0升级到CUTLASS/CuTe 3.6.0。 34 | 35 | 36 | 最低要求: 37 | 38 | - 架构:曲院 39 | 40 | - 编译器:MCC 4.0.0 41 | 42 | - MUSA工具包:4.0.0 43 | 44 | 45 | **参考[变更日志](./CHANGELOG.md)获取更详细的发布及更新信息。** 46 | 47 | # 性能 48 | 49 | # 文档 50 | 51 | - [快速入门指南](./media/docs/quickstart.md) - 编译和运行MUTLASS 52 | 53 | 54 | # 编译MUTLASS 55 | 56 | MUTLASS是一个模板纯头文件库,因此在被其他项目使用时不需要单独编译。用户应用将MUTLASS的`include/`目录指定到项目头文件路径中即可使用。 57 | 58 | MUTLASS的单元测试、实例和工具都使用CMake进行编译构建。编译构建所需要的最低CMake版本在[快速入门指南](./media/docs/quickstart.md)中给出。 59 | 60 | 在MUTLASS中创建一个单独的build目录,并执行CMake即可编译。默认情况下,MUTLASS会编译MUSA架构2.2和3.1的实现。 61 | -------------------------------------------------------------------------------- /cmake/CTestTestfile.configure.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2024 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | # Generated file 31 | 32 | set(TEST_SETS_SUPPORTED @TEST_SETS_SUPPORTED@) 33 | 34 | set(TEST_EXE_PATH @TEST_EXE_PATH@) 35 | set(TEST_EXE_WORKING_DIRECTORY @TEST_EXE_WORKING_DIRECTORY@) 36 | set(MUTLASS_USE_EXTENDED_ADD_TEST_FORMAT @TEST_USE_EXTENDED_FORMAT@) 37 | 38 | if (DEFINED ENV{MUTLASS_TEST_EXECUTION_ENVIRONMENT}) 39 | set(_MUTLASS_TEST_EXECUTION_ENVIRONMENT $ENV{MUTLASS_TEST_EXECUTION_ENVIRONMENT}) 40 | else() 41 | set(_MUTLASS_TEST_EXECUTION_ENVIRONMENT @MUTLASS_TEST_EXECUTION_ENVIRONMENT@) 42 | endif() 43 | 44 | @_INLINE_PER_TEST_CODE@ 45 | -------------------------------------------------------------------------------- /cmake/googletest.cmake: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2024 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | include(FetchContent) 31 | 32 | set(GOOGLETEST_DIR "" CACHE STRING "Location of local GoogleTest repo to build against") 33 | 34 | if(GOOGLETEST_DIR) 35 | set(FETCHCONTENT_SOURCE_DIR_GOOGLETEST ${GOOGLETEST_DIR} CACHE STRING "GoogleTest source directory override") 36 | endif() 37 | 38 | set(GTEST_REPOSITORY "https://github.com/google/googletest.git" CACHE STRING "GoogleTest repo to fetch") 39 | 40 | FetchContent_Declare( 41 | googletest 42 | GIT_REPOSITORY ${GTEST_REPOSITORY} 43 | GIT_TAG v1.13.0 44 | ) 45 | 46 | FetchContent_GetProperties(googletest) 47 | 48 | if(NOT googletest_POPULATED) 49 | FetchContent_Populate(googletest) 50 | if (MSVC) 51 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) 52 | endif() 53 | add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL) 54 | endif() 55 | -------------------------------------------------------------------------------- /cmake/version_extended.h.in: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #define MUTLASS_BUILD @MUTLASS_VERSION_BUILD@ 35 | #define MUTLASS_REVISION "@MUTLASS_REVISION@" 36 | -------------------------------------------------------------------------------- /examples/00_basic_gemm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | mutlass_example_add_executable( 30 | 00_basic_gemm 31 | basic_gemm.mu 32 | ) 33 | -------------------------------------------------------------------------------- /examples/01_quyuan_gemm_with_collective_builder/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | mutlass_example_add_executable( 30 | 01_collective_builder 31 | collective_builder.mu 32 | ) 33 | -------------------------------------------------------------------------------- /examples/02_mp31_fp8_gemm_with_collective_builder/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | mutlass_example_add_executable( 30 | 02_mp31_fp8_gemm 31 | mp31_fp8_gemm.mu 32 | ) 33 | -------------------------------------------------------------------------------- /examples/03_mp31_fp8_scaling_gemm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | mutlass_example_add_executable( 30 | 03_fp8_scling_gemm 31 | fp8_scaling_gemm.mu 32 | ) 33 | -------------------------------------------------------------------------------- /examples/03_mp31_fp8_scaling_gemm/README.md: -------------------------------------------------------------------------------- 1 | # FP8 Groupwise-Scaling GEMM 2 | 3 | ## Double Accumulator algorithm 4 | 5 | In the double accumulator algorithm, we maintain two accumulators. One is used to store the final result, and the other is used for the scaled accumulation of groups. 6 | 7 | Due to the limitation of the total number of registers, compared with the standard FP8 GEMM, the tile shape in this method will be halved. 8 | 9 | 10 | ## Iterative algorithm 11 | 12 | In the iterative algorithm, we continuously maintain and update the group scaling coefficients and apply them to the same accumulator. Therefore, we can use the same tile shape as the standard FP8 GEMM to further improve performance. 13 | 14 | It can be simply described by the following formula. 15 | 16 | $S_0 \cdot A_0B_0 + S_1\cdot A_1B_1+S_2\cdot A_2B_2=((\frac{S_0}{S_1}\cdot A_0B_0 + A_1B_1)\cdot \frac{S_1}{S_2} +A_2B_2)\cdot S_2$ 17 | 18 | We will release the implementation of this algorithm in the future. 19 | 20 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | set(MUTLASS_EXAMPLES_COMMON_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/common) 31 | 32 | add_custom_target(mutlass_examples) 33 | add_custom_target(test_examples) 34 | 35 | function(mutlass_example_add_executable NAME) 36 | 37 | set(options) 38 | set(oneValueArgs DISABLE_TESTS) 39 | set(multiValueArgs DEPENDS DEPENDEES TEST_COMMAND_OPTIONS) 40 | cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) 41 | 42 | if (NOT DEFINED __DISABLE_TESTS) 43 | set(__DISABLE_TESTS OFF) 44 | endif() 45 | 46 | mutlass_add_executable(${NAME} ${__UNPARSED_ARGUMENTS}) 47 | 48 | add_dependencies(mutlass_examples ${NAME}) 49 | 50 | target_link_libraries( 51 | ${NAME} 52 | PRIVATE 53 | MUTLASS 54 | mutlass_tools_util_includes 55 | musart 56 | musa_driver 57 | ) 58 | 59 | target_include_directories( 60 | ${NAME} 61 | PRIVATE 62 | ${MUTLASS_EXAMPLES_COMMON_SOURCE_DIR} 63 | ${MUTLASS_EXAMPLES_UTILS_DIR} 64 | ) 65 | 66 | install( 67 | TARGETS ${NAME} 68 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} 69 | ) 70 | 71 | mutlass_add_executable_tests( 72 | test_examples_${NAME} ${NAME} 73 | DEPENDS ${__DEPENDS} 74 | DEPENDEES test_examples ${__DEPENDEES} 75 | TEST_COMMAND_OPTIONS ${__TEST_COMMAND_OPTIONS} 76 | DISABLE_EXECUTABLE_INSTALL_RULE 77 | DISABLE_TESTS ${__DISABLE_TESTS} 78 | ) 79 | 80 | endfunction() 81 | 82 | foreach(EXAMPLE 83 | 00_basic_gemm 84 | 01_quyuan_gemm_with_collective_builder 85 | 02_mp31_gemm_with_collective_builder 86 | 03_mp31_fp8_scaling_gemm 87 | ) 88 | 89 | add_subdirectory(${EXAMPLE}) 90 | 91 | endforeach() 92 | -------------------------------------------------------------------------------- /experimental/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | set(MUTLASS_EXPERIMENTAL_COMMON_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../examples/common) 30 | 31 | add_custom_target(mutlass_experimental) 32 | add_custom_target(test_experimental) 33 | 34 | foreach(EXPERIMENTAL 35 | mp31_flash_attention_fwd 36 | ) 37 | 38 | add_subdirectory(${EXPERIMENTAL}) 39 | 40 | endforeach() 41 | -------------------------------------------------------------------------------- /experimental/README.md: -------------------------------------------------------------------------------- 1 | # Experimental 2 | 3 | This directory is intended for the development of "experimental" implementations. Theses kernels or functions are not well-tested and do not have stable API. Thus, they are not released as a part of the examples. We also do not recommend relying on the implementations here in formal products. 4 | 5 | When the kernel or fucntion becomes mature, it will be moved to the example for demonstration, or integrated into MUTLASS. 6 | -------------------------------------------------------------------------------- /experimental/mp31_flash_attention_fwd/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | function(mutlass_experimental_mp31_fa_fwd_add_executable NAME) 30 | set(options) 31 | set(oneValueArgs DISABLE_TESTS) 32 | set(multiValueArgs DEPENDS DEPENDEES TEST_COMMAND_OPTIONS) 33 | 34 | cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) 35 | 36 | if (NOT DEFINED __DISABLE_TESTS) 37 | set(__DISABLE_TESTS OFF) 38 | endif() 39 | 40 | # add -fmusa-flush-denormals-to-zero to enable fast exp2 instruction 41 | list(APPEND MUTLASS_MUSA_MCC_FLAGS -fmusa-flush-denormals-to-zero) 42 | 43 | mutlass_add_executable(${NAME} ${__UNPARSED_ARGUMENTS}) 44 | 45 | add_dependencies(mutlass_experimental ${NAME}) 46 | 47 | target_link_libraries( 48 | ${NAME} 49 | PRIVATE 50 | MUTLASS 51 | mutlass_tools_util_includes 52 | musa_driver 53 | musart 54 | ) 55 | 56 | target_include_directories( 57 | ${NAME} 58 | PRIVATE 59 | ${MUTLASS_EXPERIMENTAL_COMMON_SOURCE_DIR} 60 | ) 61 | 62 | install( 63 | TARGETS ${NAME} 64 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} 65 | ) 66 | 67 | mutlass_add_executable_tests( 68 | test_examples_${NAME} ${NAME} 69 | DEPENDS ${__DEPENDS} 70 | DEPENDEES test_experimental ${__DEPENDEES} 71 | TEST_COMMAND_OPTIONS ${__TEST_COMMAND_OPTIONS} 72 | DISABLE_EXECUTABLE_INSTALL_RULE 73 | DISABLE_TESTS ${__DISABLE_TESTS} 74 | ) 75 | 76 | endfunction() 77 | 78 | mutlass_experimental_mp31_fa_fwd_add_executable( 79 | mp31_fa_fwd 80 | flash_attention.mu 81 | ) 82 | -------------------------------------------------------------------------------- /include/mute/address_space.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * SPDX-License-Identifier: BSD-3-Clause 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, this 9 | * list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * 3. Neither the name of the copyright holder nor the names of its 16 | * contributors may be used to endorse or promote products derived from 17 | * this software without specific prior written permission. 18 | * 19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | * 30 | **************************************************************************************************/ 31 | 32 | #pragma once 33 | 34 | namespace mute { 35 | 36 | enum class AddressSpace { 37 | Generic = 0, 38 | Global = 1, 39 | Shared = 3, 40 | }; 41 | 42 | template 43 | MUTE_HOST_DEVICE constexpr 44 | void __attribute__((address_space(static_cast(AS))))* 45 | make_ptr_with_address_space(uint64_t ptr) { 46 | return reinterpret_cast(AS))))*>(ptr); 47 | } 48 | 49 | } // namespace mute 50 | -------------------------------------------------------------------------------- /include/mute/algorithm/clear.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #include 35 | 36 | #include 37 | 38 | #include 39 | 40 | namespace mute 41 | { 42 | 43 | // 44 | // Accept mutable temporaries 45 | // 46 | template 47 | MUTE_HOST_DEVICE 48 | void 49 | clear(Tensor&& tensor) 50 | { 51 | return clear(tensor); 52 | } 53 | 54 | // 55 | // Set elements to zero 56 | // 57 | template 58 | MUTE_HOST_DEVICE 59 | void 60 | clear(Tensor& tensor) 61 | { 62 | using T = typename Tensor::value_type; 63 | 64 | fill(tensor, T{}); 65 | } 66 | 67 | } // end namespace mute 68 | -------------------------------------------------------------------------------- /include/mute/algorithm/fill.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #include 35 | 36 | #include 37 | #include 38 | 39 | namespace mute 40 | { 41 | 42 | // 43 | // Accept mutable temporaries 44 | // 45 | template 46 | MUTE_HOST_DEVICE 47 | void 48 | fill(Tensor&& tensor, T const& value) 49 | { 50 | return fill(tensor, value); 51 | } 52 | 53 | namespace detail 54 | { 55 | 56 | // Prefer fill(tensor.data(), value), if possible 57 | template 58 | MUTE_HOST_DEVICE 59 | auto 60 | fill(Tensor& tensor, T const& value, prefer<1>) 61 | -> decltype(fill(tensor.data(), value)) 62 | { 63 | fill(tensor.data(), value); 64 | } 65 | 66 | // Default implementation 67 | template 68 | MUTE_HOST_DEVICE 69 | void 70 | fill(Tensor& tensor, T const& value, prefer<0>) 71 | { 72 | MUTE_UNROLL 73 | for (int i = 0; i < size(tensor); ++i) { 74 | tensor(i) = value; 75 | } 76 | } 77 | 78 | } // end namespace detail 79 | 80 | template 81 | MUTE_HOST_DEVICE 82 | void 83 | fill(Tensor& tensor, T const& value) 84 | { 85 | return detail::fill(tensor, value, prefer<1>{}); 86 | } 87 | 88 | } // end namespace mute 89 | -------------------------------------------------------------------------------- /include/mute/algorithm/prefer.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | namespace mute 35 | { 36 | 37 | // Infinite types that inherit from each other 38 | template 39 | struct prefer : prefer {}; 40 | 41 | template <> 42 | struct prefer<0> {}; 43 | 44 | // Can be used to preferencially overload implementations 45 | // Higher N in prefer have higher priority. 46 | 47 | } // end namespace mute 48 | -------------------------------------------------------------------------------- /include/mute/arch/mma.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #include // MUTE_HOST_DEVICE 35 | #include // mute::fma 36 | #include // mute::fma 37 | 38 | namespace mute 39 | { 40 | 41 | // 42 | // Direct FMA for any type 43 | // 44 | 45 | template 46 | struct UniversalFMA 47 | { 48 | using DRegisters = D[1]; 49 | using ARegisters = A[1]; 50 | using BRegisters = B[1]; 51 | using CRegisters = C[1]; 52 | 53 | MUTE_HOST_DEVICE static constexpr void 54 | fma(D & d, 55 | A const& a, 56 | B const& b, 57 | C const& c) 58 | { 59 | // Forward to an ADL/mute free function for these types 60 | using mute::fma; 61 | fma(d, a, b, c); 62 | } 63 | }; 64 | 65 | } // end namespace mute 66 | -------------------------------------------------------------------------------- /include/mute/arch/tce_desc.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * SPDX-License-Identifier: BSD-3-Clause 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions are met: 7 | * 8 | * 1. Redistributions of source code must retain the above copyright notice, this 9 | * list of conditions and the following disclaimer. 10 | * 11 | * 2. Redistributions in binary form must reproduce the above copyright notice, 12 | * this list of conditions and the following disclaimer in the documentation 13 | * and/or other materials provided with the distribution. 14 | * 15 | * 3. Neither the name of the copyright holder nor the names of its 16 | * contributors may be used to endorse or promote products derived from 17 | * this software without specific prior written permission. 18 | * 19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | * 30 | **************************************************************************************************/ 31 | 32 | #pragma once 33 | 34 | namespace mute { 35 | namespace TCE { 36 | 37 | // TCE MMA Major enum 38 | enum class Major { 39 | K = 0, 40 | MN = 1 41 | }; 42 | 43 | } // namespace TCE 44 | } // namespace mute 45 | -------------------------------------------------------------------------------- /include/mute/container/array_aligned.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #include 35 | #include 36 | 37 | namespace mute 38 | { 39 | 40 | template 41 | struct MUTE_ALIGNAS(Alignment) array_aligned : mute::array {}; 42 | 43 | } // end namespace mute 44 | -------------------------------------------------------------------------------- /include/mute/numeric/bfloat.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #include 35 | 36 | #include 37 | #include 38 | 39 | namespace mute { 40 | 41 | using mutlass::bfloat16_t; 42 | 43 | // 44 | // Display utilities 45 | // 46 | 47 | #if !defined(__MUSACC_RTC__) 48 | MUTE_HOST std::ostream& operator<<(std::ostream& os, bfloat16_t const& v) 49 | { 50 | return os << float(v); 51 | } 52 | #endif 53 | 54 | } // end namespace mute 55 | -------------------------------------------------------------------------------- /include/mute/numeric/complex.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #include // MUTE_HOST_DEVICE 35 | 36 | #include // mutlass::complexm, mutlass::real, mutlass::imag, mutlass::is_complex 37 | namespace mute 38 | { 39 | 40 | using mutlass::complex; 41 | using mutlass::is_complex; 42 | using mutlass::RealType; 43 | using mutlass::real; 44 | using mutlass::imag; 45 | using mutlass::conj; 46 | 47 | template 48 | static constexpr auto is_complex_v = is_complex::value; 49 | 50 | /// Fused multiply-add for complex numbers 51 | template 52 | MUTE_HOST_DEVICE constexpr 53 | void 54 | fma(complex & d, 55 | complex const& a, 56 | complex const& b, 57 | complex const& c) 58 | { 59 | fma(d.real(), a.real(), b.real(), c.real()); 60 | fma(d.imag(), a.real(), b.imag(), c.imag()); 61 | fma(d.real(), -a.imag(), b.imag(), d.real()); 62 | fma(d.imag(), a.imag(), b.real(), d.imag()); 63 | } 64 | 65 | /// Fused multiply-add for triplets 66 | template 67 | MUTE_HOST_DEVICE constexpr 68 | void 69 | fma(complex const& a, 70 | complex const& b, 71 | complex & c) 72 | { 73 | return fma(c, a, b, c); 74 | } 75 | 76 | } // end namespace mute 77 | -------------------------------------------------------------------------------- /include/mute/numeric/float8.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #include 35 | 36 | #include 37 | #include 38 | 39 | namespace mute { 40 | 41 | using mutlass::float_e4m3_t; 42 | using mutlass::float_e5m2_t; 43 | 44 | } // end namespace mute 45 | -------------------------------------------------------------------------------- /include/mute/numeric/half.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #include 35 | #include 36 | #include 37 | 38 | namespace mute { 39 | 40 | using mutlass::half_t; 41 | 42 | } // end namespace mute 43 | -------------------------------------------------------------------------------- /include/mute/numeric/real.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #include 35 | 36 | namespace mute 37 | { 38 | 39 | /// Generic add 40 | template 41 | MUTE_HOST_DEVICE constexpr 42 | void 43 | add(C& c, A const& a, B const& b) 44 | { 45 | c = a + b; 46 | } 47 | 48 | /// Generic multiply 49 | template 50 | MUTE_HOST_DEVICE constexpr 51 | void 52 | mul(C& c, A const& a, B const& b) 53 | { 54 | c = a * b; 55 | } 56 | /// Generic fused multiply-add 57 | template 58 | MUTE_HOST_DEVICE constexpr 59 | void 60 | fma(D& d, A const& a, B const& b, C const& c) 61 | { 62 | d = a * b + c; 63 | } 64 | 65 | /// Fused multiply-add for triplets 66 | template 67 | MUTE_HOST_DEVICE constexpr 68 | void 69 | fma(A const& a, B const& b, C& c) 70 | { 71 | return fma(c, a, b, c); 72 | } 73 | 74 | } // end namespace mute 75 | -------------------------------------------------------------------------------- /include/mute/numeric/tfloat.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #include 35 | 36 | #include 37 | #include 38 | 39 | namespace mute { 40 | 41 | using mutlass::tfloat32_t; 42 | 43 | // 44 | // Display utilities 45 | // 46 | 47 | #if !defined(__MUSACC_RTC__) 48 | MUTE_HOST std::ostream& operator<<(std::ostream& os, tfloat32_t const& v) 49 | { 50 | return os << float(v); 51 | } 52 | #endif 53 | 54 | } // end namespace mute 55 | -------------------------------------------------------------------------------- /include/mute/tensor_predicate.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #include 35 | 36 | #include 37 | 38 | namespace mute 39 | { 40 | 41 | template 42 | struct ConstantTensor 43 | { 44 | template 45 | MUTE_HOST_DEVICE constexpr 46 | T const& 47 | operator()(Coords const&...) const { 48 | return val_; 49 | } 50 | 51 | T val_; 52 | }; 53 | 54 | struct TrivialPredTensor 55 | { 56 | template 57 | MUTE_HOST_DEVICE constexpr 58 | true_type 59 | operator()(Coords const&...) const { 60 | return {}; 61 | } 62 | }; 63 | 64 | template 65 | struct FunctionPredTensor 66 | { 67 | MUTE_HOST_DEVICE constexpr 68 | FunctionPredTensor(Fn const& fn) : fn_(fn) {} 69 | 70 | template 71 | MUTE_HOST_DEVICE constexpr 72 | auto 73 | operator()(Coords const&... coords) const { 74 | return fn_(coords...); 75 | } 76 | 77 | Fn const& fn_; 78 | }; 79 | 80 | } // end namespace mute 81 | -------------------------------------------------------------------------------- /include/mute/tile.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #include 35 | 36 | #include 37 | 38 | namespace mute 39 | { 40 | 41 | // 42 | // A Tile is not a Layout, it's a tuple of Layouts or Tiles or Underscores 43 | // 44 | 45 | template 46 | using Tile = tuple; 47 | 48 | template 49 | using is_tile = is_tuple; 50 | 51 | template 52 | MUTE_HOST_DEVICE constexpr 53 | auto 54 | make_tile(Layouts const&... layouts) 55 | { 56 | return Tile(layouts...); 57 | } 58 | 59 | } // end namespace mute 60 | -------------------------------------------------------------------------------- /include/mutlass/arch/arch.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | /*! \file 33 | \brief Defines tags for architecture-specific configurations. 34 | */ 35 | 36 | #pragma once 37 | 38 | #include "mutlass/mutlass.h" 39 | 40 | //////////////////////////////////////////////////////////////////////////////////////////////////// 41 | 42 | namespace mutlass { 43 | namespace arch { 44 | 45 | //////////////////////////////////////////////////////////////////////////////////////////////////// 46 | 47 | struct Mp22 { 48 | static int const kMinComputeCapability = 22; 49 | }; 50 | 51 | struct Mp31 { 52 | static int const kMinComputeCapability = 31; 53 | }; 54 | 55 | //////////////////////////////////////////////////////////////////////////////////////////////////// 56 | 57 | } // namespace arch 58 | } // namespace mutlass 59 | 60 | //////////////////////////////////////////////////////////////////////////////////////////////////// 61 | -------------------------------------------------------------------------------- /include/mutlass/arch/cache_operation.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | /*! \file 33 | \brief Directives related to cache operations 34 | */ 35 | #pragma once 36 | 37 | #include "mutlass/mutlass.h" 38 | 39 | namespace mutlass { 40 | namespace arch { 41 | 42 | //////////////////////////////////////////////////////////////////////////////////////////////////// 43 | 44 | /// Controls Intrinsic cache operations 45 | struct CacheOperation { 46 | enum Kind { 47 | /// Cache at all levels - accessed again 48 | Always, 49 | /// Cache at global level 50 | Global, 51 | /// Streaming - likely to be accessed once 52 | Streaming, 53 | /// Indicates the line will not be used again 54 | LastUse, 55 | /// Don't cache, and fetch again 56 | Volatile, 57 | /// Write back at all coherent levels 58 | WriteBack, 59 | /// Write through to system memory 60 | WriteThrough 61 | }; 62 | }; 63 | 64 | //////////////////////////////////////////////////////////////////////////////////////////////////// 65 | 66 | } // namespace arch 67 | } // namespace mutlass 68 | -------------------------------------------------------------------------------- /include/mutlass/detail/collective.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #include "mute/container/tuple.hpp" 35 | #include "mute/layout.hpp" // mute::size(shape) 36 | 37 | ///////////////////////////////////////////////////////////////////////////////////////////////// 38 | 39 | namespace mutlass::gemm::collective { 40 | 41 | ///////////////////////////////////////////////////////////////////////////////////////////////// 42 | 43 | namespace detail { 44 | 45 | template 46 | struct deduce_mixed_width_dtype { 47 | static_assert(I >= 0u && I <= 2u, "Valid indices are 0, 1, and 2, which represent Operand, Scale, and Bias, respectively."); 48 | 49 | private: 50 | using underlying_tuple = mute::conditional_t::value, Tuple, mute::tuple>; 51 | static constexpr size_t valid_index = mute::min(I, mute::tuple_size_v - 1); 52 | 53 | public: 54 | using type = mute::conditional_t<(I < mute::tuple_size_v), 55 | mute::tuple_element_t, 56 | void>; 57 | }; 58 | 59 | template 60 | using deduce_mixed_width_dtype_t = typename deduce_mixed_width_dtype::type; 61 | 62 | } // namespace detail 63 | 64 | ///////////////////////////////////////////////////////////////////////////////////////////////// 65 | 66 | } // namespace mutlass::gemm::collective 67 | -------------------------------------------------------------------------------- /include/mutlass/epilogue/collective/collective_epilogue.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #include 35 | 36 | ///////////////////////////////////////////////////////////////////////////////////////////////// 37 | 38 | namespace mutlass::epilogue::collective { 39 | 40 | ///////////////////////////////////////////////////////////////////////////////////////////////// 41 | 42 | template < 43 | class DispatchPolicy, 44 | class... Args 45 | > 46 | class CollectiveEpilogue { 47 | static_assert(mutlass::detail::dependent_false, "Could not find an epilogue specialization."); 48 | }; 49 | 50 | ///////////////////////////////////////////////////////////////////////////////////////////////// 51 | 52 | } // namespace mutlass::epilogue::collective 53 | 54 | ///////////////////////////////////////////////////////////////////////////////////////////////// 55 | 56 | #include "detail.hpp" 57 | #include "default_epilogue.hpp" 58 | #include "epilogue_tensor_broadcast.hpp" 59 | #include "mp31_epilogue_tme.hpp" 60 | 61 | ///////////////////////////////////////////////////////////////////////////////////////////////// 62 | -------------------------------------------------------------------------------- /include/mutlass/epilogue/dispatch_policy.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #include "mutlass/numeric_conversion.h" 35 | #include "mutlass/epilogue/thread/scale_type.h" 36 | 37 | ////////////////////////////////////////////////////////////////////////////// 38 | 39 | namespace mutlass::epilogue { 40 | 41 | ////////////////////////////////////////////////////////////////////////////// 42 | // 43 | // Builder Epilogue Schedules 44 | // 45 | ////////////////////////////////////////////////////////////////////////////// 46 | 47 | struct NoSmem {}; 48 | struct WithTme {}; 49 | struct EpilogueSimtVectorized {}; 50 | ////////////////////////////////////////////////////////////////////////////// 51 | 52 | template < 53 | int Stages_, 54 | int FragmentSize_ 55 | > 56 | struct Mp22CollectiveEpilogue { 57 | constexpr static int Stages = Stages_; 58 | constexpr static int FragmentSize = FragmentSize_; 59 | }; 60 | 61 | template < 62 | int StagesC_, 63 | int StagesD_, 64 | int FragmentSize_, 65 | bool ReuseSmemC_, 66 | bool DelayTmeStore_ = false 67 | > 68 | struct Mp31CollectiveEpilogue { 69 | constexpr static int StagesC = StagesC_; 70 | constexpr static int StagesD = StagesD_; 71 | constexpr static int FragmentSize = FragmentSize_; 72 | constexpr static bool ReuseSmemC = ReuseSmemC_; 73 | constexpr static bool DelayTmeStore = DelayTmeStore_; 74 | }; 75 | 76 | ////////////////////////////////////////////////////////////////////////////// 77 | 78 | } // namespace mutlass::epilogue 79 | -------------------------------------------------------------------------------- /include/mutlass/epilogue/thread/linear_combination_params.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | /*! \file 33 | \brief 34 | */ 35 | 36 | #pragma once 37 | 38 | ///////////////////////////////////////////////////////////////////////////////////////////////// 39 | 40 | namespace mutlass { 41 | namespace epilogue { 42 | namespace thread { 43 | 44 | ///////////////////////////////////////////////////////////////////////////////////////////////// 45 | 46 | struct LinearCombinationParams { 47 | uint64_t alpha_data[2]; 48 | uint64_t beta_data[2]; 49 | 50 | MUTLASS_HOST_DEVICE 51 | LinearCombinationParams() 52 | : alpha_data {0lu, 0lu}, beta_data {0lu, 0lu} 53 | { } 54 | 55 | template 56 | MUTLASS_HOST_DEVICE 57 | LinearCombinationParams(ElementCompute alpha, ElementCompute beta) 58 | : alpha_data {0lu, 0lu}, beta_data {0lu, 0lu} 59 | { 60 | #if defined(__MUSA_ARCH__) 61 | reinterpret_cast(alpha_data) = alpha; 62 | reinterpret_cast(beta_data) = beta; 63 | #else 64 | memcpy( alpha_data, &alpha, sizeof(ElementCompute) ); 65 | memcpy( beta_data, &beta, sizeof(ElementCompute) ); 66 | #endif 67 | } 68 | }; 69 | 70 | ///////////////////////////////////////////////////////////////////////////////////////////////// 71 | 72 | } // namespace thread 73 | } // namespace epilogue 74 | } // namespace mutlass 75 | 76 | ///////////////////////////////////////////////////////////////////////////////////////////////// 77 | -------------------------------------------------------------------------------- /include/mutlass/epilogue/thread/scale_type.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | /*! \file 33 | \brief Enum defines the behaviors of the epilogue. 34 | */ 35 | 36 | #pragma once 37 | 38 | #include "mutlass/mutlass.h" 39 | 40 | ///////////////////////////////////////////////////////////////////////////////////////////////// 41 | 42 | namespace mutlass { 43 | namespace epilogue { 44 | namespace thread { 45 | 46 | ///////////////////////////////////////////////////////////////////////////////////////////////// 47 | 48 | /// Specifies internal data type for computation 49 | /// Note : 50 | /// 1. Scalar means alpha/beta is a single value from host(constant param) or device memory. 51 | /// 2. Vector means alpha/beta is a vector always from device memory. 52 | struct ScaleType { 53 | enum Kind { 54 | Default, // D = scalar_alpha x Acc + scalar_beta x C 55 | NoBetaScaling, // D = scalar_alpha x Acc + C 56 | OnlyAlphaScaling, // D = scalar_alpha x Acc 57 | PerChannelScaling, // D = vector_alpha x Acc + vector_beta x C 58 | OnlyAlphaPerChannelScaling, // D = vector_alpha x Acc 59 | Nothing // D = Acc 60 | }; 61 | }; 62 | 63 | ///////////////////////////////////////////////////////////////////////////////////////////////// 64 | 65 | } // namespace thread 66 | } // namespace epilogue 67 | } // namespace mutlass 68 | -------------------------------------------------------------------------------- /include/mutlass/floating_point_mtrtc.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | 33 | #pragma once 34 | 35 | namespace mutlass { 36 | 37 | /////////////////////////////////////////////////////////////////////////////////////////////////// 38 | 39 | // All floating-point numbers can be put in one of these categories. 40 | enum { 41 | FP_NAN = 42 | # define FP_NAN 0 43 | FP_NAN, 44 | FP_INFINITE = 45 | # define FP_INFINITE 1 46 | FP_INFINITE, 47 | FP_ZERO = 48 | # define FP_ZERO 2 49 | FP_ZERO, 50 | FP_SUBNORMAL = 51 | # define FP_SUBNORMAL 3 52 | FP_SUBNORMAL, 53 | FP_NORMAL = 54 | # define FP_NORMAL 4 55 | FP_NORMAL 56 | }; 57 | 58 | /////////////////////////////////////////////////////////////////////////////////////////////////// 59 | 60 | } // namespace mutlass 61 | 62 | /////////////////////////////////////////////////////////////////////////////////////////////////// 63 | -------------------------------------------------------------------------------- /include/mutlass/gemm/collective/builders/mp31_sqmma_common.inl: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "mutlass/gemm/gemm.h" 4 | #include "mute/atom/mma_traits_mp31_sqmma.hpp" 5 | 6 | ///////////////////////////////////////////////////////////////////////////////////////////////// 7 | 8 | namespace mutlass::gemm::collective { 9 | 10 | ///////////////////////////////////////////////////////////////////////////////////////////////// 11 | 12 | namespace detail { 13 | 14 | // 15 | // Some named constants 16 | // 17 | constexpr int tme_alignment_bytes = 4; 18 | constexpr int mp31_smem_capacity_bytes = 196608; 19 | 20 | template 21 | constexpr mute::TCE::Major 22 | sqmma_ss_tag_to_major_A() { 23 | if constexpr (mutlass::gemm::detail::is_mn_major_A()) { 24 | return mute::TCE::Major::MN; 25 | } 26 | else { 27 | return mute::TCE::Major::K; 28 | } 29 | } 30 | 31 | template 32 | constexpr mute::TCE::Major 33 | sqmma_ss_tag_to_major_B() { 34 | if constexpr (mutlass::gemm::detail::is_mn_major_B()) { 35 | return mute::TCE::Major::MN; 36 | } 37 | else { 38 | return mute::TCE::Major::K; 39 | } 40 | } 41 | 42 | template 43 | MUTE_HOST_DEVICE constexpr 44 | auto 45 | ss_smem_selector_A() 46 | { 47 | using AtomOpTraits = MMA_Traits; 48 | using AtomOpShape = typename AtomOpTraits::Shape_MNK; 49 | 50 | using AtomM = decltype(get<0>(AtomOpShape{})); 51 | using AtomK = decltype(get<2>(AtomOpShape{})); 52 | 53 | return mute::MP31::SQMMA::make_canonical_gemm_smem_atom_layout(); 54 | } 55 | 56 | template 57 | MUTE_HOST_DEVICE constexpr 58 | auto 59 | ss_smem_selector_B() 60 | { 61 | using AtomOpTraits = MMA_Traits; 62 | using AtomOpShape = typename AtomOpTraits::Shape_MNK; 63 | 64 | using AtomN = decltype(get<1>(AtomOpShape{})); 65 | using AtomK = decltype(get<2>(AtomOpShape{})); 66 | 67 | return mute::MP31::SQMMA::make_canonical_gemm_smem_atom_layout(); 68 | } 69 | 70 | template 71 | constexpr bool 72 | is_aligned() { 73 | return ((mute::sizeof_bits_v * AlignmentA / 8) % RequiredAlignment == 0) && 74 | ((mute::sizeof_bits_v * AlignmentB / 8) % RequiredAlignment == 0); 75 | } 76 | 77 | template 78 | constexpr bool 79 | is_input_fp8() { 80 | return ((mute::is_same_v || mute::is_same_v) && 81 | (mute::is_same_v || mute::is_same_v)); 82 | } 83 | 84 | } // namespace detail 85 | 86 | ///////////////////////////////////////////////////////////////////////////////////////////////// 87 | 88 | } // namespace mutlass::gemm::collective 89 | 90 | ///////////////////////////////////////////////////////////////////////////////////////////////// 91 | -------------------------------------------------------------------------------- /include/mutlass/gemm/collective/collective_mma.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #include "mutlass/detail/dependent_false.hpp" 35 | 36 | ///////////////////////////////////////////////////////////////////////////////////////////////// 37 | 38 | namespace mutlass::gemm::collective { 39 | 40 | ///////////////////////////////////////////////////////////////////////////////////////////////// 41 | 42 | template < 43 | class DispatchPolicy, 44 | class TileShape, 45 | class ElementA, 46 | class StrideA, 47 | class ElementB, 48 | class StrideB, 49 | class TiledMma, 50 | class GmemTiledCopyA, 51 | class SmemLayoutAtomA, 52 | class SmemCopyAtomA, 53 | class TransformA, 54 | class GmemTiledCopyB, 55 | class SmemLayoutAtomB, 56 | class SmemCopyAtomB, 57 | class TransformB 58 | > 59 | struct CollectiveMma { 60 | static_assert(mutlass::detail::dependent_false == 0, "Could not find a mainloop specialization."); 61 | }; 62 | 63 | ///////////////////////////////////////////////////////////////////////////////////////////////// 64 | 65 | } // namespace mutlass::gemm::collective 66 | 67 | ///////////////////////////////////////////////////////////////////////////////////////////////// 68 | 69 | #include "mutlass/gemm/collective/mp22_mma_twostage.hpp" 70 | #include "mutlass/gemm/collective/mp31_mma_tme_sqmma_ss.hpp" 71 | #include "mutlass/gemm/collective/mp31_mma_tme_sqmma_ss_scaling.hpp" 72 | ///////////////////////////////////////////////////////////////////////////////////////////////// 73 | -------------------------------------------------------------------------------- /include/mutlass/gemm/threadblock/threadblock_swizzle.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | /*! \file 33 | \brief Implements several possible threadblock-swizzling functions mapping blockIdx to 34 | GEMM problems. 35 | */ 36 | 37 | #pragma once 38 | 39 | ///////////////////////////////////////////////////////////////////////////////////////////////// 40 | 41 | namespace mutlass { 42 | namespace gemm { 43 | namespace threadblock { 44 | 45 | ///////////////////////////////////////////////////////////////////////////////////////////////// 46 | 47 | /// Threadblock swizzling function for GEMMs 48 | template 49 | struct GemmIdentityThreadblockSwizzle { }; 50 | 51 | ///////////////////////////////////////////////////////////////////////////////////////////////// 52 | 53 | } // namespace threadblock 54 | } // namespace gemm 55 | } // namespace mutlass 56 | 57 | -------------------------------------------------------------------------------- /include/mutlass/gemm_coord.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | 33 | /*! \file 34 | \brief Utilities to convert a MuTe tuple to a GemmCoord or BatchedGemmCoord 35 | */ 36 | 37 | #pragma once 38 | 39 | #include "mute/layout.hpp" 40 | #include "mutlass/gemm_coord.h" 41 | 42 | namespace mutlass { 43 | namespace gemm { 44 | 45 | ///////////////////////////////////////////////////////////////////////////////////////////////// 46 | 47 | template 48 | MUTLASS_HOST_DEVICE 49 | auto 50 | to_gemm_coord(Tuple tuple) { 51 | static_assert(mute::rank(tuple) <= 4, "Can only convert tuples of rank <= 4."); 52 | 53 | if constexpr (mute::rank(tuple) <= 3) { 54 | auto tuple_mnk = mute::append<3>(tuple, mute::Int<0>{}); 55 | return GemmCoord(mute::size<0>(tuple_mnk), mute::size<1>(tuple_mnk), mute::size<2>(tuple_mnk)); 56 | } 57 | else { 58 | return BatchedGemmCoord(mute::size<0>(tuple), mute::size<1>(tuple), mute::size<2>(tuple), mute::size<3>(tuple)); 59 | } 60 | } 61 | 62 | ///////////////////////////////////////////////////////////////////////////////////////////////// 63 | 64 | } // namespace gemm 65 | } // namespace mutlass 66 | 67 | ///////////////////////////////////////////////////////////////////////////////////////////////// 68 | -------------------------------------------------------------------------------- /include/mutlass/kernel_hardware_info.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #if !defined(__MUSACC_RTC__) 35 | #include "musa_runtime.h" 36 | 37 | #include "mutlass/trace.h" 38 | #endif 39 | 40 | namespace mutlass { 41 | 42 | struct KernelHardwareInfo { 43 | // 44 | // Data members 45 | // 46 | int device_id = 0; 47 | int sm_count = 0; 48 | 49 | // 50 | // Methods 51 | // 52 | 53 | #if !defined(__MUSACC_RTC__) 54 | static inline int 55 | query_device_multiprocessor_count(int device_id = 0) { 56 | musaError_t result = musaGetDevice(&device_id); 57 | if (result != musaSuccess) { 58 | MUTLASS_TRACE_HOST( 59 | " musaGetDevice() returned error " 60 | << musaGetErrorString(result)); 61 | return 0; 62 | } 63 | int multiprocessor_count; 64 | result = musaDeviceGetAttribute(&multiprocessor_count, 65 | musaDevAttrMultiProcessorCount, device_id); 66 | if (result != musaSuccess) { 67 | MUTLASS_TRACE_HOST( 68 | " musaDeviceGetAttribute() returned error " 69 | << musaGetErrorString(result)); 70 | return 0; 71 | } 72 | return multiprocessor_count; 73 | } 74 | #endif 75 | }; 76 | 77 | } // namespace mutlass 78 | -------------------------------------------------------------------------------- /include/mutlass/kernel_hardware_info.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | // Simply import .h version of header so as to avoid breaking any existing MUTLASS builds 35 | // after .hpp was changed to .h 36 | #include "mutlass/kernel_hardware_info.h" 37 | -------------------------------------------------------------------------------- /include/mutlass/kernel_launch.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | /*! \file 33 | \brief Defines structures and helpers to launch MUSA kernels within MUTLASS. 34 | */ 35 | 36 | #pragma once 37 | 38 | #include "mutlass/mutlass.h" 39 | 40 | namespace mutlass { 41 | 42 | /////////////////////////////////////////////////////////////////////////////////////////////////// 43 | 44 | /// Structure containing the basic launch configuration of a MUSA kernel. 45 | struct KernelLaunchConfiguration { 46 | 47 | /// MUSA grid dimensions 48 | dim3 grid; 49 | 50 | /// MUSA threablock dimensions 51 | dim3 block; 52 | 53 | /// Bytes of dynamically allocated SMEM in addition to static SMEM 54 | size_t dynamic_smem; 55 | 56 | // 57 | // Methods 58 | // 59 | 60 | /// Constructs a KernellaunchConfiguration object 61 | MUTLASS_HOST_DEVICE 62 | KernelLaunchConfiguration( 63 | dim3 _grid = dim3(1,1,1), 64 | dim3 _block = dim3(1,1,1), 65 | size_t _dynamic_smem = 0 66 | ): 67 | grid(_grid), 68 | block(_block), 69 | dynamic_smem(_dynamic_smem) { } 70 | }; 71 | 72 | /////////////////////////////////////////////////////////////////////////////////////////////////// 73 | 74 | } // namespace mutlass 75 | -------------------------------------------------------------------------------- /include/mutlass/layout/layout.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | /*! \file 33 | \brief Defines layout functions used by TensorRef and derived classes. 34 | 35 | Layout functions map logical coordinates to linear memory. They often require additional 36 | data to describe strides between elements. 37 | 38 | Layout functions must implement all members in the public interface of IdentityTensorLayout<> 39 | defined in mutlass/tensor_ref.h. 40 | */ 41 | #pragma once 42 | 43 | #include "mutlass/mutlass.h" 44 | #include "mutlass/matrix_coord.h" 45 | #include "mutlass/layout/matrix.h" 46 | #include "mutlass/layout/pitch_linear.h" 47 | #include "mutlass/layout/tensor.h" 48 | #include "mutlass/layout/vector.h" 49 | 50 | /////////////////////////////////////////////////////////////////////////////////////////////////// 51 | 52 | namespace mutlass { 53 | namespace layout { 54 | 55 | /////////////////////////////////////////////////////////////////////////////////////////////////// 56 | 57 | 58 | /////////////////////////////////////////////////////////////////////////////////////////////////// 59 | 60 | } // namespace layout 61 | } // namespace mutlass 62 | 63 | /////////////////////////////////////////////////////////////////////////////////////////////////// 64 | -------------------------------------------------------------------------------- /include/mutlass/matrix_shape.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | /*! \file 33 | \brief Defines a Shape template for matrix tiles 34 | */ 35 | #pragma once 36 | 37 | #include "mutlass/mutlass.h" 38 | #include "mutlass/coord.h" 39 | 40 | namespace mutlass { 41 | 42 | ///////////////////////////////////////////////////////////////////////////////////////////////// 43 | 44 | /// Describes the size of a matrix tile 45 | template < 46 | int Row_, ///< rows of a matrix 47 | int Column_ ///< columns of a matrix 48 | > 49 | struct MatrixShape { 50 | static int const kRow = Row_; ///< rows of a matrix 51 | static int const kColumn = Column_; ///< columns of a matrix 52 | static int const kCount = Row_ * Column_; ///< total number of elements in a matrix 53 | 54 | // 55 | // Static member functions 56 | // 57 | 58 | MUTLASS_HOST_DEVICE 59 | static Coord<2> toCoord() { 60 | return make_Coord(kRow, kColumn); 61 | } 62 | }; 63 | 64 | ///////////////////////////////////////////////////////////////////////////////////////////////// 65 | 66 | } // namespace mutlass 67 | -------------------------------------------------------------------------------- /include/mutlass/pipeline/pipeline.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2023 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | //////////////////////////////////////////////////////////////////////////////////////////////////// 35 | 36 | #include "mutlass/pipeline/mp31_pipeline.hpp" 37 | //////////////////////////////////////////////////////////////////////////////////////////////////// 38 | -------------------------------------------------------------------------------- /include/mutlass/real.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | /** 33 | \file 34 | \brief This class provides helpers to support real<> and complex<> types in generic code. 35 | */ 36 | 37 | #pragma once 38 | 39 | 40 | #include // MUTLASS_DEVICE 41 | 42 | namespace mutlass { 43 | 44 | /// Used to determine the real-valued underlying type of a numeric type T. 45 | template 46 | struct RealType { 47 | using Type = T; 48 | 49 | /// Number of elements 50 | static int const kExtent = 1; 51 | 52 | MUTLASS_HOST_DEVICE 53 | static T from_real(double x) { 54 | return static_cast(x); 55 | } 56 | }; 57 | 58 | template 59 | MUTLASS_HOST_DEVICE 60 | static T from_real(double r) { 61 | return T(r); 62 | } 63 | 64 | 65 | } // namespace mutlass 66 | -------------------------------------------------------------------------------- /include/mutlass/trace.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | /*! \file 33 | \brief Helpers for optionally tracing through code when debugging. 34 | 35 | This file is to be included after all other headers. 36 | */ 37 | 38 | #pragma once 39 | 40 | //////////////////////////////////////////////////////////////////////////////////////////////////// 41 | 42 | // Tracing options 43 | #ifndef MUTLASS_DEBUG_TRACE_LEVEL 44 | #define MUTLASS_DEBUG_TRACE_LEVEL 0 45 | #endif 46 | 47 | #if MUTLASS_DEBUG_TRACE_LEVEL 48 | #include 49 | #include "mutlass/core_io.h" 50 | #if defined(__MUSA_ARCH__) 51 | #define MUTLASS_TRACE_HOST(x) 52 | #else 53 | #define MUTLASS_TRACE_HOST(x) { std::cout << __FILE__ << ":" << __LINE__ << " " << x << std::endl; } 54 | #endif 55 | #else 56 | #define MUTLASS_TRACE_HOST(x) 57 | #endif 58 | 59 | //////////////////////////////////////////////////////////////////////////////////////////////////// 60 | 61 | -------------------------------------------------------------------------------- /include/mutlass/version.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | 33 | #pragma once 34 | 35 | #include 36 | #include 37 | 38 | #define MUTLASS_MAJOR 0 39 | #define MUTLASS_MINOR 2 40 | #define MUTLASS_PATCH 0 41 | 42 | #ifdef MUTLASS_VERSIONS_GENERATED 43 | #include "mutlass/version_extended.h" 44 | #else 45 | #define MUTLASS_BUILD 0 46 | #define MUTLASS_REVISION "" 47 | #endif 48 | 49 | #define MUTLASS_VERSION ((MUTLASS_MAJOR)*100 + (MUTLASS_MINOR)*10 + MUTLASS_PATCH) 50 | 51 | namespace mutlass { 52 | 53 | inline constexpr uint32_t getVersion() { 54 | return MUTLASS_VERSION; 55 | } 56 | inline constexpr uint32_t getVersionMajor() { 57 | return MUTLASS_MAJOR; 58 | } 59 | inline constexpr uint32_t getVersionMinor() { 60 | return MUTLASS_MINOR; 61 | } 62 | inline constexpr uint32_t getVersionPatch() { 63 | return MUTLASS_PATCH; 64 | } 65 | inline constexpr uint32_t getVersionBuild() { 66 | return MUTLASS_BUILD + 0; 67 | } 68 | 69 | inline std::string getVersionString() { 70 | std::string version = "@MUTLASS_VERSION@"; 71 | if (getVersionBuild()) { 72 | version += "." + std::to_string(getVersionBuild()); 73 | } 74 | return version; 75 | } 76 | 77 | inline std::string getGitRevision() { 78 | return "@MUTLASS_REVISION@"; 79 | } 80 | 81 | } // namespace mutlass 82 | -------------------------------------------------------------------------------- /media/images/mutlass-0.2.0-gemm-performance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MooreThreads/mutlass/68f1bf1806f5435246518bbeecd3aa810704e3ae/media/images/mutlass-0.2.0-gemm-performance.png -------------------------------------------------------------------------------- /python/mutlass_library/__init__.py: -------------------------------------------------------------------------------- 1 | ################################################################################################# 2 | # 3 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 4 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions are met: 9 | # 10 | # 1. Redistributions of source code must retain the above copyright notice, this 11 | # list of conditions and the following disclaimer. 12 | # 13 | # 2. Redistributions in binary form must reproduce the above copyright notice, 14 | # this list of conditions and the following disclaimer in the documentation 15 | # and/or other materials provided with the distribution. 16 | # 17 | # 3. Neither the name of the copyright holder nor the names of its 18 | # contributors may be used to endorse or promote products derived from 19 | # this software without specific prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | # 32 | ################################################################################################# 33 | 34 | import os 35 | import sys 36 | 37 | from . import gemm_operation 38 | 39 | if '-m' not in sys.argv: 40 | # Do not import generator when running python -m mutlass_library.generator to 41 | # avoid double-import warnings 42 | from . import generator 43 | 44 | from . import library 45 | from . import manifest 46 | 47 | # Make enum types from library.py accessible via mutlass_library.* 48 | from .library import * 49 | 50 | # Set up `source` to point to the path containing the MUTLASS source. 51 | # Check first if the path cotains a `source` subdirectory -- this will 52 | # be the case when the package has been installed via pip. Otherwise, 53 | # default to the root of MUTLASS. 54 | install_source_path = os.path.join(__path__[0], 'source') 55 | if os.path.isdir(install_source_path): 56 | source_path = install_source_path 57 | else: 58 | source_path = os.path.join(__path__[0], '../..') 59 | -------------------------------------------------------------------------------- /python/setup_library.py: -------------------------------------------------------------------------------- 1 | ################################################################################################# 2 | # 3 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 4 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 | # SPDX-License-Identifier: BSD-3-Clause 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions are met: 9 | # 10 | # 1. Redistributions of source code must retain the above copyright notice, this 11 | # list of conditions and the following disclaimer. 12 | # 13 | # 2. Redistributions in binary form must reproduce the above copyright notice, 14 | # this list of conditions and the following disclaimer in the documentation 15 | # and/or other materials provided with the distribution. 16 | # 17 | # 3. Neither the name of the copyright holder nor the names of its 18 | # contributors may be used to endorse or promote products derived from 19 | # this software without specific prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | # 32 | ################################################################################################# 33 | 34 | from setuptools import setup 35 | 36 | 37 | def perform_setup(): 38 | setup( 39 | name='mutlass_library', 40 | version='0.1.0', 41 | description='MUTLASS library generation scripts', 42 | packages=['mutlass_library'] 43 | ) 44 | 45 | 46 | if __name__ == '__main__': 47 | perform_setup() 48 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | 31 | if (MUTLASS_ENABLE_GTEST_UNIT_TESTS) 32 | add_subdirectory(unit) 33 | else() 34 | add_custom_target(test_unit) 35 | endif() 36 | -------------------------------------------------------------------------------- /test/unit/gemm/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | 31 | add_subdirectory(device) 32 | 33 | add_custom_target( 34 | mutlass_test_unit_gemm 35 | DEPENDS 36 | mutlass_test_unit_gemm_device 37 | ) 38 | 39 | add_custom_target( 40 | test_unit_gemm 41 | DEPENDS 42 | test_unit_gemm_device 43 | ) 44 | -------------------------------------------------------------------------------- /test/unit/gemm/device/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | add_custom_target( 31 | mutlass_test_unit_gemm_device 32 | DEPENDS 33 | mutlass_test_unit_gemm_device_mp22 34 | mutlass_test_unit_gemm_device_mp31 35 | ) 36 | 37 | add_custom_target( 38 | test_unit_gemm_device 39 | DEPENDS 40 | test_unit_gemm_device_mp22 41 | test_unit_gemm_device_mp31 42 | ) 43 | 44 | mutlass_test_unit_add_executable( 45 | mutlass_test_unit_gemm_device_mp22 46 | mp22_gemm_f32_f32_f32_simt.mu 47 | mp22_gemm_tensorop.mu 48 | ) 49 | 50 | mutlass_test_unit_add_executable( 51 | mutlass_test_unit_gemm_device_mp31 52 | mp31_gemm_sqmma_ss.mu 53 | ) 54 | -------------------------------------------------------------------------------- /test/unit/gemm/device/testbed_utils.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | /*! \file 33 | \brief Tests for device-wide GEMM interface 34 | */ 35 | 36 | #pragma once 37 | 38 | #include "mutlass/mutlass.h" 39 | 40 | inline char const *to_string(mutlass::Status status) { 41 | 42 | switch (status) { 43 | case mutlass::Status::kSuccess: return "kSuccess"; 44 | case mutlass::Status::kErrorMisalignedOperand: return "kErrorMisalignedOperand"; 45 | case mutlass::Status::kErrorInvalidLayout: return "kErrorInvalidLayout"; 46 | case mutlass::Status::kErrorInvalidProblem: return "kErrorInvalidProblem"; 47 | case mutlass::Status::kErrorNotSupported: return "kErrorNotSupported"; 48 | case mutlass::Status::kErrorWorkspaceNull: return "kErrorWorkspaceNull"; 49 | case mutlass::Status::kErrorInternal: return "kErrorInternal"; 50 | case mutlass::Status::kInvalid: return "kInvalid"; 51 | default: break; 52 | } 53 | return "invalid"; 54 | } 55 | -------------------------------------------------------------------------------- /test/unit/mute/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | add_subdirectory(core) 31 | add_subdirectory(quyuan) 32 | add_subdirectory(mp31) 33 | add_subdirectory(mma) 34 | 35 | 36 | add_custom_target( 37 | mutlass_test_unit_mute 38 | DEPENDS 39 | mutlass_test_unit_mute_core 40 | mutlass_test_unit_mute_quyuan 41 | mutlass_test_unit_mute_mp31 42 | mutlass_test_unit_mute_mma 43 | ) 44 | 45 | add_custom_target( 46 | test_unit_mute 47 | DEPENDS 48 | test_unit_mute_core 49 | test_unit_mute_quyuan 50 | test_unit_mute_mp31 51 | test_unit_mute_mma 52 | ) 53 | -------------------------------------------------------------------------------- /test/unit/mute/core/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | mutlass_test_unit_add_executable( 31 | mutlass_test_unit_mute_core 32 | WITHOUT_MUSA 33 | array_subbyte.cpp 34 | bitfield.cpp 35 | coalesce.cpp 36 | compact_xmajor.cpp 37 | compare.cpp 38 | complement.cpp 39 | composition.cpp 40 | constants.cpp 41 | core_unit.cpp 42 | inverse_left.cpp 43 | inverse_right.cpp 44 | logical_divide.cpp 45 | logical_product.cpp 46 | math.cpp 47 | mixedbits.cpp 48 | nullspace.cpp 49 | pointer.cpp 50 | reverse.cpp 51 | transform.cpp 52 | tuple.cpp 53 | int_tuple.cpp 54 | ) 55 | -------------------------------------------------------------------------------- /test/unit/mute/core/constants.cpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | 33 | #include "mutlass_unit_test.h" 34 | #include 35 | 36 | #include 37 | #include 38 | 39 | TEST(MuTe_core, MakeIntegerSequence) { 40 | mute::for_each(mute::make_integer_sequence{}, [](auto c) { 41 | using c_type = decltype(c); 42 | constexpr auto c_value = c_type::value; 43 | using expected_type = mute::integral_constant; 44 | static_assert(mute::is_same_v); 45 | static_assert(mute::is_same_v); 46 | static_assert(mute::is_constant::value); 47 | static_assert(mute::is_constant<0, decltype(c * mute::Int<0>{})>::value); 48 | static_assert(mute::is_constant<2*c_value, decltype(c * mute::Int<2>{})>::value); 49 | }); 50 | 51 | mute::for_each(mute::make_integer_sequence{}, [](auto c) { 52 | using c_type = decltype(c); 53 | constexpr auto c_value = c_type::value; 54 | using expected_type = mute::integral_constant; 55 | static_assert(mute::is_same_v); 56 | static_assert(mute::is_same_v); 57 | static_assert(mute::is_constant::value); 58 | static_assert(mute::is_constant<0, decltype(c * mute::Int<0>{})>::value); 59 | static_assert(mute::is_constant<2*c_value, decltype(c * mute::Int<2>{})>::value); 60 | }); 61 | } 62 | -------------------------------------------------------------------------------- /test/unit/mute/core/core_unit.cpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | /** \file 33 | \brief Unit tests for MuTe core 34 | */ 35 | 36 | #include 37 | 38 | int main(int argc, char* arg[]) { 39 | ::testing::InitGoogleTest(&argc, arg); 40 | return RUN_ALL_TESTS(); 41 | } 42 | -------------------------------------------------------------------------------- /test/unit/mute/core/mixedbits.cpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | 33 | #include "mutlass_unit_test.h" 34 | 35 | #include 36 | #include 37 | 38 | TEST(MuTe_core, MixedBits) 39 | { 40 | using namespace mute; 41 | 42 | for_each(make_int_sequence<8>{}, [&](auto S0) { 43 | for_each(make_int_sequence<8>{}, [&](auto F0) { 44 | for_each(make_int_sequence<8>{}, [&](auto S1) { 45 | for_each(make_int_sequence<8>{}, [&](auto F1) { 46 | for (uint32_t d0 = 0; d0 < 8; ++d0) { 47 | for (uint32_t d1 = 0; d1 < 8; ++d1) { 48 | auto m0 = make_mixed_bits(S0, d0, F0); 49 | auto m1 = make_mixed_bits(S1, d1, F1); 50 | //print(m0); print(" & "); print(m1); print(" = "); print(m0 & m1); print("\n"); 51 | EXPECT_EQ(uint32_t(m0 & m1), uint32_t(m0) & uint32_t(m1)); 52 | //print(m0); print(" | "); print(m1); print(" = "); print(m0 | m1); print("\n"); 53 | EXPECT_EQ(uint32_t(m0 | m1), uint32_t(m0) | uint32_t(m1)); 54 | //print(m0); print(" ^ "); print(m1); print(" = "); print(m0 ^ m1); print("\n"); 55 | EXPECT_EQ(uint32_t(m0 ^ m1), uint32_t(m0) ^ uint32_t(m1)); 56 | } 57 | } 58 | }); 59 | }); 60 | }); 61 | }); 62 | } 63 | -------------------------------------------------------------------------------- /test/unit/mute/core/transform.cpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | 33 | #include "mutlass_unit_test.h" 34 | 35 | #include 36 | #include 37 | #include 38 | 39 | TEST(MuTe_core, Transform) { 40 | using namespace mute; 41 | complex array[4] = {{0,0}, {1,0}, {0,1}, {1,1}}; 42 | complex correct[4] = {{0,0}, {1,0}, {0,-1}, {1,-1}}; 43 | auto tensor = make_tensor(static_cast*>(array), make_layout(make_shape(4))); 44 | conjugate conj; 45 | transform(tensor, conj); 46 | for (int i = 0; i < 4; ++i) 47 | { 48 | EXPECT_EQ(tensor(i), correct[i]); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /test/unit/mute/mma/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | mutlass_test_unit_add_executable( 30 | mutlass_test_unit_mute_mma 31 | mp31_mma.mu 32 | mp22_mma.mu 33 | ) -------------------------------------------------------------------------------- /test/unit/mute/mp31/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | mutlass_test_unit_add_executable( 30 | mutlass_test_unit_mute_mp31 31 | mp31_sqmma.mu 32 | mp31_robust_buffer_access.mu 33 | ) 34 | -------------------------------------------------------------------------------- /test/unit/mute/quyuan/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # SPDX-License-Identifier: BSD-3-Clause 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | mutlass_test_unit_add_executable( 30 | mutlass_test_unit_mute_quyuan 31 | vectorization_auto.mu 32 | ) 33 | -------------------------------------------------------------------------------- /test/unit/test_unit.cpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | /** \file 33 | \brief Unit tests for MUTLASS core 34 | */ 35 | #include "common/mutlass_unit_test.h" 36 | 37 | int main(int argc, char* arg[]) { 38 | FilterArchitecture(); 39 | ::testing::InitGoogleTest(&argc, arg); 40 | return RUN_ALL_TESTS(); 41 | } 42 | -------------------------------------------------------------------------------- /tools/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | add_subdirectory(util) 31 | 32 | if (MUTLASS_ENABLE_LIBRARY) 33 | add_subdirectory(library) 34 | endif() 35 | 36 | if (MUTLASS_ENABLE_PROFILER) 37 | if (NOT MUTLASS_ENABLE_LIBRARY) 38 | message(SEND_ERROR "Build conflict: The MUTLASS profiler requires the MUTLASS library.") 39 | message(SEND_ERROR " MUTLASS_ENABLE_PROFILER = ${MUTLASS_ENABLE_PROFILER}") 40 | message(SEND_ERROR " MUTLASS_ENABLE_LIBRARY = ${MUTLASS_ENABLE_LIBRARY}") 41 | else() 42 | add_subdirectory(profiler) 43 | endif() 44 | endif() 45 | -------------------------------------------------------------------------------- /tools/library/include/mutlass/library/arch_mappings.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | #include "mutlass/arch/mma.h" 35 | #include "mutlass/arch/arch.h" 36 | 37 | ///////////////////////////////////////////////////////////////////////////////////////////////// 38 | 39 | namespace mutlass { 40 | namespace library { 41 | 42 | ///////////////////////////////////////////////////////////////////////////////////////////////// 43 | 44 | template struct ArchMap; 45 | 46 | template <> struct ArchMap { 47 | static int const kMin = 22; 48 | static int const kMax = 1024; 49 | }; 50 | 51 | template <> struct ArchMap { 52 | static int const kMin = 22; 53 | static int const kMax = 22; 54 | }; 55 | 56 | template <> struct ArchMap { 57 | static int const kMin = 31; 58 | static int const kMax = 31; 59 | }; 60 | 61 | ///////////////////////////////////////////////////////////////////////////////////////////////// 62 | 63 | } // namespace library 64 | } // namespace mutlass 65 | 66 | ///////////////////////////////////////////////////////////////////////////////////////////////// 67 | -------------------------------------------------------------------------------- /tools/library/include/mutlass/library/singleton.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | 33 | #pragma once 34 | 35 | #include "mutlass/library/library.h" 36 | #include "mutlass/library/manifest.h" 37 | #include "mutlass/library/operation_table.h" 38 | 39 | ///////////////////////////////////////////////////////////////////////////////////////////////// 40 | 41 | namespace mutlass { 42 | namespace library { 43 | 44 | ///////////////////////////////////////////////////////////////////////////////////////////////// 45 | 46 | /// Singleton instance stores a Manifest and Operation table 47 | class Singleton { 48 | public: 49 | 50 | /// Manifest object 51 | Manifest manifest; 52 | 53 | /// Operation table referencing the Manifest 54 | OperationTable operation_table; 55 | 56 | public: 57 | 58 | Singleton(); 59 | 60 | static Singleton const &get(); 61 | }; 62 | 63 | ///////////////////////////////////////////////////////////////////////////////////////////////// 64 | 65 | } // namespace library 66 | } // namespace mutlass 67 | 68 | ///////////////////////////////////////////////////////////////////////////////////////////////// 69 | 70 | -------------------------------------------------------------------------------- /tools/library/src/reference/gemm_int8_canonical.mu: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2024 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | /* \file 33 | \brief Instantiates GEMM reference implementations. 34 | */ 35 | 36 | #include "mutlass/mutlass.h" 37 | #include "mutlass/library/library.h" 38 | #include "mutlass/library/manifest.h" 39 | 40 | #include "gemm_reference_operation.h" 41 | 42 | ///////////////////////////////////////////////////////////////////////////////////////////////// 43 | 44 | namespace mutlass { 45 | namespace library { 46 | 47 | /////////////////////////////////////////////////////////////////////////////////////////////////// 48 | 49 | void initialize_gemm_reference_operations_int8_canonical(Manifest &manifest) { 50 | make_gemm_real_canonical_layouts< 51 | int8_t, 52 | int8_t, 53 | int32_t, 54 | int32_t, 55 | int32_t 56 | >(manifest); 57 | 58 | } 59 | 60 | /////////////////////////////////////////////////////////////////////////////////////////////////// 61 | 62 | } // namespace library 63 | } // namespace mutlass 64 | 65 | /////////////////////////////////////////////////////////////////////////////////////////////////// 66 | 67 | -------------------------------------------------------------------------------- /tools/library/src/singleton.mu: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | 33 | #include 34 | #include "mutlass/library/library.h" 35 | #include "mutlass/library/manifest.h" 36 | #include "mutlass/library/operation_table.h" 37 | #include "mutlass/library/singleton.h" 38 | 39 | ///////////////////////////////////////////////////////////////////////////////////////////////// 40 | 41 | namespace mutlass { 42 | namespace library { 43 | 44 | ///////////////////////////////////////////////////////////////////////////////////////////////// 45 | 46 | Singleton::Singleton() { 47 | 48 | manifest.initialize(); 49 | 50 | operation_table.append(manifest); 51 | } 52 | 53 | Singleton const & Singleton::get() { 54 | static Singleton instance; 55 | return instance; 56 | } 57 | 58 | ///////////////////////////////////////////////////////////////////////////////////////////////// 59 | 60 | } // namespace library 61 | } // namespace mutlass 62 | 63 | ///////////////////////////////////////////////////////////////////////////////////////////////// 64 | -------------------------------------------------------------------------------- /tools/profiler/include/mutlass/profiler/debug.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | /* \file 33 | \brief 34 | */ 35 | 36 | #pragma once 37 | 38 | #include 39 | 40 | //#define report(x) { std::cout << "\033[31m" << __FILE__ << ":" << __LINE__ << " " << x << "\033[0m" << std::endl; } 41 | //#define report(x) {} 42 | 43 | // Enable/Disable Profiler debug prints 44 | //#define DEBUG_PROFILER 45 | 46 | //RED 31m // profiler prints debug messages in red 47 | //YELLOW 33m // ir prints debug messages in yellow 48 | 49 | #ifndef DEBUG_PROFILER 50 | #define debugprof(...) 51 | #else 52 | #define debugprof(...) do { \ 53 | printf("\033[33m[DEBUG PROF] %s:%d | ", __FILE__, __LINE__); \ 54 | printf(__VA_ARGS__); \ 55 | printf("\033[0m\n"); \ 56 | } while (0) 57 | #endif 58 | -------------------------------------------------------------------------------- /tools/profiler/include/mutlass/profiler/gpu_timer.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | /* \file 33 | \brief Defines a math function 34 | */ 35 | 36 | #pragma once 37 | 38 | #include 39 | #include "mutlass/mutlass.h" 40 | 41 | namespace mutlass { 42 | namespace profiler { 43 | 44 | ///////////////////////////////////////////////////////////////////////////////////////////////// 45 | 46 | struct GpuTimer { 47 | 48 | musaEvent_t events[2]; 49 | 50 | // 51 | // Methods 52 | // 53 | 54 | GpuTimer(); 55 | ~GpuTimer(); 56 | 57 | /// Records a start event in the stream 58 | void start(musaStream_t stream = nullptr); 59 | 60 | /// Records a stop event in the stream 61 | void stop(musaStream_t stream = nullptr); 62 | 63 | /// Records a stop event in the stream and synchronizes on the stream 64 | void stop_and_wait(musaStream_t stream = nullptr); 65 | 66 | /// Returns the duration in milliseconds 67 | double duration(int iterations = 1) const; 68 | }; 69 | 70 | ///////////////////////////////////////////////////////////////////////////////////////////////// 71 | 72 | } // namespace profiler 73 | } // namespace mutlass 74 | -------------------------------------------------------------------------------- /tools/profiler/src/main.cpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | /* \file 33 | \brief 34 | */ 35 | 36 | #include 37 | 38 | #include "mutlass/profiler/options.h" 39 | 40 | #include "mutlass/profiler/mutlass_profiler.h" 41 | 42 | /////////////////////////////////////////////////////////////////////////////////////////////////// 43 | 44 | int main(int argc, char const *arg[]) { 45 | 46 | mutlass::CommandLine cmdline(argc, arg); 47 | mutlass::profiler::Options options(cmdline); 48 | 49 | mutlass::profiler::MutlassProfiler profiler(options); 50 | 51 | return profiler(); 52 | } 53 | 54 | /////////////////////////////////////////////////////////////////////////////////////////////////// 55 | -------------------------------------------------------------------------------- /tools/profiler/src/performance_result.mu: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | /* \file 33 | \brief 34 | */ 35 | 36 | #pragma once 37 | 38 | #include 39 | 40 | #include "mutlass/mutlass.h" 41 | 42 | // MUTLASS Profiler includes 43 | #include "mutlass/profiler/enumerated_types.h" 44 | #include "mutlass/profiler/performance_result.h" 45 | 46 | // MUTLASS Library includes 47 | #include "mutlass/library/library.h" 48 | #include "mutlass/library/util.h" 49 | 50 | namespace mutlass { 51 | namespace profiler { 52 | 53 | ///////////////////////////////////////////////////////////////////////////////////////////////// 54 | 55 | 56 | ///////////////////////////////////////////////////////////////////////////////////////////////// 57 | 58 | } // namespace profiler 59 | } // namespace mutlass 60 | 61 | ///////////////////////////////////////////////////////////////////////////////////////////////// 62 | 63 | -------------------------------------------------------------------------------- /tools/util/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 2 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 | # SPDX-License-Identifier: BSD-3-Clause 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions are met: 7 | # 8 | # 1. Redistributions of source code must retain the above copyright notice, this 9 | # list of conditions and the following disclaimer. 10 | # 11 | # 2. Redistributions in binary form must reproduce the above copyright notice, 12 | # this list of conditions and the following disclaimer in the documentation 13 | # and/or other materials provided with the distribution. 14 | # 15 | # 3. Neither the name of the copyright holder nor the names of its 16 | # contributors may be used to endorse or promote products derived from 17 | # this software without specific prior written permission. 18 | # 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | add_library(mutlass_tools_util_includes INTERFACE) 30 | add_library(mt::mutlass::tools::util ALIAS mutlass_tools_util_includes) 31 | set_target_properties(mutlass_tools_util_includes PROPERTIES EXPORT_NAME tools::util) 32 | 33 | target_include_directories( 34 | mutlass_tools_util_includes 35 | INTERFACE 36 | $ 37 | $ 38 | ) 39 | 40 | 41 | install( 42 | DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/ 43 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/ 44 | ) 45 | 46 | install( 47 | TARGETS mutlass_tools_util_includes 48 | EXPORT MtMutlass 49 | ) 50 | -------------------------------------------------------------------------------- /tools/util/include/mutlass/util/GPU_Clock.hpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | 33 | #pragma once 34 | 35 | #include 36 | 37 | struct GPU_Clock 38 | { 39 | GPU_Clock() { 40 | musaEventCreate(&start_); 41 | musaEventCreate(&stop_); 42 | musaEventRecord(start_); 43 | } 44 | 45 | ~GPU_Clock() { 46 | musaEventDestroy(start_); 47 | musaEventDestroy(stop_); 48 | } 49 | 50 | void start() { 51 | musaEventRecord(start_); 52 | } 53 | 54 | float milliseconds() { 55 | musaEventRecord(stop_); 56 | musaEventSynchronize(stop_); 57 | float time; 58 | musaEventElapsedTime(&time, start_, stop_); 59 | return time; 60 | } 61 | 62 | float seconds() { 63 | return milliseconds() * float(1e-3); 64 | } 65 | 66 | private: 67 | musaEvent_t start_, stop_; 68 | }; 69 | -------------------------------------------------------------------------------- /tools/util/include/mutlass/util/exceptions.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | ******************************************************************************/ 32 | 33 | #pragma once 34 | 35 | /** 36 | * \file 37 | * \brief C++ exception semantics for MUSA error codes 38 | */ 39 | 40 | #include 41 | #include 42 | #include 43 | 44 | #include "mutlass/platform/platform.h" 45 | 46 | namespace mutlass { 47 | 48 | /// C++ exception wrapper for MUSA \p musaError_t 49 | class musa_exception : public std::exception { 50 | public: 51 | /// Constructor 52 | musa_exception(const char* msg = "", musaError_t err = musaErrorUnknown) : msg(msg), err(err) {} 53 | 54 | /// Returns the underlying MUSA \p musaError_t 55 | musaError_t musaError() const { return err; } 56 | 57 | protected: 58 | /// Explanatory string 59 | const char* msg; 60 | 61 | /// Underlying MUSA \p musaError_t 62 | musaError_t err; 63 | }; 64 | 65 | /// Writes a musa_exception instance to an output stream 66 | inline std::ostream& operator<<(std::ostream& out, musa_exception const& e) { 67 | return out << e.what() << ": " << musaGetErrorString(e.musaError()); 68 | } 69 | 70 | } // namespace mutlass 71 | -------------------------------------------------------------------------------- /tools/util/include/mutlass/util/index_sequence.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | 33 | #pragma once 34 | 35 | #include "mutlass/mutlass.h" 36 | #include "mutlass/numeric_types.h" 37 | 38 | // integer_sequence moved to mutlass/numeric_types.h 39 | 40 | -------------------------------------------------------------------------------- /tools/util/include/mutlass/util/reference/host/error_metrics.h: -------------------------------------------------------------------------------- 1 | 2 | /*************************************************************************************************** 3 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 4 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 5 | * SPDX-License-Identifier: BSD-3-Clause 6 | * 7 | * Redistribution and use in source and binary forms, with or without 8 | * modification, are permitted provided that the following conditions are met: 9 | * 10 | * 1. Redistributions of source code must retain the above copyright notice, this 11 | * list of conditions and the following disclaimer. 12 | * 13 | * 2. Redistributions in binary form must reproduce the above copyright notice, 14 | * this list of conditions and the following disclaimer in the documentation 15 | * and/or other materials provided with the distribution. 16 | * 17 | * 3. Neither the name of the copyright holder nor the names of its 18 | * contributors may be used to endorse or promote products derived from 19 | * this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | * 32 | **************************************************************************************************/ 33 | #pragma once 34 | 35 | #include 36 | 37 | #include "mutlass/mutlass.h" 38 | #include "mutlass/complex.h" 39 | #include "mutlass/util/reference/host/tensor_reduce.h" 40 | #include "mutlass/core_io.h" 41 | 42 | namespace mutlass { 43 | namespace reference { 44 | namespace host { 45 | 46 | /// Helper to compute the relative error metric for tensor A_computed w.r.t. to tensor A_reference 47 | template < 48 | typename Element, 49 | typename Layout, 50 | typename ComputeType = double 51 | > 52 | ComputeType TensorRelativeErrorMetric( 53 | TensorView view_A_computed, 54 | TensorView view_B_reference, 55 | ComputeType identity = ComputeType() 56 | ) { 57 | 58 | return mutlass::reference::host::TensorNormDiff(view_A_computed, view_B_reference, identity) / 59 | mutlass::reference::host::TensorNorm(view_B_reference, identity); 60 | } 61 | 62 | 63 | /////////////////////////////////////////////////////////////////////////////////////////////////// 64 | 65 | } // namespace host 66 | } // namespace reference 67 | } // namespace mutlass 68 | -------------------------------------------------------------------------------- /tools/util/include/mutlass/util/reference/host/tensor_norm.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************************************** 2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved. 3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 4 | * SPDX-License-Identifier: BSD-3-Clause 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions are met: 8 | * 9 | * 1. Redistributions of source code must retain the above copyright notice, this 10 | * list of conditions and the following disclaimer. 11 | * 12 | * 2. Redistributions in binary form must reproduce the above copyright notice, 13 | * this list of conditions and the following disclaimer in the documentation 14 | * and/or other materials provided with the distribution. 15 | * 16 | * 3. Neither the name of the copyright holder nor the names of its 17 | * contributors may be used to endorse or promote products derived from 18 | * this software without specific prior written permission. 19 | * 20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | * 31 | **************************************************************************************************/ 32 | #pragma once 33 | 34 | 35 | #include "mutlass/mutlass.h" 36 | 37 | // The contents of this file have been moved to 'tensor_reduce' to cover other types of reductions. 38 | 39 | #include "mutlass/util/reference/host/tensor_reduce.h" 40 | 41 | /////////////////////////////////////////////////////////////////////////////////////////////////// 42 | 43 | 44 | --------------------------------------------------------------------------------