├── .github
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ ├── documentation_request.md
│ ├── feature_request.md
│ └── submit_question.md
└── workflows
│ ├── labeler.yml
│ └── stale.yml
├── .gitignore
├── CHANGELOG.md
├── CMakeLists.txt
├── LICENSE
├── MUSA.cmake
├── README.md
├── README_CN.md
├── cmake
├── CTestTestfile.configure.cmake
├── googletest.cmake
└── version_extended.h.in
├── examples
├── 00_basic_gemm
│ ├── CMakeLists.txt
│ └── basic_gemm.mu
├── 01_quyuan_gemm_with_collective_builder
│ ├── CMakeLists.txt
│ └── collective_builder.mu
├── 02_mp31_fp8_gemm_with_collective_builder
│ ├── CMakeLists.txt
│ └── mp31_fp8_gemm.mu
├── 03_mp31_fp8_scaling_gemm
│ ├── CMakeLists.txt
│ ├── README.md
│ ├── fp8_scaling_gemm.mu
│ └── reference
│ │ └── host
│ │ └── gemm_with_groupwise_scaling.hpp
├── CMakeLists.txt
└── common
│ ├── gather_tensor.hpp
│ └── helper.h
├── experimental
├── CMakeLists.txt
├── README.md
└── mp31_flash_attention_fwd
│ ├── CMakeLists.txt
│ ├── flash_attention.mu
│ ├── flash_attn_fwd.hpp
│ ├── fwd_params.hpp
│ ├── kernel_traits.hpp
│ ├── online_softmax.hpp
│ └── reference_attention.hpp
├── include
├── mute
│ ├── address_space.hpp
│ ├── algorithm
│ │ ├── axpby.hpp
│ │ ├── clear.hpp
│ │ ├── cooperative_copy.hpp
│ │ ├── cooperative_gemm.hpp
│ │ ├── copy.hpp
│ │ ├── fill.hpp
│ │ ├── functional.hpp
│ │ ├── gemm.hpp
│ │ ├── prefer.hpp
│ │ ├── prefetch.hpp
│ │ ├── tensor_algorithms.hpp
│ │ └── tuple_algorithms.hpp
│ ├── arch
│ │ ├── copy.hpp
│ │ ├── copy_mp31.hpp
│ │ ├── copy_mp31_desc.hpp
│ │ ├── copy_mp31_tme.hpp
│ │ ├── mma.hpp
│ │ ├── mma_mp22.hpp
│ │ ├── mma_mp31.hpp
│ │ ├── mma_mp31_desc.hpp
│ │ ├── mma_mp31_sqmma.hpp
│ │ ├── tce_desc.hpp
│ │ └── util.hpp
│ ├── atom
│ │ ├── copy_atom.hpp
│ │ ├── copy_traits.hpp
│ │ ├── copy_traits_mp31.hpp
│ │ ├── copy_traits_mp31_tme.hpp
│ │ ├── copy_traits_mp31_tme_swizzle.hpp
│ │ ├── mma_atom.hpp
│ │ ├── mma_traits.hpp
│ │ ├── mma_traits_mp22.hpp
│ │ ├── mma_traits_mp31.hpp
│ │ └── mma_traits_mp31_sqmma.hpp
│ ├── config.hpp
│ ├── container
│ │ ├── alignment.hpp
│ │ ├── array.hpp
│ │ ├── array_aligned.hpp
│ │ ├── array_subbyte.hpp
│ │ ├── bit_field.hpp
│ │ ├── musa_types.hpp
│ │ ├── tuple.hpp
│ │ └── type_list.hpp
│ ├── int_tuple.hpp
│ ├── layout.hpp
│ ├── layout_composed.hpp
│ ├── numeric
│ │ ├── arithmetic_tuple.hpp
│ │ ├── bfloat.hpp
│ │ ├── complex.hpp
│ │ ├── float8.hpp
│ │ ├── half.hpp
│ │ ├── int.hpp
│ │ ├── integer_sequence.hpp
│ │ ├── integer_subbyte.hpp
│ │ ├── integral_constant.hpp
│ │ ├── integral_ratio.hpp
│ │ ├── math.hpp
│ │ ├── numeric_types.hpp
│ │ ├── real.hpp
│ │ ├── tfloat.hpp
│ │ └── uint128.hpp
│ ├── pointer.hpp
│ ├── pointer_base.hpp
│ ├── pointer_flagged.hpp
│ ├── pointer_sparse.hpp
│ ├── pointer_swizzle.hpp
│ ├── stride.hpp
│ ├── swizzle.hpp
│ ├── swizzle_layout.hpp
│ ├── tensor.hpp
│ ├── tensor_predicate.hpp
│ ├── tensor_zip.hpp
│ ├── tile.hpp
│ ├── underscore.hpp
│ └── util
│ │ ├── debug.hpp
│ │ ├── print.hpp
│ │ └── type_traits.hpp
└── mutlass
│ ├── aligned_buffer.h
│ ├── arch
│ ├── arch.h
│ ├── barrier.hpp
│ ├── cache_operation.h
│ ├── memory.h
│ ├── mma.h
│ └── simd.h
│ ├── array.h
│ ├── array_planar_complex.h
│ ├── array_subbyte.h
│ ├── bfloat16.h
│ ├── blas3.h
│ ├── blas3_types.h
│ ├── complex.h
│ ├── constants.h
│ ├── conv
│ ├── conv2d_problem_size.h
│ ├── conv3d_problem_size.h
│ └── convolution.h
│ ├── coord.h
│ ├── core_io.h
│ ├── detail
│ ├── collective.hpp
│ ├── dependent_false.hpp
│ ├── helper_macros.hpp
│ ├── layout.hpp
│ ├── mainloop_fusion_helper_scale_factor.hpp
│ └── mma.hpp
│ ├── device_kernel.h
│ ├── epilogue
│ ├── collective
│ │ ├── builders
│ │ │ ├── mp22_builder.inl
│ │ │ ├── mp31_builder.inl
│ │ │ └── mp31_builder_common.inl
│ │ ├── collective_builder.hpp
│ │ ├── collective_epilogue.hpp
│ │ ├── default_epilogue.hpp
│ │ ├── detail.hpp
│ │ ├── epilogue_tensor_broadcast.hpp
│ │ ├── mp22_epilogue_evt.hpp
│ │ ├── mp22_epilogue_vectorized.hpp
│ │ └── mp31_epilogue_tme.hpp
│ ├── dispatch_policy.hpp
│ ├── fusion
│ │ ├── callbacks.hpp
│ │ └── operations.hpp
│ └── thread
│ │ ├── activation.h
│ │ ├── linear_combination.h
│ │ ├── linear_combination_params.h
│ │ └── scale_type.h
│ ├── fast_math.h
│ ├── float8.h
│ ├── floating_point_mtrtc.h
│ ├── functional.h
│ ├── gemm
│ ├── collective
│ │ ├── builders
│ │ │ ├── common.inl
│ │ │ ├── mp22_gemm_builder.inl
│ │ │ ├── mp31_sqmma_builder.inl
│ │ │ └── mp31_sqmma_common.inl
│ │ ├── collective_builder.hpp
│ │ ├── collective_mma.hpp
│ │ ├── mp22_mma_twostage.hpp
│ │ ├── mp31_mma_tme_sqmma_ss.hpp
│ │ ├── mp31_mma_tme_sqmma_ss_scaling.hpp
│ │ └── scaling_accumulation.hpp
│ ├── device
│ │ └── gemm_universal_adapter.h
│ ├── dispatch_policy.hpp
│ ├── gemm.h
│ ├── gemm_enumerated_types.h
│ ├── kernel
│ │ ├── gemm_universal.hpp
│ │ ├── mp22_gemm.hpp
│ │ ├── mp31_gemm_tme.hpp
│ │ ├── static_tile_scheduler.hpp
│ │ ├── tile_scheduler.hpp
│ │ └── tile_scheduler_params.hpp
│ └── threadblock
│ │ └── threadblock_swizzle.h
│ ├── gemm_coord.h
│ ├── gemm_coord.hpp
│ ├── half.h
│ ├── integer_subbyte.h
│ ├── kernel_hardware_info.h
│ ├── kernel_hardware_info.hpp
│ ├── kernel_launch.h
│ ├── layout
│ ├── layout.h
│ ├── matrix.h
│ ├── permute.h
│ ├── pitch_linear.h
│ ├── tensor.h
│ └── vector.h
│ ├── matrix.h
│ ├── matrix_coord.h
│ ├── matrix_shape.h
│ ├── musa_host_adapter.hpp
│ ├── mutlass.h
│ ├── numeric_conversion.h
│ ├── numeric_size.h
│ ├── numeric_types.h
│ ├── pipeline
│ ├── mp31_pipeline.hpp
│ └── pipeline.hpp
│ ├── pitch_linear_coord.h
│ ├── platform
│ └── platform.h
│ ├── predicate_vector.h
│ ├── quaternion.h
│ ├── real.h
│ ├── relatively_equal.h
│ ├── subbyte_reference.h
│ ├── tensor_coord.h
│ ├── tensor_ref.h
│ ├── tensor_ref_planar_complex.h
│ ├── tensor_view.h
│ ├── tensor_view_planar_complex.h
│ ├── tfloat32.h
│ ├── trace.h
│ ├── transform
│ └── thread
│ │ ├── transpose.h
│ │ └── unary_op.h
│ ├── uint128.h
│ ├── version.h
│ └── workspace.h
├── media
├── docs
│ └── quickstart.md
└── images
│ └── mutlass-0.2.0-gemm-performance.png
├── python
├── mutlass_library
│ ├── __init__.py
│ ├── gemm_operation.py
│ ├── generator.py
│ ├── library.py
│ └── manifest.py
└── setup_library.py
├── test
├── CMakeLists.txt
└── unit
│ ├── CMakeLists.txt
│ ├── common
│ ├── filter_architecture.cpp
│ ├── mma_inst_rr_test.hpp
│ └── mutlass_unit_test.h
│ ├── gemm
│ ├── CMakeLists.txt
│ └── device
│ │ ├── CMakeLists.txt
│ │ ├── default_gemm_configuration.hpp
│ │ ├── gemm_testbed_3x.hpp
│ │ ├── mp22_gemm_f32_f32_f32_simt.mu
│ │ ├── mp22_gemm_tensorop.mu
│ │ ├── mp31_gemm_sqmma_ss.mu
│ │ └── testbed_utils.h
│ ├── mute
│ ├── CMakeLists.txt
│ ├── core
│ │ ├── CMakeLists.txt
│ │ ├── array_subbyte.cpp
│ │ ├── bitfield.cpp
│ │ ├── coalesce.cpp
│ │ ├── compact_xmajor.cpp
│ │ ├── compare.cpp
│ │ ├── complement.cpp
│ │ ├── composition.cpp
│ │ ├── constants.cpp
│ │ ├── core_unit.cpp
│ │ ├── int_tuple.cpp
│ │ ├── inverse_left.cpp
│ │ ├── inverse_right.cpp
│ │ ├── logical_divide.cpp
│ │ ├── logical_product.cpp
│ │ ├── math.cpp
│ │ ├── mixedbits.cpp
│ │ ├── nullspace.cpp
│ │ ├── pointer.cpp
│ │ ├── reverse.cpp
│ │ ├── transform.cpp
│ │ └── tuple.cpp
│ ├── mma
│ │ ├── CMakeLists.txt
│ │ ├── mp22_mma.mu
│ │ └── mp31_mma.mu
│ ├── mp31
│ │ ├── CMakeLists.txt
│ │ ├── mp31_robust_buffer_access.mu
│ │ └── mp31_sqmma.mu
│ └── quyuan
│ │ ├── CMakeLists.txt
│ │ ├── mp22_mma.mu
│ │ └── vectorization_auto.mu
│ └── test_unit.cpp
└── tools
├── CMakeLists.txt
├── library
├── CMakeLists.txt
├── include
│ └── mutlass
│ │ └── library
│ │ ├── arch_mappings.h
│ │ ├── descriptions.h
│ │ ├── handle.h
│ │ ├── library.h
│ │ ├── manifest.h
│ │ ├── operation_table.h
│ │ ├── singleton.h
│ │ ├── types.h
│ │ └── util.h
└── src
│ ├── gemm_operation_3x.hpp
│ ├── handle.mu
│ ├── library_internal.h
│ ├── manifest.cpp
│ ├── operation_table.mu
│ ├── reference
│ ├── gemm_fp32out.mu
│ ├── gemm_fp_other.mu
│ ├── gemm_int8_canonical.mu
│ ├── gemm_reference_operation.h
│ ├── gemm_s8_s8_s32.mu
│ ├── gemm_u8_u8_s32.mu
│ └── initialize_reference_operations.mu
│ ├── singleton.mu
│ └── util.mu
├── profiler
├── CMakeLists.txt
├── include
│ └── mutlass
│ │ └── profiler
│ │ ├── debug.h
│ │ ├── device_allocation.h
│ │ ├── device_context.h
│ │ ├── enumerated_types.h
│ │ ├── gemm_operation_profiler.h
│ │ ├── gpu_timer.h
│ │ ├── mutlass_profiler.h
│ │ ├── operation_profiler.h
│ │ ├── options.h
│ │ ├── performance_report.h
│ │ ├── performance_result.h
│ │ └── problem_space.h
└── src
│ ├── device_allocation.mu
│ ├── device_context.mu
│ ├── enumerated_types.cpp
│ ├── gemm_operation_profiler.mu
│ ├── gpu_timer.cpp
│ ├── main.cpp
│ ├── mutlass_profiler.mu
│ ├── operation_profiler.mu
│ ├── options.mu
│ ├── performance_report.cpp
│ ├── performance_result.mu
│ └── problem_space.cpp
└── util
├── CMakeLists.txt
└── include
└── mutlass
└── util
├── GPU_Clock.hpp
├── command_line.h
├── debug.h
├── device_dump.h
├── device_groupnorm.h
├── device_layernorm.h
├── device_memory.h
├── device_nchw_to_nhwc.h
├── device_nhwc_padding.h
├── device_nhwc_pooling.h
├── device_nhwc_to_nchw.h
├── device_rmsnorm.h
├── device_utils.h
├── distribution.h
├── exceptions.h
├── gett_commandline.hpp
├── helper_cuda.hpp
├── host_reorder.h
├── host_tensor.h
├── host_tensor_planar_complex.h
├── host_uncompress.h
├── index_sequence.h
├── packed_stride.hpp
├── print_error.hpp
├── reference
├── detail
│ ├── inner_product.h
│ └── linear_to_coordinate.h
├── device
│ ├── convolution.h
│ ├── gemm.h
│ ├── gemm_complex.h
│ ├── gemm_planar_complex.h
│ ├── gett.hpp
│ ├── kernel
│ │ ├── gemm.h
│ │ ├── tensor_elementwise.h
│ │ └── tensor_foreach.h
│ ├── rank_2k_complex.h
│ ├── tensor_compare.h
│ ├── tensor_fill.h
│ ├── tensor_foreach.h
│ ├── tensor_reduce.h
│ ├── tensor_relu.h
│ └── thread
│ │ └── gemm.h
└── host
│ ├── convolution.h
│ ├── error_metrics.h
│ ├── gemm.h
│ ├── gemm_complex.h
│ ├── gemm_planar_complex.h
│ ├── gett.hpp
│ ├── rank_2k.h
│ ├── rank_2k_complex.h
│ ├── rank_k_complex.h
│ ├── symm.h
│ ├── symm_complex.h
│ ├── tensor_compare.h
│ ├── tensor_compare.hpp
│ ├── tensor_copy.h
│ ├── tensor_elementwise.h
│ ├── tensor_fill.h
│ ├── tensor_fill.hpp
│ ├── tensor_foreach.h
│ ├── tensor_norm.h
│ ├── tensor_reduce.h
│ ├── tensor_reduce.hpp
│ ├── trmm.h
│ └── trmm_complex.h
├── tensor_view_io.h
└── type_traits.h
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a bug report to help us improve MUTLASS
4 | title: "[BUG]"
5 | labels: "? - Needs Triage, bug"
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 |
13 | **Steps/Code to reproduce bug**
14 | Follow this guide http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports to craft a minimal bug report. This helps us reproduce the issue you're having and resolve the issue more quickly.
15 |
16 | **Expected behavior**
17 | A clear and concise description of what you expected to happen.
18 |
19 | **Environment details (please complete the following information):**
20 | - Environment location: [Bare-metal, Docker, Cloud(specify cloud provider)]
21 |
22 | **Additional context**
23 | Add any other context about the problem here.
24 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/documentation_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Documentation request
3 | about: Report incorrect or needed documentation to improve MUTLASS
4 | title: "[DOC]"
5 | labels: "? - Needs Triage, documentation"
6 | assignees: ''
7 |
8 | ---
9 |
10 | ## Report incorrect documentation
11 |
12 | **Location of incorrect documentation**
13 | Provide links and line numbers if applicable.
14 |
15 | **Describe the problems or issues found in the documentation**
16 | A clear and concise description of what you found to be incorrect.
17 |
18 | **Steps taken to verify documentation is incorrect**
19 | List any steps you have taken:
20 |
21 | **Suggested fix for documentation**
22 | Detail proposed changes to fix the documentation if you have any.
23 |
24 | ---
25 |
26 | ## Report needed documentation
27 |
28 | **Report needed documentation**
29 | A clear and concise description of what documentation you believe it is needed and why.
30 |
31 | **Describe the documentation you'd like**
32 | A clear and concise description of what you want to happen.
33 |
34 | **Steps taken to search for needed documentation**
35 | List any steps you have taken:
36 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for MUTLASS
4 | title: "[FEA]"
5 | labels: "? - Needs Triage, feature request"
6 | assignees: ''
7 |
8 | ---
9 |
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I wish I could use MUTLASS to do [...]
12 |
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 |
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 |
19 | **Additional context**
20 | Add any other context, code examples, or references to existing implementations about the feature request here.
21 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/submit_question.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Submit question
3 | about: Ask a general question about MUTLASS
4 | title: "[QST]"
5 | labels: "? - Needs Triage, question"
6 | assignees: ''
7 |
8 | ---
9 |
10 | **What is your question?**
11 |
--------------------------------------------------------------------------------
/.github/workflows/labeler.yml:
--------------------------------------------------------------------------------
1 | name: "Pull Request Labeler"
2 | on:
3 | - pull_request_target
4 |
5 | jobs:
6 | triage:
7 | runs-on: ubuntu-latest
8 | steps:
9 | - uses: actions/labeler@main
10 | with:
11 | repo-token: "${{ secrets.GITHUB_TOKEN }}"
12 |
--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
1 | name: Mark inactive issues and pull requests
2 |
3 | on:
4 | schedule:
5 | - cron: "0 * * * *"
6 |
7 | jobs:
8 | mark-inactive-30d:
9 | runs-on: ubuntu-latest
10 | steps:
11 | - name: Mark 30 day inactive issues and pull requests
12 | uses: actions/stale@v3
13 | with:
14 | repo-token: ${{ secrets.GITHUB_TOKEN }}
15 | stale-issue-message: >
16 | This issue has been labeled `inactive-30d` due to no recent activity in the past 30 days.
17 | Please close this issue if no further response or action is needed.
18 | Otherwise, please respond with a comment indicating any updates or changes to the original issue and/or confirm this issue still needs to be addressed.
19 | This issue will be labeled `inactive-90d` if there is no activity in the next 60 days.
20 | stale-issue-label: "inactive-30d"
21 | exempt-issue-labels: "0 - Blocked,0 - Backlog,good first issue"
22 | days-before-issue-stale: 30
23 | days-before-issue-close: -1
24 | stale-pr-message: >
25 | This PR has been labeled `inactive-30d` due to no recent activity in the past 30 days.
26 | Please close this PR if it is no longer required.
27 | Otherwise, please respond with a comment indicating any updates.
28 | This PR will be labeled `inactive-90d` if there is no activity in the next 60 days.
29 | stale-pr-label: "inactive-30d"
30 | exempt-pr-labels: "0 - Blocked,0 - Backlog,good first issue"
31 | days-before-pr-stale: 30
32 | days-before-pr-close: -1
33 | operations-per-run: 50
34 | mark-inactive-90d:
35 | runs-on: ubuntu-latest
36 | steps:
37 | - name: Mark 90 day inactive issues and pull requests
38 | uses: actions/stale@v3
39 | with:
40 | repo-token: ${{ secrets.GITHUB_TOKEN }}
41 | stale-issue-message: >
42 | This issue has been labeled `inactive-90d` due to no recent activity in the past 90 days.
43 | Please close this issue if no further response or action is needed.
44 | Otherwise, please respond with a comment indicating any updates or changes to the original issue and/or confirm this issue still needs to be addressed.
45 | stale-issue-label: "inactive-90d"
46 | exempt-issue-labels: "0 - Blocked,0 - Backlog,good first issue"
47 | days-before-issue-stale: 90
48 | days-before-issue-close: -1
49 | stale-pr-message: >
50 | This PR has been labeled `inactive-90d` due to no recent activity in the past 90 days.
51 | Please close this PR if it is no longer required.
52 | Otherwise, please respond with a comment indicating any updates.
53 | stale-pr-label: "inactive-90d"
54 | exempt-pr-labels: "0 - Blocked,0 - Backlog,good first issue"
55 | days-before-pr-stale: 90
56 | days-before-pr-close: -1
57 | operations-per-run: 50
58 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # PyCache files
2 | __pycache__/
3 | mutlass_library.egg-info/
4 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # MooreThreads MUTLASS Changelog
2 |
3 | ## [0.2.0](https://github.com/MooreThreads/mutlass/tags/v0.2.0) (2025-02-26)
4 |
5 | - MP31 Features:
6 | - Squad-level MMA(SQMMA) and Warp-level MMA primitives with rich data types (TF32/FP16/BF16/[FP8](./examples/02_mp31_fp8_gemm_with_collective_builder)/S8 etc.).
7 | - Tensor Memory Engine(TME) and [RobustBufferAccess](./test/unit/mute/mp31/mp31_robust_buffer_access.mu) primitives.
8 | - New GEMM mainloop and epilogue targeting MP31 architecture that achieve high performance with TME and SQMMA.
9 | - New tile scheduler to support CTA swizzle for MP31 kernels.
10 | - New experimental directory housing the implementations that are not yet stable and may have significant changes in the future.
11 | - [Prototype of Flash Attention Forward](./experimental/mp31_flash_attention_fwd/) targeting MP31 architecture with TME, RobustBufferAccess and SQMMA.
12 | - New [FP8 GEMM with groupwise scaling](./examples/03_mp31_fp8_scaling_gemm/).
13 | - Upgrade the backend from CUTLASS/CuTe 3.5.0 to CUTLASS/CuTe 3.6.0.
14 |
15 |
16 | ## [0.1.1](https://github.com/MooreThreads/mutlass/tags/v0.1.1) (2024-09-30)
17 |
18 | - [MuTe](./include/mute), a core library and backend adapted from CUTLASS CuTe
19 | - Quyuan Features
20 | - MMA primitives: TensorFloat32, BFloat16, Float16, INT8
21 | - FMA/MMA GEMM Kernels targeting the Quyuan architecture
22 | - Note: this is a beta release. Further updates to MUTLASS will include performance improvements, feature enablement, and possible breaking changes to the API
23 | - MUTLASS Profiler, Library, and Utilities
24 | - Two examples that demonstrate the usage of the [low-level API](./examples/00_basic_gemm) and the [collective builders](./examples/01_quyuan_gemm_with_collective_builder) to build GEMM kernelS
25 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [中文版](./README_CN.md)
2 |
3 | # MUTLASS 0.2.0
4 |
5 | _MUTLASS 0.2.0 - February 2025_
6 |
7 | MUTLASS(MUSA Templates for Linear Algebra Subroutines) is a header-only library for implementing high-performance matrix-matrix multiplication (GEMM) within MUSA(**M**eta-computing **U**nified **S**ystem **A**rchitecture). It incorporates strategies for hierarchical decomposition and data movement similar to those used to implement muDNN.
8 |
9 | See the [Quick Start Guide](./media/docs/quickstart.md) to get started quickly.
10 |
11 | Note: MUTLASS uses the CuTe library, introduced in CUTLASS 3.x, as the backend, and thus is incompatible with most implementations of CUTLASS 2.x.
12 |
13 | # What's New in MUTLASS 0.2.0
14 |
15 | MUTLASS 0.2.0 is an update to MUTLASS adding:
16 |
17 | - MP31 Features:
18 | - Squad-level MMA(SQMMA) and Warp-level MMA primitives with rich data types (TF32/FP16/BF16/[FP8](./examples/02_mp31_fp8_gemm_with_collective_builder)/S8 etc.).
19 | - Tensor Memory Engine(TME) and [RobustBufferAccess](./test/unit/mute/mp31/mp31_robust_buffer_access.mu) primitives.
20 | - New GEMM mainloop and epilogue targeting MP31 architecture that achieve high performance with TME and SQMMA.
21 | - New tile scheduler to support CTA swizzle for MP31 kernels.
22 | - New experimental directory housing the implementations that are not yet stable and may have significant changes in the future.
23 | - [Prototype of Flash Attention Forward](./experimental/mp31_flash_attention_fwd/) targeting MP31 architecture with TME, RobustBufferAccess and SQMMA.
24 | - New [FP8 GEMM with groupwise scaling](./examples/03_mp31_fp8_scaling_gemm/).
25 | - Upgrade the backend from CUTLASS/CuTe 3.5.0 to CUTLASS/CuTe 3.6.0.
26 |
27 |
28 | Minimum requirements:
29 |
30 | - Architecture: Quyuan
31 |
32 | - Compiler: MCC 4.0.0
33 |
34 | - MUSA Toolkit version: 4.0.0
35 |
36 |
37 | **See the [CHANGELOG](./CHANGELOG.md) for a detailed listing of releases and updates.**
38 |
39 | # Performance
40 |
41 |
42 |

43 |
44 | The above figure shows the relative performance of the tensorop GEMM compared with muDNN. The performance of TF32 data type be futher optimized in the next release.
45 |
46 | # Documentation
47 |
48 | - [Quick Start Guide](./media/docs/quickstart.md) - build and run MUTLASS
49 |
50 | # Building MUTLASS
51 |
52 | MUTLASS is a header-only template library and does not need to be built to be used by other projects. Client applications should target MUTLASS's `include/` directory in their include paths.
53 |
54 | MUTLASS unit tests, examples, and utilities can be build with CMake. The minimum version of CMake is given in the [QuickStart guide](./media/docs/quickstart.md).
55 |
56 | Create a build directory within the MUTLASS project, then run CMake. By default MUTLASS will build kernels for MUSA architecture versions 2.2 and 3.1.
57 |
--------------------------------------------------------------------------------
/README_CN.md:
--------------------------------------------------------------------------------
1 | [English](./README.md)
2 |
3 | # MUTLASS 0.2.0
4 |
5 | *MUTLASS 0.2.0 - 2025年2月*
6 |
7 | MUTLASS(MUSA Templates for Linear Algebra Subroutines)是用于在MUSA(**M**eta-computing **U**nified **S**ystem **A**rchitecture)上实现高性能矩阵乘法运算的纯头文件库,采用了与实现muDNN类似的分层分解和数据搬运策略。
8 |
9 | 参考[快速入门指南](./media/docs/quickstart.md)来快速入门使用。
10 |
11 | 注意:MUTLASS使用了在CUTLASS 3.x引入的CuTe库做为后端,因此与大多数CUTLASS 2.x的实现并不兼容。
12 |
13 | # MUTLASS 0.2.0新增
14 |
15 | MUTLASS 0.2.0是MUTLASS的一次版本更新,添加了:
16 |
17 | - MP31特性:
18 |
19 | - 支持丰富数据类型的Squad-level MMA(SQMMA)和Warp-level MMA原语,包含TF32/FP16/BF16/[FP8](./examples/02_mp31_fp8_gemm_with_collective_builder)/S8等多种精度。
20 |
21 | - Tensor Memory Engine(TME)及[RobustBufferAccess](./test/unit/mute/mp31/mp31_robust_buffer_access.mu)原语。
22 |
23 | - 新适用于MP31架构的矩阵乘法核心循环及后处理实现,基于TME和SQMMA实现高性能的矩阵乘法计算。
24 |
25 | - 新适用于MP31架构算子的Tile调度器,用于实现更好的线程组调度。
26 |
27 | - 新的*experimental*目录,用于存放尚未稳定或可能在未来有重大改变的代码实现。
28 |
29 | - 针对MP31架构的[FlashAttention前向原型](./experimental/mp31_flash_attention_fwd/),运用了TME、RobustBufferAccess和SQMMA等新特性。
30 |
31 | - 新的[Groupwise Scaling FP8矩阵乘法](./examples/03_mp31_fp8_scaling_gemm/)。
32 |
33 | - 将后端库从CUTLASS/CuTe 3.5.0升级到CUTLASS/CuTe 3.6.0。
34 |
35 |
36 | 最低要求:
37 |
38 | - 架构:曲院
39 |
40 | - 编译器:MCC 4.0.0
41 |
42 | - MUSA工具包:4.0.0
43 |
44 |
45 | **参考[变更日志](./CHANGELOG.md)获取更详细的发布及更新信息。**
46 |
47 | # 性能
48 |
49 | # 文档
50 |
51 | - [快速入门指南](./media/docs/quickstart.md) - 编译和运行MUTLASS
52 |
53 |
54 | # 编译MUTLASS
55 |
56 | MUTLASS是一个模板纯头文件库,因此在被其他项目使用时不需要单独编译。用户应用将MUTLASS的`include/`目录指定到项目头文件路径中即可使用。
57 |
58 | MUTLASS的单元测试、实例和工具都使用CMake进行编译构建。编译构建所需要的最低CMake版本在[快速入门指南](./media/docs/quickstart.md)中给出。
59 |
60 | 在MUTLASS中创建一个单独的build目录,并执行CMake即可编译。默认情况下,MUTLASS会编译MUSA架构2.2和3.1的实现。
61 |
--------------------------------------------------------------------------------
/cmake/CTestTestfile.configure.cmake:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2024 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 | # Generated file
31 |
32 | set(TEST_SETS_SUPPORTED @TEST_SETS_SUPPORTED@)
33 |
34 | set(TEST_EXE_PATH @TEST_EXE_PATH@)
35 | set(TEST_EXE_WORKING_DIRECTORY @TEST_EXE_WORKING_DIRECTORY@)
36 | set(MUTLASS_USE_EXTENDED_ADD_TEST_FORMAT @TEST_USE_EXTENDED_FORMAT@)
37 |
38 | if (DEFINED ENV{MUTLASS_TEST_EXECUTION_ENVIRONMENT})
39 | set(_MUTLASS_TEST_EXECUTION_ENVIRONMENT $ENV{MUTLASS_TEST_EXECUTION_ENVIRONMENT})
40 | else()
41 | set(_MUTLASS_TEST_EXECUTION_ENVIRONMENT @MUTLASS_TEST_EXECUTION_ENVIRONMENT@)
42 | endif()
43 |
44 | @_INLINE_PER_TEST_CODE@
45 |
--------------------------------------------------------------------------------
/cmake/googletest.cmake:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2024 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 | include(FetchContent)
31 |
32 | set(GOOGLETEST_DIR "" CACHE STRING "Location of local GoogleTest repo to build against")
33 |
34 | if(GOOGLETEST_DIR)
35 | set(FETCHCONTENT_SOURCE_DIR_GOOGLETEST ${GOOGLETEST_DIR} CACHE STRING "GoogleTest source directory override")
36 | endif()
37 |
38 | set(GTEST_REPOSITORY "https://github.com/google/googletest.git" CACHE STRING "GoogleTest repo to fetch")
39 |
40 | FetchContent_Declare(
41 | googletest
42 | GIT_REPOSITORY ${GTEST_REPOSITORY}
43 | GIT_TAG v1.13.0
44 | )
45 |
46 | FetchContent_GetProperties(googletest)
47 |
48 | if(NOT googletest_POPULATED)
49 | FetchContent_Populate(googletest)
50 | if (MSVC)
51 | set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
52 | endif()
53 | add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL)
54 | endif()
55 |
--------------------------------------------------------------------------------
/cmake/version_extended.h.in:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #define MUTLASS_BUILD @MUTLASS_VERSION_BUILD@
35 | #define MUTLASS_REVISION "@MUTLASS_REVISION@"
36 |
--------------------------------------------------------------------------------
/examples/00_basic_gemm/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
29 | mutlass_example_add_executable(
30 | 00_basic_gemm
31 | basic_gemm.mu
32 | )
33 |
--------------------------------------------------------------------------------
/examples/01_quyuan_gemm_with_collective_builder/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
29 | mutlass_example_add_executable(
30 | 01_collective_builder
31 | collective_builder.mu
32 | )
33 |
--------------------------------------------------------------------------------
/examples/02_mp31_fp8_gemm_with_collective_builder/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
29 | mutlass_example_add_executable(
30 | 02_mp31_fp8_gemm
31 | mp31_fp8_gemm.mu
32 | )
33 |
--------------------------------------------------------------------------------
/examples/03_mp31_fp8_scaling_gemm/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
29 | mutlass_example_add_executable(
30 | 03_fp8_scling_gemm
31 | fp8_scaling_gemm.mu
32 | )
33 |
--------------------------------------------------------------------------------
/examples/03_mp31_fp8_scaling_gemm/README.md:
--------------------------------------------------------------------------------
1 | # FP8 Groupwise-Scaling GEMM
2 |
3 | ## Double Accumulator algorithm
4 |
5 | In the double accumulator algorithm, we maintain two accumulators. One is used to store the final result, and the other is used for the scaled accumulation of groups.
6 |
7 | Due to the limitation of the total number of registers, compared with the standard FP8 GEMM, the tile shape in this method will be halved.
8 |
9 |
10 | ## Iterative algorithm
11 |
12 | In the iterative algorithm, we continuously maintain and update the group scaling coefficients and apply them to the same accumulator. Therefore, we can use the same tile shape as the standard FP8 GEMM to further improve performance.
13 |
14 | It can be simply described by the following formula.
15 |
16 | $S_0 \cdot A_0B_0 + S_1\cdot A_1B_1+S_2\cdot A_2B_2=((\frac{S_0}{S_1}\cdot A_0B_0 + A_1B_1)\cdot \frac{S_1}{S_2} +A_2B_2)\cdot S_2$
17 |
18 | We will release the implementation of this algorithm in the future.
19 |
20 |
--------------------------------------------------------------------------------
/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 | set(MUTLASS_EXAMPLES_COMMON_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/common)
31 |
32 | add_custom_target(mutlass_examples)
33 | add_custom_target(test_examples)
34 |
35 | function(mutlass_example_add_executable NAME)
36 |
37 | set(options)
38 | set(oneValueArgs DISABLE_TESTS)
39 | set(multiValueArgs DEPENDS DEPENDEES TEST_COMMAND_OPTIONS)
40 | cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
41 |
42 | if (NOT DEFINED __DISABLE_TESTS)
43 | set(__DISABLE_TESTS OFF)
44 | endif()
45 |
46 | mutlass_add_executable(${NAME} ${__UNPARSED_ARGUMENTS})
47 |
48 | add_dependencies(mutlass_examples ${NAME})
49 |
50 | target_link_libraries(
51 | ${NAME}
52 | PRIVATE
53 | MUTLASS
54 | mutlass_tools_util_includes
55 | musart
56 | musa_driver
57 | )
58 |
59 | target_include_directories(
60 | ${NAME}
61 | PRIVATE
62 | ${MUTLASS_EXAMPLES_COMMON_SOURCE_DIR}
63 | ${MUTLASS_EXAMPLES_UTILS_DIR}
64 | )
65 |
66 | install(
67 | TARGETS ${NAME}
68 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
69 | )
70 |
71 | mutlass_add_executable_tests(
72 | test_examples_${NAME} ${NAME}
73 | DEPENDS ${__DEPENDS}
74 | DEPENDEES test_examples ${__DEPENDEES}
75 | TEST_COMMAND_OPTIONS ${__TEST_COMMAND_OPTIONS}
76 | DISABLE_EXECUTABLE_INSTALL_RULE
77 | DISABLE_TESTS ${__DISABLE_TESTS}
78 | )
79 |
80 | endfunction()
81 |
82 | foreach(EXAMPLE
83 | 00_basic_gemm
84 | 01_quyuan_gemm_with_collective_builder
85 | 02_mp31_gemm_with_collective_builder
86 | 03_mp31_fp8_scaling_gemm
87 | )
88 |
89 | add_subdirectory(${EXAMPLE})
90 |
91 | endforeach()
92 |
--------------------------------------------------------------------------------
/experimental/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
29 | set(MUTLASS_EXPERIMENTAL_COMMON_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../examples/common)
30 |
31 | add_custom_target(mutlass_experimental)
32 | add_custom_target(test_experimental)
33 |
34 | foreach(EXPERIMENTAL
35 | mp31_flash_attention_fwd
36 | )
37 |
38 | add_subdirectory(${EXPERIMENTAL})
39 |
40 | endforeach()
41 |
--------------------------------------------------------------------------------
/experimental/README.md:
--------------------------------------------------------------------------------
1 | # Experimental
2 |
3 | This directory is intended for the development of "experimental" implementations. Theses kernels or functions are not well-tested and do not have stable API. Thus, they are not released as a part of the examples. We also do not recommend relying on the implementations here in formal products.
4 |
5 | When the kernel or fucntion becomes mature, it will be moved to the example for demonstration, or integrated into MUTLASS.
6 |
--------------------------------------------------------------------------------
/experimental/mp31_flash_attention_fwd/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
29 | function(mutlass_experimental_mp31_fa_fwd_add_executable NAME)
30 | set(options)
31 | set(oneValueArgs DISABLE_TESTS)
32 | set(multiValueArgs DEPENDS DEPENDEES TEST_COMMAND_OPTIONS)
33 |
34 | cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
35 |
36 | if (NOT DEFINED __DISABLE_TESTS)
37 | set(__DISABLE_TESTS OFF)
38 | endif()
39 |
40 | # add -fmusa-flush-denormals-to-zero to enable fast exp2 instruction
41 | list(APPEND MUTLASS_MUSA_MCC_FLAGS -fmusa-flush-denormals-to-zero)
42 |
43 | mutlass_add_executable(${NAME} ${__UNPARSED_ARGUMENTS})
44 |
45 | add_dependencies(mutlass_experimental ${NAME})
46 |
47 | target_link_libraries(
48 | ${NAME}
49 | PRIVATE
50 | MUTLASS
51 | mutlass_tools_util_includes
52 | musa_driver
53 | musart
54 | )
55 |
56 | target_include_directories(
57 | ${NAME}
58 | PRIVATE
59 | ${MUTLASS_EXPERIMENTAL_COMMON_SOURCE_DIR}
60 | )
61 |
62 | install(
63 | TARGETS ${NAME}
64 | RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
65 | )
66 |
67 | mutlass_add_executable_tests(
68 | test_examples_${NAME} ${NAME}
69 | DEPENDS ${__DEPENDS}
70 | DEPENDEES test_experimental ${__DEPENDEES}
71 | TEST_COMMAND_OPTIONS ${__TEST_COMMAND_OPTIONS}
72 | DISABLE_EXECUTABLE_INSTALL_RULE
73 | DISABLE_TESTS ${__DISABLE_TESTS}
74 | )
75 |
76 | endfunction()
77 |
78 | mutlass_experimental_mp31_fa_fwd_add_executable(
79 | mp31_fa_fwd
80 | flash_attention.mu
81 | )
82 |
--------------------------------------------------------------------------------
/include/mute/address_space.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * SPDX-License-Identifier: BSD-3-Clause
4 | *
5 | * Redistribution and use in source and binary forms, with or without
6 | * modification, are permitted provided that the following conditions are met:
7 | *
8 | * 1. Redistributions of source code must retain the above copyright notice, this
9 | * list of conditions and the following disclaimer.
10 | *
11 | * 2. Redistributions in binary form must reproduce the above copyright notice,
12 | * this list of conditions and the following disclaimer in the documentation
13 | * and/or other materials provided with the distribution.
14 | *
15 | * 3. Neither the name of the copyright holder nor the names of its
16 | * contributors may be used to endorse or promote products derived from
17 | * this software without specific prior written permission.
18 | *
19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | *
30 | **************************************************************************************************/
31 |
32 | #pragma once
33 |
34 | namespace mute {
35 |
36 | enum class AddressSpace {
37 | Generic = 0,
38 | Global = 1,
39 | Shared = 3,
40 | };
41 |
42 | template
43 | MUTE_HOST_DEVICE constexpr
44 | void __attribute__((address_space(static_cast(AS))))*
45 | make_ptr_with_address_space(uint64_t ptr) {
46 | return reinterpret_cast(AS))))*>(ptr);
47 | }
48 |
49 | } // namespace mute
50 |
--------------------------------------------------------------------------------
/include/mute/algorithm/clear.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #include
35 |
36 | #include
37 |
38 | #include
39 |
40 | namespace mute
41 | {
42 |
43 | //
44 | // Accept mutable temporaries
45 | //
46 | template
47 | MUTE_HOST_DEVICE
48 | void
49 | clear(Tensor&& tensor)
50 | {
51 | return clear(tensor);
52 | }
53 |
54 | //
55 | // Set elements to zero
56 | //
57 | template
58 | MUTE_HOST_DEVICE
59 | void
60 | clear(Tensor& tensor)
61 | {
62 | using T = typename Tensor::value_type;
63 |
64 | fill(tensor, T{});
65 | }
66 |
67 | } // end namespace mute
68 |
--------------------------------------------------------------------------------
/include/mute/algorithm/fill.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #include
35 |
36 | #include
37 | #include
38 |
39 | namespace mute
40 | {
41 |
42 | //
43 | // Accept mutable temporaries
44 | //
45 | template
46 | MUTE_HOST_DEVICE
47 | void
48 | fill(Tensor&& tensor, T const& value)
49 | {
50 | return fill(tensor, value);
51 | }
52 |
53 | namespace detail
54 | {
55 |
56 | // Prefer fill(tensor.data(), value), if possible
57 | template
58 | MUTE_HOST_DEVICE
59 | auto
60 | fill(Tensor& tensor, T const& value, prefer<1>)
61 | -> decltype(fill(tensor.data(), value))
62 | {
63 | fill(tensor.data(), value);
64 | }
65 |
66 | // Default implementation
67 | template
68 | MUTE_HOST_DEVICE
69 | void
70 | fill(Tensor& tensor, T const& value, prefer<0>)
71 | {
72 | MUTE_UNROLL
73 | for (int i = 0; i < size(tensor); ++i) {
74 | tensor(i) = value;
75 | }
76 | }
77 |
78 | } // end namespace detail
79 |
80 | template
81 | MUTE_HOST_DEVICE
82 | void
83 | fill(Tensor& tensor, T const& value)
84 | {
85 | return detail::fill(tensor, value, prefer<1>{});
86 | }
87 |
88 | } // end namespace mute
89 |
--------------------------------------------------------------------------------
/include/mute/algorithm/prefer.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | namespace mute
35 | {
36 |
37 | // Infinite types that inherit from each other
38 | template
39 | struct prefer : prefer {};
40 |
41 | template <>
42 | struct prefer<0> {};
43 |
44 | // Can be used to preferencially overload implementations
45 | // Higher N in prefer have higher priority.
46 |
47 | } // end namespace mute
48 |
--------------------------------------------------------------------------------
/include/mute/arch/mma.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #include // MUTE_HOST_DEVICE
35 | #include // mute::fma
36 | #include // mute::fma
37 |
38 | namespace mute
39 | {
40 |
41 | //
42 | // Direct FMA for any type
43 | //
44 |
45 | template
46 | struct UniversalFMA
47 | {
48 | using DRegisters = D[1];
49 | using ARegisters = A[1];
50 | using BRegisters = B[1];
51 | using CRegisters = C[1];
52 |
53 | MUTE_HOST_DEVICE static constexpr void
54 | fma(D & d,
55 | A const& a,
56 | B const& b,
57 | C const& c)
58 | {
59 | // Forward to an ADL/mute free function for these types
60 | using mute::fma;
61 | fma(d, a, b, c);
62 | }
63 | };
64 |
65 | } // end namespace mute
66 |
--------------------------------------------------------------------------------
/include/mute/arch/tce_desc.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * SPDX-License-Identifier: BSD-3-Clause
4 | *
5 | * Redistribution and use in source and binary forms, with or without
6 | * modification, are permitted provided that the following conditions are met:
7 | *
8 | * 1. Redistributions of source code must retain the above copyright notice, this
9 | * list of conditions and the following disclaimer.
10 | *
11 | * 2. Redistributions in binary form must reproduce the above copyright notice,
12 | * this list of conditions and the following disclaimer in the documentation
13 | * and/or other materials provided with the distribution.
14 | *
15 | * 3. Neither the name of the copyright holder nor the names of its
16 | * contributors may be used to endorse or promote products derived from
17 | * this software without specific prior written permission.
18 | *
19 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | *
30 | **************************************************************************************************/
31 |
32 | #pragma once
33 |
34 | namespace mute {
35 | namespace TCE {
36 |
37 | // TCE MMA Major enum
38 | enum class Major {
39 | K = 0,
40 | MN = 1
41 | };
42 |
43 | } // namespace TCE
44 | } // namespace mute
45 |
--------------------------------------------------------------------------------
/include/mute/container/array_aligned.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #include
35 | #include
36 |
37 | namespace mute
38 | {
39 |
40 | template
41 | struct MUTE_ALIGNAS(Alignment) array_aligned : mute::array {};
42 |
43 | } // end namespace mute
44 |
--------------------------------------------------------------------------------
/include/mute/numeric/bfloat.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #include
35 |
36 | #include
37 | #include
38 |
39 | namespace mute {
40 |
41 | using mutlass::bfloat16_t;
42 |
43 | //
44 | // Display utilities
45 | //
46 |
47 | #if !defined(__MUSACC_RTC__)
48 | MUTE_HOST std::ostream& operator<<(std::ostream& os, bfloat16_t const& v)
49 | {
50 | return os << float(v);
51 | }
52 | #endif
53 |
54 | } // end namespace mute
55 |
--------------------------------------------------------------------------------
/include/mute/numeric/complex.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #include // MUTE_HOST_DEVICE
35 |
36 | #include // mutlass::complexm, mutlass::real, mutlass::imag, mutlass::is_complex
37 | namespace mute
38 | {
39 |
40 | using mutlass::complex;
41 | using mutlass::is_complex;
42 | using mutlass::RealType;
43 | using mutlass::real;
44 | using mutlass::imag;
45 | using mutlass::conj;
46 |
47 | template
48 | static constexpr auto is_complex_v = is_complex::value;
49 |
50 | /// Fused multiply-add for complex numbers
51 | template
52 | MUTE_HOST_DEVICE constexpr
53 | void
54 | fma(complex & d,
55 | complex const& a,
56 | complex const& b,
57 | complex const& c)
58 | {
59 | fma(d.real(), a.real(), b.real(), c.real());
60 | fma(d.imag(), a.real(), b.imag(), c.imag());
61 | fma(d.real(), -a.imag(), b.imag(), d.real());
62 | fma(d.imag(), a.imag(), b.real(), d.imag());
63 | }
64 |
65 | /// Fused multiply-add for triplets
66 | template
67 | MUTE_HOST_DEVICE constexpr
68 | void
69 | fma(complex const& a,
70 | complex const& b,
71 | complex & c)
72 | {
73 | return fma(c, a, b, c);
74 | }
75 |
76 | } // end namespace mute
77 |
--------------------------------------------------------------------------------
/include/mute/numeric/float8.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #include
35 |
36 | #include
37 | #include
38 |
39 | namespace mute {
40 |
41 | using mutlass::float_e4m3_t;
42 | using mutlass::float_e5m2_t;
43 |
44 | } // end namespace mute
45 |
--------------------------------------------------------------------------------
/include/mute/numeric/half.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #include
35 | #include
36 | #include
37 |
38 | namespace mute {
39 |
40 | using mutlass::half_t;
41 |
42 | } // end namespace mute
43 |
--------------------------------------------------------------------------------
/include/mute/numeric/real.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #include
35 |
36 | namespace mute
37 | {
38 |
39 | /// Generic add
40 | template
41 | MUTE_HOST_DEVICE constexpr
42 | void
43 | add(C& c, A const& a, B const& b)
44 | {
45 | c = a + b;
46 | }
47 |
48 | /// Generic multiply
49 | template
50 | MUTE_HOST_DEVICE constexpr
51 | void
52 | mul(C& c, A const& a, B const& b)
53 | {
54 | c = a * b;
55 | }
56 | /// Generic fused multiply-add
57 | template
58 | MUTE_HOST_DEVICE constexpr
59 | void
60 | fma(D& d, A const& a, B const& b, C const& c)
61 | {
62 | d = a * b + c;
63 | }
64 |
65 | /// Fused multiply-add for triplets
66 | template
67 | MUTE_HOST_DEVICE constexpr
68 | void
69 | fma(A const& a, B const& b, C& c)
70 | {
71 | return fma(c, a, b, c);
72 | }
73 |
74 | } // end namespace mute
75 |
--------------------------------------------------------------------------------
/include/mute/numeric/tfloat.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #include
35 |
36 | #include
37 | #include
38 |
39 | namespace mute {
40 |
41 | using mutlass::tfloat32_t;
42 |
43 | //
44 | // Display utilities
45 | //
46 |
47 | #if !defined(__MUSACC_RTC__)
48 | MUTE_HOST std::ostream& operator<<(std::ostream& os, tfloat32_t const& v)
49 | {
50 | return os << float(v);
51 | }
52 | #endif
53 |
54 | } // end namespace mute
55 |
--------------------------------------------------------------------------------
/include/mute/tensor_predicate.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #include
35 |
36 | #include
37 |
38 | namespace mute
39 | {
40 |
41 | template
42 | struct ConstantTensor
43 | {
44 | template
45 | MUTE_HOST_DEVICE constexpr
46 | T const&
47 | operator()(Coords const&...) const {
48 | return val_;
49 | }
50 |
51 | T val_;
52 | };
53 |
54 | struct TrivialPredTensor
55 | {
56 | template
57 | MUTE_HOST_DEVICE constexpr
58 | true_type
59 | operator()(Coords const&...) const {
60 | return {};
61 | }
62 | };
63 |
64 | template
65 | struct FunctionPredTensor
66 | {
67 | MUTE_HOST_DEVICE constexpr
68 | FunctionPredTensor(Fn const& fn) : fn_(fn) {}
69 |
70 | template
71 | MUTE_HOST_DEVICE constexpr
72 | auto
73 | operator()(Coords const&... coords) const {
74 | return fn_(coords...);
75 | }
76 |
77 | Fn const& fn_;
78 | };
79 |
80 | } // end namespace mute
81 |
--------------------------------------------------------------------------------
/include/mute/tile.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #include
35 |
36 | #include
37 |
38 | namespace mute
39 | {
40 |
41 | //
42 | // A Tile is not a Layout, it's a tuple of Layouts or Tiles or Underscores
43 | //
44 |
45 | template
46 | using Tile = tuple;
47 |
48 | template
49 | using is_tile = is_tuple;
50 |
51 | template
52 | MUTE_HOST_DEVICE constexpr
53 | auto
54 | make_tile(Layouts const&... layouts)
55 | {
56 | return Tile(layouts...);
57 | }
58 |
59 | } // end namespace mute
60 |
--------------------------------------------------------------------------------
/include/mutlass/arch/arch.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | /*! \file
33 | \brief Defines tags for architecture-specific configurations.
34 | */
35 |
36 | #pragma once
37 |
38 | #include "mutlass/mutlass.h"
39 |
40 | ////////////////////////////////////////////////////////////////////////////////////////////////////
41 |
42 | namespace mutlass {
43 | namespace arch {
44 |
45 | ////////////////////////////////////////////////////////////////////////////////////////////////////
46 |
47 | struct Mp22 {
48 | static int const kMinComputeCapability = 22;
49 | };
50 |
51 | struct Mp31 {
52 | static int const kMinComputeCapability = 31;
53 | };
54 |
55 | ////////////////////////////////////////////////////////////////////////////////////////////////////
56 |
57 | } // namespace arch
58 | } // namespace mutlass
59 |
60 | ////////////////////////////////////////////////////////////////////////////////////////////////////
61 |
--------------------------------------------------------------------------------
/include/mutlass/arch/cache_operation.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | /*! \file
33 | \brief Directives related to cache operations
34 | */
35 | #pragma once
36 |
37 | #include "mutlass/mutlass.h"
38 |
39 | namespace mutlass {
40 | namespace arch {
41 |
42 | ////////////////////////////////////////////////////////////////////////////////////////////////////
43 |
44 | /// Controls Intrinsic cache operations
45 | struct CacheOperation {
46 | enum Kind {
47 | /// Cache at all levels - accessed again
48 | Always,
49 | /// Cache at global level
50 | Global,
51 | /// Streaming - likely to be accessed once
52 | Streaming,
53 | /// Indicates the line will not be used again
54 | LastUse,
55 | /// Don't cache, and fetch again
56 | Volatile,
57 | /// Write back at all coherent levels
58 | WriteBack,
59 | /// Write through to system memory
60 | WriteThrough
61 | };
62 | };
63 |
64 | ////////////////////////////////////////////////////////////////////////////////////////////////////
65 |
66 | } // namespace arch
67 | } // namespace mutlass
68 |
--------------------------------------------------------------------------------
/include/mutlass/detail/collective.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #include "mute/container/tuple.hpp"
35 | #include "mute/layout.hpp" // mute::size(shape)
36 |
37 | /////////////////////////////////////////////////////////////////////////////////////////////////
38 |
39 | namespace mutlass::gemm::collective {
40 |
41 | /////////////////////////////////////////////////////////////////////////////////////////////////
42 |
43 | namespace detail {
44 |
45 | template
46 | struct deduce_mixed_width_dtype {
47 | static_assert(I >= 0u && I <= 2u, "Valid indices are 0, 1, and 2, which represent Operand, Scale, and Bias, respectively.");
48 |
49 | private:
50 | using underlying_tuple = mute::conditional_t::value, Tuple, mute::tuple>;
51 | static constexpr size_t valid_index = mute::min(I, mute::tuple_size_v - 1);
52 |
53 | public:
54 | using type = mute::conditional_t<(I < mute::tuple_size_v),
55 | mute::tuple_element_t,
56 | void>;
57 | };
58 |
59 | template
60 | using deduce_mixed_width_dtype_t = typename deduce_mixed_width_dtype::type;
61 |
62 | } // namespace detail
63 |
64 | /////////////////////////////////////////////////////////////////////////////////////////////////
65 |
66 | } // namespace mutlass::gemm::collective
67 |
--------------------------------------------------------------------------------
/include/mutlass/epilogue/collective/collective_epilogue.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #include
35 |
36 | /////////////////////////////////////////////////////////////////////////////////////////////////
37 |
38 | namespace mutlass::epilogue::collective {
39 |
40 | /////////////////////////////////////////////////////////////////////////////////////////////////
41 |
42 | template <
43 | class DispatchPolicy,
44 | class... Args
45 | >
46 | class CollectiveEpilogue {
47 | static_assert(mutlass::detail::dependent_false, "Could not find an epilogue specialization.");
48 | };
49 |
50 | /////////////////////////////////////////////////////////////////////////////////////////////////
51 |
52 | } // namespace mutlass::epilogue::collective
53 |
54 | /////////////////////////////////////////////////////////////////////////////////////////////////
55 |
56 | #include "detail.hpp"
57 | #include "default_epilogue.hpp"
58 | #include "epilogue_tensor_broadcast.hpp"
59 | #include "mp31_epilogue_tme.hpp"
60 |
61 | /////////////////////////////////////////////////////////////////////////////////////////////////
62 |
--------------------------------------------------------------------------------
/include/mutlass/epilogue/dispatch_policy.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #include "mutlass/numeric_conversion.h"
35 | #include "mutlass/epilogue/thread/scale_type.h"
36 |
37 | //////////////////////////////////////////////////////////////////////////////
38 |
39 | namespace mutlass::epilogue {
40 |
41 | //////////////////////////////////////////////////////////////////////////////
42 | //
43 | // Builder Epilogue Schedules
44 | //
45 | //////////////////////////////////////////////////////////////////////////////
46 |
47 | struct NoSmem {};
48 | struct WithTme {};
49 | struct EpilogueSimtVectorized {};
50 | //////////////////////////////////////////////////////////////////////////////
51 |
52 | template <
53 | int Stages_,
54 | int FragmentSize_
55 | >
56 | struct Mp22CollectiveEpilogue {
57 | constexpr static int Stages = Stages_;
58 | constexpr static int FragmentSize = FragmentSize_;
59 | };
60 |
61 | template <
62 | int StagesC_,
63 | int StagesD_,
64 | int FragmentSize_,
65 | bool ReuseSmemC_,
66 | bool DelayTmeStore_ = false
67 | >
68 | struct Mp31CollectiveEpilogue {
69 | constexpr static int StagesC = StagesC_;
70 | constexpr static int StagesD = StagesD_;
71 | constexpr static int FragmentSize = FragmentSize_;
72 | constexpr static bool ReuseSmemC = ReuseSmemC_;
73 | constexpr static bool DelayTmeStore = DelayTmeStore_;
74 | };
75 |
76 | //////////////////////////////////////////////////////////////////////////////
77 |
78 | } // namespace mutlass::epilogue
79 |
--------------------------------------------------------------------------------
/include/mutlass/epilogue/thread/linear_combination_params.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | /*! \file
33 | \brief
34 | */
35 |
36 | #pragma once
37 |
38 | /////////////////////////////////////////////////////////////////////////////////////////////////
39 |
40 | namespace mutlass {
41 | namespace epilogue {
42 | namespace thread {
43 |
44 | /////////////////////////////////////////////////////////////////////////////////////////////////
45 |
46 | struct LinearCombinationParams {
47 | uint64_t alpha_data[2];
48 | uint64_t beta_data[2];
49 |
50 | MUTLASS_HOST_DEVICE
51 | LinearCombinationParams()
52 | : alpha_data {0lu, 0lu}, beta_data {0lu, 0lu}
53 | { }
54 |
55 | template
56 | MUTLASS_HOST_DEVICE
57 | LinearCombinationParams(ElementCompute alpha, ElementCompute beta)
58 | : alpha_data {0lu, 0lu}, beta_data {0lu, 0lu}
59 | {
60 | #if defined(__MUSA_ARCH__)
61 | reinterpret_cast(alpha_data) = alpha;
62 | reinterpret_cast(beta_data) = beta;
63 | #else
64 | memcpy( alpha_data, &alpha, sizeof(ElementCompute) );
65 | memcpy( beta_data, &beta, sizeof(ElementCompute) );
66 | #endif
67 | }
68 | };
69 |
70 | /////////////////////////////////////////////////////////////////////////////////////////////////
71 |
72 | } // namespace thread
73 | } // namespace epilogue
74 | } // namespace mutlass
75 |
76 | /////////////////////////////////////////////////////////////////////////////////////////////////
77 |
--------------------------------------------------------------------------------
/include/mutlass/epilogue/thread/scale_type.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | /*! \file
33 | \brief Enum defines the behaviors of the epilogue.
34 | */
35 |
36 | #pragma once
37 |
38 | #include "mutlass/mutlass.h"
39 |
40 | /////////////////////////////////////////////////////////////////////////////////////////////////
41 |
42 | namespace mutlass {
43 | namespace epilogue {
44 | namespace thread {
45 |
46 | /////////////////////////////////////////////////////////////////////////////////////////////////
47 |
48 | /// Specifies internal data type for computation
49 | /// Note :
50 | /// 1. Scalar means alpha/beta is a single value from host(constant param) or device memory.
51 | /// 2. Vector means alpha/beta is a vector always from device memory.
52 | struct ScaleType {
53 | enum Kind {
54 | Default, // D = scalar_alpha x Acc + scalar_beta x C
55 | NoBetaScaling, // D = scalar_alpha x Acc + C
56 | OnlyAlphaScaling, // D = scalar_alpha x Acc
57 | PerChannelScaling, // D = vector_alpha x Acc + vector_beta x C
58 | OnlyAlphaPerChannelScaling, // D = vector_alpha x Acc
59 | Nothing // D = Acc
60 | };
61 | };
62 |
63 | /////////////////////////////////////////////////////////////////////////////////////////////////
64 |
65 | } // namespace thread
66 | } // namespace epilogue
67 | } // namespace mutlass
68 |
--------------------------------------------------------------------------------
/include/mutlass/floating_point_mtrtc.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 |
33 | #pragma once
34 |
35 | namespace mutlass {
36 |
37 | ///////////////////////////////////////////////////////////////////////////////////////////////////
38 |
39 | // All floating-point numbers can be put in one of these categories.
40 | enum {
41 | FP_NAN =
42 | # define FP_NAN 0
43 | FP_NAN,
44 | FP_INFINITE =
45 | # define FP_INFINITE 1
46 | FP_INFINITE,
47 | FP_ZERO =
48 | # define FP_ZERO 2
49 | FP_ZERO,
50 | FP_SUBNORMAL =
51 | # define FP_SUBNORMAL 3
52 | FP_SUBNORMAL,
53 | FP_NORMAL =
54 | # define FP_NORMAL 4
55 | FP_NORMAL
56 | };
57 |
58 | ///////////////////////////////////////////////////////////////////////////////////////////////////
59 |
60 | } // namespace mutlass
61 |
62 | ///////////////////////////////////////////////////////////////////////////////////////////////////
63 |
--------------------------------------------------------------------------------
/include/mutlass/gemm/collective/builders/mp31_sqmma_common.inl:
--------------------------------------------------------------------------------
1 | #pragma once
2 |
3 | #include "mutlass/gemm/gemm.h"
4 | #include "mute/atom/mma_traits_mp31_sqmma.hpp"
5 |
6 | /////////////////////////////////////////////////////////////////////////////////////////////////
7 |
8 | namespace mutlass::gemm::collective {
9 |
10 | /////////////////////////////////////////////////////////////////////////////////////////////////
11 |
12 | namespace detail {
13 |
14 | //
15 | // Some named constants
16 | //
17 | constexpr int tme_alignment_bytes = 4;
18 | constexpr int mp31_smem_capacity_bytes = 196608;
19 |
20 | template
21 | constexpr mute::TCE::Major
22 | sqmma_ss_tag_to_major_A() {
23 | if constexpr (mutlass::gemm::detail::is_mn_major_A()) {
24 | return mute::TCE::Major::MN;
25 | }
26 | else {
27 | return mute::TCE::Major::K;
28 | }
29 | }
30 |
31 | template
32 | constexpr mute::TCE::Major
33 | sqmma_ss_tag_to_major_B() {
34 | if constexpr (mutlass::gemm::detail::is_mn_major_B()) {
35 | return mute::TCE::Major::MN;
36 | }
37 | else {
38 | return mute::TCE::Major::K;
39 | }
40 | }
41 |
42 | template
43 | MUTE_HOST_DEVICE constexpr
44 | auto
45 | ss_smem_selector_A()
46 | {
47 | using AtomOpTraits = MMA_Traits;
48 | using AtomOpShape = typename AtomOpTraits::Shape_MNK;
49 |
50 | using AtomM = decltype(get<0>(AtomOpShape{}));
51 | using AtomK = decltype(get<2>(AtomOpShape{}));
52 |
53 | return mute::MP31::SQMMA::make_canonical_gemm_smem_atom_layout();
54 | }
55 |
56 | template
57 | MUTE_HOST_DEVICE constexpr
58 | auto
59 | ss_smem_selector_B()
60 | {
61 | using AtomOpTraits = MMA_Traits;
62 | using AtomOpShape = typename AtomOpTraits::Shape_MNK;
63 |
64 | using AtomN = decltype(get<1>(AtomOpShape{}));
65 | using AtomK = decltype(get<2>(AtomOpShape{}));
66 |
67 | return mute::MP31::SQMMA::make_canonical_gemm_smem_atom_layout();
68 | }
69 |
70 | template
71 | constexpr bool
72 | is_aligned() {
73 | return ((mute::sizeof_bits_v * AlignmentA / 8) % RequiredAlignment == 0) &&
74 | ((mute::sizeof_bits_v * AlignmentB / 8) % RequiredAlignment == 0);
75 | }
76 |
77 | template
78 | constexpr bool
79 | is_input_fp8() {
80 | return ((mute::is_same_v || mute::is_same_v) &&
81 | (mute::is_same_v || mute::is_same_v));
82 | }
83 |
84 | } // namespace detail
85 |
86 | /////////////////////////////////////////////////////////////////////////////////////////////////
87 |
88 | } // namespace mutlass::gemm::collective
89 |
90 | /////////////////////////////////////////////////////////////////////////////////////////////////
91 |
--------------------------------------------------------------------------------
/include/mutlass/gemm/collective/collective_mma.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #include "mutlass/detail/dependent_false.hpp"
35 |
36 | /////////////////////////////////////////////////////////////////////////////////////////////////
37 |
38 | namespace mutlass::gemm::collective {
39 |
40 | /////////////////////////////////////////////////////////////////////////////////////////////////
41 |
42 | template <
43 | class DispatchPolicy,
44 | class TileShape,
45 | class ElementA,
46 | class StrideA,
47 | class ElementB,
48 | class StrideB,
49 | class TiledMma,
50 | class GmemTiledCopyA,
51 | class SmemLayoutAtomA,
52 | class SmemCopyAtomA,
53 | class TransformA,
54 | class GmemTiledCopyB,
55 | class SmemLayoutAtomB,
56 | class SmemCopyAtomB,
57 | class TransformB
58 | >
59 | struct CollectiveMma {
60 | static_assert(mutlass::detail::dependent_false == 0, "Could not find a mainloop specialization.");
61 | };
62 |
63 | /////////////////////////////////////////////////////////////////////////////////////////////////
64 |
65 | } // namespace mutlass::gemm::collective
66 |
67 | /////////////////////////////////////////////////////////////////////////////////////////////////
68 |
69 | #include "mutlass/gemm/collective/mp22_mma_twostage.hpp"
70 | #include "mutlass/gemm/collective/mp31_mma_tme_sqmma_ss.hpp"
71 | #include "mutlass/gemm/collective/mp31_mma_tme_sqmma_ss_scaling.hpp"
72 | /////////////////////////////////////////////////////////////////////////////////////////////////
73 |
--------------------------------------------------------------------------------
/include/mutlass/gemm/threadblock/threadblock_swizzle.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | /*! \file
33 | \brief Implements several possible threadblock-swizzling functions mapping blockIdx to
34 | GEMM problems.
35 | */
36 |
37 | #pragma once
38 |
39 | /////////////////////////////////////////////////////////////////////////////////////////////////
40 |
41 | namespace mutlass {
42 | namespace gemm {
43 | namespace threadblock {
44 |
45 | /////////////////////////////////////////////////////////////////////////////////////////////////
46 |
47 | /// Threadblock swizzling function for GEMMs
48 | template
49 | struct GemmIdentityThreadblockSwizzle { };
50 |
51 | /////////////////////////////////////////////////////////////////////////////////////////////////
52 |
53 | } // namespace threadblock
54 | } // namespace gemm
55 | } // namespace mutlass
56 |
57 |
--------------------------------------------------------------------------------
/include/mutlass/gemm_coord.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 |
33 | /*! \file
34 | \brief Utilities to convert a MuTe tuple to a GemmCoord or BatchedGemmCoord
35 | */
36 |
37 | #pragma once
38 |
39 | #include "mute/layout.hpp"
40 | #include "mutlass/gemm_coord.h"
41 |
42 | namespace mutlass {
43 | namespace gemm {
44 |
45 | /////////////////////////////////////////////////////////////////////////////////////////////////
46 |
47 | template
48 | MUTLASS_HOST_DEVICE
49 | auto
50 | to_gemm_coord(Tuple tuple) {
51 | static_assert(mute::rank(tuple) <= 4, "Can only convert tuples of rank <= 4.");
52 |
53 | if constexpr (mute::rank(tuple) <= 3) {
54 | auto tuple_mnk = mute::append<3>(tuple, mute::Int<0>{});
55 | return GemmCoord(mute::size<0>(tuple_mnk), mute::size<1>(tuple_mnk), mute::size<2>(tuple_mnk));
56 | }
57 | else {
58 | return BatchedGemmCoord(mute::size<0>(tuple), mute::size<1>(tuple), mute::size<2>(tuple), mute::size<3>(tuple));
59 | }
60 | }
61 |
62 | /////////////////////////////////////////////////////////////////////////////////////////////////
63 |
64 | } // namespace gemm
65 | } // namespace mutlass
66 |
67 | /////////////////////////////////////////////////////////////////////////////////////////////////
68 |
--------------------------------------------------------------------------------
/include/mutlass/kernel_hardware_info.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #if !defined(__MUSACC_RTC__)
35 | #include "musa_runtime.h"
36 |
37 | #include "mutlass/trace.h"
38 | #endif
39 |
40 | namespace mutlass {
41 |
42 | struct KernelHardwareInfo {
43 | //
44 | // Data members
45 | //
46 | int device_id = 0;
47 | int sm_count = 0;
48 |
49 | //
50 | // Methods
51 | //
52 |
53 | #if !defined(__MUSACC_RTC__)
54 | static inline int
55 | query_device_multiprocessor_count(int device_id = 0) {
56 | musaError_t result = musaGetDevice(&device_id);
57 | if (result != musaSuccess) {
58 | MUTLASS_TRACE_HOST(
59 | " musaGetDevice() returned error "
60 | << musaGetErrorString(result));
61 | return 0;
62 | }
63 | int multiprocessor_count;
64 | result = musaDeviceGetAttribute(&multiprocessor_count,
65 | musaDevAttrMultiProcessorCount, device_id);
66 | if (result != musaSuccess) {
67 | MUTLASS_TRACE_HOST(
68 | " musaDeviceGetAttribute() returned error "
69 | << musaGetErrorString(result));
70 | return 0;
71 | }
72 | return multiprocessor_count;
73 | }
74 | #endif
75 | };
76 |
77 | } // namespace mutlass
78 |
--------------------------------------------------------------------------------
/include/mutlass/kernel_hardware_info.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | // Simply import .h version of header so as to avoid breaking any existing MUTLASS builds
35 | // after .hpp was changed to .h
36 | #include "mutlass/kernel_hardware_info.h"
37 |
--------------------------------------------------------------------------------
/include/mutlass/kernel_launch.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | /*! \file
33 | \brief Defines structures and helpers to launch MUSA kernels within MUTLASS.
34 | */
35 |
36 | #pragma once
37 |
38 | #include "mutlass/mutlass.h"
39 |
40 | namespace mutlass {
41 |
42 | ///////////////////////////////////////////////////////////////////////////////////////////////////
43 |
44 | /// Structure containing the basic launch configuration of a MUSA kernel.
45 | struct KernelLaunchConfiguration {
46 |
47 | /// MUSA grid dimensions
48 | dim3 grid;
49 |
50 | /// MUSA threablock dimensions
51 | dim3 block;
52 |
53 | /// Bytes of dynamically allocated SMEM in addition to static SMEM
54 | size_t dynamic_smem;
55 |
56 | //
57 | // Methods
58 | //
59 |
60 | /// Constructs a KernellaunchConfiguration object
61 | MUTLASS_HOST_DEVICE
62 | KernelLaunchConfiguration(
63 | dim3 _grid = dim3(1,1,1),
64 | dim3 _block = dim3(1,1,1),
65 | size_t _dynamic_smem = 0
66 | ):
67 | grid(_grid),
68 | block(_block),
69 | dynamic_smem(_dynamic_smem) { }
70 | };
71 |
72 | ///////////////////////////////////////////////////////////////////////////////////////////////////
73 |
74 | } // namespace mutlass
75 |
--------------------------------------------------------------------------------
/include/mutlass/layout/layout.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | /*! \file
33 | \brief Defines layout functions used by TensorRef and derived classes.
34 |
35 | Layout functions map logical coordinates to linear memory. They often require additional
36 | data to describe strides between elements.
37 |
38 | Layout functions must implement all members in the public interface of IdentityTensorLayout<>
39 | defined in mutlass/tensor_ref.h.
40 | */
41 | #pragma once
42 |
43 | #include "mutlass/mutlass.h"
44 | #include "mutlass/matrix_coord.h"
45 | #include "mutlass/layout/matrix.h"
46 | #include "mutlass/layout/pitch_linear.h"
47 | #include "mutlass/layout/tensor.h"
48 | #include "mutlass/layout/vector.h"
49 |
50 | ///////////////////////////////////////////////////////////////////////////////////////////////////
51 |
52 | namespace mutlass {
53 | namespace layout {
54 |
55 | ///////////////////////////////////////////////////////////////////////////////////////////////////
56 |
57 |
58 | ///////////////////////////////////////////////////////////////////////////////////////////////////
59 |
60 | } // namespace layout
61 | } // namespace mutlass
62 |
63 | ///////////////////////////////////////////////////////////////////////////////////////////////////
64 |
--------------------------------------------------------------------------------
/include/mutlass/matrix_shape.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | /*! \file
33 | \brief Defines a Shape template for matrix tiles
34 | */
35 | #pragma once
36 |
37 | #include "mutlass/mutlass.h"
38 | #include "mutlass/coord.h"
39 |
40 | namespace mutlass {
41 |
42 | /////////////////////////////////////////////////////////////////////////////////////////////////
43 |
44 | /// Describes the size of a matrix tile
45 | template <
46 | int Row_, ///< rows of a matrix
47 | int Column_ ///< columns of a matrix
48 | >
49 | struct MatrixShape {
50 | static int const kRow = Row_; ///< rows of a matrix
51 | static int const kColumn = Column_; ///< columns of a matrix
52 | static int const kCount = Row_ * Column_; ///< total number of elements in a matrix
53 |
54 | //
55 | // Static member functions
56 | //
57 |
58 | MUTLASS_HOST_DEVICE
59 | static Coord<2> toCoord() {
60 | return make_Coord(kRow, kColumn);
61 | }
62 | };
63 |
64 | /////////////////////////////////////////////////////////////////////////////////////////////////
65 |
66 | } // namespace mutlass
67 |
--------------------------------------------------------------------------------
/include/mutlass/pipeline/pipeline.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2023 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | ////////////////////////////////////////////////////////////////////////////////////////////////////
35 |
36 | #include "mutlass/pipeline/mp31_pipeline.hpp"
37 | ////////////////////////////////////////////////////////////////////////////////////////////////////
38 |
--------------------------------------------------------------------------------
/include/mutlass/real.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | /**
33 | \file
34 | \brief This class provides helpers to support real<> and complex<> types in generic code.
35 | */
36 |
37 | #pragma once
38 |
39 |
40 | #include // MUTLASS_DEVICE
41 |
42 | namespace mutlass {
43 |
44 | /// Used to determine the real-valued underlying type of a numeric type T.
45 | template
46 | struct RealType {
47 | using Type = T;
48 |
49 | /// Number of elements
50 | static int const kExtent = 1;
51 |
52 | MUTLASS_HOST_DEVICE
53 | static T from_real(double x) {
54 | return static_cast(x);
55 | }
56 | };
57 |
58 | template
59 | MUTLASS_HOST_DEVICE
60 | static T from_real(double r) {
61 | return T(r);
62 | }
63 |
64 |
65 | } // namespace mutlass
66 |
--------------------------------------------------------------------------------
/include/mutlass/trace.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | /*! \file
33 | \brief Helpers for optionally tracing through code when debugging.
34 |
35 | This file is to be included after all other headers.
36 | */
37 |
38 | #pragma once
39 |
40 | ////////////////////////////////////////////////////////////////////////////////////////////////////
41 |
42 | // Tracing options
43 | #ifndef MUTLASS_DEBUG_TRACE_LEVEL
44 | #define MUTLASS_DEBUG_TRACE_LEVEL 0
45 | #endif
46 |
47 | #if MUTLASS_DEBUG_TRACE_LEVEL
48 | #include
49 | #include "mutlass/core_io.h"
50 | #if defined(__MUSA_ARCH__)
51 | #define MUTLASS_TRACE_HOST(x)
52 | #else
53 | #define MUTLASS_TRACE_HOST(x) { std::cout << __FILE__ << ":" << __LINE__ << " " << x << std::endl; }
54 | #endif
55 | #else
56 | #define MUTLASS_TRACE_HOST(x)
57 | #endif
58 |
59 | ////////////////////////////////////////////////////////////////////////////////////////////////////
60 |
61 |
--------------------------------------------------------------------------------
/include/mutlass/version.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 |
33 | #pragma once
34 |
35 | #include
36 | #include
37 |
38 | #define MUTLASS_MAJOR 0
39 | #define MUTLASS_MINOR 2
40 | #define MUTLASS_PATCH 0
41 |
42 | #ifdef MUTLASS_VERSIONS_GENERATED
43 | #include "mutlass/version_extended.h"
44 | #else
45 | #define MUTLASS_BUILD 0
46 | #define MUTLASS_REVISION ""
47 | #endif
48 |
49 | #define MUTLASS_VERSION ((MUTLASS_MAJOR)*100 + (MUTLASS_MINOR)*10 + MUTLASS_PATCH)
50 |
51 | namespace mutlass {
52 |
53 | inline constexpr uint32_t getVersion() {
54 | return MUTLASS_VERSION;
55 | }
56 | inline constexpr uint32_t getVersionMajor() {
57 | return MUTLASS_MAJOR;
58 | }
59 | inline constexpr uint32_t getVersionMinor() {
60 | return MUTLASS_MINOR;
61 | }
62 | inline constexpr uint32_t getVersionPatch() {
63 | return MUTLASS_PATCH;
64 | }
65 | inline constexpr uint32_t getVersionBuild() {
66 | return MUTLASS_BUILD + 0;
67 | }
68 |
69 | inline std::string getVersionString() {
70 | std::string version = "@MUTLASS_VERSION@";
71 | if (getVersionBuild()) {
72 | version += "." + std::to_string(getVersionBuild());
73 | }
74 | return version;
75 | }
76 |
77 | inline std::string getGitRevision() {
78 | return "@MUTLASS_REVISION@";
79 | }
80 |
81 | } // namespace mutlass
82 |
--------------------------------------------------------------------------------
/media/images/mutlass-0.2.0-gemm-performance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MooreThreads/mutlass/68f1bf1806f5435246518bbeecd3aa810704e3ae/media/images/mutlass-0.2.0-gemm-performance.png
--------------------------------------------------------------------------------
/python/mutlass_library/__init__.py:
--------------------------------------------------------------------------------
1 | #################################################################################################
2 | #
3 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
4 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5 | # SPDX-License-Identifier: BSD-3-Clause
6 | #
7 | # Redistribution and use in source and binary forms, with or without
8 | # modification, are permitted provided that the following conditions are met:
9 | #
10 | # 1. Redistributions of source code must retain the above copyright notice, this
11 | # list of conditions and the following disclaimer.
12 | #
13 | # 2. Redistributions in binary form must reproduce the above copyright notice,
14 | # this list of conditions and the following disclaimer in the documentation
15 | # and/or other materials provided with the distribution.
16 | #
17 | # 3. Neither the name of the copyright holder nor the names of its
18 | # contributors may be used to endorse or promote products derived from
19 | # this software without specific prior written permission.
20 | #
21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | #
32 | #################################################################################################
33 |
34 | import os
35 | import sys
36 |
37 | from . import gemm_operation
38 |
39 | if '-m' not in sys.argv:
40 | # Do not import generator when running python -m mutlass_library.generator to
41 | # avoid double-import warnings
42 | from . import generator
43 |
44 | from . import library
45 | from . import manifest
46 |
47 | # Make enum types from library.py accessible via mutlass_library.*
48 | from .library import *
49 |
50 | # Set up `source` to point to the path containing the MUTLASS source.
51 | # Check first if the path cotains a `source` subdirectory -- this will
52 | # be the case when the package has been installed via pip. Otherwise,
53 | # default to the root of MUTLASS.
54 | install_source_path = os.path.join(__path__[0], 'source')
55 | if os.path.isdir(install_source_path):
56 | source_path = install_source_path
57 | else:
58 | source_path = os.path.join(__path__[0], '../..')
59 |
--------------------------------------------------------------------------------
/python/setup_library.py:
--------------------------------------------------------------------------------
1 | #################################################################################################
2 | #
3 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
4 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5 | # SPDX-License-Identifier: BSD-3-Clause
6 | #
7 | # Redistribution and use in source and binary forms, with or without
8 | # modification, are permitted provided that the following conditions are met:
9 | #
10 | # 1. Redistributions of source code must retain the above copyright notice, this
11 | # list of conditions and the following disclaimer.
12 | #
13 | # 2. Redistributions in binary form must reproduce the above copyright notice,
14 | # this list of conditions and the following disclaimer in the documentation
15 | # and/or other materials provided with the distribution.
16 | #
17 | # 3. Neither the name of the copyright holder nor the names of its
18 | # contributors may be used to endorse or promote products derived from
19 | # this software without specific prior written permission.
20 | #
21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | #
32 | #################################################################################################
33 |
34 | from setuptools import setup
35 |
36 |
37 | def perform_setup():
38 | setup(
39 | name='mutlass_library',
40 | version='0.1.0',
41 | description='MUTLASS library generation scripts',
42 | packages=['mutlass_library']
43 | )
44 |
45 |
46 | if __name__ == '__main__':
47 | perform_setup()
48 |
--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 |
31 | if (MUTLASS_ENABLE_GTEST_UNIT_TESTS)
32 | add_subdirectory(unit)
33 | else()
34 | add_custom_target(test_unit)
35 | endif()
36 |
--------------------------------------------------------------------------------
/test/unit/gemm/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 |
31 | add_subdirectory(device)
32 |
33 | add_custom_target(
34 | mutlass_test_unit_gemm
35 | DEPENDS
36 | mutlass_test_unit_gemm_device
37 | )
38 |
39 | add_custom_target(
40 | test_unit_gemm
41 | DEPENDS
42 | test_unit_gemm_device
43 | )
44 |
--------------------------------------------------------------------------------
/test/unit/gemm/device/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 | add_custom_target(
31 | mutlass_test_unit_gemm_device
32 | DEPENDS
33 | mutlass_test_unit_gemm_device_mp22
34 | mutlass_test_unit_gemm_device_mp31
35 | )
36 |
37 | add_custom_target(
38 | test_unit_gemm_device
39 | DEPENDS
40 | test_unit_gemm_device_mp22
41 | test_unit_gemm_device_mp31
42 | )
43 |
44 | mutlass_test_unit_add_executable(
45 | mutlass_test_unit_gemm_device_mp22
46 | mp22_gemm_f32_f32_f32_simt.mu
47 | mp22_gemm_tensorop.mu
48 | )
49 |
50 | mutlass_test_unit_add_executable(
51 | mutlass_test_unit_gemm_device_mp31
52 | mp31_gemm_sqmma_ss.mu
53 | )
54 |
--------------------------------------------------------------------------------
/test/unit/gemm/device/testbed_utils.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | /*! \file
33 | \brief Tests for device-wide GEMM interface
34 | */
35 |
36 | #pragma once
37 |
38 | #include "mutlass/mutlass.h"
39 |
40 | inline char const *to_string(mutlass::Status status) {
41 |
42 | switch (status) {
43 | case mutlass::Status::kSuccess: return "kSuccess";
44 | case mutlass::Status::kErrorMisalignedOperand: return "kErrorMisalignedOperand";
45 | case mutlass::Status::kErrorInvalidLayout: return "kErrorInvalidLayout";
46 | case mutlass::Status::kErrorInvalidProblem: return "kErrorInvalidProblem";
47 | case mutlass::Status::kErrorNotSupported: return "kErrorNotSupported";
48 | case mutlass::Status::kErrorWorkspaceNull: return "kErrorWorkspaceNull";
49 | case mutlass::Status::kErrorInternal: return "kErrorInternal";
50 | case mutlass::Status::kInvalid: return "kInvalid";
51 | default: break;
52 | }
53 | return "invalid";
54 | }
55 |
--------------------------------------------------------------------------------
/test/unit/mute/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 | add_subdirectory(core)
31 | add_subdirectory(quyuan)
32 | add_subdirectory(mp31)
33 | add_subdirectory(mma)
34 |
35 |
36 | add_custom_target(
37 | mutlass_test_unit_mute
38 | DEPENDS
39 | mutlass_test_unit_mute_core
40 | mutlass_test_unit_mute_quyuan
41 | mutlass_test_unit_mute_mp31
42 | mutlass_test_unit_mute_mma
43 | )
44 |
45 | add_custom_target(
46 | test_unit_mute
47 | DEPENDS
48 | test_unit_mute_core
49 | test_unit_mute_quyuan
50 | test_unit_mute_mp31
51 | test_unit_mute_mma
52 | )
53 |
--------------------------------------------------------------------------------
/test/unit/mute/core/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 | mutlass_test_unit_add_executable(
31 | mutlass_test_unit_mute_core
32 | WITHOUT_MUSA
33 | array_subbyte.cpp
34 | bitfield.cpp
35 | coalesce.cpp
36 | compact_xmajor.cpp
37 | compare.cpp
38 | complement.cpp
39 | composition.cpp
40 | constants.cpp
41 | core_unit.cpp
42 | inverse_left.cpp
43 | inverse_right.cpp
44 | logical_divide.cpp
45 | logical_product.cpp
46 | math.cpp
47 | mixedbits.cpp
48 | nullspace.cpp
49 | pointer.cpp
50 | reverse.cpp
51 | transform.cpp
52 | tuple.cpp
53 | int_tuple.cpp
54 | )
55 |
--------------------------------------------------------------------------------
/test/unit/mute/core/constants.cpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 |
33 | #include "mutlass_unit_test.h"
34 | #include
35 |
36 | #include
37 | #include
38 |
39 | TEST(MuTe_core, MakeIntegerSequence) {
40 | mute::for_each(mute::make_integer_sequence{}, [](auto c) {
41 | using c_type = decltype(c);
42 | constexpr auto c_value = c_type::value;
43 | using expected_type = mute::integral_constant;
44 | static_assert(mute::is_same_v);
45 | static_assert(mute::is_same_v);
46 | static_assert(mute::is_constant::value);
47 | static_assert(mute::is_constant<0, decltype(c * mute::Int<0>{})>::value);
48 | static_assert(mute::is_constant<2*c_value, decltype(c * mute::Int<2>{})>::value);
49 | });
50 |
51 | mute::for_each(mute::make_integer_sequence{}, [](auto c) {
52 | using c_type = decltype(c);
53 | constexpr auto c_value = c_type::value;
54 | using expected_type = mute::integral_constant;
55 | static_assert(mute::is_same_v);
56 | static_assert(mute::is_same_v);
57 | static_assert(mute::is_constant::value);
58 | static_assert(mute::is_constant<0, decltype(c * mute::Int<0>{})>::value);
59 | static_assert(mute::is_constant<2*c_value, decltype(c * mute::Int<2>{})>::value);
60 | });
61 | }
62 |
--------------------------------------------------------------------------------
/test/unit/mute/core/core_unit.cpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | /** \file
33 | \brief Unit tests for MuTe core
34 | */
35 |
36 | #include
37 |
38 | int main(int argc, char* arg[]) {
39 | ::testing::InitGoogleTest(&argc, arg);
40 | return RUN_ALL_TESTS();
41 | }
42 |
--------------------------------------------------------------------------------
/test/unit/mute/core/mixedbits.cpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 |
33 | #include "mutlass_unit_test.h"
34 |
35 | #include
36 | #include
37 |
38 | TEST(MuTe_core, MixedBits)
39 | {
40 | using namespace mute;
41 |
42 | for_each(make_int_sequence<8>{}, [&](auto S0) {
43 | for_each(make_int_sequence<8>{}, [&](auto F0) {
44 | for_each(make_int_sequence<8>{}, [&](auto S1) {
45 | for_each(make_int_sequence<8>{}, [&](auto F1) {
46 | for (uint32_t d0 = 0; d0 < 8; ++d0) {
47 | for (uint32_t d1 = 0; d1 < 8; ++d1) {
48 | auto m0 = make_mixed_bits(S0, d0, F0);
49 | auto m1 = make_mixed_bits(S1, d1, F1);
50 | //print(m0); print(" & "); print(m1); print(" = "); print(m0 & m1); print("\n");
51 | EXPECT_EQ(uint32_t(m0 & m1), uint32_t(m0) & uint32_t(m1));
52 | //print(m0); print(" | "); print(m1); print(" = "); print(m0 | m1); print("\n");
53 | EXPECT_EQ(uint32_t(m0 | m1), uint32_t(m0) | uint32_t(m1));
54 | //print(m0); print(" ^ "); print(m1); print(" = "); print(m0 ^ m1); print("\n");
55 | EXPECT_EQ(uint32_t(m0 ^ m1), uint32_t(m0) ^ uint32_t(m1));
56 | }
57 | }
58 | });
59 | });
60 | });
61 | });
62 | }
63 |
--------------------------------------------------------------------------------
/test/unit/mute/core/transform.cpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 |
33 | #include "mutlass_unit_test.h"
34 |
35 | #include
36 | #include
37 | #include
38 |
39 | TEST(MuTe_core, Transform) {
40 | using namespace mute;
41 | complex array[4] = {{0,0}, {1,0}, {0,1}, {1,1}};
42 | complex correct[4] = {{0,0}, {1,0}, {0,-1}, {1,-1}};
43 | auto tensor = make_tensor(static_cast*>(array), make_layout(make_shape(4)));
44 | conjugate conj;
45 | transform(tensor, conj);
46 | for (int i = 0; i < 4; ++i)
47 | {
48 | EXPECT_EQ(tensor(i), correct[i]);
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/test/unit/mute/mma/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
29 | mutlass_test_unit_add_executable(
30 | mutlass_test_unit_mute_mma
31 | mp31_mma.mu
32 | mp22_mma.mu
33 | )
--------------------------------------------------------------------------------
/test/unit/mute/mp31/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
29 | mutlass_test_unit_add_executable(
30 | mutlass_test_unit_mute_mp31
31 | mp31_sqmma.mu
32 | mp31_robust_buffer_access.mu
33 | )
34 |
--------------------------------------------------------------------------------
/test/unit/mute/quyuan/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # SPDX-License-Identifier: BSD-3-Clause
3 | #
4 | # Redistribution and use in source and binary forms, with or without
5 | # modification, are permitted provided that the following conditions are met:
6 | #
7 | # 1. Redistributions of source code must retain the above copyright notice, this
8 | # list of conditions and the following disclaimer.
9 | #
10 | # 2. Redistributions in binary form must reproduce the above copyright notice,
11 | # this list of conditions and the following disclaimer in the documentation
12 | # and/or other materials provided with the distribution.
13 | #
14 | # 3. Neither the name of the copyright holder nor the names of its
15 | # contributors may be used to endorse or promote products derived from
16 | # this software without specific prior written permission.
17 | #
18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 |
29 | mutlass_test_unit_add_executable(
30 | mutlass_test_unit_mute_quyuan
31 | vectorization_auto.mu
32 | )
33 |
--------------------------------------------------------------------------------
/test/unit/test_unit.cpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | /** \file
33 | \brief Unit tests for MUTLASS core
34 | */
35 | #include "common/mutlass_unit_test.h"
36 |
37 | int main(int argc, char* arg[]) {
38 | FilterArchitecture();
39 | ::testing::InitGoogleTest(&argc, arg);
40 | return RUN_ALL_TESTS();
41 | }
42 |
--------------------------------------------------------------------------------
/tools/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
30 | add_subdirectory(util)
31 |
32 | if (MUTLASS_ENABLE_LIBRARY)
33 | add_subdirectory(library)
34 | endif()
35 |
36 | if (MUTLASS_ENABLE_PROFILER)
37 | if (NOT MUTLASS_ENABLE_LIBRARY)
38 | message(SEND_ERROR "Build conflict: The MUTLASS profiler requires the MUTLASS library.")
39 | message(SEND_ERROR " MUTLASS_ENABLE_PROFILER = ${MUTLASS_ENABLE_PROFILER}")
40 | message(SEND_ERROR " MUTLASS_ENABLE_LIBRARY = ${MUTLASS_ENABLE_LIBRARY}")
41 | else()
42 | add_subdirectory(profiler)
43 | endif()
44 | endif()
45 |
--------------------------------------------------------------------------------
/tools/library/include/mutlass/library/arch_mappings.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 | #include "mutlass/arch/mma.h"
35 | #include "mutlass/arch/arch.h"
36 |
37 | /////////////////////////////////////////////////////////////////////////////////////////////////
38 |
39 | namespace mutlass {
40 | namespace library {
41 |
42 | /////////////////////////////////////////////////////////////////////////////////////////////////
43 |
44 | template struct ArchMap;
45 |
46 | template <> struct ArchMap {
47 | static int const kMin = 22;
48 | static int const kMax = 1024;
49 | };
50 |
51 | template <> struct ArchMap {
52 | static int const kMin = 22;
53 | static int const kMax = 22;
54 | };
55 |
56 | template <> struct ArchMap {
57 | static int const kMin = 31;
58 | static int const kMax = 31;
59 | };
60 |
61 | /////////////////////////////////////////////////////////////////////////////////////////////////
62 |
63 | } // namespace library
64 | } // namespace mutlass
65 |
66 | /////////////////////////////////////////////////////////////////////////////////////////////////
67 |
--------------------------------------------------------------------------------
/tools/library/include/mutlass/library/singleton.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 |
33 | #pragma once
34 |
35 | #include "mutlass/library/library.h"
36 | #include "mutlass/library/manifest.h"
37 | #include "mutlass/library/operation_table.h"
38 |
39 | /////////////////////////////////////////////////////////////////////////////////////////////////
40 |
41 | namespace mutlass {
42 | namespace library {
43 |
44 | /////////////////////////////////////////////////////////////////////////////////////////////////
45 |
46 | /// Singleton instance stores a Manifest and Operation table
47 | class Singleton {
48 | public:
49 |
50 | /// Manifest object
51 | Manifest manifest;
52 |
53 | /// Operation table referencing the Manifest
54 | OperationTable operation_table;
55 |
56 | public:
57 |
58 | Singleton();
59 |
60 | static Singleton const &get();
61 | };
62 |
63 | /////////////////////////////////////////////////////////////////////////////////////////////////
64 |
65 | } // namespace library
66 | } // namespace mutlass
67 |
68 | /////////////////////////////////////////////////////////////////////////////////////////////////
69 |
70 |
--------------------------------------------------------------------------------
/tools/library/src/reference/gemm_int8_canonical.mu:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2024 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | /* \file
33 | \brief Instantiates GEMM reference implementations.
34 | */
35 |
36 | #include "mutlass/mutlass.h"
37 | #include "mutlass/library/library.h"
38 | #include "mutlass/library/manifest.h"
39 |
40 | #include "gemm_reference_operation.h"
41 |
42 | /////////////////////////////////////////////////////////////////////////////////////////////////
43 |
44 | namespace mutlass {
45 | namespace library {
46 |
47 | ///////////////////////////////////////////////////////////////////////////////////////////////////
48 |
49 | void initialize_gemm_reference_operations_int8_canonical(Manifest &manifest) {
50 | make_gemm_real_canonical_layouts<
51 | int8_t,
52 | int8_t,
53 | int32_t,
54 | int32_t,
55 | int32_t
56 | >(manifest);
57 |
58 | }
59 |
60 | ///////////////////////////////////////////////////////////////////////////////////////////////////
61 |
62 | } // namespace library
63 | } // namespace mutlass
64 |
65 | ///////////////////////////////////////////////////////////////////////////////////////////////////
66 |
67 |
--------------------------------------------------------------------------------
/tools/library/src/singleton.mu:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 |
33 | #include
34 | #include "mutlass/library/library.h"
35 | #include "mutlass/library/manifest.h"
36 | #include "mutlass/library/operation_table.h"
37 | #include "mutlass/library/singleton.h"
38 |
39 | /////////////////////////////////////////////////////////////////////////////////////////////////
40 |
41 | namespace mutlass {
42 | namespace library {
43 |
44 | /////////////////////////////////////////////////////////////////////////////////////////////////
45 |
46 | Singleton::Singleton() {
47 |
48 | manifest.initialize();
49 |
50 | operation_table.append(manifest);
51 | }
52 |
53 | Singleton const & Singleton::get() {
54 | static Singleton instance;
55 | return instance;
56 | }
57 |
58 | /////////////////////////////////////////////////////////////////////////////////////////////////
59 |
60 | } // namespace library
61 | } // namespace mutlass
62 |
63 | /////////////////////////////////////////////////////////////////////////////////////////////////
64 |
--------------------------------------------------------------------------------
/tools/profiler/include/mutlass/profiler/debug.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | /* \file
33 | \brief
34 | */
35 |
36 | #pragma once
37 |
38 | #include
39 |
40 | //#define report(x) { std::cout << "\033[31m" << __FILE__ << ":" << __LINE__ << " " << x << "\033[0m" << std::endl; }
41 | //#define report(x) {}
42 |
43 | // Enable/Disable Profiler debug prints
44 | //#define DEBUG_PROFILER
45 |
46 | //RED 31m // profiler prints debug messages in red
47 | //YELLOW 33m // ir prints debug messages in yellow
48 |
49 | #ifndef DEBUG_PROFILER
50 | #define debugprof(...)
51 | #else
52 | #define debugprof(...) do { \
53 | printf("\033[33m[DEBUG PROF] %s:%d | ", __FILE__, __LINE__); \
54 | printf(__VA_ARGS__); \
55 | printf("\033[0m\n"); \
56 | } while (0)
57 | #endif
58 |
--------------------------------------------------------------------------------
/tools/profiler/include/mutlass/profiler/gpu_timer.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | /* \file
33 | \brief Defines a math function
34 | */
35 |
36 | #pragma once
37 |
38 | #include
39 | #include "mutlass/mutlass.h"
40 |
41 | namespace mutlass {
42 | namespace profiler {
43 |
44 | /////////////////////////////////////////////////////////////////////////////////////////////////
45 |
46 | struct GpuTimer {
47 |
48 | musaEvent_t events[2];
49 |
50 | //
51 | // Methods
52 | //
53 |
54 | GpuTimer();
55 | ~GpuTimer();
56 |
57 | /// Records a start event in the stream
58 | void start(musaStream_t stream = nullptr);
59 |
60 | /// Records a stop event in the stream
61 | void stop(musaStream_t stream = nullptr);
62 |
63 | /// Records a stop event in the stream and synchronizes on the stream
64 | void stop_and_wait(musaStream_t stream = nullptr);
65 |
66 | /// Returns the duration in milliseconds
67 | double duration(int iterations = 1) const;
68 | };
69 |
70 | /////////////////////////////////////////////////////////////////////////////////////////////////
71 |
72 | } // namespace profiler
73 | } // namespace mutlass
74 |
--------------------------------------------------------------------------------
/tools/profiler/src/main.cpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | /* \file
33 | \brief
34 | */
35 |
36 | #include
37 |
38 | #include "mutlass/profiler/options.h"
39 |
40 | #include "mutlass/profiler/mutlass_profiler.h"
41 |
42 | ///////////////////////////////////////////////////////////////////////////////////////////////////
43 |
44 | int main(int argc, char const *arg[]) {
45 |
46 | mutlass::CommandLine cmdline(argc, arg);
47 | mutlass::profiler::Options options(cmdline);
48 |
49 | mutlass::profiler::MutlassProfiler profiler(options);
50 |
51 | return profiler();
52 | }
53 |
54 | ///////////////////////////////////////////////////////////////////////////////////////////////////
55 |
--------------------------------------------------------------------------------
/tools/profiler/src/performance_result.mu:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | /* \file
33 | \brief
34 | */
35 |
36 | #pragma once
37 |
38 | #include
39 |
40 | #include "mutlass/mutlass.h"
41 |
42 | // MUTLASS Profiler includes
43 | #include "mutlass/profiler/enumerated_types.h"
44 | #include "mutlass/profiler/performance_result.h"
45 |
46 | // MUTLASS Library includes
47 | #include "mutlass/library/library.h"
48 | #include "mutlass/library/util.h"
49 |
50 | namespace mutlass {
51 | namespace profiler {
52 |
53 | /////////////////////////////////////////////////////////////////////////////////////////////////
54 |
55 |
56 | /////////////////////////////////////////////////////////////////////////////////////////////////
57 |
58 | } // namespace profiler
59 | } // namespace mutlass
60 |
61 | /////////////////////////////////////////////////////////////////////////////////////////////////
62 |
63 |
--------------------------------------------------------------------------------
/tools/util/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
2 | # Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 | # SPDX-License-Identifier: BSD-3-Clause
4 | #
5 | # Redistribution and use in source and binary forms, with or without
6 | # modification, are permitted provided that the following conditions are met:
7 | #
8 | # 1. Redistributions of source code must retain the above copyright notice, this
9 | # list of conditions and the following disclaimer.
10 | #
11 | # 2. Redistributions in binary form must reproduce the above copyright notice,
12 | # this list of conditions and the following disclaimer in the documentation
13 | # and/or other materials provided with the distribution.
14 | #
15 | # 3. Neither the name of the copyright holder nor the names of its
16 | # contributors may be used to endorse or promote products derived from
17 | # this software without specific prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | add_library(mutlass_tools_util_includes INTERFACE)
30 | add_library(mt::mutlass::tools::util ALIAS mutlass_tools_util_includes)
31 | set_target_properties(mutlass_tools_util_includes PROPERTIES EXPORT_NAME tools::util)
32 |
33 | target_include_directories(
34 | mutlass_tools_util_includes
35 | INTERFACE
36 | $
37 | $
38 | )
39 |
40 |
41 | install(
42 | DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/
43 | DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/
44 | )
45 |
46 | install(
47 | TARGETS mutlass_tools_util_includes
48 | EXPORT MtMutlass
49 | )
50 |
--------------------------------------------------------------------------------
/tools/util/include/mutlass/util/GPU_Clock.hpp:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 |
33 | #pragma once
34 |
35 | #include
36 |
37 | struct GPU_Clock
38 | {
39 | GPU_Clock() {
40 | musaEventCreate(&start_);
41 | musaEventCreate(&stop_);
42 | musaEventRecord(start_);
43 | }
44 |
45 | ~GPU_Clock() {
46 | musaEventDestroy(start_);
47 | musaEventDestroy(stop_);
48 | }
49 |
50 | void start() {
51 | musaEventRecord(start_);
52 | }
53 |
54 | float milliseconds() {
55 | musaEventRecord(stop_);
56 | musaEventSynchronize(stop_);
57 | float time;
58 | musaEventElapsedTime(&time, start_, stop_);
59 | return time;
60 | }
61 |
62 | float seconds() {
63 | return milliseconds() * float(1e-3);
64 | }
65 |
66 | private:
67 | musaEvent_t start_, stop_;
68 | };
69 |
--------------------------------------------------------------------------------
/tools/util/include/mutlass/util/exceptions.h:
--------------------------------------------------------------------------------
1 | /******************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | ******************************************************************************/
32 |
33 | #pragma once
34 |
35 | /**
36 | * \file
37 | * \brief C++ exception semantics for MUSA error codes
38 | */
39 |
40 | #include
41 | #include
42 | #include
43 |
44 | #include "mutlass/platform/platform.h"
45 |
46 | namespace mutlass {
47 |
48 | /// C++ exception wrapper for MUSA \p musaError_t
49 | class musa_exception : public std::exception {
50 | public:
51 | /// Constructor
52 | musa_exception(const char* msg = "", musaError_t err = musaErrorUnknown) : msg(msg), err(err) {}
53 |
54 | /// Returns the underlying MUSA \p musaError_t
55 | musaError_t musaError() const { return err; }
56 |
57 | protected:
58 | /// Explanatory string
59 | const char* msg;
60 |
61 | /// Underlying MUSA \p musaError_t
62 | musaError_t err;
63 | };
64 |
65 | /// Writes a musa_exception instance to an output stream
66 | inline std::ostream& operator<<(std::ostream& out, musa_exception const& e) {
67 | return out << e.what() << ": " << musaGetErrorString(e.musaError());
68 | }
69 |
70 | } // namespace mutlass
71 |
--------------------------------------------------------------------------------
/tools/util/include/mutlass/util/index_sequence.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 |
33 | #pragma once
34 |
35 | #include "mutlass/mutlass.h"
36 | #include "mutlass/numeric_types.h"
37 |
38 | // integer_sequence moved to mutlass/numeric_types.h
39 |
40 |
--------------------------------------------------------------------------------
/tools/util/include/mutlass/util/reference/host/error_metrics.h:
--------------------------------------------------------------------------------
1 |
2 | /***************************************************************************************************
3 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
4 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5 | * SPDX-License-Identifier: BSD-3-Clause
6 | *
7 | * Redistribution and use in source and binary forms, with or without
8 | * modification, are permitted provided that the following conditions are met:
9 | *
10 | * 1. Redistributions of source code must retain the above copyright notice, this
11 | * list of conditions and the following disclaimer.
12 | *
13 | * 2. Redistributions in binary form must reproduce the above copyright notice,
14 | * this list of conditions and the following disclaimer in the documentation
15 | * and/or other materials provided with the distribution.
16 | *
17 | * 3. Neither the name of the copyright holder nor the names of its
18 | * contributors may be used to endorse or promote products derived from
19 | * this software without specific prior written permission.
20 | *
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | *
32 | **************************************************************************************************/
33 | #pragma once
34 |
35 | #include
36 |
37 | #include "mutlass/mutlass.h"
38 | #include "mutlass/complex.h"
39 | #include "mutlass/util/reference/host/tensor_reduce.h"
40 | #include "mutlass/core_io.h"
41 |
42 | namespace mutlass {
43 | namespace reference {
44 | namespace host {
45 |
46 | /// Helper to compute the relative error metric for tensor A_computed w.r.t. to tensor A_reference
47 | template <
48 | typename Element,
49 | typename Layout,
50 | typename ComputeType = double
51 | >
52 | ComputeType TensorRelativeErrorMetric(
53 | TensorView view_A_computed,
54 | TensorView view_B_reference,
55 | ComputeType identity = ComputeType()
56 | ) {
57 |
58 | return mutlass::reference::host::TensorNormDiff(view_A_computed, view_B_reference, identity) /
59 | mutlass::reference::host::TensorNorm(view_B_reference, identity);
60 | }
61 |
62 |
63 | ///////////////////////////////////////////////////////////////////////////////////////////////////
64 |
65 | } // namespace host
66 | } // namespace reference
67 | } // namespace mutlass
68 |
--------------------------------------------------------------------------------
/tools/util/include/mutlass/util/reference/host/tensor_norm.h:
--------------------------------------------------------------------------------
1 | /***************************************************************************************************
2 | * Copyright (c) 2024 - 2025 Moore Threads Technology Co., Ltd("Moore Threads"). All rights reserved.
3 | * Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4 | * SPDX-License-Identifier: BSD-3-Clause
5 | *
6 | * Redistribution and use in source and binary forms, with or without
7 | * modification, are permitted provided that the following conditions are met:
8 | *
9 | * 1. Redistributions of source code must retain the above copyright notice, this
10 | * list of conditions and the following disclaimer.
11 | *
12 | * 2. Redistributions in binary form must reproduce the above copyright notice,
13 | * this list of conditions and the following disclaimer in the documentation
14 | * and/or other materials provided with the distribution.
15 | *
16 | * 3. Neither the name of the copyright holder nor the names of its
17 | * contributors may be used to endorse or promote products derived from
18 | * this software without specific prior written permission.
19 | *
20 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | *
31 | **************************************************************************************************/
32 | #pragma once
33 |
34 |
35 | #include "mutlass/mutlass.h"
36 |
37 | // The contents of this file have been moved to 'tensor_reduce' to cover other types of reductions.
38 |
39 | #include "mutlass/util/reference/host/tensor_reduce.h"
40 |
41 | ///////////////////////////////////////////////////////////////////////////////////////////////////
42 |
43 |
44 |
--------------------------------------------------------------------------------