├── CPPLINT.cfg ├── docs ├── mori_io_arch.png ├── mori_io_workflow.jpg ├── mori_arch_20250819_v0.png ├── MORI-EP-BENCHMARK.md └── MORI-IO-INTRO.md ├── .typos.toml ├── .gitignore ├── tools ├── codestyle │ ├── pyproject.toml │ └── .cmakelintrc └── bnxt_disable_cq_coal.sh ├── requirements-build.txt ├── .gitmodules ├── include └── mori │ ├── utils │ └── data_types.hpp │ ├── ops │ └── ops.hpp │ ├── core │ ├── transport │ │ ├── p2p │ │ │ └── p2p.hpp │ │ └── rdma │ │ │ ├── providers │ │ │ ├── mlx5 │ │ │ │ └── mlx5_defs.hpp │ │ │ ├── ionic │ │ │ │ └── ionic_defs.hpp │ │ │ └── bnxt │ │ │ │ └── bnxt_defs.hpp │ │ │ ├── rdma.hpp │ │ │ └── host_primitives.hpp │ └── core.hpp │ ├── io │ ├── io.hpp │ ├── enum.hpp │ ├── logging.hpp │ └── engine.hpp │ ├── application │ ├── bootstrap │ │ ├── bootstrap.hpp │ │ ├── torch_bootstrap.hpp │ │ ├── mpi_bootstrap.hpp │ │ └── base_bootstrap.hpp │ ├── topology │ │ ├── topology.hpp │ │ ├── node.hpp │ │ ├── net.hpp │ │ ├── system.hpp │ │ └── gpu.hpp │ ├── transport │ │ ├── p2p │ │ │ └── p2p.hpp │ │ ├── transport.hpp │ │ ├── rdma │ │ │ └── providers │ │ │ │ └── ibverbs │ │ │ │ └── ibverbs.hpp │ │ └── tcp │ │ │ └── tcp.hpp │ ├── application.hpp │ ├── memory │ │ ├── memory_region.hpp │ │ └── symmetric_memory.hpp │ ├── utils │ │ └── math.hpp │ └── context │ │ └── context.hpp │ └── shmem │ ├── shmem.hpp │ └── shmem_api.hpp ├── .clang-format ├── src ├── ops │ ├── CMakeLists.txt │ └── dispatch_combine │ │ └── internode_v1.hpp ├── shmem │ ├── CMakeLists.txt │ ├── memory.cpp │ └── internal.hpp ├── io │ ├── CMakeLists.txt │ └── rdma │ │ ├── protocol.cpp │ │ ├── protocol.hpp │ │ └── executor.hpp ├── pybind │ ├── CMakeLists.txt │ ├── pybind.cpp │ ├── mori.hpp │ └── torch_utils.hpp └── application │ ├── topology │ ├── node.cpp │ ├── net.cpp │ ├── gpu.cpp │ └── system.cpp │ ├── CMakeLists.txt │ ├── memory │ └── memory_region.cpp │ ├── transport │ ├── rdma │ │ └── providers │ │ │ └── mlx5 │ │ │ └── mlx5_prm.hpp │ └── tcp │ │ └── tcp.cpp │ └── bootstrap │ ├── mpi_bootstrap.cpp │ └── torch_bootstrap.cpp ├── docker └── Dockerfile.dev ├── LICENSE ├── tests ├── __init__.py ├── python │ ├── __init__.py │ └── shmem │ │ └── test_api.py └── cpp │ ├── CMakeLists.txt │ ├── application │ ├── test_transport_tcp.cpp │ ├── test_topology.cpp │ └── test_transport_ibverbs.cpp │ └── io │ ├── test_protocol.cpp │ └── test_engine.cpp ├── python └── mori │ ├── shmem │ ├── __init__.py │ └── api.py │ ├── __init__.py │ ├── ops │ └── __init__.py │ ├── io │ └── __init__.py │ └── cpp │ └── __init__.py ├── examples ├── utils │ ├── common_utils.hpp │ └── args_parser.hpp ├── application │ ├── context.cpp │ └── ibverbs_test.cpp ├── ops │ └── dispatch_combine │ │ ├── correctness_test.sh │ │ └── README.md ├── CMakeLists.txt ├── shmem │ ├── concurrent_put_imm_thread.cpp │ ├── concurrent_put_thread.cpp │ ├── shmem_python_api.py │ └── put_thread_allgather.cpp └── benchmarks │ └── accum_perf.cpp ├── .pre-commit-config.yaml └── CMakeLists.txt /CPPLINT.cfg: -------------------------------------------------------------------------------- 1 | linelength=100 2 | -------------------------------------------------------------------------------- /docs/mori_io_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/mori/HEAD/docs/mori_io_arch.png -------------------------------------------------------------------------------- /docs/mori_io_workflow.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/mori/HEAD/docs/mori_io_workflow.jpg -------------------------------------------------------------------------------- /.typos.toml: -------------------------------------------------------------------------------- 1 | [default.extend-words] 2 | ASO = "ASO" 3 | aso = "aso" 4 | endien = "endien" 5 | fre = "fre" 6 | -------------------------------------------------------------------------------- /docs/mori_arch_20250819_v0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/mori/HEAD/docs/mori_arch_20250819_v0.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build 2 | build/ 3 | build*/ 4 | lib* 5 | 6 | # Python 7 | *.egg-info 8 | .eggs 9 | *.pyc 10 | .pytest_cache 11 | __pycache__ 12 | 13 | .vscode 14 | -------------------------------------------------------------------------------- /tools/codestyle/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.ruff] 2 | [tool.ruff.lint] 3 | ignore = [] 4 | 5 | [tool.ruff.lint.per-file-ignores] 6 | "*/__init__.py" = ["F401", "E402", "F403"] 7 | -------------------------------------------------------------------------------- /tools/codestyle/.cmakelintrc: -------------------------------------------------------------------------------- 1 | filter=-readability/wonkycase,-syntax,-convention/filename,-package/stdargs,-whitespace/indent,-whitespace/extra,-linelength,-readability/mixedcase 2 | -------------------------------------------------------------------------------- /requirements-build.txt: -------------------------------------------------------------------------------- 1 | cmake>=3.19 2 | setuptools>=65 3 | setuptools-scm>=8 4 | prettytable 5 | --extra-index-url https://download.pytorch.org/whl/rocm6.4 6 | torch 7 | pytest-assume 8 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "3rdparty/msgpack-c"] 2 | path = 3rdparty/msgpack-c 3 | url = https://github.com/msgpack/msgpack-c.git 4 | branch = cpp_master 5 | [submodule "3rdparty/spdlog"] 6 | path = 3rdparty/spdlog 7 | url = https://github.com/gabime/spdlog.git 8 | -------------------------------------------------------------------------------- /include/mori/utils/data_types.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #if defined(HIP_FP8_TYPE_FNUZ) && HIP_FP8_TYPE_FNUZ == 1 4 | #define MORI_FP8_TYPE_FNUZ_ENABLED 5 | #endif 6 | 7 | #if defined(HIP_FP8_TYPE_OCP) && HIP_FP8_TYPE_OCP == 1 8 | #define MORI_FP8_TYPE_OCP_ENABLED 9 | #endif 10 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | # Run the following command to reformat a file: 2 | # clang-format -i -style=Google 3 | # Or use clang-format-diff to only reformat the changed lines: 4 | # https://clang.llvm.org/docs/ClangFormat.html 5 | BasedOnStyle: Google 6 | DerivePointerAlignment: false 7 | ColumnLimit: 100 8 | PointerAlignment: Left 9 | NamespaceIndentation: None 10 | -------------------------------------------------------------------------------- /src/ops/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(mori_ops dispatch_combine/dispatch_combine.cpp 2 | dispatch_combine/internode_v1.cpp) 3 | target_include_directories(mori_ops PUBLIC ${CMAKE_SOURCE_DIR}/) 4 | target_link_libraries(mori_ops mori_application mori_shmem mori_logging 5 | hip::host hip::device) 6 | target_include_directories(mori_ops PUBLIC ${CMAKE_SOURCE_DIR}/include) 7 | -------------------------------------------------------------------------------- /docker/Dockerfile.dev: -------------------------------------------------------------------------------- 1 | # FROM rocm/pytorch:rocm6.4.1_ubuntu22.04_py3.12_pytorch_release_2.5.1 2 | FROM rocm/pytorch:rocm6.4.3_ubuntu22.04_py3.10_pytorch_release_2.5.1 3 | 4 | RUN apt-get update && \ 5 | apt-get install -y \ 6 | git \ 7 | cython3 \ 8 | ibverbs-utils \ 9 | openmpi-bin \ 10 | libopenmpi-dev \ 11 | libpci-dev \ 12 | cmake \ 13 | libdw1 \ 14 | locales 15 | -------------------------------------------------------------------------------- /src/shmem/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(MPI REQUIRED) 2 | 3 | add_library(mori_shmem init.cpp memory.cpp) 4 | target_include_directories(mori_shmem PUBLIC ${CMAKE_SOURCE_DIR}/include) 5 | target_include_directories(mori_shmem PUBLIC ${CMAKE_SOURCE_DIR}) 6 | target_link_libraries( 7 | mori_shmem 8 | mori_application 9 | mori_logging 10 | MPI::MPI_CXX 11 | ibverbs 12 | hip::host 13 | hip::device 14 | mlx5) 15 | target_compile_options(mori_shmem PUBLIC "-fgpu-rdc") 16 | target_link_options(mori_shmem PUBLIC "-fgpu-rdc") 17 | -------------------------------------------------------------------------------- /src/io/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(mori_io SHARED engine.cpp rdma/protocol.cpp rdma/backend_impl.cpp 2 | rdma/executor.cpp rdma/common.cpp) 3 | 4 | target_include_directories(mori_io PUBLIC ${CMAKE_SOURCE_DIR}/include) 5 | target_include_directories(mori_io PUBLIC ${CMAKE_SOURCE_DIR}) 6 | target_include_directories( 7 | mori_io PUBLIC ${CMAKE_SOURCE_DIR}/3rdparty/msgpack-c/include) 8 | target_link_libraries(mori_io mori_application ibverbs mlx5 hip::host 9 | hip::device mori_logging) 10 | target_compile_definitions(mori_io PUBLIC MSGPACK_NO_BOOST) 11 | -------------------------------------------------------------------------------- /src/pybind/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(Torch REQUIRED) 2 | find_package(PythonLibs REQUIRED) 3 | 4 | add_library(mori_pybinds SHARED mori.cpp pybind.cpp) 5 | 6 | target_include_directories(mori_pybinds PUBLIC ${PYTHON_INCLUDE_DIRS} 7 | ${TORCH_INCLUDE_DIRS}) 8 | target_link_directories(mori_pybinds PUBLIC ${TORCH_INSTALL_PREFIX}/lib) 9 | target_link_libraries( 10 | mori_pybinds 11 | mori_ops 12 | mori_io 13 | ${TORCH_LIBRARIES} 14 | torch_python 15 | hip::host 16 | hip::device) 17 | 18 | # For python packages to find dependent libraries 19 | set_target_properties( 20 | mori_pybinds 21 | PROPERTIES BUILD_RPATH "$ORIGIN;$ORIGIN/../torch/lib" 22 | INSTALL_RPATH "$ORIGIN;$ORIGIN/../torch/lib" 23 | BUILD_WITH_INSTALL_RPATH TRUE) 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | # 3 | # MIT License 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/python/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | # 3 | # MIT License 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | -------------------------------------------------------------------------------- /python/mori/shmem/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | # 3 | # MIT License 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | from .api import * 23 | -------------------------------------------------------------------------------- /src/application/topology/node.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(test_transport_tcp application/test_transport_tcp.cpp) 2 | 3 | target_include_directories(test_transport_tcp 4 | PUBLIC ${CMAKE_SOURCE_DIR}/include) 5 | target_include_directories(test_transport_tcp PUBLIC ${CMAKE_SOURCE_DIR}) 6 | target_link_libraries(test_transport_tcp mori_application) 7 | 8 | add_executable(test_transport_ibverbs application/test_transport_ibverbs.cpp) 9 | 10 | target_include_directories(test_transport_ibverbs 11 | PUBLIC ${CMAKE_SOURCE_DIR}/include) 12 | target_include_directories(test_transport_ibverbs PUBLIC ${CMAKE_SOURCE_DIR}) 13 | target_link_libraries(test_transport_ibverbs mori_application) 14 | 15 | add_executable(test_engine io/test_engine.cpp) 16 | 17 | target_include_directories(test_engine PUBLIC ${CMAKE_SOURCE_DIR}/include) 18 | target_include_directories(test_engine PUBLIC ${CMAKE_SOURCE_DIR}) 19 | target_link_libraries(test_engine mori_application mori_io) 20 | 21 | add_executable(test_topology application/test_topology.cpp) 22 | 23 | target_include_directories(test_topology PUBLIC ${CMAKE_SOURCE_DIR}/include) 24 | target_include_directories(test_topology PUBLIC ${CMAKE_SOURCE_DIR}) 25 | target_link_libraries(test_topology mori_application mori_io) 26 | -------------------------------------------------------------------------------- /python/mori/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | # 3 | # MIT License 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | from . import cpp 23 | from . import ops 24 | from . import shmem 25 | from . import io 26 | -------------------------------------------------------------------------------- /include/mori/ops/ops.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include "mori/ops/dispatch_combine/dispatch_combine.hpp" 25 | -------------------------------------------------------------------------------- /examples/utils/common_utils.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #define MS_TO_S 1000 25 | #define B_TO_GB (1000 * 1000 * 1000) 26 | -------------------------------------------------------------------------------- /include/mori/core/transport/p2p/p2p.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include "mori/core/transport/p2p/device_primitives.hpp" 25 | -------------------------------------------------------------------------------- /python/mori/ops/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | # 3 | # MIT License 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | from .dispatch_combine import ( 23 | EpDispatchCombineKernelType, 24 | EpDispatchCombineConfig, 25 | EpDispatchCombineOp, 26 | ) 27 | -------------------------------------------------------------------------------- /include/mori/core/core.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include "mori/core/transport/p2p/p2p.hpp" 25 | #include "mori/core/transport/rdma/rdma.hpp" 26 | #include "mori/core/utils.hpp" 27 | -------------------------------------------------------------------------------- /include/mori/io/io.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include "mori/io/common.hpp" 25 | #include "mori/io/engine.hpp" 26 | #include "mori/io/enum.hpp" 27 | #include "mori/io/logging.hpp" 28 | -------------------------------------------------------------------------------- /src/pybind/pybind.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include "src/pybind/mori.hpp" 23 | 24 | PYBIND11_MODULE(libmori_pybinds, m) { 25 | mori::RegisterMoriOps(m); 26 | mori::RegisterMoriShmem(m); 27 | mori::RegisterMoriIo(m); 28 | } 29 | -------------------------------------------------------------------------------- /include/mori/application/bootstrap/bootstrap.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include "mori/application/bootstrap/base_bootstrap.hpp" 25 | #include "mori/application/bootstrap/mpi_bootstrap.hpp" 26 | #include "mori/application/bootstrap/torch_bootstrap.hpp" 27 | -------------------------------------------------------------------------------- /include/mori/core/transport/rdma/providers/mlx5/mlx5_defs.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | namespace mori { 25 | namespace core { 26 | 27 | enum { 28 | MLX5_CQ_SET_CI = 0, 29 | MLX5_CQ_ARM_DB = 1, 30 | }; 31 | 32 | } // namespace core 33 | } // namespace mori 34 | -------------------------------------------------------------------------------- /python/mori/io/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | # 3 | # MIT License 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | from .engine import * 23 | from mori.cpp import ( 24 | IOEngineConfig, 25 | StatusCode, 26 | BackendType, 27 | EngineDesc, 28 | MemoryDesc, 29 | MemoryLocationType, 30 | PollCqMode, 31 | RdmaBackendConfig, 32 | set_log_level, 33 | ) 34 | -------------------------------------------------------------------------------- /src/pybind/mori.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | 26 | namespace mori { 27 | void RegisterMoriOps(pybind11::module_& m); 28 | void RegisterMoriShmem(pybind11::module_& m); 29 | void RegisterMoriIo(pybind11::module_& m); 30 | } // namespace mori 31 | -------------------------------------------------------------------------------- /include/mori/shmem/shmem.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include "mori/shmem/shmem_api.hpp" 25 | #include "mori/shmem/shmem_device_api.hpp" 26 | #include "mori/shmem/shmem_device_kernels.hpp" 27 | #include "mori/shmem/shmem_ibgda_kernels.hpp" 28 | #include "mori/shmem/shmem_p2p_kernels.hpp" 29 | -------------------------------------------------------------------------------- /include/mori/core/transport/rdma/providers/ionic/ionic_defs.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | namespace mori { 25 | namespace core { 26 | 27 | #define QUEUE_SIZE 1 28 | #define MAX_INLINE_SIZE 32 29 | //#define IONIC_CCQE 1 30 | #undef IONIC_CCQE 31 | } // namespace core 32 | } // namespace mori 33 | -------------------------------------------------------------------------------- /include/mori/core/transport/rdma/providers/bnxt/bnxt_defs.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | namespace mori { 25 | namespace core { 26 | 27 | #define BNXT_RE_SLOT_SIZE 16 28 | #define BNXT_RE_CQE_SIZE (2 * BNXT_RE_SLOT_SIZE) 29 | #define BNXT_RE_NUM_SLOT_PER_WQE 3 30 | 31 | } // namespace core 32 | } // namespace mori -------------------------------------------------------------------------------- /include/mori/application/topology/topology.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include "mori/application/topology/gpu.hpp" 25 | #include "mori/application/topology/net.hpp" 26 | #include "mori/application/topology/node.hpp" 27 | #include "mori/application/topology/pci.hpp" 28 | #include "mori/application/topology/system.hpp" 29 | -------------------------------------------------------------------------------- /include/mori/application/topology/node.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | namespace mori { 25 | namespace application { 26 | 27 | using NumaNodeId = int32_t; 28 | 29 | class TopoNode { 30 | public: 31 | TopoNode() = default; 32 | virtual ~TopoNode() = default; 33 | }; 34 | } // namespace application 35 | } // namespace mori 36 | -------------------------------------------------------------------------------- /include/mori/application/transport/p2p/p2p.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include "hip/hip_runtime.h" 25 | 26 | namespace mori { 27 | namespace application { 28 | 29 | struct P2PMemoryRegion { 30 | uintptr_t addr; 31 | size_t length; 32 | hipIpcMemHandle_t ipcHandle; 33 | }; 34 | 35 | } // namespace application 36 | } // namespace mori 37 | -------------------------------------------------------------------------------- /include/mori/application/transport/transport.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include "mori/application/transport/p2p/p2p.hpp" 25 | #include "mori/application/transport/rdma/rdma.hpp" 26 | 27 | namespace mori { 28 | namespace application { 29 | 30 | enum TransportType { RDMA = 0, P2P = 1 }; 31 | 32 | } // namespace application 33 | } // namespace mori 34 | -------------------------------------------------------------------------------- /docs/MORI-EP-BENCHMARK.md: -------------------------------------------------------------------------------- 1 | # MORI-EP Benchmark 2 | 3 | ## Intra-node 4 | ``` 5 | cd /path/to/mori 6 | export PYTHONPATH=/path/to/mori:$PYTHONPATH 7 | 8 | # Benchmark performance 9 | python3 tests/python/ops/bench_dispatch_combine.py 10 | ``` 11 | 12 | ## Inter-node 13 | 14 | Run the following command on each node and replace node_rank to its actual rank. Note that 'master_addr' should be the ip of rank 0 node. Environment variable 'GLOO_SOCKET_IFNAME' should be set to the tcp socket ifname you want to use. 15 | 16 | ``` 17 | export GLOO_SOCKET_IFNAME=ens14np0 18 | export MORI_RDMA_DEVICES=^mlx5_0,mlx5_1 # Optional: use `^` prefix to exclude specified devices 19 | 20 | torchrun --nnodes=2 --node_rank=0 --nproc_per_node=1 --master_addr="10.194.129.65" --master_port=1234 examples/ops/dispatch_combine/test_dispatch_combine_internode.py --bench 21 | ``` 22 | 23 | The output of this scripit includes total number of tokens received, total number of RDMA tokens received and total bandwidth(include XGMI and RDMA). To calculate RDMA bandwidth, multiply the total bandwidth with (total # of RDMA tokens / total # of tokens); 24 | 25 | ## Others 26 | 27 | ### Select NICs by setting environment variable MORI_RDMA_DEVICES 28 | 29 | For RoCE networks, you can specify which RDMA devices to use with the `MORI_RDMA_DEVICES` environment variable: 30 | 31 | - **Include specific devices**: `MORI_RDMA_DEVICES=mlx5_0,mlx5_1` 32 | - **Exclude devices**: `MORI_RDMA_DEVICES=^mlx5_2,mlx5_3` (use `^` prefix to exclude specified devices) 33 | - **Default**: If not set, all available RDMA devices will be used 34 | -------------------------------------------------------------------------------- /python/mori/cpp/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | # 3 | # MIT License 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | import os 23 | import sys 24 | import importlib.util 25 | 26 | cur_dir = os.path.dirname(os.path.abspath(__file__)) 27 | lib_path = os.path.abspath(os.path.join(cur_dir, "../libmori_pybinds.so")) 28 | 29 | spec = importlib.util.spec_from_file_location("libmori_pybinds", lib_path) 30 | module = importlib.util.module_from_spec(spec) 31 | sys.modules["libmori_pybinds"] = module 32 | spec.loader.exec_module(module) 33 | 34 | from libmori_pybinds import * 35 | -------------------------------------------------------------------------------- /python/mori/shmem/api.py: -------------------------------------------------------------------------------- 1 | # Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | # 3 | # MIT License 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | from mori import cpp as mori_cpp 23 | 24 | 25 | def shmem_torch_process_group_init(group_name: str): 26 | return mori_cpp.shmem_torch_process_group_init(group_name) 27 | 28 | 29 | def shmem_finalize(): 30 | return mori_cpp.shmem_finalize() 31 | 32 | 33 | def shmem_mype(): 34 | return mori_cpp.shmem_mype() 35 | 36 | 37 | def shmem_npes(): 38 | return mori_cpp.shmem_npes() 39 | 40 | 41 | def shmem_num_qp_per_pe(): 42 | return mori_cpp.shmem_num_qp_per_pe() 43 | -------------------------------------------------------------------------------- /examples/application/context.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include "mori/application/application.hpp" 23 | 24 | using namespace mori; 25 | using namespace mori::application; 26 | 27 | int main() { 28 | MpiBootstrapNetwork bootNet(MPI_COMM_WORLD); 29 | bootNet.Initialize(); 30 | 31 | Context context(bootNet); 32 | std::cout << "Local rank: " << context.LocalRank() << std::endl; 33 | std::cout << "World size: " << context.WorldSize() << std::endl; 34 | std::cout << "Host Name: " << context.HostName() << std::endl; 35 | 36 | bootNet.Finalize(); 37 | } 38 | -------------------------------------------------------------------------------- /include/mori/core/transport/rdma/rdma.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include "mori/core/transport/rdma/device_primitives.hpp" 25 | #include "mori/core/transport/rdma/host_primitives.hpp" 26 | #include "mori/core/transport/rdma/primitives.hpp" 27 | #include "mori/core/transport/rdma/providers/mlx5/mlx5_device_primitives.hpp" 28 | #include "mori/core/transport/rdma/providers/mlx5/mlx5_host_primitives.hpp" 29 | #include "mori/core/transport/rdma/providers/bnxt/bnxt_device_primitives.hpp" 30 | #include "mori/core/transport/rdma/providers/ionic/ionic_device_primitives.hpp" 31 | -------------------------------------------------------------------------------- /include/mori/application/application.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include "mori/application/bootstrap/bootstrap.hpp" 25 | #include "mori/application/context/context.hpp" 26 | #include "mori/application/memory/memory_region.hpp" 27 | #include "mori/application/memory/symmetric_memory.hpp" 28 | #include "mori/application/transport/rdma/providers/bnxt/bnxt.hpp" 29 | #include "mori/application/transport/rdma/providers/ionic/ionic.hpp" 30 | #include "mori/application/transport/rdma/providers/mlx5/mlx5.hpp" 31 | #include "mori/application/transport/rdma/rdma.hpp" 32 | #include "mori/application/utils/check.hpp" 33 | -------------------------------------------------------------------------------- /src/ops/dispatch_combine/internode_v1.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include "mori/core/core.hpp" 25 | #include "mori/ops/dispatch_combine/dispatch_combine.hpp" 26 | #include "mori/shmem/shmem.hpp" 27 | 28 | namespace mori { 29 | namespace moe { 30 | 31 | template 32 | __global__ void EpDispatchInterNodeV1Kernel(EpDispatchCombineArgs args); 33 | 34 | template 35 | __global__ void EpDispatchInterNodeV1KernelLowLatency(EpDispatchCombineArgs args); 36 | 37 | template 38 | __global__ void EpCombineInterNodeV1Kernel(EpDispatchCombineArgs args); 39 | 40 | } // namespace moe 41 | } // namespace mori 42 | -------------------------------------------------------------------------------- /include/mori/application/bootstrap/torch_bootstrap.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include "mori/application/bootstrap/base_bootstrap.hpp" 25 | 26 | namespace mori { 27 | namespace application { 28 | 29 | class TorchBootstrapNetwork : public BootstrapNetwork { 30 | public: 31 | TorchBootstrapNetwork(const std::string& groupName); 32 | ~TorchBootstrapNetwork(); 33 | 34 | void Initialize(); 35 | void Finalize(); 36 | 37 | void Allgather(void* sendbuf, void* recvbuf, size_t sendcount); 38 | void AllToAll(void* sendbuf, void* recvbuf, size_t sendcount); 39 | void Barrier(); 40 | 41 | private: 42 | std::string groupName; 43 | }; 44 | 45 | } // namespace application 46 | } // namespace mori 47 | -------------------------------------------------------------------------------- /include/mori/application/bootstrap/mpi_bootstrap.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | 26 | #include "mori/application/bootstrap/base_bootstrap.hpp" 27 | 28 | namespace mori { 29 | namespace application { 30 | 31 | class MpiBootstrapNetwork : public BootstrapNetwork { 32 | public: 33 | MpiBootstrapNetwork(MPI_Comm mpi_comm); 34 | ~MpiBootstrapNetwork(); 35 | 36 | void Initialize(); 37 | void Finalize(); 38 | 39 | void Allgather(void* sendbuf, void* recvbuf, size_t sendcount); 40 | void AllToAll(void* sendbuf, void* recvbuf, size_t sendcount); 41 | void Barrier(); 42 | 43 | private: 44 | MPI_Comm mpi_comm; 45 | }; 46 | 47 | } // namespace application 48 | } // namespace mori 49 | -------------------------------------------------------------------------------- /examples/ops/dispatch_combine/correctness_test.sh: -------------------------------------------------------------------------------- 1 | thisScriptPath=$(dirname $0) 2 | execPath=$thisScriptPath/../../../build/examples/test_dispatch_combine_ops 3 | echo $execPath 4 | # ------------------------------------------------------------------------------------------------ # 5 | # Inra-Node Test # 6 | # ------------------------------------------------------------------------------------------------ # 7 | worldSizeList=(2 4 8) 8 | hiddenStateSizeList=(7168) 9 | scaleDimList=(8 32) 10 | tokenNumList=(1 128) 11 | expertPerRankList=(8 256) 12 | expertPerTokenList=(8) 13 | warpPerBlockList=(4) 14 | blockNumList=(8) 15 | dataTypeList=("fp8" "bf16") 16 | scaleTypeList=("fp8" "fp32") 17 | 18 | for worldSize in "${worldSizeList[@]}"; do 19 | for hiddenStateSize in "${hiddenStateSizeList[@]}"; do 20 | for scaleDim in "${scaleDimList[@]}"; do 21 | for tokenNum in "${tokenNumList[@]}"; do 22 | for expertPerRank in "${expertPerRankList[@]}"; do 23 | for expertPerToken in "${expertPerTokenList[@]}"; do 24 | for warpPerBlock in "${warpPerBlockList[@]}"; do 25 | for blockNum in "${blockNumList[@]}"; do 26 | for dataType in "${dataTypeList[@]}"; do 27 | for scaleType in "${scaleTypeList[@]}"; do 28 | 29 | cmd="mpirun -np $worldSize --allow-run-as-root $execPath --cmd test --data_type $dataType --hdim=$hiddenStateSize \ 30 | --scale_dim=$scaleDim --max_tokens=$tokenNum --expert_per_rank=$expertPerRank --expert_per_token=$expertPerToken \ 31 | --warp_per_blk=$warpPerBlock --block_num=$blockNum --scale_type=$scaleType --max_token_type_size=4 --num=3" 32 | echo "$cmd" 33 | eval "$cmd" 34 | if [ $? -ne 0 ]; then 35 | echo "Command failed: $cmd" 36 | exit 1 37 | fi 38 | 39 | done # scaleType 40 | done # dataType 41 | done # blockNum 42 | done # warpPerBlock 43 | done # expertPerToken 44 | done # expertPerRank 45 | done # tokenNum 46 | done # scaleDim 47 | done # hiddenStateSize 48 | done # worldSize 49 | -------------------------------------------------------------------------------- /include/mori/application/memory/memory_region.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | 26 | #include 27 | 28 | #include "mori/application/transport/rdma/rdma.hpp" 29 | 30 | namespace mori { 31 | namespace application { 32 | 33 | class RdmaMemoryRegionManager { 34 | public: 35 | RdmaMemoryRegionManager(RdmaDeviceContext& context); 36 | ~RdmaMemoryRegionManager(); 37 | 38 | RdmaMemoryRegion RegisterBuffer(void* ptr, size_t size); 39 | void DeregisterBuffer(void* ptr); 40 | 41 | RdmaMemoryRegion Get(void* ptr) const; 42 | 43 | private: 44 | RdmaDeviceContext& context; 45 | std::unordered_map mrPool; 46 | }; 47 | 48 | } // namespace application 49 | } // namespace mori 50 | -------------------------------------------------------------------------------- /include/mori/application/utils/math.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | 26 | namespace mori { 27 | namespace application { 28 | 29 | static int RoundUpPowOfTwo(int val) { return pow(2, ceil(log2(float(val)))); } 30 | 31 | static int AlignUpTo3x256Minus1(int n) { return ((n + 767) / 768) * 768 - 1; } 32 | 33 | static int AlignUp(int n, int alignment) { return ((n + alignment - 1) / alignment) * alignment; } 34 | 35 | static int AlignUpTo256(int n) { return AlignUp(n, 256); } 36 | 37 | static int RoundUpPowOfTwoAlignUpTo256(int n) { 38 | return RoundUpPowOfTwo((n + 255) & ~255); 39 | } 40 | 41 | static int LogCeil2(int val) { return ceil(log2(float(val))); } 42 | 43 | } // namespace application 44 | } // namespace mori 45 | -------------------------------------------------------------------------------- /include/mori/io/enum.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | namespace mori { 25 | namespace io { 26 | 27 | enum class BackendType : uint32_t { 28 | Unknown = 0, 29 | XGMI = 1, 30 | RDMA = 2, 31 | TCP = 3, 32 | }; 33 | 34 | using BackendTypeVec = std::vector; 35 | 36 | enum class MemoryLocationType : uint32_t { 37 | Unknown = 0, 38 | CPU = 1, 39 | GPU = 2, 40 | }; 41 | 42 | enum class StatusCode : uint32_t { 43 | SUCCESS = 0, 44 | INIT = 1, 45 | IN_PROGRESS = 2, 46 | 47 | ERR_BEGIN = 10, 48 | ERR_INVALID_ARGS = 11, 49 | ERR_NOT_FOUND = 12, 50 | ERR_RDMA_OP = 13, 51 | ERR_BAD_STATE = 14 52 | }; 53 | 54 | enum class PollCqMode : uint32_t { POLLING = 0, EVENT = 1 }; 55 | 56 | } // namespace io 57 | } // namespace mori 58 | -------------------------------------------------------------------------------- /src/application/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | find_package(MPI REQUIRED) 2 | find_package(hsa-runtime64 REQUIRED) 3 | #find_library(IONIC_LIBRARY 4 | # NAMES ionic 5 | # HINTS /lib/x86_64-linux-gnu 6 | # REQUIRED 7 | #) 8 | execute_process( 9 | COMMAND python -c "import torch; print(torch.utils.cmake_prefix_path)" 10 | WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} 11 | OUTPUT_VARIABLE TORCH_DIR 12 | OUTPUT_STRIP_TRAILING_WHITESPACE) 13 | cmake_path(SET TORCH_CMAKE_DIR NORMALIZE "${TORCH_DIR}/Torch") 14 | list(APPEND CMAKE_PREFIX_PATH ${TORCH_CMAKE_DIR}) 15 | message(STATUS "Found LibTorch CMake Path: ${CMAKE_PREFIX_PATH}") 16 | 17 | find_package(Torch REQUIRED) 18 | 19 | add_library( 20 | mori_application SHARED 21 | bootstrap/mpi_bootstrap.cpp 22 | bootstrap/torch_bootstrap.cpp 23 | transport/rdma/rdma.cpp 24 | transport/rdma/providers/mlx5/mlx5.cpp 25 | transport/rdma/providers/bnxt/bnxt.cpp 26 | transport/rdma/providers/ionic/ionic.cpp 27 | transport/rdma/providers/ibverbs/ibverbs.cpp 28 | transport/tcp/tcp.cpp 29 | memory/symmetric_memory.cpp 30 | memory/memory_region.cpp 31 | context/context.cpp 32 | topology/gpu.cpp 33 | topology/net.cpp 34 | topology/node.cpp 35 | topology/pci.cpp 36 | topology/system.cpp) 37 | 38 | target_include_directories(mori_application PUBLIC ${CMAKE_SOURCE_DIR}/include) 39 | target_include_directories(mori_application PUBLIC ${CMAKE_SOURCE_DIR}) 40 | target_link_libraries( 41 | mori_application 42 | MPI::MPI_CXX 43 | ibverbs 44 | hip::host 45 | hip::device 46 | mlx5 47 | rocm_smi64 48 | pci 49 | mori_logging 50 | hsa-runtime64) 51 | # ${IONIC_LIBRARY}) 52 | 53 | if(USE_BNXT) 54 | target_link_libraries(mori_application ${BNXT_RE_LIB}) 55 | endif() 56 | 57 | if(USE_IONIC) 58 | target_link_libraries(mori_application ${IONIC_LIB}) 59 | endif() 60 | 61 | target_include_directories(mori_application PUBLIC ${TORCH_INCLUDE_DIRS}) 62 | target_link_libraries(mori_application c10 torch torch_cpu c10_hip torch_hip) 63 | -------------------------------------------------------------------------------- /tests/python/shmem/test_api.py: -------------------------------------------------------------------------------- 1 | # Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | # 3 | # MIT License 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | import pytest 23 | import mori 24 | from tests.python.utils import TorchDistContext, get_free_port 25 | import torch 26 | 27 | 28 | def _test_torch_init(rank, world_size, port): 29 | with TorchDistContext(rank=rank, world_size=world_size, master_port=port): 30 | mori.shmem.shmem_torch_process_group_init("default") 31 | assert rank == mori.shmem.shmem_mype() 32 | assert world_size == mori.shmem.shmem_npes() 33 | mori.shmem.shmem_finalize() 34 | 35 | 36 | @pytest.mark.parametrize("world_size", (8,)) 37 | def test_torch_init(world_size): 38 | torch.multiprocessing.spawn( 39 | _test_torch_init, 40 | args=(world_size, get_free_port()), 41 | nprocs=world_size, 42 | join=True, 43 | ) 44 | -------------------------------------------------------------------------------- /include/mori/application/bootstrap/base_bootstrap.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | 26 | namespace mori { 27 | namespace application { 28 | 29 | class BootstrapNetwork { 30 | public: 31 | BootstrapNetwork() = default; 32 | virtual ~BootstrapNetwork() = default; 33 | 34 | virtual void Initialize() = 0; 35 | virtual void Finalize() = 0; 36 | 37 | int GetLocalRank() const { return localRank; } 38 | int GetWorldSize() const { return worldSize; } 39 | 40 | virtual void Allgather(void* sendbuf, void* recvbuf, size_t sendcount) = 0; 41 | virtual void AllToAll(void* sendbuf, void* recvbuf, size_t sendcount) = 0; 42 | 43 | virtual void Barrier() = 0; 44 | 45 | protected: 46 | int localRank{0}; 47 | int worldSize{0}; 48 | }; 49 | 50 | } // namespace application 51 | } // namespace mori 52 | -------------------------------------------------------------------------------- /src/application/memory/memory_region.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include "mori/application/memory/memory_region.hpp" 23 | 24 | namespace mori { 25 | namespace application { 26 | 27 | RdmaMemoryRegionManager::RdmaMemoryRegionManager(RdmaDeviceContext& context) : context(context) {} 28 | 29 | RdmaMemoryRegionManager::~RdmaMemoryRegionManager() {} 30 | 31 | application::RdmaMemoryRegion RdmaMemoryRegionManager::RegisterBuffer(void* ptr, size_t size) { 32 | application::RdmaMemoryRegion mr = context.RegisterRdmaMemoryRegion(ptr, size); 33 | mrPool.insert({ptr, mr}); 34 | return mr; 35 | } 36 | 37 | void RdmaMemoryRegionManager::DeregisterBuffer(void* ptr) { 38 | if (mrPool.find(ptr) == mrPool.end()) return; 39 | context.DeregisterRdmaMemoryRegion(ptr); 40 | mrPool.erase(ptr); 41 | } 42 | 43 | application::RdmaMemoryRegion RdmaMemoryRegionManager::Get(void* ptr) const { 44 | if (mrPool.find(ptr) == mrPool.end()) return {}; 45 | return mrPool.at(ptr); 46 | } 47 | 48 | } // namespace application 49 | } // namespace mori 50 | -------------------------------------------------------------------------------- /src/application/transport/rdma/providers/mlx5/mlx5_prm.hpp: -------------------------------------------------------------------------------- 1 | /* SPDX-License-Identifier: BSD-3-Clause 2 | * Copyright 2016 6WIND S.A. 3 | * Copyright 2016 Mellanox Technologies, Ltd 4 | */ 5 | 6 | #ifndef RTE_PMD_MLX5_PRM_H_ 7 | #define RTE_PMD_MLX5_PRM_H_ 8 | 9 | #include 10 | 11 | #define u8 uint8_t 12 | 13 | #define MLX5_ADAPTER_PAGE_SHIFT 12 14 | 15 | enum { 16 | MLX5_CQE_SIZE_64B = 0x0, 17 | MLX5_CQE_SIZE_128B = 0x1, 18 | }; 19 | 20 | struct mlx5_ifc_cqc_bits { 21 | u8 status[0x4]; 22 | u8 as_notify[0x1]; 23 | u8 initiator_src_dct[0x1]; 24 | u8 dbr_umem_valid[0x1]; 25 | u8 reserved_at_7[0x1]; 26 | u8 cqe_sz[0x3]; 27 | u8 cc[0x1]; 28 | u8 reserved_at_c[0x1]; 29 | u8 scqe_break_moderation_en[0x1]; 30 | u8 oi[0x1]; 31 | u8 cq_period_mode[0x2]; 32 | u8 cqe_comp_en[0x1]; 33 | u8 mini_cqe_res_format[0x2]; 34 | u8 st[0x4]; 35 | u8 reserved_at_18[0x1]; 36 | u8 cqe_comp_layout[0x7]; 37 | u8 dbr_umem_id[0x20]; 38 | u8 reserved_at_40[0x14]; 39 | u8 page_offset[0x6]; 40 | u8 reserved_at_5a[0x2]; 41 | u8 mini_cqe_res_format_ext[0x2]; 42 | u8 cq_timestamp_format[0x2]; 43 | u8 reserved_at_60[0x3]; 44 | u8 log_cq_size[0x5]; 45 | u8 uar_page[0x18]; 46 | u8 reserved_at_80[0x4]; 47 | u8 cq_period[0xc]; 48 | u8 cq_max_count[0x10]; 49 | u8 reserved_at_a0[0x18]; 50 | u8 c_eqn[0x8]; 51 | u8 reserved_at_c0[0x3]; 52 | u8 log_page_size[0x5]; 53 | u8 reserved_at_c8[0x18]; 54 | u8 reserved_at_e0[0x20]; 55 | u8 reserved_at_100[0x8]; 56 | u8 last_notified_index[0x18]; 57 | u8 reserved_at_120[0x8]; 58 | u8 last_solicit_index[0x18]; 59 | u8 reserved_at_140[0x8]; 60 | u8 consumer_counter[0x18]; 61 | u8 reserved_at_160[0x8]; 62 | u8 producer_counter[0x18]; 63 | u8 local_partition_id[0xc]; 64 | u8 process_id[0x14]; 65 | u8 reserved_at_1A0[0x20]; 66 | u8 dbr_addr[0x40]; 67 | }; 68 | 69 | struct mlx5_ifc_create_cq_in_bits { 70 | u8 opcode[0x10]; 71 | u8 uid[0x10]; 72 | u8 reserved_at_20[0x10]; 73 | u8 op_mod[0x10]; 74 | u8 reserved_at_40[0x40]; 75 | struct mlx5_ifc_cqc_bits cq_context; 76 | u8 cq_umem_offset[0x40]; 77 | u8 cq_umem_id[0x20]; 78 | u8 cq_umem_valid[0x1]; 79 | u8 reserved_at_2e1[0x1f]; 80 | u8 reserved_at_300[0x580]; 81 | u8 pas[]; 82 | }; 83 | 84 | #endif /* RTE_PMD_MLX5_PRM_H_ */ 85 | -------------------------------------------------------------------------------- /include/mori/application/transport/rdma/providers/ibverbs/ibverbs.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include "infiniband/verbs.h" 25 | #include "mori/application/transport/rdma/rdma.hpp" 26 | 27 | namespace mori { 28 | namespace application { 29 | 30 | class IBVerbsDeviceContext : public RdmaDeviceContext { 31 | public: 32 | IBVerbsDeviceContext(RdmaDevice* rdma_device, ibv_pd* inPd); 33 | ~IBVerbsDeviceContext() override; 34 | 35 | virtual RdmaEndpoint CreateRdmaEndpoint(const RdmaEndpointConfig&) override; 36 | virtual void ConnectEndpoint(const RdmaEndpointHandle& local, 37 | const RdmaEndpointHandle& remote, uint32_t qpId = 0) override; 38 | 39 | private: 40 | std::unordered_map cqPool; 41 | std::unordered_map qpPool; 42 | }; 43 | 44 | class IBVerbsDevice : public RdmaDevice { 45 | public: 46 | IBVerbsDevice(ibv_device* device); 47 | ~IBVerbsDevice() override; 48 | 49 | RdmaDeviceContext* CreateRdmaDeviceContext() override; 50 | }; 51 | 52 | } // namespace application 53 | } // namespace mori 54 | -------------------------------------------------------------------------------- /include/mori/io/logging.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | 26 | // Include the new centralized logging system 27 | #include "mori/utils/mori_log.hpp" 28 | 29 | namespace mori { 30 | namespace io { 31 | 32 | // Legacy SetLogLevel function for backward compatibility 33 | inline void SetLogLevel(const std::string& strLevel) { 34 | try { 35 | InitializeLoggingFromEnv(); 36 | } catch (...) { 37 | } 38 | 39 | ForceSetModuleLogLevel(modules::IO, strLevel); 40 | 41 | auto logger = mori::ModuleLogger::GetInstance().GetLogger(modules::IO); 42 | if (logger) { 43 | logger->info("Set MORI-IO log level to {}", strLevel); 44 | } 45 | } 46 | 47 | // Legacy ScopedTimer - redirect to new implementation 48 | using ScopedTimer = mori::ScopedTimer; 49 | 50 | // Legacy ScopedTimer - redirect to new implementation 51 | using ScopedTimer = mori::ScopedTimer; 52 | 53 | #define MORI_IO_TIMER(message) MORI_TIMER(message, mori::modules::IO) 54 | #define MORI_IO_FUNCTION_TIMER MORI_FUNCTION_TIMER(mori::modules::IO) 55 | 56 | } // namespace io 57 | } // namespace mori -------------------------------------------------------------------------------- /include/mori/application/topology/net.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | #include 26 | 27 | #include "mori/application/topology/node.hpp" 28 | #include "mori/application/topology/pci.hpp" 29 | 30 | namespace mori { 31 | namespace application { 32 | 33 | /* ---------------------------------------------------------------------------------------------- */ 34 | /* TopoNodeNic */ 35 | /* ---------------------------------------------------------------------------------------------- */ 36 | class TopoNodeNic : public TopoNode { 37 | public: 38 | TopoNodeNic() = default; 39 | ~TopoNodeNic() = default; 40 | 41 | public: 42 | std::string name{}; 43 | PciBusId busId{0}; 44 | double totalGbps{0}; 45 | }; 46 | 47 | class TopoSystemNet { 48 | public: 49 | TopoSystemNet(); 50 | ~TopoSystemNet(); 51 | 52 | int NumNics() const { return nics.size(); } 53 | std::vector GetNics() const; 54 | 55 | private: 56 | void Load(); 57 | 58 | private: 59 | std::vector> nics; 60 | }; 61 | 62 | } // namespace application 63 | } // namespace mori 64 | -------------------------------------------------------------------------------- /tests/cpp/application/test_transport_tcp.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include 23 | #include 24 | 25 | #include "mori/application/transport/tcp/tcp.hpp" 26 | 27 | using namespace mori::application; 28 | 29 | void TestTcpContext() { 30 | std::string host = "127.0.0.1"; 31 | 32 | TCPContext context1(host, 0); 33 | TCPContext context2(host, 0); 34 | 35 | context1.Listen(); 36 | context2.Listen(); 37 | printf("port 1 %d port 2 %d\n", context1.GetPort(), context2.GetPort()); 38 | assert((context1.GetPort() > 0) && (context2.GetPort() > 0)); 39 | assert((context1.GetListenFd() >= 0) && (context2.GetListenFd() >= 0)); 40 | 41 | TCPEndpointHandle eph1 = context1.Connect(host, context2.GetPort()); 42 | TCPEndpointHandle eph2 = context2.Accept()[0]; 43 | 44 | TCPEndpoint ep1(eph1); 45 | TCPEndpoint ep2(eph2); 46 | 47 | std::string sendBuf("Hello Mori!"); 48 | std::vector recvBuf(sendBuf.size()); 49 | 50 | assert(ep1.Send(sendBuf.c_str(), sendBuf.size()) == 0); 51 | assert(ep2.Recv(recvBuf.data(), sendBuf.size()) == 0); 52 | assert(std::string(recvBuf.data()) == sendBuf); 53 | 54 | context1.Close(); 55 | context2.Close(); 56 | } 57 | 58 | int main() { TestTcpContext(); } 59 | -------------------------------------------------------------------------------- /examples/ops/dispatch_combine/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ## Test under DeepSeek-V3/R1 configuration 5 | Test: 6 | ``` 7 | make -j32 && mpirun -np 8 --allow-run-as-root ./examples/test_dispatch_combine_ops --data_type fp8 --hdim=7168 --max_tokens=128 --expert_per_rank=32 --expert_per_token=8 --warp_per_blk=4 --block_num=128 --num=1 --cmd test 8 | ``` 9 | 10 | Intra-node Benchmark: 11 | ``` 12 | make -j32 && mpirun -np 8 --allow-run-as-root rocprofv3 --kernel-trace --stats -o dispatch -- ./examples/test_dispatch_combine_ops --data_type bf16 --hdim=7168 --max_tokens=512 --expert_per_rank=32 --expert_per_token=8 --warp_per_blk=4 --block_num=256 --num=10 --cmd bench --kernel_type=intra 13 | ``` 14 | 15 | Inter-node Benchmark: 16 | 17 | Run the following command on each node and replace node_rank to its actual rank. Note that 'master_addr' should be the ip of rank 0 node. Environment variable 'GLOO_SOCKET_IFNAME' should be set to the tcp socket ifname you want to use. 18 | 19 | ``` 20 | export GLOO_SOCKET_IFNAME=ens14np0 21 | export MORI_RDMA_DEVICES=^mlx5_0,mlx5_1 # Optional: use `^` prefix to exclude specified devices 22 | 23 | torchrun --nnodes=2 --node_rank=0 --nproc_per_node=1 --master_addr="10.194.132.29" --master_port=1234 examples/ops/dispatch_combine/test_dispatch_combine_internode.py --max-tokens 128 --cmd bench 24 | ``` 25 | 26 | 27 | GLOO_SOCKET_IFNAME=enp81s0f1 torchrun --nnodes=2 --node_rank=0 --nproc_per_node=1 --master_addr="10.235.192.87" --master_port=1234 examples/ops/dispatch_combine/test_dispatch_combine_internode.py --max-tokens 128 --cmd bench 28 | GLOO_SOCKET_IFNAME=enp81s0f1 torchrun --nnodes=2 --node_rank=1 --nproc_per_node=1 --master_addr="10.235.192.87" --master_port=1234 examples/ops/dispatch_combine/test_dispatch_combine_internode.py --max-tokens 128 --cmd bench 29 | 30 | The output of this scripit includes total number of tokens received, total number of RDMA tokens received and total bandwidth(include XGMI and RDMA). To calculate RDMA bandwidth, multiply the total bandwidth with (total # of RDMA tokens / total # of tokens); 31 | 32 | ## Mori Environment Variables 33 | 34 | ### MORI_RDMA_DEVICES 35 | 36 | For RoCE networks, you can specify which RDMA devices to use with the `MORI_RDMA_DEVICES` environment variable: 37 | 38 | - **Include specific devices**: `MORI_RDMA_DEVICES=mlx5_0,mlx5_1` 39 | - **Exclude devices**: `MORI_RDMA_DEVICES=^mlx5_2,mlx5_3` (use `^` prefix to exclude specified devices) 40 | - **Default**: If not set, all available RDMA devices will be used 41 | -------------------------------------------------------------------------------- /include/mori/application/topology/system.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | #include "mori/application/topology/gpu.hpp" 29 | #include "mori/application/topology/net.hpp" 30 | #include "mori/application/topology/pci.hpp" 31 | 32 | namespace mori { 33 | namespace application { 34 | 35 | /* ---------------------------------------------------------------------------------------------- */ 36 | /* TopoSystem */ 37 | /* ---------------------------------------------------------------------------------------------- */ 38 | class TopoSystem { 39 | public: 40 | TopoSystem(); 41 | ~TopoSystem(); 42 | 43 | TopoSystemGpu* GetTopoSystemGpu() { return gpu.get(); } 44 | TopoSystemPci* GetTopoSystemPci() { return pci.get(); } 45 | TopoSystemNet* GetTopoSystemNet() { return net.get(); } 46 | 47 | std::string MatchGpuAndNic(int id); 48 | std::vector MatchAllGpusAndNics(); 49 | 50 | private: 51 | void Load(); 52 | 53 | private: 54 | std::unique_ptr gpu; 55 | std::unique_ptr pci; 56 | std::unique_ptr net; 57 | }; 58 | 59 | } // namespace application 60 | } // namespace mori 61 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Exclude all 3rd-party libraries 2 | exclude: | 3 | (?x)^( 4 | third_party/.+ 5 | )$ 6 | repos: 7 | # Common hooks 8 | - repo: https://github.com/pre-commit/pre-commit-hooks 9 | rev: v5.0.0 10 | hooks: 11 | - id: check-added-large-files 12 | - id: check-merge-conflict 13 | - id: check-symlinks 14 | - id: detect-private-key 15 | - id: end-of-file-fixer 16 | - id: trailing-whitespace 17 | - repo: https://github.com/Lucas-C/pre-commit-hooks.git 18 | rev: v1.5.5 19 | hooks: 20 | - id: remove-crlf 21 | - id: remove-tabs 22 | name: Tabs remover (C++) 23 | files: \.(c|cc|cxx|cpp|cu|h|cuh|hpp|hxx|xpu|kps)$ 24 | args: [--whitespaces-count, '2'] 25 | - id: remove-tabs 26 | name: Tabs remover (Python) 27 | files: (.*\.py)$ 28 | args: [--whitespaces-count, '4'] 29 | - id: insert-license # Insert a short license disclaimer as a header comment in Python files 30 | name: License inserter (Python) 31 | files: \.py$ 32 | args: 33 | - --license-filepath 34 | - LICENSE 35 | - --comment-style 36 | - "#" 37 | - --use-current-year 38 | - --no-extra-eol 39 | - id: insert-license # Insert a short license disclaimer as a header comment in C++ files 40 | name: License inserter (C++) 41 | files: \.(c|cc|cxx|cpp|cu|h|cuh|hpp|hxx|xpu|kps)$ 42 | args: 43 | - --license-filepath 44 | - LICENSE 45 | - --comment-style 46 | - "//" 47 | - --use-current-year 48 | - --no-extra-eol 49 | - repo: https://github.com/PFCCLab/typos-pre-commit-mirror.git 50 | rev: v1.35.1 51 | hooks: 52 | - id: typos 53 | args: [--force-exclude] 54 | # For Python files 55 | - repo: https://github.com/psf/black-pre-commit-mirror 56 | rev: 25.1.0 57 | hooks: 58 | - id: black 59 | - repo: https://github.com/astral-sh/ruff-pre-commit 60 | rev: v0.12.0 61 | hooks: 62 | - id: ruff-check 63 | args: [--fix, --exit-non-zero-on-fix, --no-cache, --config=tools/codestyle/pyproject.toml] 64 | # For C++ files 65 | - repo: https://github.com/pre-commit/mirrors-clang-format 66 | rev: v20.1.8 67 | hooks: 68 | - id: clang-format 69 | types_or: [c++, cuda] 70 | args: [--style=file, --verbose] 71 | - repo: https://github.com/cheshirekow/cmake-format-precommit 72 | rev: v0.6.13 73 | hooks: 74 | - id: cmake-format 75 | - repo: https://github.com/PFCCLab/cmake-lint-paddle 76 | rev: v1.5.2 77 | hooks: 78 | - id: cmakelint 79 | args: [--config=./tools/codestyle/.cmakelintrc] 80 | -------------------------------------------------------------------------------- /src/application/bootstrap/mpi_bootstrap.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include "mori/application/bootstrap/mpi_bootstrap.hpp" 23 | 24 | #include 25 | 26 | #include 27 | 28 | namespace mori { 29 | namespace application { 30 | 31 | MpiBootstrapNetwork::MpiBootstrapNetwork(MPI_Comm mpi_comm) : mpi_comm(mpi_comm) { Initialize(); } 32 | 33 | MpiBootstrapNetwork::~MpiBootstrapNetwork() { Finalize(); } 34 | 35 | void MpiBootstrapNetwork::Initialize() { 36 | int initialized; 37 | int status = MPI_Initialized(&initialized); 38 | assert(!status); 39 | if (!initialized) { 40 | MPI_Init(NULL, NULL); 41 | } 42 | MPI_Comm_size(mpi_comm, &worldSize); 43 | MPI_Comm_rank(mpi_comm, &localRank); 44 | } 45 | 46 | void MpiBootstrapNetwork::Finalize() { 47 | int finalized = false; 48 | int status = MPI_Finalized(&finalized); 49 | assert(!status); 50 | 51 | if (!finalized) MPI_Finalize(); 52 | } 53 | 54 | void MpiBootstrapNetwork::Allgather(void* sendbuf, void* recvbuf, size_t sendcount) { 55 | int status = MPI_Allgather(sendbuf, sendcount, MPI_CHAR, recvbuf, sendcount, MPI_CHAR, mpi_comm); 56 | assert(!status); 57 | } 58 | 59 | void MpiBootstrapNetwork::AllToAll(void* sendbuf, void* recvbuf, size_t sendcount) { 60 | int status = MPI_Alltoall(sendbuf, sendcount, MPI_CHAR, recvbuf, sendcount, MPI_CHAR, mpi_comm); 61 | assert(!status); 62 | } 63 | 64 | void MpiBootstrapNetwork::Barrier() { 65 | int status = MPI_Barrier(mpi_comm); 66 | assert(!status); 67 | } 68 | 69 | } // namespace application 70 | } // namespace mori 71 | -------------------------------------------------------------------------------- /tests/cpp/application/test_topology.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include 23 | 24 | #include 25 | 26 | #include "mori/application/topology/topology.hpp" 27 | 28 | int TestTopoNodeGpu() { 29 | mori::application::TopoSystem sys{}; 30 | auto* gpuSys = sys.GetTopoSystemGpu(); 31 | auto* netSys = sys.GetTopoSystemNet(); 32 | auto* pciSys = sys.GetTopoSystemPci(); 33 | 34 | auto gpus = gpuSys->GetGpus(); 35 | auto nics = netSys->GetNics(); 36 | 37 | for (auto* gpu : gpus) { 38 | assert(pciSys->Node(gpu->busId)); 39 | for (auto* nic : nics) { 40 | assert(pciSys->Node(nic->busId)); 41 | auto* path = pciSys->Path(gpu->busId, nic->busId); 42 | auto* gpuPci = pciSys->Node(gpu->busId); 43 | auto* nicPci = pciSys->Node(nic->busId); 44 | if (!path) { 45 | printf("gpu %s nic %s no direct link\n", gpu->busId.String().c_str(), 46 | nic->busId.String().c_str()); 47 | } else { 48 | printf("gpu %s numa %d, nic %s name %s hops %zu speed %f numa %d\n", 49 | gpu->busId.String().c_str(), gpuPci->NumaNode(), nic->busId.String().c_str(), 50 | nic->name.c_str(), path->Hops(), nic->totalGbps, nicPci->NumaNode()); 51 | } 52 | } 53 | } 54 | 55 | std::vector matches = sys.MatchAllGpusAndNics(); 56 | for (int i = 0; i < matches.size(); i++) { 57 | auto* gpu = gpuSys->GetGpuByLogicalId(i); 58 | printf("gpu %d (%s) matches %s\n", i, gpu->busId.String().c_str(), matches[i].c_str()); 59 | } 60 | 61 | return 0; 62 | } 63 | 64 | int main() { return TestTopoNodeGpu(); } 65 | -------------------------------------------------------------------------------- /src/application/topology/net.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include "mori/application/topology/net.hpp" 23 | 24 | #include 25 | #include 26 | 27 | #include "mori/application/transport/rdma/rdma.hpp" 28 | 29 | namespace mori { 30 | namespace application { 31 | 32 | TopoSystemNet::TopoSystemNet() { Load(); } 33 | 34 | TopoSystemNet::~TopoSystemNet() {} 35 | 36 | PciBusId ParseBusIdFromSysfs(std::filesystem::path path) { 37 | // Regex to match PCI BDF like 0000:8c:00.0 38 | std::regex bdf_pattern(R"(^[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-7]$)"); 39 | 40 | for (auto it = path; !it.empty(); it = it.parent_path()) { 41 | auto comp = it.filename().string(); 42 | if (IsBdfString(comp)) return PciBusId(comp); 43 | } 44 | 45 | return PciBusId(0); 46 | } 47 | 48 | void TopoSystemNet::Load() { 49 | application::RdmaContext rdma(application::RdmaBackendType::IBVerbs); 50 | auto devices = rdma.GetRdmaDeviceList(); 51 | 52 | for (auto& dev : devices) { 53 | // TODO: finish nic plane 54 | TopoNodeNic* nic = new TopoNodeNic(); 55 | auto rPath = std::filesystem::canonical(dev->GetIbvDevice()->ibdev_path); 56 | nic->name = dev->Name(); 57 | nic->busId = ParseBusIdFromSysfs(rPath); 58 | nic->totalGbps = dev->TotalActiveGbps(); 59 | 60 | nics.emplace_back(nic); 61 | } 62 | } 63 | 64 | std::vector TopoSystemNet::GetNics() const { 65 | std::vector v(nics.size()); 66 | for (int i = 0; i < nics.size(); i++) v[i] = nics[i].get(); 67 | return v; 68 | } 69 | 70 | } // namespace application 71 | } // namespace mori 72 | -------------------------------------------------------------------------------- /src/pybind/torch_utils.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | namespace mori { 30 | 31 | template 32 | inline torch::Dtype GetTorchDataType() { 33 | if constexpr (std::is_same_v) { 34 | return torch::kFloat32; 35 | } else if constexpr (std::is_same_v) { 36 | return torch::kUInt32; 37 | } else if constexpr (std::is_same_v) { 38 | return torch::kInt32; 39 | } else if constexpr (std::is_same_v) { 40 | return torch::kUInt64; 41 | } else if constexpr (std::is_same_v) { 42 | return torch::kUInt64; 43 | } else if constexpr (std::is_same_v) { 44 | return torch::kBFloat16; 45 | } else if constexpr (std::is_same_v) { 46 | return torch::kFloat8_e4m3fn; 47 | } else if constexpr (std::is_same_v) { 48 | return torch::kFloat8_e4m3fnuz; 49 | } else { 50 | static_assert(false, "Unsupported data type"); 51 | } 52 | } 53 | 54 | inline hipDataType ScalarTypeToHipDataType(at::ScalarType scalarType) { 55 | switch (scalarType) { 56 | case at::kFloat: 57 | return HIP_R_32F; 58 | case at::kBFloat16: 59 | return HIP_R_16BF; 60 | case at::kFloat8_e4m3fn: 61 | return HIP_R_8F_E4M3; 62 | case at::kFloat8_e4m3fnuz: 63 | return HIP_R_8F_E4M3_FNUZ; 64 | default: 65 | throw std::runtime_error("Unsupported scalar type"); 66 | } 67 | } 68 | 69 | } // namespace mori 70 | -------------------------------------------------------------------------------- /include/mori/application/topology/gpu.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | #include 26 | 27 | #include "mori/application/topology/node.hpp" 28 | #include "mori/application/topology/pci.hpp" 29 | #include "rocm_smi/rocm_smi.h" 30 | 31 | namespace mori { 32 | namespace application { 33 | /* ---------------------------------------------------------------------------------------------- */ 34 | /* TopoNodeGpu */ 35 | /* ---------------------------------------------------------------------------------------------- */ 36 | class TopoNodeGpu; 37 | 38 | class TopoNodeGpuP2pLink : public TopoNode { 39 | public: 40 | TopoNodeGpuP2pLink() = default; 41 | ~TopoNodeGpuP2pLink() = default; 42 | 43 | public: 44 | RSMI_IO_LINK_TYPE type; 45 | uint64_t hops{0}; 46 | uint64_t weight{0}; 47 | 48 | TopoNodeGpu* gpu1{nullptr}; 49 | TopoNodeGpu* gpu2{nullptr}; 50 | }; 51 | 52 | class TopoNodeGpu : public TopoNode { 53 | public: 54 | TopoNodeGpu() = default; 55 | ~TopoNodeGpu() = default; 56 | 57 | public: 58 | PciBusId busId{0}; 59 | std::vector p2ps; 60 | }; 61 | 62 | class TopoSystemGpu { 63 | public: 64 | TopoSystemGpu(); 65 | ~TopoSystemGpu(); 66 | 67 | int NumGpus() const { return gpus.size(); } 68 | std::vector GetGpus() const; 69 | TopoNodeGpu* GetGpuByLogicalId(int) const; 70 | 71 | private: 72 | void Load(); 73 | 74 | private: 75 | std::vector> gpus; 76 | std::vector> p2ps; 77 | }; 78 | 79 | } // namespace application 80 | } // namespace mori 81 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(dist_write dist_rdma_ops/dist_write.cpp utils/args_parser.cpp) 2 | target_include_directories(dist_write PRIVATE ${CMAKE_SOURCE_DIR}/examples/utils) 3 | target_link_libraries(dist_write mori_application hip::host hip::device) 4 | 5 | add_executable(send_recv_gpu local_rdma_ops/send_recv_gpu.cpp) 6 | target_link_libraries(send_recv_gpu mori_application hip::host hip::device) 7 | 8 | add_executable(write_gpu local_rdma_ops/write_gpu.cpp) 9 | target_link_libraries(write_gpu mori_application hip::host hip::device) 10 | 11 | add_executable(write_inline_gpu local_rdma_ops/write_inline_gpu.cpp) 12 | target_link_libraries(write_inline_gpu mori_application hip::host hip::device) 13 | 14 | add_executable(atomic_gpu local_rdma_ops/atomic_gpu.cpp) 15 | target_link_libraries(atomic_gpu mori_application hip::host hip::device) 16 | 17 | add_executable(put_thread_allgather shmem/put_thread_allgather.cpp) 18 | target_link_libraries(put_thread_allgather mori_shmem hip::host hip::device) 19 | 20 | add_executable(concurrent_put_thread shmem/concurrent_put_thread.cpp) 21 | target_link_libraries(concurrent_put_thread mori_shmem hip::host hip::device) 22 | 23 | add_executable(concurrent_put_imm_thread shmem/concurrent_put_imm_thread.cpp) 24 | target_link_libraries(concurrent_put_imm_thread mori_shmem hip::host 25 | hip::device) 26 | 27 | add_executable(concurrent_put_signal_thread shmem/concurrent_put_signal_thread.cpp) 28 | target_link_libraries(concurrent_put_signal_thread mori_shmem hip::host 29 | hip::device) 30 | 31 | add_executable(atomic_nonfetch_thread shmem/atomic_nonfetch_thread.cpp) 32 | target_link_libraries(atomic_nonfetch_thread mori_shmem hip::host hip::device) 33 | 34 | add_executable(atomic_fetch_thread shmem/atomic_fetch_thread.cpp) 35 | target_link_libraries(atomic_fetch_thread mori_shmem hip::host hip::device) 36 | 37 | add_executable(context application/context.cpp) 38 | target_link_libraries(context mori_application hip::host hip::device) 39 | 40 | add_executable(test_dispatch_combine_ops 41 | ops/dispatch_combine/test_dispatch_combine.cpp) 42 | target_link_libraries(test_dispatch_combine_ops mori_application mori_shmem 43 | mori_ops hip::host hip::device) 44 | 45 | add_executable(atomic_perf benchmarks/atomic_perf.cpp utils/args_parser.cpp) 46 | target_include_directories(atomic_perf 47 | PRIVATE ${CMAKE_SOURCE_DIR}/examples/utils) 48 | target_link_libraries(atomic_perf mori_application hip::host hip::device) 49 | 50 | add_executable(accum_perf benchmarks/accum_perf.cpp) 51 | target_link_libraries(accum_perf mori_application mori_shmem hip::host 52 | hip::device) 53 | 54 | add_executable(ibverbs_test application/ibverbs_test.cpp) 55 | target_link_libraries(ibverbs_test mori_application mori_shmem hip::host 56 | hip::device) 57 | -------------------------------------------------------------------------------- /include/mori/shmem/shmem_api.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | 26 | #include "mori/application/application.hpp" 27 | 28 | namespace mori { 29 | namespace shmem { 30 | 31 | /* ---------------------------------------------------------------------------------------------- */ 32 | /* Initialization */ 33 | /* ---------------------------------------------------------------------------------------------- */ 34 | 35 | // TODO: provide unified initialize / finalize APIs 36 | int ShmemInit(application::BootstrapNetwork* bootNet); 37 | int ShmemMpiInit(MPI_Comm); 38 | int ShmemTorchProcessGroupInit(const std::string& groupName); 39 | int ShmemFinalize(); 40 | 41 | int ShmemMyPe(); 42 | int ShmemNPes(); 43 | 44 | enum ShmemTeamType { 45 | INVALID = -1, 46 | WORLD = 0, 47 | SHARED = 1, 48 | TEAM_NODE = 2, 49 | }; 50 | 51 | int ShmemNumQpPerPe(); 52 | 53 | // TODO: finish team pe api 54 | // int ShmemTeamMyPe(ShmemTeamType); 55 | // int ShmemTeamNPes(ShmemTeamType); 56 | 57 | /* ---------------------------------------------------------------------------------------------- */ 58 | /* Symmetric Memory */ 59 | /* ---------------------------------------------------------------------------------------------- */ 60 | 61 | void* ShmemMalloc(size_t size); 62 | void* ShmemExtMallocWithFlags(size_t size, unsigned int flags); 63 | void ShmemFree(void*); 64 | 65 | // Note: temporary API for testing 66 | application::SymmMemObjPtr ShmemQueryMemObjPtr(void*); 67 | 68 | int ShmemBufferRegister(void* ptr, size_t size); 69 | int ShmemBufferDeregister(void* ptr, size_t size); 70 | 71 | } // namespace shmem 72 | } // namespace mori 73 | -------------------------------------------------------------------------------- /include/mori/application/context/context.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | #include 26 | 27 | #include "mori/application/bootstrap/bootstrap.hpp" 28 | #include "mori/application/topology/topology.hpp" 29 | #include "mori/application/transport/transport.hpp" 30 | 31 | namespace mori { 32 | namespace application { 33 | 34 | class Context { 35 | public: 36 | Context(BootstrapNetwork& bootNet); 37 | ~Context(); 38 | 39 | int LocalRank() const { return bootNet.GetLocalRank(); } 40 | int WorldSize() const { return bootNet.GetWorldSize(); } 41 | int LocalRankInNode() const { return rankInNode; } 42 | std::string HostName() const; 43 | 44 | TransportType GetTransportType(int destRank) const { return transportTypes[destRank]; } 45 | std::vector GetTransportTypes() const { return transportTypes; } 46 | int GetNumQpPerPe() const { return numQpPerPe; } 47 | 48 | RdmaContext* GetRdmaContext() const { return rdmaContext.get(); } 49 | RdmaDeviceContext* GetRdmaDeviceContext() const { return rdmaDeviceContext.get(); } 50 | bool RdmaTransportEnabled() const { return GetRdmaDeviceContext() != nullptr; } 51 | 52 | const std::vector& GetRdmaEndpoints() const { return rdmaEps; } 53 | 54 | private: 55 | void CollectHostNames(); 56 | void InitializePossibleTransports(); 57 | 58 | private: 59 | BootstrapNetwork& bootNet; 60 | int rankInNode{-1}; 61 | int numQpPerPe{4}; 62 | std::vector hostnames; 63 | std::vector transportTypes; 64 | 65 | std::unique_ptr rdmaContext{nullptr}; 66 | std::unique_ptr rdmaDeviceContext{nullptr}; 67 | 68 | std::vector rdmaEps; 69 | 70 | std::unique_ptr topo{nullptr}; 71 | }; 72 | 73 | } // namespace application 74 | } // namespace mori 75 | -------------------------------------------------------------------------------- /examples/application/ibverbs_test.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include 23 | #include 24 | 25 | #include "mori/application/application.hpp" 26 | #include "mori/application/utils/udma_barrier.h" 27 | #include "mori/core/core.hpp" 28 | 29 | using namespace mori; 30 | using namespace mori::application; 31 | using namespace mori::core; 32 | 33 | int main(int argc, char* argv[]) { 34 | MpiBootstrapNetwork bootNet(MPI_COMM_WORLD); 35 | bootNet.Initialize(); 36 | int local_rank = bootNet.GetLocalRank(); 37 | int world_size = bootNet.GetWorldSize(); 38 | 39 | // RDMA initialization 40 | // 1 Create device 41 | RdmaContext rdma_context(RdmaBackendType::IBVerbs); 42 | RdmaDeviceList rdma_devices = rdma_context.GetRdmaDeviceList(); 43 | ActiveDevicePortList activeDevicePortList = GetActiveDevicePortList(rdma_devices); 44 | RdmaDevice* device = activeDevicePortList[0].first; 45 | 46 | RdmaDeviceContext* device_context = device->CreateRdmaDeviceContext(); 47 | 48 | // 2 Create an endpoint 49 | RdmaEndpointConfig config; 50 | config.portId = activeDevicePortList[0].second; 51 | config.gidIdx = 1; 52 | config.maxMsgsNum = 200; 53 | config.maxCqeNum = 1024; 54 | config.alignment = 4096; 55 | config.onGpu = false; 56 | RdmaEndpoint endpoint = device_context->CreateRdmaEndpoint(config); 57 | 58 | // 3 Allgather global endpoint and connect 59 | std::vector global_rdma_ep_handles(world_size); 60 | bootNet.Allgather(&endpoint.handle, global_rdma_ep_handles.data(), sizeof(RdmaEndpointHandle)); 61 | 62 | std::cout << "Local rank " << local_rank << " " << endpoint.handle << std::endl; 63 | 64 | for (int i = 0; i < world_size; i++) { 65 | if (i == local_rank) continue; 66 | device_context->ConnectEndpoint(endpoint.handle, global_rdma_ep_handles[i]); 67 | std::cout << "Local rank " << local_rank << " received " << global_rdma_ep_handles[i] 68 | << std::endl; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /tools/bnxt_disable_cq_coal.sh: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #!/usr/bin/env bash 23 | # 24 | # Discover all bnxt_re RDMA devices 25 | # Automatically create the configfs directory if needed 26 | # Set both cq_coal_normal_maxbuf and cq_coal_during_maxbuf to 0x1 27 | # 28 | 29 | set -euo pipefail 30 | 31 | CFG_ROOT="/sys/kernel/config/bnxt_re" 32 | IB_ROOT="/sys/class/infiniband" 33 | VALUE="0x1" 34 | 35 | if [[ ! -d $IB_ROOT ]]; then 36 | echo "Error: $IB_ROOT not present, is the RDMA stack loaded?" 37 | exit 1 38 | fi 39 | 40 | mapfile -t IB_DEVS < <(find "$IB_ROOT" -maxdepth 1 -mindepth 1 -type l -printf '%f\n' | sort) 41 | 42 | if [[ ${#IB_DEVS[@]} -eq 0 ]]; then 43 | echo "No RDMA devices found under $IB_ROOT" 44 | exit 0 45 | fi 46 | 47 | echo "Detected RDMA devices: ${IB_DEVS[*]}" 48 | 49 | for dev in "${IB_DEVS[@]}"; do 50 | if [[ $dev != bnxt_re* ]]; then 51 | echo "Skip $dev (non-bnxt device)" 52 | continue 53 | fi 54 | 55 | DEV_CFG_DIR="$CFG_ROOT/$dev" 56 | echo 57 | echo ">> Processing $dev" 58 | 59 | if [[ ! -d $DEV_CFG_DIR ]]; then 60 | echo " Creating configfs directory $DEV_CFG_DIR" 61 | sudo mkdir -p "$DEV_CFG_DIR" || { echo " !! mkdir failed"; continue; } 62 | fi 63 | 64 | mapfile -t PORT_DIRS < <(find "$DEV_CFG_DIR/ports" -maxdepth 1 -type d -name "[0-9]*" 2>/dev/null | sort) 65 | 66 | if [[ ${#PORT_DIRS[@]} -eq 0 ]]; then 67 | echo " No port directories found under $DEV_CFG_DIR (driver may be inactive)" 68 | continue 69 | fi 70 | 71 | for port in "${PORT_DIRS[@]}"; do 72 | TUNE_DIR="$port/tunables" 73 | for file in cq_coal_normal_maxbuf cq_coal_during_maxbuf; do 74 | path="$TUNE_DIR/$file" 75 | if [[ -e $path ]]; then 76 | echo " set $(basename "$port")/$file -> $VALUE" 77 | sudo sh -c "echo $VALUE > $path" || echo " !! write failed" 78 | else 79 | echo " $path not found" 80 | fi 81 | done 82 | done 83 | done 84 | 85 | echo 86 | echo "Done." -------------------------------------------------------------------------------- /src/io/rdma/protocol.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include "src/io/rdma/protocol.hpp" 23 | 24 | #include 25 | 26 | #include "mori/application/utils/check.hpp" 27 | 28 | namespace mori { 29 | namespace io { 30 | 31 | Protocol::Protocol(application::TCPEndpointHandle eph) : ep(eph) {} 32 | 33 | Protocol::~Protocol() {} 34 | 35 | MessageHeader Protocol::ReadMessageHeader() { 36 | MessageHeader hdr; 37 | SYSCALL_RETURN_ZERO(ep.Recv(&hdr.type, sizeof(hdr.type))); 38 | SYSCALL_RETURN_ZERO(ep.Recv(&hdr.len, sizeof(hdr.len))); 39 | hdr.len = ntohl(hdr.len); 40 | return hdr; 41 | } 42 | 43 | void Protocol::WriteMessageHeader(const MessageHeader& hdr) { 44 | SYSCALL_RETURN_ZERO(ep.Send(&hdr.type, sizeof(hdr.type))); 45 | uint32_t len = htonl(hdr.len); 46 | SYSCALL_RETURN_ZERO(ep.Send(&len, sizeof(len))); 47 | } 48 | 49 | MessageRegEndpoint Protocol::ReadMessageRegEndpoint(size_t len) { 50 | std::vector buf(len); 51 | SYSCALL_RETURN_ZERO(ep.Recv(buf.data(), len)); 52 | auto out = msgpack::unpack(buf.data(), len); 53 | return out.get().as(); 54 | } 55 | 56 | void Protocol::WriteMessageRegEndpoint(const MessageRegEndpoint& msg) { 57 | msgpack::sbuffer buf; 58 | msgpack::pack(buf, msg); 59 | uint32_t len = static_cast(buf.size()); 60 | WriteMessageHeader({MessageType::RegEndpoint, len}); 61 | SYSCALL_RETURN_ZERO(ep.Send(buf.data(), buf.size())); 62 | } 63 | 64 | MessageAskMemoryRegion Protocol::ReadMessageAskMemoryRegion(size_t len) { 65 | std::vector buf(len); 66 | SYSCALL_RETURN_ZERO(ep.Recv(buf.data(), len)); 67 | auto out = msgpack::unpack(buf.data(), len); 68 | return out.get().as(); 69 | } 70 | 71 | void Protocol::WriteMessageAskMemoryRegion(const MessageAskMemoryRegion& msg) { 72 | msgpack::sbuffer buf; 73 | msgpack::pack(buf, msg); 74 | uint32_t len = static_cast(buf.size()); 75 | WriteMessageHeader({MessageType::AskMemoryRegion, len}); 76 | SYSCALL_RETURN_ZERO(ep.Send(buf.data(), buf.size())); 77 | } 78 | 79 | } // namespace io 80 | } // namespace mori 81 | -------------------------------------------------------------------------------- /src/shmem/memory.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include 23 | 24 | #include "mori/application/memory/symmetric_memory.hpp" 25 | #include "mori/shmem/shmem_api.hpp" 26 | #include "src/shmem/internal.hpp" 27 | #include "mori/utils/mori_log.hpp" 28 | 29 | namespace mori { 30 | namespace shmem { 31 | 32 | void* ShmemMalloc(size_t size) { 33 | ShmemStates* states = ShmemStatesSingleton::GetInstance(); 34 | states->CheckStatusValid(); 35 | application::SymmMemObjPtr obj = states->memoryStates->symmMemMgr->Malloc(size); 36 | MORI_SHMEM_TRACE("Allocated shared memory of size {}", size); 37 | if (obj.IsValid()) { 38 | return obj.cpu->localPtr; 39 | } 40 | return nullptr; 41 | } 42 | 43 | void* ShmemExtMallocWithFlags(size_t size, unsigned int flags) { 44 | ShmemStates* states = ShmemStatesSingleton::GetInstance(); 45 | states->CheckStatusValid(); 46 | application::SymmMemObjPtr obj = 47 | states->memoryStates->symmMemMgr->ExtMallocWithFlags(size, flags); 48 | MORI_SHMEM_TRACE("Allocated shared memory of size {} with flags {}", size, flags); 49 | if (obj.IsValid()) { 50 | return obj.cpu->localPtr; 51 | } 52 | return nullptr; 53 | } 54 | 55 | void ShmemFree(void* localPtr) { 56 | ShmemStates* states = ShmemStatesSingleton::GetInstance(); 57 | states->CheckStatusValid(); 58 | states->memoryStates->symmMemMgr->Free(localPtr); 59 | } 60 | 61 | application::SymmMemObjPtr ShmemQueryMemObjPtr(void* localPtr) { 62 | ShmemStates* states = ShmemStatesSingleton::GetInstance(); 63 | states->CheckStatusValid(); 64 | return states->memoryStates->symmMemMgr->Get(localPtr); 65 | } 66 | 67 | int ShmemBufferRegister(void* ptr, size_t size) { 68 | ShmemStates* states = ShmemStatesSingleton::GetInstance(); 69 | states->CheckStatusValid(); 70 | states->memoryStates->mrMgr->RegisterBuffer(ptr, size); 71 | return 0; 72 | } 73 | 74 | int ShmemBufferDeregister(void* ptr, size_t size) { 75 | ShmemStates* states = ShmemStatesSingleton::GetInstance(); 76 | states->CheckStatusValid(); 77 | states->memoryStates->mrMgr->DeregisterBuffer(ptr); 78 | return 0; 79 | } 80 | 81 | } // namespace shmem 82 | } // namespace mori 83 | -------------------------------------------------------------------------------- /src/io/rdma/protocol.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | 26 | #include "mori/io/common.hpp" 27 | #include "mori/io/msgpack_adaptor.hpp" 28 | #include "src/io/rdma/backend_impl.hpp" 29 | 30 | namespace mori { 31 | namespace io { 32 | /* ---------------------------------------------------------------------------------------------- */ 33 | /* Message */ 34 | /* ---------------------------------------------------------------------------------------------- */ 35 | enum class MessageType : uint8_t { 36 | RegEndpoint = 0, 37 | AskMemoryRegion = 1, 38 | }; 39 | 40 | struct MessageHeader { 41 | MessageType type; 42 | uint32_t len; 43 | }; 44 | 45 | struct MessageRegEndpoint { 46 | EngineKey ekey; 47 | TopoKeyPair topo; 48 | int devId; 49 | application::RdmaEndpointHandle eph; 50 | MSGPACK_DEFINE(ekey, topo, devId, eph); 51 | }; 52 | 53 | struct MessageAskMemoryRegion { 54 | EngineKey ekey; 55 | int devId; 56 | MemoryUniqueId id; 57 | application::RdmaMemoryRegion mr; 58 | MSGPACK_DEFINE(ekey, devId, id, mr); 59 | }; 60 | 61 | struct MessageBuildConn { 62 | EngineKey key; 63 | MSGPACK_DEFINE(key); 64 | }; 65 | 66 | /* ---------------------------------------------------------------------------------------------- */ 67 | /* Protocol */ 68 | /* ---------------------------------------------------------------------------------------------- */ 69 | class Protocol { 70 | public: 71 | Protocol(application::TCPEndpointHandle); 72 | ~Protocol(); 73 | 74 | MessageHeader ReadMessageHeader(); 75 | void WriteMessageHeader(const MessageHeader&); 76 | 77 | MessageRegEndpoint ReadMessageRegEndpoint(size_t len); 78 | void WriteMessageRegEndpoint(const MessageRegEndpoint&); 79 | 80 | MessageAskMemoryRegion ReadMessageAskMemoryRegion(size_t len); 81 | void WriteMessageAskMemoryRegion(const MessageAskMemoryRegion&); 82 | 83 | private: 84 | application::TCPEndpoint ep; 85 | }; 86 | 87 | } // namespace io 88 | } // namespace mori 89 | -------------------------------------------------------------------------------- /examples/shmem/concurrent_put_imm_thread.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include 23 | 24 | #include 25 | 26 | #include "mori/application/utils/check.hpp" 27 | #include "mori/shmem/shmem.hpp" 28 | 29 | using namespace mori::core; 30 | using namespace mori::shmem; 31 | using namespace mori::application; 32 | 33 | __global__ void ConcurrentPutImmThreadKernel(int myPe, const SymmMemObjPtr memObj) { 34 | constexpr int sendPe = 0; 35 | constexpr int recvPe = 1; 36 | uint32_t val = 42; 37 | int globalTid = blockIdx.x * blockDim.x + threadIdx.x; 38 | int threadOffset = globalTid * sizeof(uint32_t); 39 | 40 | if (myPe == sendPe) { 41 | RdmaMemoryRegion source = memObj->GetRdmaMemoryRegion(myPe); 42 | 43 | ShmemPutSizeImmNbiThread(memObj, threadOffset, &val, sizeof(uint32_t), recvPe); 44 | __threadfence_system(); 45 | 46 | ShmemQuietThread(); 47 | // __syncthreads(); 48 | } else { 49 | while (atomicAdd(reinterpret_cast(memObj->localPtr) + globalTid, 0) != val) { 50 | } 51 | } 52 | } 53 | 54 | void ConcurrentPutImmThread() { 55 | int status; 56 | MPI_Init(NULL, NULL); 57 | 58 | status = ShmemMpiInit(MPI_COMM_WORLD); 59 | assert(!status); 60 | 61 | // Assume in same node 62 | int myPe = ShmemMyPe(); 63 | int npes = ShmemNPes(); 64 | assert(npes == 2); 65 | 66 | constexpr int threadNum = 128; 67 | constexpr int blockNum = 3; 68 | 69 | // Allocate buffer 70 | int numEle = threadNum * blockNum; 71 | int buffSize = numEle * sizeof(uint32_t); 72 | 73 | void* buff = ShmemMalloc(buffSize); 74 | HIP_RUNTIME_CHECK(hipMemsetD32(reinterpret_cast(buff), myPe, numEle)); 75 | HIP_RUNTIME_CHECK(hipDeviceSynchronize()); 76 | 77 | SymmMemObjPtr buffObj = ShmemQueryMemObjPtr(buff); 78 | assert(buffObj.IsValid()); 79 | 80 | // Run put 81 | ConcurrentPutImmThreadKernel<<>>(myPe, buffObj); 82 | HIP_RUNTIME_CHECK(hipDeviceSynchronize()); 83 | MPI_Barrier(MPI_COMM_WORLD); 84 | if (myPe == 0) { 85 | printf("test done!\n"); 86 | } 87 | // Finalize 88 | ShmemFree(buff); 89 | ShmemFinalize(); 90 | } 91 | 92 | int main(int argc, char* argv[]) { 93 | ConcurrentPutImmThread(); 94 | return 0; 95 | } 96 | -------------------------------------------------------------------------------- /tests/cpp/io/test_protocol.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include 23 | #include 24 | 25 | #include "mori/application/transport/tcp/tcp.hpp" 26 | #include "src/io/rdma/protocol.hpp" 27 | 28 | using namespace mori::application; 29 | using namespace mori::io; 30 | 31 | using TCPInfo = std::pair; 32 | using TCPInfoPair = std::pair; 33 | 34 | TCPInfoPair PrepareTCPEndpoints() { 35 | std::string host = "127.0.0.1"; 36 | 37 | TCPContext* context1 = new TCPContext(host, 0); 38 | TCPContext* context2 = new TCPContext(host, 0); 39 | 40 | context1->Listen(); 41 | context2->Listen(); 42 | assert((context1->GetPort() > 0) && (context2->GetPort() > 0)); 43 | assert((context1->GetListenFd() >= 0) && (context2->GetListenFd() >= 0)); 44 | 45 | TCPEndpointHandle eph1 = context1->Connect(host, context2->GetPort()); 46 | TCPEndpointHandle eph2 = context2->Accept()[0]; 47 | 48 | return {{context1, eph1}, {context2, eph2}}; 49 | } 50 | 51 | void TestProtocol() { 52 | auto tcpInfoPair = PrepareTCPEndpoints(); 53 | Protocol initiator(tcpInfoPair.first.second); 54 | Protocol target(tcpInfoPair.second.second); 55 | 56 | MessageRegEngine msg; 57 | msg.engineDesc.key = "initiator"; 58 | msg.engineDesc.hostname = "test"; 59 | msg.rdmaEph.psn = 22; 60 | msg.rdmaEph.qpn = 35; 61 | msg.rdmaEph.portId = 9999; 62 | msg.rdmaEph.ib.lid = 678; 63 | for (int i = 0; i < sizeof(msg.rdmaEph.eth.gid); i++) msg.rdmaEph.eth.gid[i] = i; 64 | for (int i = 0; i < sizeof(msg.rdmaEph.eth.mac); i++) msg.rdmaEph.eth.mac[i] = i; 65 | 66 | initiator.WriteMessageRegEngine(msg); 67 | 68 | MessageHeader hdr = target.ReadMessageHeader(); 69 | assert(hdr.type == MessageType::RegEngine); 70 | MessageRegEngine recv = target.ReadMessageRegEngine(hdr.len); 71 | 72 | assert(recv.engineDesc.key == msg.engineDesc.key); 73 | assert(recv.engineDesc.hostname == msg.engineDesc.hostname); 74 | assert(recv.rdmaEph == msg.rdmaEph); 75 | 76 | for (int i = 0; i < sizeof(msg.rdmaEph.eth.gid); i++) assert(recv.rdmaEph.eth.gid[i] == i); 77 | for (int i = 0; i < sizeof(msg.rdmaEph.eth.mac); i++) assert(recv.rdmaEph.eth.mac[i] == i); 78 | } 79 | 80 | int main() { TestProtocol(); } 81 | -------------------------------------------------------------------------------- /include/mori/application/transport/tcp/tcp.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | #include 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | namespace mori { 32 | namespace application { 33 | 34 | /* ---------------------------------------------------------------------------------------------- */ 35 | /* TCPEndpoint */ 36 | /* ---------------------------------------------------------------------------------------------- */ 37 | struct TCPEndpointHandle { 38 | int fd; 39 | sockaddr_in peer; 40 | }; 41 | 42 | using TCPEndpointHandleVec = std::vector; 43 | 44 | class TCPEndpoint { 45 | public: 46 | TCPEndpoint(TCPEndpointHandle handle) : handle(handle) {} 47 | ~TCPEndpoint() = default; 48 | 49 | int Send(const void* buf, size_t len); 50 | int Recv(void* buf, size_t len); 51 | 52 | public: 53 | TCPEndpointHandle handle; 54 | }; 55 | 56 | /* ---------------------------------------------------------------------------------------------- */ 57 | /* TCPContext */ 58 | /* ---------------------------------------------------------------------------------------------- */ 59 | struct TCPContextHandle { 60 | std::string host{}; 61 | uint16_t port{0}; 62 | 63 | constexpr bool operator==(const TCPContextHandle& rhs) const noexcept { 64 | return (host == rhs.host) && (port == rhs.port); 65 | } 66 | }; 67 | 68 | class TCPContext { 69 | public: 70 | // TODO: delete copy ctor 71 | TCPContext(std::string ip, uint16_t port = 0); 72 | ~TCPContext(); 73 | 74 | std::string GetHost() const { return handle.host; } 75 | uint16_t GetPort() const { return handle.port; } 76 | int GetListenFd() const { return listenFd; } 77 | 78 | void Listen(); 79 | void Close(); 80 | 81 | TCPEndpointHandle Connect(std::string remote, uint16_t port); 82 | TCPEndpointHandleVec Accept(); 83 | void CloseEndpoint(TCPEndpointHandle); 84 | 85 | public: 86 | TCPContextHandle handle; 87 | 88 | private: 89 | int listenFd{-1}; 90 | std::unordered_map endpoints; 91 | }; 92 | 93 | } // namespace application 94 | } // namespace mori 95 | -------------------------------------------------------------------------------- /examples/shmem/concurrent_put_thread.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include 23 | 24 | #include 25 | 26 | #include "mori/application/utils/check.hpp" 27 | #include "mori/shmem/shmem.hpp" 28 | 29 | using namespace mori::core; 30 | using namespace mori::shmem; 31 | using namespace mori::application; 32 | 33 | __global__ void ConcurrentPutThreadKernel(int myPe, const SymmMemObjPtr memObj) { 34 | constexpr int sendPe = 0; 35 | constexpr int recvPe = 1; 36 | 37 | int globalTid = blockIdx.x * blockDim.x + threadIdx.x; 38 | int threadOffset = globalTid * sizeof(uint32_t); 39 | 40 | if (myPe == sendPe) { 41 | RdmaMemoryRegion source = memObj->GetRdmaMemoryRegion(myPe); 42 | 43 | ShmemPutMemNbiThread(memObj, threadOffset, source, threadOffset, sizeof(uint32_t), recvPe, 1); 44 | __threadfence_system(); 45 | 46 | if (blockIdx.x == 0) 47 | { 48 | ShmemQuietThread(); 49 | } 50 | 51 | 52 | // __syncthreads(); 53 | } else { 54 | while (atomicAdd(reinterpret_cast(memObj->localPtr) + globalTid, 0) != sendPe) { 55 | } 56 | } 57 | } 58 | 59 | void ConcurrentPutThread() { 60 | int status; 61 | MPI_Init(NULL, NULL); 62 | 63 | status = ShmemMpiInit(MPI_COMM_WORLD); 64 | assert(!status); 65 | 66 | // Assume in same node 67 | int myPe = ShmemMyPe(); 68 | int npes = ShmemNPes(); 69 | assert(npes == 2); 70 | 71 | constexpr int threadNum = 128; 72 | constexpr int blockNum = 3; 73 | 74 | // Allocate buffer 75 | int numEle = threadNum * blockNum; 76 | int buffSize = numEle * sizeof(uint32_t); 77 | 78 | void* buff = ShmemMalloc(buffSize); 79 | HIP_RUNTIME_CHECK(hipMemsetD32(reinterpret_cast(buff), myPe, numEle)); 80 | HIP_RUNTIME_CHECK(hipDeviceSynchronize()); 81 | 82 | SymmMemObjPtr buffObj = ShmemQueryMemObjPtr(buff); 83 | assert(buffObj.IsValid()); 84 | 85 | // Run put 86 | ConcurrentPutThreadKernel<<>>(myPe, buffObj); 87 | HIP_RUNTIME_CHECK(hipDeviceSynchronize()); 88 | MPI_Barrier(MPI_COMM_WORLD); 89 | if (myPe == 0) { 90 | printf("test done!\n"); 91 | } 92 | 93 | // Finalize 94 | ShmemFree(buff); 95 | ShmemFinalize(); 96 | } 97 | 98 | int main(int argc, char* argv[]) { 99 | ConcurrentPutThread(); 100 | return 0; 101 | } 102 | -------------------------------------------------------------------------------- /src/application/bootstrap/torch_bootstrap.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include "mori/application/bootstrap/torch_bootstrap.hpp" 23 | 24 | #include 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | namespace mori { 31 | namespace application { 32 | 33 | TorchBootstrapNetwork::TorchBootstrapNetwork(const std::string& name) : groupName(name) {} 34 | 35 | TorchBootstrapNetwork::~TorchBootstrapNetwork() { Finalize(); } 36 | 37 | void TorchBootstrapNetwork::Initialize() { 38 | c10::intrusive_ptr group = c10d::resolve_process_group(groupName); 39 | this->worldSize = group->getSize(); 40 | this->localRank = group->getRank(); 41 | } 42 | 43 | void TorchBootstrapNetwork::Finalize() {} 44 | 45 | void TorchBootstrapNetwork::Allgather(void* sendbuf, void* recvbuf, size_t sendcount) { 46 | c10::intrusive_ptr group = c10d::resolve_process_group(groupName); 47 | 48 | std::vector inputTensors = { 49 | at::from_blob(sendbuf, {1, (int)sendcount}, at::TensorOptions().dtype(at::kByte))}; 50 | 51 | std::vector outputTensors = { 52 | at::from_blob(recvbuf, {worldSize, (int)sendcount}, at::TensorOptions().dtype(at::kByte))}; 53 | 54 | c10d::AllgatherOptions opts; 55 | auto work = group->allgather_into_tensor_coalesced(outputTensors, inputTensors, opts); 56 | work->wait(); 57 | } 58 | 59 | void TorchBootstrapNetwork::AllToAll(void* sendbuf, void* recvbuf, size_t sendcount) { 60 | c10::intrusive_ptr group = c10d::resolve_process_group(groupName); 61 | 62 | at::Tensor inputTensor = 63 | at::from_blob(sendbuf, {worldSize, (int)sendcount}, at::TensorOptions().dtype(at::kByte)); 64 | 65 | at::Tensor outputTensor = 66 | at::from_blob(recvbuf, {worldSize, (int)sendcount}, at::TensorOptions().dtype(at::kByte)); 67 | 68 | std::vector counts(worldSize, 1); 69 | 70 | c10d::AllToAllOptions opts; 71 | auto work = group->alltoall_base(outputTensor, inputTensor, counts, counts, opts); 72 | work->wait(); 73 | } 74 | 75 | void TorchBootstrapNetwork::Barrier() { 76 | c10::intrusive_ptr group = c10d::resolve_process_group(groupName); 77 | 78 | auto work = group->barrier(); 79 | work->wait(); 80 | } 81 | 82 | } // namespace application 83 | } // namespace mori 84 | -------------------------------------------------------------------------------- /tests/cpp/application/test_transport_ibverbs.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include 23 | 24 | #include 25 | #include 26 | 27 | #include "mori/application/transport/rdma/rdma.hpp" 28 | #include "mori/application/utils/check.hpp" 29 | 30 | using namespace mori::application; 31 | 32 | void TestCompChannel() { 33 | RdmaContext context(RdmaBackendType::IBVerbs); 34 | RdmaDeviceList devices = context.GetRdmaDeviceList(); 35 | ActiveDevicePortList activeDevicePortList = GetActiveDevicePortList(devices); 36 | assert(activeDevicePortList.size() > 0); 37 | 38 | ActiveDevicePort devicePort1 = activeDevicePortList[0]; 39 | RdmaDevice* device1 = devicePort1.first; 40 | ActiveDevicePort devicePort2 = activeDevicePortList[1]; 41 | RdmaDevice* device2 = devicePort2.first; 42 | 43 | RdmaDeviceContext* rdmaDeviceContext1 = device1->CreateRdmaDeviceContext(); 44 | RdmaDeviceContext* rdmaDeviceContext2 = device2->CreateRdmaDeviceContext(); 45 | 46 | RdmaEndpointConfig config; 47 | config.portId = devicePort1.second; 48 | config.withCompChannel = true; 49 | RdmaEndpoint ep1 = rdmaDeviceContext1->CreateRdmaEndpoint(config); 50 | 51 | config.portId = devicePort2.second; 52 | RdmaEndpoint ep2 = rdmaDeviceContext2->CreateRdmaEndpoint(config); 53 | 54 | rdmaDeviceContext1->ConnectEndpoint(ep1, ep2); 55 | rdmaDeviceContext2->ConnectEndpoint(ep2, ep1); 56 | 57 | void *buf1, *buf2; 58 | size_t bufSize = 1024; 59 | HIP_RUNTIME_CHECK(hipMalloc(&buf1, bufSize)); 60 | HIP_RUNTIME_CHECK(hipMalloc(&buf2, bufSize)); 61 | 62 | RdmaMemoryRegion mr1 = rdmaDeviceContext1->RegisterRdmaMemoryRegion(buf1, bufSize); 63 | RdmaMemoryRegion mr2 = rdmaDeviceContext2->RegisterRdmaMemoryRegion(buf2, bufSize); 64 | 65 | ibv_sge sge{}; 66 | sge.addr = reinterpret_cast(buf1); 67 | sge.length = bufSize; 68 | sge.lkey = mr1.lkey; 69 | 70 | ibv_send_wr wr{}; 71 | ibv_send_wr* bad_wr = nullptr; 72 | wr.wr_id = 0; 73 | wr.sg_list = &sge; 74 | wr.num_sge = 1; 75 | wr.opcode = IBV_WR_RDMA_READ; 76 | wr.send_flags = IBV_SEND_SIGNALED; 77 | wr.wr.rdma.remote_addr = reinterpret_cast(buf2); 78 | wr.wr.rdma.rkey = mr2.rkey; 79 | ibv_req_notify_cq(ep1.ibvHandle.cq, 0); 80 | 81 | assert(!ibv_post_send(ep1.ibvHandle.qp, &wr, &bad_wr) && "ibv_post_send RDMA READ"); 82 | ibv_cq* ev_cq; 83 | void* ev_ctx; 84 | printf("get cq event %d\n", ibv_get_cq_event(ep1.ibvHandle.cq->channel, &ev_cq, &ev_ctx)); 85 | } 86 | 87 | int main() { TestCompChannel(); } 88 | -------------------------------------------------------------------------------- /src/shmem/internal.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | #include "mori/application/application.hpp" 30 | #include "mori/application/bootstrap/bootstrap.hpp" 31 | 32 | namespace mori { 33 | namespace shmem { 34 | 35 | struct BootStates { 36 | int rank{0}; 37 | int worldSize{0}; 38 | application::BootstrapNetwork* bootNet{nullptr}; 39 | }; 40 | 41 | using RdmaEndpointList = std::vector; 42 | using RdmaEndpointHandleList = std::vector; 43 | 44 | struct RdmaStates { 45 | application::Context* commContext{nullptr}; 46 | }; 47 | 48 | struct MemoryStates { 49 | application::SymmMemManager* symmMemMgr{nullptr}; 50 | application::RdmaMemoryRegionManager* mrMgr{nullptr}; 51 | }; 52 | 53 | enum ShmemStatesStatus { 54 | New = 0, 55 | Initialized = 1, 56 | Finalized = 2, 57 | }; 58 | 59 | struct ShmemStates { 60 | ShmemStatesStatus status{ShmemStatesStatus::New}; 61 | BootStates* bootStates{nullptr}; 62 | RdmaStates* rdmaStates{nullptr}; 63 | MemoryStates* memoryStates{nullptr}; 64 | 65 | // This is a temporary API for debugging only 66 | void CheckStatusValid() { 67 | if (status == ShmemStatesStatus::New) { 68 | std::cout 69 | << "Shmem state is not initialized, initialize it by calling ShmemMpiInitialize first." 70 | << std::endl; 71 | assert(false); 72 | } 73 | if (status == ShmemStatesStatus::Finalized) { 74 | std::cout << "Shmem state has been finalized." << std::endl; 75 | assert(false); 76 | } 77 | } 78 | }; 79 | 80 | struct GpuStates { 81 | int rank{-1}; 82 | int worldSize{-1}; 83 | int numQpPerPe{4}; // Default to 4 QPs per peer, consistent with Context default 84 | application::TransportType* transportTypes{nullptr}; 85 | application::RdmaEndpoint* rdmaEndpoints{nullptr}; 86 | uint32_t* endpointLock{nullptr}; 87 | }; 88 | 89 | extern __constant__ GpuStates globalGpuStates; 90 | 91 | static __device__ GpuStates* GetGlobalGpuStatesPtr() { return &globalGpuStates; } 92 | 93 | class ShmemStatesSingleton { 94 | public: 95 | ShmemStatesSingleton(const ShmemStatesSingleton& obj) = delete; 96 | 97 | static ShmemStates* GetInstance() { 98 | static ShmemStates states; 99 | return &states; 100 | } 101 | }; 102 | 103 | } // namespace shmem 104 | } // namespace mori 105 | -------------------------------------------------------------------------------- /examples/shmem/shmem_python_api.py: -------------------------------------------------------------------------------- 1 | # Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | # 3 | # MIT License 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | import mori 23 | import os 24 | 25 | import torch 26 | import torch.distributed as dist 27 | 28 | 29 | def setup(local_rank, num_node, gpu_per_node): 30 | world_size = num_node * gpu_per_node 31 | 32 | torch.cuda.set_device(local_rank) 33 | device = torch.device("cuda", local_rank) 34 | 35 | node_rank = int(os.environ["RANK"]) 36 | global_rank = node_rank * gpu_per_node + local_rank 37 | # print( 38 | # f"before init process group, rank{local_rank}", 39 | # f"env rank {os.environ["RANK"]}", 40 | # f"world_size{world_size},", 41 | # f"env worldsize {os.environ['WORLD_SIZE']}", 42 | # f" global_rank {global_rank}" 43 | # ) 44 | 45 | dist.init_process_group( 46 | backend="cpu:gloo", # ,cuda:nccl", 47 | rank=global_rank, 48 | world_size=world_size, 49 | # device_id=device, 50 | ) 51 | 52 | print("init process group done") 53 | world_group = torch.distributed.group.WORLD 54 | assert world_group is not None 55 | 56 | print("process group ok") 57 | torch._C._distributed_c10d._register_process_group("default", world_group) 58 | print(mori.shmem.shmem_torch_process_group_init("default")) 59 | 60 | print(f"I'm pe {mori.shmem.shmem_mype()} in {mori.shmem.shmem_npes()} pes") 61 | 62 | config = mori.ops.EpDispatchCombineConfig( 63 | data_type=torch.bfloat16, 64 | rank=global_rank, 65 | world_size=world_size, 66 | hidden_dim=7168, 67 | max_num_inp_token_per_rank=512, 68 | num_experts_per_rank=32, 69 | num_experts_per_token=8, 70 | ) 71 | op = mori.ops.EpDispatchCombineOp(config) 72 | op.dispatch_internode( 73 | torch.ones(4, 7168).to(torch.bfloat16).to(device), 74 | torch.ones(4, 1).to(torch.float).to(device), 75 | torch.ones(4, 8).to(torch.uint32).to(device), 76 | ) 77 | torch.cuda.synchronize() 78 | 79 | 80 | def cleanup(): 81 | mori.shmem.shmem_finalize() 82 | dist.destroy_process_group() 83 | 84 | 85 | def test_shmem(rank, num_node, gpu_per_node): 86 | setup(rank, num_node, gpu_per_node) 87 | cleanup() 88 | 89 | 90 | if __name__ == "__main__": 91 | gpu_per_node = os.environ.get("GPU_PER_NODE", None) 92 | gpu_per_node = int(gpu_per_node) if gpu_per_node is not None else 8 93 | num_node = int(os.environ["WORLD_SIZE"]) 94 | 95 | world_size = num_node * gpu_per_node 96 | torch.multiprocessing.spawn( 97 | test_shmem, 98 | args=( 99 | num_node, 100 | gpu_per_node, 101 | ), 102 | nprocs=gpu_per_node, 103 | join=True, 104 | ) 105 | -------------------------------------------------------------------------------- /include/mori/application/memory/symmetric_memory.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | #include 29 | #include 30 | 31 | #include "mori/application/bootstrap/bootstrap.hpp" 32 | #include "mori/application/context/context.hpp" 33 | #include "mori/application/transport/transport.hpp" 34 | 35 | namespace mori { 36 | namespace application { 37 | 38 | struct SymmMemObj { 39 | void* localPtr{nullptr}; 40 | uintptr_t* peerPtrs{nullptr}; 41 | size_t size{0}; 42 | // For Rdma 43 | uint32_t lkey{0}; 44 | uint32_t* peerRkeys{nullptr}; 45 | // For IPC 46 | hipIpcMemHandle_t* ipcMemHandles{nullptr}; // should only placed on cpu 47 | 48 | __device__ __host__ RdmaMemoryRegion GetRdmaMemoryRegion(int pe) const { 49 | RdmaMemoryRegion mr; 50 | mr.addr = peerPtrs[pe]; 51 | mr.lkey = lkey; 52 | mr.rkey = peerRkeys[pe]; 53 | mr.length = size; 54 | return mr; 55 | } 56 | 57 | // Get pointers 58 | inline __device__ __host__ void* Get() const { return localPtr; } 59 | inline __device__ __host__ void* Get(int pe) const { 60 | return reinterpret_cast(peerPtrs[pe]); 61 | } 62 | 63 | template 64 | inline __device__ __host__ T GetAs() const { 65 | return reinterpret_cast(localPtr); 66 | } 67 | template 68 | inline __device__ __host__ T GetAs(int pe) const { 69 | return reinterpret_cast(peerPtrs[pe]); 70 | } 71 | }; 72 | 73 | struct SymmMemObjPtr { 74 | SymmMemObj* cpu{nullptr}; 75 | SymmMemObj* gpu{nullptr}; 76 | 77 | bool IsValid() { return (cpu != nullptr) && (gpu != nullptr); } 78 | 79 | __host__ SymmMemObj* operator->() { return cpu; } 80 | __device__ SymmMemObj* operator->() { return gpu; } 81 | __host__ const SymmMemObj* operator->() const { return cpu; } 82 | __device__ const SymmMemObj* operator->() const { return gpu; } 83 | }; 84 | 85 | class SymmMemManager { 86 | public: 87 | SymmMemManager(BootstrapNetwork& bootNet, Context& context); 88 | ~SymmMemManager(); 89 | 90 | SymmMemObjPtr HostMalloc(size_t size, size_t alignment = sysconf(_SC_PAGE_SIZE)); 91 | void HostFree(void* localPtr); 92 | 93 | SymmMemObjPtr Malloc(size_t size); 94 | // See hipExtMallocWithFlags for flags settings 95 | SymmMemObjPtr ExtMallocWithFlags(size_t size, unsigned int flags); 96 | void Free(void* localPtr); 97 | 98 | SymmMemObjPtr RegisterSymmMemObj(void* localPtr, size_t size); 99 | void DeregisterSymmMemObj(void* localPtr); 100 | 101 | SymmMemObjPtr Get(void* localPtr) const; 102 | 103 | private: 104 | BootstrapNetwork& bootNet; 105 | Context& context; 106 | std::unordered_map memObjPool; 107 | }; 108 | 109 | } // namespace application 110 | } // namespace mori 111 | -------------------------------------------------------------------------------- /src/application/topology/gpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include "mori/application/topology/gpu.hpp" 23 | 24 | #include "mori/application/utils/check.hpp" 25 | 26 | namespace mori { 27 | namespace application { 28 | 29 | /* ---------------------------------------------------------------------------------------------- */ 30 | /* TopoSystemGpu */ 31 | /* ---------------------------------------------------------------------------------------------- */ 32 | TopoSystemGpu::TopoSystemGpu() { Load(); } 33 | 34 | TopoSystemGpu::~TopoSystemGpu() {} 35 | 36 | PciBusId RsmiBusId2PciBusId(uint64_t rsmiBusId) { 37 | uint16_t domain = (rsmiBusId >> 32); 38 | uint8_t bus = (rsmiBusId >> 8); 39 | uint8_t dev = (rsmiBusId >> 3) & 0x1f; 40 | uint8_t func = rsmiBusId & 0x7; 41 | return PciBusId(domain, bus, dev, func); 42 | } 43 | 44 | void TopoSystemGpu::Load() { 45 | uint32_t numGpus; 46 | 47 | ROCM_SMI_CHECK(rsmi_init(0)); 48 | ROCM_SMI_CHECK(rsmi_num_monitor_devices(&numGpus)); 49 | 50 | for (uint32_t i = 0; i < numGpus; ++i) { 51 | TopoNodeGpu* gpu = new TopoNodeGpu(); 52 | gpus.emplace_back(gpu); 53 | uint64_t rsmiBusId = 0; 54 | ROCM_SMI_CHECK(rsmi_dev_pci_id_get(i, &rsmiBusId)); 55 | gpu->busId = RsmiBusId2PciBusId(rsmiBusId); 56 | // ROCM_SMI_CHECK(rsmi_topo_numa_affinity_get(reinterpret_cast(i), &gpu->numaNode)); 57 | } 58 | 59 | for (uint32_t i = 0; i < numGpus; ++i) { 60 | for (uint32_t j = i; j < numGpus; ++j) { 61 | if (i == j) continue; 62 | bool accessible = false; 63 | ROCM_SMI_CHECK(rsmi_is_P2P_accessible(i, j, &accessible)); 64 | if (!accessible) continue; 65 | 66 | TopoNodeGpuP2pLink* p2p = new TopoNodeGpuP2pLink(); 67 | ROCM_SMI_CHECK(rsmi_topo_get_link_type(i, j, &p2p->hops, &p2p->type)); 68 | ROCM_SMI_CHECK(rsmi_topo_get_link_weight(i, j, &p2p->weight)); 69 | p2p->gpu1 = gpus[i].get(); 70 | p2p->gpu2 = gpus[j].get(); 71 | p2ps.emplace_back(p2p); 72 | 73 | gpus[i]->p2ps.push_back(p2p); 74 | gpus[j]->p2ps.push_back(p2p); 75 | } 76 | } 77 | 78 | ROCM_SMI_CHECK(rsmi_shut_down()); 79 | } 80 | 81 | std::vector TopoSystemGpu::GetGpus() const { 82 | std::vector v(gpus.size()); 83 | for (int i = 0; i < gpus.size(); i++) v[i] = gpus[i].get(); 84 | return v; 85 | } 86 | 87 | TopoNodeGpu* TopoSystemGpu::GetGpuByLogicalId(int id) const { 88 | std::string str; 89 | str.resize(13); 90 | HIP_RUNTIME_CHECK(hipDeviceGetPCIBusId(str.data(), str.size(), id)); 91 | PciBusId target{str}; 92 | for (auto& gpuPtr : gpus) { 93 | TopoNodeGpu* gpu = gpuPtr.get(); 94 | if (gpu->busId == target) return gpu; 95 | } 96 | return nullptr; 97 | } 98 | 99 | } // namespace application 100 | } // namespace mori 101 | -------------------------------------------------------------------------------- /docs/MORI-IO-INTRO.md: -------------------------------------------------------------------------------- 1 | # MORI-IO Introduction 2 | 3 | MORI-IO is AMD's point-to-point communication library that leverages GDR (GPU Direct RDMA) to achieve low-latency and high-bandwidth. Its current main use case is KVCache transfer in LLM inference. 4 | 5 | ## Design & Concepts 6 | - **IOEgnine**: The primary interface for interacting with MORI-IO, it abstracts low-level details of p2p communications and provides high-level APIs for engine registration, memory registration, p2p transfer and etc. 7 | - **Backend**: A backend represents and manages a specific transfer medium (e.g., PCIe, xGMI, IB). It must be created before any data transfer can occur over that medium. 8 | - **Engine Registration**: Before two engines can communicate, the remote engine must be registered with the local engine. This establishes the necessary context for initiating data transfers between them. 9 | - **Memory Registration**: Application memory must be registered with a local engine before it can participate in data transfer. This ensures the engine can access and manage the memory efficiently during communication. 10 | - **Read/Write**: One-sided transfer operations initiated by the initiator engine without active involvement from the target engine. These operations can move data directly between registered memory regions. 11 | - **Batch Read/Write**: A batched form of one-sided operations, where multiple transfers are grouped and launched together. Batching reduces per-operation launch overhead and improves bandwidth utilization. 12 | - **Session**: A pre-established transfer context between a pair of MemoryDesc objects. Sessions eliminate repetitive overheads such as connection setup, metadata exchange, and resource management, providing a lightweight and efficient path for repeated transfers. 13 | 14 | ## Workflow 15 | The image below shows a typical workflow of using MORI-IO. 16 | 17 | 18 | ## Architecture 19 | The image below shows the architecture of MORI-IO. 20 | 21 | From application's perspective, MORI-IO provides 3 kinds of functionalities, they are engine registration, memory registration and transfers. 22 | 23 | The application is responsible for passing and registering engine descriptors among engines where transfers are expected to happen. Once engines are registered with each others, the application registgers memory buffers on both initiator-side and target-side. Before initiating transfers from initiator, the application need to first pass the memory descriptors from target side to initiator side. After that, the application is ready to initiate transfers from initiator engine. The transfer APIs will return a TransferStatus that application can use to query the state of corresponding transfers. Note that transfers are initiated in an asynchronous fashion. 24 | 25 | Inside MORI-IO, there're 5 components: Slow Control Plane, Fast Control Plane, Data Plane, Metadata Store and Transport Store. 26 | 27 | - **Slow Control Plane**: this component is used to exchange meta data such as memory descriptor, RDMA QP numbers, custom notification messages(TBD) and etc. MORI-IO implemented a TCP server and a light-weight protocol(no 3rdparty libraries required) for it. 28 | - **Fast Control Plane**: this components is used to exchange meta data that is required in the critical path of transfers. An example is remote engine completion notifications. For single-side transfer operations such as READ/WRITE, the memory buffer on target side can only be reused or released after the target engine is notified with completion notification. To reduce the performance penalty of this process, MORI-IO utilize RDMA network. 29 | - **Data Plane**: this component is used to transfer bulk data, current supported transport is RDMA, TCP and XGMI is in WIP state. 30 | - **Metadata Store**: this component is used to manage meta data for each engine. MORI-IO does not have a centralized meta data store, instead each engine manages its own meta data through this component. 31 | - **Transport Store**: this component is used to manage multiple transfer backend such as RDMA, TCP and XGMI. In case one of them is in failure state, MORI-IO switch to others. 32 | 33 | 34 | -------------------------------------------------------------------------------- /include/mori/core/transport/rdma/host_primitives.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include "primitives.hpp" 25 | 26 | namespace mori { 27 | namespace core { 28 | /* ---------------------------------------------------------------------------------------------- */ 29 | /* Post Tasks */ 30 | /* ---------------------------------------------------------------------------------------------- */ 31 | 32 | template 33 | static __host__ uint64_t PostSend(void* queue_buff_addr, uint32_t& post_idx, uint32_t wqe_num, 34 | uint32_t qpn, uintptr_t laddr, uint64_t lkey, size_t bytes_count); 35 | 36 | template 37 | static __host__ void PostRecv(void* queue_buff_addr, uint32_t wqe_num, uint32_t& post_idx, 38 | uintptr_t laddr, uint64_t lkey, size_t bytes_count); 39 | 40 | template 41 | static __host__ uint64_t PostWrite(void* queue_buff_addr, uint32_t wqe_num, uint32_t& post_idx, 42 | uint32_t qpn, uintptr_t laddr, uint64_t lkey, uintptr_t raddr, 43 | uint64_t rkey, size_t bytes_count); 44 | 45 | template 46 | static __host__ uint64_t PostRead(void* queue_buff_addr, uint32_t wqe_num, uint32_t& post_idx, 47 | uint32_t qpn, uintptr_t laddr, uint64_t lkey, uintptr_t raddr, 48 | uint64_t rkey, size_t bytes_count); 49 | 50 | /* ---------------------------------------------------------------------------------------------- */ 51 | /* Doorbell */ 52 | /* ---------------------------------------------------------------------------------------------- */ 53 | template 54 | static __host__ void UpdateSendDbrRecord(void* dbrRecAddr, uint32_t wqe_idx); 55 | 56 | template 57 | static __host__ void UpdateRecvDbrRecord(void* dbrRecAddr, uint32_t wqe_idx); 58 | 59 | template 60 | static __host__ void RingDoorbell(void* dbr_addr, uint64_t dbr_val); 61 | 62 | /* ---------------------------------------------------------------------------------------------- */ 63 | /* Completion Queue */ 64 | /* ---------------------------------------------------------------------------------------------- */ 65 | template 66 | static __host__ int PollCqOnce(void* cqAddr, uint32_t cqeNum, uint32_t& consIdx); 67 | 68 | template 69 | static __host__ int PollCq(void* cqAddr, uint32_t cqeNum, uint32_t& consIdx); 70 | 71 | template 72 | static __host__ void UpdateCqDbrRecord(CompletionQueueHandle& cq, void* dbrRecAddr, uint32_t cons_idx); 73 | 74 | } // namespace core 75 | } // namespace mori 76 | -------------------------------------------------------------------------------- /examples/shmem/put_thread_allgather.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include 23 | 24 | #include 25 | 26 | #include "mori/application/utils/check.hpp" 27 | #include "mori/shmem/shmem.hpp" 28 | 29 | using namespace mori::core; 30 | using namespace mori::shmem; 31 | using namespace mori::application; 32 | 33 | constexpr ProviderType PrvdType = ProviderType::MLX5; 34 | 35 | __global__ void RingAllGatherWithPutMemAPIKernel(int myPe, int npes, const SymmMemObjPtr memObj) { 36 | int nextPeer = (myPe + 1) % npes; 37 | int peChunkSize = memObj->size / npes; 38 | 39 | RdmaMemoryRegion source; 40 | source.addr = reinterpret_cast(memObj->localPtr); 41 | source.lkey = memObj->lkey; 42 | 43 | for (int i = 0; i < npes - 1; i++) { 44 | int sendDataRank = ((myPe - i) + npes) % npes; 45 | int sendOffset = sendDataRank * peChunkSize; 46 | ShmemPutMemNbiThread(memObj, sendOffset, source, sendOffset, peChunkSize, nextPeer); 47 | ShmemQuietThread(); 48 | 49 | int recvDataRank = ((sendDataRank - 1) + npes) % npes; 50 | int recvOffset = recvDataRank * peChunkSize; 51 | void* recvAddr = reinterpret_cast(memObj->localPtr) + recvOffset; 52 | 53 | // Wait until received 54 | printf("rank %d round %d recv rank %d sendoff %d recvoff %d\n", myPe, i, recvDataRank, 55 | sendOffset, recvOffset); 56 | 57 | while ((atomicAdd(reinterpret_cast(recvAddr), 0)) != (recvDataRank + 1)) { 58 | } 59 | } 60 | } 61 | 62 | void RingAllGatherWithPutMemAPI() { 63 | int status; 64 | MPI_Init(NULL, NULL); 65 | 66 | status = ShmemMpiInit(MPI_COMM_WORLD); 67 | assert(!status); 68 | 69 | // Assume in same node 70 | int myPe = ShmemMyPe(); 71 | int npes = ShmemNPes(); 72 | 73 | // Allocate buffer 74 | int buffSize = npes * 1024 * sizeof(uint32_t); 75 | int peChunkSize = buffSize / npes / sizeof(uint32_t); 76 | 77 | void* buff = ShmemMalloc(buffSize); 78 | HIP_RUNTIME_CHECK( 79 | hipMemsetD32(reinterpret_cast(buff) + myPe * peChunkSize, myPe + 1, peChunkSize)); 80 | HIP_RUNTIME_CHECK(hipDeviceSynchronize()); 81 | 82 | SymmMemObjPtr buffObj = ShmemQueryMemObjPtr(buff); 83 | assert(buffObj.IsValid()); 84 | 85 | for (int i = 0; i < npes; i++) { 86 | printf("Before rank %d, got %d on %dth chunk\n", myPe, 87 | reinterpret_cast(buff)[i * peChunkSize], i); 88 | } 89 | // Run put 90 | RingAllGatherWithPutMemAPIKernel<<<1, 1>>>(myPe, npes, buffObj); 91 | HIP_RUNTIME_CHECK(hipDeviceSynchronize()); 92 | MPI_Barrier(MPI_COMM_WORLD); 93 | 94 | for (int i = 0; i < npes; i++) { 95 | printf("After rank %d, got %d on %dth chunk\n", myPe, 96 | reinterpret_cast(buff)[i * peChunkSize], i); 97 | for (int j = i * peChunkSize; j < ((i + 1) * peChunkSize); j++) { 98 | assert(reinterpret_cast(buff)[j] == i + 1); 99 | } 100 | } 101 | 102 | // Finalize 103 | ShmemFree(buff); 104 | ShmemFinalize(); 105 | MPI_Finalize(); 106 | } 107 | 108 | int main(int argc, char* argv[]) { 109 | RingAllGatherWithPutMemAPI(); 110 | return 0; 111 | } 112 | -------------------------------------------------------------------------------- /src/io/rdma/executor.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include "mori/application/transport/rdma/rdma.hpp" 31 | #include "mori/io/common.hpp" 32 | #include "src/io/rdma/common.hpp" 33 | 34 | namespace mori { 35 | namespace io { 36 | 37 | /* ---------------------------------------------------------------------------------------------- */ 38 | /* Data Structures */ 39 | /* ---------------------------------------------------------------------------------------------- */ 40 | struct ExecutorReq { 41 | const EpPairVec& eps; 42 | const application::RdmaMemoryRegion& local; 43 | const SizeVec& localOffsets; 44 | const application::RdmaMemoryRegion& remote; 45 | const SizeVec& remoteOffsets; 46 | const SizeVec& sizes; 47 | CqCallbackMeta* callbackMeta; 48 | TransferUniqueId id; 49 | int postBatchSize; 50 | bool isRead; 51 | }; 52 | 53 | /* ---------------------------------------------------------------------------------------------- */ 54 | /* Executor */ 55 | /* ---------------------------------------------------------------------------------------------- */ 56 | class Executor { 57 | public: 58 | Executor() = default; 59 | virtual ~Executor() = default; 60 | 61 | virtual void Start() = 0; 62 | virtual void Shutdown() = 0; 63 | virtual RdmaOpRet RdmaBatchReadWrite(const ExecutorReq& req) = 0; 64 | }; 65 | 66 | /* ---------------------------------------------------------------------------------------------- */ 67 | /* MultithreadExecutor */ 68 | /* ---------------------------------------------------------------------------------------------- */ 69 | class MultithreadExecutor : public Executor { 70 | public: 71 | MultithreadExecutor(int numWorker); 72 | ~MultithreadExecutor(); 73 | 74 | RdmaOpRet RdmaBatchReadWrite(const ExecutorReq& req); 75 | void Start(); 76 | void Shutdown(); 77 | 78 | private: 79 | struct Task { 80 | const ExecutorReq* req; 81 | int epId{-1}; 82 | int begin{-1}; 83 | int end{-1}; 84 | std::promise ret; 85 | 86 | Task(const ExecutorReq* req_, int epId_, int begin_, int end_) 87 | : req(req_), epId(epId_), begin(begin_), end(end_) {} 88 | }; 89 | 90 | std::vector> SplitWork(const ExecutorReq& req); 91 | 92 | class Worker { 93 | public: 94 | Worker(int wid); 95 | ~Worker(); 96 | void MainLoop(); 97 | void Start(); 98 | void Shutdown(); 99 | 100 | void Submit(Task&&); 101 | 102 | private: 103 | int workerId{-1}; 104 | std::atomic running{false}; 105 | mutable std::mutex mu; 106 | std::condition_variable cond; 107 | std::queue q; 108 | std::thread thd; 109 | }; 110 | 111 | public: 112 | int numWorker{1}; 113 | 114 | private: 115 | std::atomic running{false}; 116 | std::vector> pool; 117 | }; 118 | 119 | } // namespace io 120 | } // namespace mori 121 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.19) 2 | 3 | set(CMAKE_CXX_FLAGS_DEBUG "-g -O0") 4 | set(CMAKE_CXX_FLAGS_RELEASE "-O3") 5 | set(CMAKE_CXX_FLAGS_O3DEBUG "-ggdb -O3") 6 | 7 | set(CMAKE_CXX_STANDARD 17) 8 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 9 | set(CMAKE_CXX_EXTENSIONS OFF) 10 | 11 | if(NOT DEFINED CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "") 12 | message(STATUS "Build type not set - defaulting to Release") 13 | set(CMAKE_BUILD_TYPE 14 | "Release" 15 | CACHE STRING "Choose build type" FORCE) 16 | endif() 17 | 18 | message(STATUS "Build type: ${CMAKE_BUILD_TYPE}") 19 | 20 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 21 | 22 | option(USE_ROCM "Whether to use rocm" ON) 23 | option(USE_BNXT "Whether to use BNXT NIC" OFF) 24 | option(USE_IONIC "Whether to use IONIC" OFF) 25 | option(BUILD_EXAMPLES "Whether to build examples" ON) 26 | option(BUILD_APPLICATION "Whether to build application library" ON) 27 | option(BUILD_SHMEM "Whether to build shmem library" ON) 28 | option(BUILD_OPS "Whether to build mori operation kernels" ON) 29 | option(BUILD_IO "Whether to build mori io library" ON) 30 | option(BUILD_PYBINDS "Whether to build mori python bindings" ON) 31 | option(BUILD_TESTS "Whether to build mori CPP tests" ON) 32 | 33 | set(CMAKE_LIBRARY_PATH "/usr/local/lib;${CMAKE_LIBRARY_PATH}") 34 | find_library(BNXT_RE_LIB NAMES bnxt_re) 35 | if(BNXT_RE_LIB) 36 | message(STATUS "Found bnxt_re library at: ${BNXT_RE_LIB}") 37 | else() 38 | message( 39 | STATUS "Could NOT find bnxt_re library. BNXT features will be disabled.") 40 | set(USE_BNXT 41 | OFF 42 | CACHE BOOL "Enable BNXT features if bnxt_re library is found" FORCE) 43 | endif() 44 | message(STATUS "USE_BNXT = ${USE_BNXT}") 45 | if(USE_BNXT) 46 | add_compile_definitions(ENABLE_BNXT) 47 | endif() 48 | 49 | find_library(IONIC_LIB 50 | NAMES ionic 51 | HINTS /lib/x86_64-linux-gnu 52 | ) 53 | if(IONIC_LIB) 54 | message(STATUS "Found ionic library at: ${IONIC_LIB}") 55 | else() 56 | message( 57 | STATUS "Could NOT find ionic library. IONIC features will be disabled.") 58 | set(USE_IONIC 59 | OFF 60 | CACHE BOOL "Enable IONIC features if ionic library is found" FORCE) 61 | endif() 62 | message(STATUS "USE_IONIC = ${USE_IONIC}") 63 | if(USE_IONIC) 64 | add_compile_definitions(ENABLE_IONIC) 65 | endif() 66 | 67 | if(NOT DEFINED WARP_ACCUM_UNROLL) 68 | set(WARP_ACCUM_UNROLL 1) 69 | endif() 70 | message(STATUS "WARP_ACCUM_UNROLL is set to: ${WARP_ACCUM_UNROLL}") 71 | add_definitions(-DWARP_ACCUM_UNROLL=${WARP_ACCUM_UNROLL}) 72 | 73 | if(USE_ROCM) 74 | list(APPEND CMAKE_PREFIX_PATH "/opt/rocm") 75 | project(mori LANGUAGES HIP CXX C) 76 | # set(CMAKE_CXX_COMPILER /opt/rocm/bin/hipcc) 77 | find_package(hip REQUIRED) 78 | 79 | if(DEFINED GPU_TARGETS) 80 | list(GET GPU_TARGETS 0 GPU_ARCH) 81 | message(STATUS "GPU_TARGETS: ${GPU_TARGETS}") 82 | # TODO: should iterate all archs 83 | if(GPU_ARCH MATCHES "^gfx8") 84 | add_definitions(-D__GFX8__=1) 85 | elseif(GPU_ARCH MATCHES "^gfx9") 86 | add_definitions(-D__GFX9__=1) 87 | endif() 88 | set(HIP_HIPCC_FLAGS "--amdgpu-target=${GPU_TARGETS}") 89 | endif() 90 | else() 91 | set(CUDA_HOME /usr/local/cuda) 92 | message(STATUS "CUDA_HOME: ${CUDA_HOME}") 93 | set(CUDAToolkit_Root 94 | ${CUDA_HOME} 95 | CACHE PATH "Root of Cuda Toolkit." FORCE) 96 | set(CMAKE_CUDA_COMPILER 97 | "${CUDA_HOME}/bin/nvcc" 98 | CACHE PATH "Root of Cuda Toolkit." FORCE) 99 | set(CMAKE_CUDA_FLAGS 100 | "--generate-code=arch=compute_70,code=[sm_70] --generate-code=arch=compute_80,code=[sm_80]" 101 | ) 102 | message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}") 103 | 104 | project(mori LANGUAGES CUDA CXX C) 105 | find_package(CUDAToolkit REQUIRED) 106 | set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -expt-relaxed-constexpr") 107 | endif() 108 | 109 | add_library(mori_logging INTERFACE) 110 | target_include_directories(mori_logging INTERFACE include 111 | 3rdparty/spdlog/include) 112 | 113 | if(BUILD_EXAMPLES) 114 | set(BUILD_APPLICATION ON) 115 | add_subdirectory(examples) 116 | endif() 117 | 118 | if(BUILD_APPLICATION) 119 | add_subdirectory(src/application) 120 | endif() 121 | 122 | if(BUILD_SHMEM) 123 | add_subdirectory(src/shmem) 124 | endif() 125 | 126 | if(BUILD_OPS) 127 | add_subdirectory(src/ops) 128 | endif() 129 | 130 | if(BUILD_IO) 131 | add_subdirectory(src/io) 132 | endif() 133 | 134 | if(BUILD_PYBINDS) 135 | add_subdirectory(src/pybind) 136 | endif() 137 | 138 | if(BUILD_TESTS) 139 | add_subdirectory(tests/cpp) 140 | endif() 141 | -------------------------------------------------------------------------------- /examples/benchmarks/accum_perf.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include 23 | #include 24 | 25 | #include 26 | 27 | #include "mori/application/utils/check.hpp" 28 | #include "mori/shmem/shmem.hpp" 29 | 30 | using namespace mori::core; 31 | using namespace mori::shmem; 32 | using namespace mori::application; 33 | 34 | using T = hip_bfloat16; 35 | 36 | __global__ void AccumPerfKernel(int myPe, int npes, const SymmMemObjPtr src, 37 | const SymmMemObjPtr dest, int elementNum, int elementPerWarp) { 38 | int thdId = threadIdx.x; 39 | int laneId = threadIdx.x & (warpSize - 1); 40 | int warpId = thdId / warpSize; 41 | int warpNum = blockDim.x / warpSize; 42 | int globalWarpId = blockIdx.x * warpNum + warpId; 43 | 44 | __shared__ T* sharedMem[8 * 16]; 45 | T** srcPtrs = sharedMem + warpId * npes; 46 | 47 | if (laneId < npes) { 48 | srcPtrs[laneId] = src->template GetAs(laneId) + globalWarpId * elementPerWarp; 49 | } 50 | 51 | mori::core::WarpAccum(dest->template GetAs() + globalWarpId * elementPerWarp, srcPtrs, 52 | nullptr, npes, 53 | std::min(elementPerWarp, elementNum - globalWarpId * elementPerWarp)); 54 | } 55 | 56 | void AccumPerf() { 57 | int status; 58 | MPI_Init(NULL, NULL); 59 | status = ShmemMpiInit(MPI_COMM_WORLD); 60 | assert(!status); 61 | 62 | int myPe = ShmemMyPe(); 63 | int npes = ShmemNPes(); 64 | 65 | size_t elementSize = sizeof(T); 66 | // size_t elementNum = 16 * 1000 * 1024; 67 | size_t elementNum = 4096 * 7168; 68 | size_t bufferSize = elementNum * elementSize; 69 | 70 | // void* srcBuff = ShmemExtMallocWithFlags(bufferSize, hipDeviceMallocUncached); 71 | void* srcBuff = ShmemMalloc(bufferSize); 72 | HIP_RUNTIME_CHECK(hipMemset(reinterpret_cast(srcBuff), 0, bufferSize)); 73 | SymmMemObjPtr srcBuffObj = ShmemQueryMemObjPtr(srcBuff); 74 | assert(srcBuffObj.IsValid()); 75 | 76 | void* destBuff = ShmemExtMallocWithFlags(bufferSize, hipDeviceMallocUncached); 77 | HIP_RUNTIME_CHECK(hipMemset(reinterpret_cast(destBuff), 0, bufferSize)); 78 | SymmMemObjPtr destBuffObj = ShmemQueryMemObjPtr(destBuff); 79 | assert(destBuffObj.IsValid()); 80 | 81 | int blockNum = 80; 82 | int warpNum = 8; 83 | int threadNum = warpSize * warpNum; 84 | int totalWarpNum = blockNum * warpNum; 85 | 86 | size_t elementPerWarp = (elementNum + totalWarpNum - 1) / totalWarpNum; 87 | 88 | printf("elementPerWarp %zu\n", elementPerWarp); 89 | 90 | for (int i = 0; i < 3; i++) 91 | AccumPerfKernel<<>>(myPe, npes, srcBuffObj, destBuffObj, elementNum, 92 | elementPerWarp); 93 | 94 | const int iters = 5; 95 | hipEvent_t start, stop; 96 | HIP_RUNTIME_CHECK(hipEventCreate(&start)); 97 | HIP_RUNTIME_CHECK(hipEventCreate(&stop)); 98 | HIP_RUNTIME_CHECK(hipDeviceSynchronize()); 99 | MPI_Barrier(MPI_COMM_WORLD); 100 | HIP_RUNTIME_CHECK(hipEventRecord(start, 0)); 101 | for (int i = 0; i < iters; i++) { 102 | AccumPerfKernel<<>>(myPe, npes, srcBuffObj, destBuffObj, elementNum, 103 | elementPerWarp); 104 | } 105 | HIP_RUNTIME_CHECK(hipEventRecord(stop, 0)); 106 | HIP_RUNTIME_CHECK(hipDeviceSynchronize()); 107 | float elapsedTime; 108 | HIP_RUNTIME_CHECK(hipEventElapsedTime(&elapsedTime, start, stop)); 109 | printf("rank %d time %f avgtime %f bw %f\n", myPe, elapsedTime, elapsedTime / iters, 110 | (bufferSize / 1.0E9) * npes / (elapsedTime / 1000.0f) / iters); 111 | MPI_Barrier(MPI_COMM_WORLD); 112 | 113 | ShmemFinalize(); 114 | } 115 | 116 | int main() { AccumPerf(); } 117 | -------------------------------------------------------------------------------- /examples/utils/args_parser.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | #include "common_utils.hpp" 35 | #include "mori/core/transport/rdma/primitives.hpp" 36 | 37 | using namespace mori::core; 38 | 39 | // Enums and types would typically be defined in a header file 40 | enum MORIDataType { 41 | MORI_INT, 42 | MORI_LONG, 43 | MORI_LONGLONG, 44 | MORI_ULONGLONG, 45 | MORI_SIZE, 46 | MORI_PTRDIFF, 47 | MORI_FLOAT, 48 | MORI_DOUBLE, 49 | MORI_UINT, 50 | MORI_INT32, 51 | MORI_INT64, 52 | MORI_UINT32, 53 | MORI_UINT64, 54 | MORI_FP16, 55 | MORI_BF16 56 | }; 57 | 58 | enum MORIReduceOp { MORI_MIN, MORI_MAX, MORI_SUM, MORI_PROD, MORI_AND, MORI_OR, MORI_XOR }; 59 | 60 | enum MORIScope { MORI_THREAD, MORI_WARP, MORI_BLOCK, MORI_ALL_SCOPES }; 61 | 62 | enum Direction { READ, WRITE }; 63 | 64 | enum PutGetIssue { ON_STREAM, HOST }; 65 | 66 | struct Datatype { 67 | MORIDataType type; 68 | size_t size; 69 | std::string name; 70 | }; 71 | 72 | struct ReduceOp { 73 | MORIReduceOp type; 74 | std::string name; 75 | }; 76 | 77 | struct ThreadgroupScope { 78 | MORIScope type; 79 | std::string name; 80 | }; 81 | 82 | struct AMO { 83 | atomicType type; 84 | std::string name; 85 | }; 86 | 87 | struct DirectionConfig { 88 | Direction type; 89 | std::string name; 90 | }; 91 | 92 | struct PutGetIssueConfig { 93 | PutGetIssue type; 94 | std::string name; 95 | }; 96 | 97 | class BenchmarkConfig { 98 | public: 99 | // 解析命令行 100 | void readArgs(int argc, char** argv); 101 | 102 | // 简单 getter 103 | size_t getMinSize() const { return min_size; } 104 | size_t getMaxSize() const { return max_size; } 105 | size_t getNumBlocks() const { return num_blocks; } 106 | size_t getThreadsPerBlock() const { return threads_per_block; } 107 | size_t getIters() const { return iters; } 108 | size_t getWarmupIters() const { return warmup_iters; } 109 | size_t getStepFactor() const { return step_factor; } 110 | size_t getMaxSizeLog() const { return max_size_log; } 111 | size_t getStride() const { return stride; } 112 | size_t getNumQp() const { return num_qp; } 113 | 114 | bool isBidirectional() const { return bidirectional; } 115 | bool isReportMsgrate() const { return report_msgrate; } 116 | 117 | Datatype getDatatype() const { return datatype; } 118 | ReduceOp getReduceOp() const { return reduce_op; } 119 | ThreadgroupScope getThreadgroupScope() const { return threadgroup_scope; } 120 | AMO getTestAMO() const { return test_amo; } 121 | PutGetIssueConfig getPutGetIssue() const { return putget_issue; } 122 | DirectionConfig getDirection() const { return dir; } 123 | 124 | private: 125 | size_t min_size = 4; 126 | size_t max_size = min_size * 1024 * 1024; 127 | size_t num_blocks = 1; 128 | size_t threads_per_block = 1; 129 | size_t iters = 10; 130 | size_t warmup_iters = 5; 131 | size_t step_factor = 2; 132 | size_t max_size_log = 1; 133 | size_t stride = 1; 134 | size_t num_qp = 1; 135 | 136 | bool bidirectional = false; 137 | bool report_msgrate = false; 138 | 139 | Datatype datatype = {MORI_UINT64, sizeof(uint64_t), "uint64_t"}; 140 | ReduceOp reduce_op = {MORI_SUM, "sum"}; 141 | ThreadgroupScope threadgroup_scope = {MORI_ALL_SCOPES, "all_scopes"}; 142 | AMO test_amo = {AMO_INC, "inc"}; 143 | PutGetIssueConfig putget_issue = {ON_STREAM, "on_stream"}; 144 | DirectionConfig dir = {WRITE, "write"}; 145 | 146 | void datatypeParse(const char* optarg); 147 | void reduceOpParse(const char* str); 148 | void atomicOpParse(const char* str); 149 | int atolScaled(const char* str, size_t* out); 150 | }; 151 | -------------------------------------------------------------------------------- /src/application/topology/system.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include "mori/application/topology/system.hpp" 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include "mori/application/transport/rdma/rdma.hpp" 33 | #include "mori/application/utils/check.hpp" 34 | 35 | namespace mori { 36 | namespace application { 37 | 38 | /* ---------------------------------------------------------------------------------------------- */ 39 | /* TopoSystem */ 40 | /* ---------------------------------------------------------------------------------------------- */ 41 | TopoSystem::TopoSystem() { Load(); } 42 | 43 | TopoSystem::~TopoSystem() {} 44 | 45 | void TopoSystem::Load() { 46 | gpu.reset(new TopoSystemGpu()); 47 | pci.reset(new TopoSystemPci()); 48 | net.reset(new TopoSystemNet()); 49 | } 50 | 51 | struct Candidate { 52 | TopoPathPci* path{nullptr}; 53 | TopoNodePci* node{nullptr}; 54 | TopoNodeNic* nic{nullptr}; 55 | }; 56 | 57 | std::vector CollectAndSortCandidates(TopoSystem* sys, int id) { 58 | assert(sys != nullptr); 59 | 60 | TopoSystemGpu* gpu = sys->GetTopoSystemGpu(); 61 | TopoSystemPci* pci = sys->GetTopoSystemPci(); 62 | TopoSystemNet* net = sys->GetTopoSystemNet(); 63 | 64 | TopoNodeGpu* dev = gpu->GetGpuByLogicalId(id); 65 | NumaNodeId gpuNumaNodeId = pci->Node(dev->busId)->NumaNode(); 66 | 67 | // Collect nic candidates 68 | auto nics = net->GetNics(); 69 | std::vector candidates; 70 | for (auto* nic : nics) { 71 | TopoPathPci* path = pci->Path(dev->busId, nic->busId); 72 | TopoNodePci* nicPci = pci->Node(nic->busId); 73 | if (!path) continue; 74 | candidates.push_back({path, nicPci, nic}); 75 | } 76 | 77 | // Sort by 1) speed 2) numa 3) hops 4) name 78 | std::sort(candidates.begin(), candidates.end(), 79 | [&gpuNumaNodeId](Candidate a, Candidate b) -> bool { 80 | bool tie = (a.nic->totalGbps == b.nic->totalGbps); 81 | if (!tie) return a.nic->totalGbps > b.nic->totalGbps; 82 | 83 | if ((a.node->NumaNode() == gpuNumaNodeId) && (b.node->NumaNode() != gpuNumaNodeId)) 84 | return true; 85 | if ((a.node->NumaNode() != gpuNumaNodeId) && (b.node->NumaNode() == gpuNumaNodeId)) 86 | return false; 87 | 88 | tie = (a.path->Hops() == b.path->Hops()); 89 | if (!tie) return a.path->Hops() <= b.path->Hops(); 90 | 91 | return a.nic->name <= b.nic->name; 92 | }); 93 | 94 | return candidates; 95 | } 96 | 97 | std::string TopoSystem::MatchGpuAndNic(int id) { 98 | std::vector matches = MatchAllGpusAndNics(); 99 | assert(id < matches.size()); 100 | return matches[id]; 101 | } 102 | 103 | std::vector TopoSystem::MatchAllGpusAndNics() { 104 | int count; 105 | HIP_RUNTIME_CHECK(hipGetDeviceCount(&count)); 106 | 107 | std::vector matches; 108 | std::unordered_set matched; 109 | 110 | for (int i = 0; i < count; i++) { 111 | std::vector candidates = CollectAndSortCandidates(this, i); 112 | if (candidates.empty()) { 113 | matches.push_back(""); 114 | continue; 115 | } 116 | 117 | bool found = false; 118 | for (auto& cand : candidates) { 119 | std::string name = cand.nic->name; 120 | if (matched.find(name) == matched.end()) { 121 | matches.push_back(name); 122 | matched.insert(name); 123 | found = true; 124 | break; 125 | } 126 | } 127 | 128 | if (!found) matches.push_back(candidates[i % candidates.size()].nic->name); 129 | } 130 | 131 | return matches; 132 | } 133 | 134 | } // namespace application 135 | } // namespace mori 136 | -------------------------------------------------------------------------------- /include/mori/io/engine.hpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #pragma once 23 | 24 | #include 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #include "mori/application/transport/tcp/tcp.hpp" 33 | #include "mori/io/backend.hpp" 34 | #include "mori/io/common.hpp" 35 | 36 | namespace mori { 37 | namespace io { 38 | 39 | struct IOEngineConfig { 40 | // Out of band TCP network configuration 41 | std::string host; 42 | uint16_t port; 43 | }; 44 | 45 | class IOEngine; 46 | 47 | // This is a low latency session between a pair of memory descriptor, it caches 48 | // necessary meta data to avoid the overhead of 49 | class IOEngineSession { 50 | public: 51 | ~IOEngineSession() = default; 52 | 53 | TransferUniqueId AllocateTransferUniqueId(); 54 | void Read(size_t localOffset, size_t remoteOffset, size_t size, TransferStatus* status, 55 | TransferUniqueId id); 56 | void Write(size_t localOffset, size_t remoteOffset, size_t size, TransferStatus* status, 57 | TransferUniqueId id); 58 | 59 | void BatchRead(const SizeVec& localOffsets, const SizeVec& remoteOffsets, const SizeVec& sizes, 60 | TransferStatus* status, TransferUniqueId id); 61 | void BatchWrite(const SizeVec& localOffsets, const SizeVec& remoteOffsets, const SizeVec& sizes, 62 | TransferStatus* status, TransferUniqueId id); 63 | bool Alive(); 64 | 65 | friend class IOEngine; 66 | 67 | protected: 68 | IOEngineSession() = default; 69 | 70 | IOEngine* engine{nullptr}; 71 | std::shared_ptr backendSess{nullptr}; 72 | }; 73 | 74 | class IOEngine { 75 | public: 76 | IOEngine(EngineKey, IOEngineConfig); 77 | ~IOEngine(); 78 | 79 | void CreateBackend(BackendType, const BackendConfig&); 80 | void RemoveBackend(BackendType); 81 | 82 | EngineDesc GetEngineDesc() const { return desc; } 83 | 84 | void RegisterRemoteEngine(const EngineDesc&); 85 | void DeregisterRemoteEngine(const EngineDesc&); 86 | 87 | MemoryDesc RegisterMemory(void* data, size_t size, int device, MemoryLocationType loc); 88 | void DeregisterMemory(const MemoryDesc& desc); 89 | 90 | TransferUniqueId AllocateTransferUniqueId(); 91 | void Read(const MemoryDesc& localDest, size_t localOffset, const MemoryDesc& remoteSrc, 92 | size_t remoteOffset, size_t size, TransferStatus* status, TransferUniqueId id); 93 | void Write(const MemoryDesc& localSrc, size_t localOffset, const MemoryDesc& remoteDest, 94 | size_t remoteOffset, size_t size, TransferStatus* status, TransferUniqueId id); 95 | 96 | void BatchRead(const MemDescVec& localDest, const BatchSizeVec& localOffsets, 97 | const MemDescVec& remoteSrc, const BatchSizeVec& remoteOffsets, 98 | const BatchSizeVec& sizes, TransferStatusPtrVec& status, TransferUniqueIdVec& ids); 99 | void BatchWrite(const MemDescVec& localSrc, const BatchSizeVec& localOffsets, 100 | const MemDescVec& remoteDest, const BatchSizeVec& remoteOffsets, 101 | const BatchSizeVec& sizes, TransferStatusPtrVec& status, 102 | TransferUniqueIdVec& ids); 103 | // Take the transfer status of an inbound op 104 | bool PopInboundTransferStatus(EngineKey remote, TransferUniqueId id, TransferStatus* status); 105 | 106 | std::optional CreateSession(const MemoryDesc& local, const MemoryDesc& remote); 107 | 108 | private: 109 | Backend* SelectBackend(const MemoryDesc& local, const MemoryDesc& remote); 110 | 111 | public: 112 | // Config and descriptors 113 | IOEngineConfig config; 114 | EngineDesc desc; 115 | 116 | private: 117 | std::atomic nextTransferUid{0}; 118 | std::atomic nextMemUid{0}; 119 | std::unordered_map memPool; 120 | std::unordered_map> backends; 121 | }; 122 | 123 | } // namespace io 124 | } // namespace mori 125 | -------------------------------------------------------------------------------- /src/application/transport/tcp/tcp.cpp: -------------------------------------------------------------------------------- 1 | 2 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 3 | // 4 | // MIT License 5 | // 6 | // Permission is hereby granted, free of charge, to any person obtaining a copy 7 | // of this software and associated documentation files (the "Software"), to deal 8 | // in the Software without restriction, including without limitation the rights 9 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | // copies of the Software, and to permit persons to whom the Software is 11 | // furnished to do so, subject to the following conditions: 12 | // 13 | // The above copyright notice and this permission notice shall be included in all 14 | // copies or substantial portions of the Software. 15 | // 16 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | // SOFTWARE. 23 | #include "mori/application/transport/tcp/tcp.hpp" 24 | 25 | #include 26 | 27 | #include 28 | 29 | #include "mori/application/utils/check.hpp" 30 | 31 | namespace mori { 32 | namespace application { 33 | 34 | #define DEFAULT_LISTEN_BACKLOG 128 35 | 36 | /* ---------------------------------------------------------------------------------------------- */ 37 | /* TCPEndpoint */ 38 | /* ---------------------------------------------------------------------------------------------- */ 39 | 40 | int TCPEndpoint::Send(const void* buf, size_t len) { 41 | const char* p = static_cast(buf); 42 | while (len > 0) { 43 | size_t n = send(handle.fd, p, len, 0); 44 | if (n < 0) return n; 45 | p += n; 46 | len -= n; 47 | } 48 | return 0; 49 | } 50 | 51 | int TCPEndpoint::Recv(void* buf, size_t len) { 52 | char* p = static_cast(buf); 53 | while (len > 0) { 54 | size_t n = ::recv(handle.fd, p, len, 0); 55 | if (n <= 0) return n; 56 | p += n; 57 | len -= n; 58 | } 59 | return 0; 60 | } 61 | 62 | /* ---------------------------------------------------------------------------------------------- */ 63 | /* TCPContext */ 64 | /* ---------------------------------------------------------------------------------------------- */ 65 | TCPContext::TCPContext(std::string host, uint16_t port) { 66 | handle.host = host; 67 | handle.port = port; 68 | } 69 | 70 | TCPContext::~TCPContext() { Close(); } 71 | 72 | void TCPContext::Listen() { 73 | listenFd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0); 74 | assert(listenFd >= 0); 75 | 76 | int opt = 1; 77 | setsockopt(listenFd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); 78 | 79 | sockaddr_in addr{}; 80 | addr.sin_family = AF_INET; 81 | addr.sin_port = htons(handle.port); 82 | addr.sin_addr.s_addr = inet_addr(handle.host.c_str()); 83 | 84 | SYSCALL_RETURN_ZERO(bind(listenFd, reinterpret_cast(&addr), sizeof(addr))); 85 | 86 | socklen_t len = sizeof(addr); 87 | getsockname(listenFd, reinterpret_cast(&addr), &len); 88 | handle.port = ntohs(addr.sin_port); 89 | 90 | SYSCALL_RETURN_ZERO(listen(listenFd, DEFAULT_LISTEN_BACKLOG)); 91 | } 92 | 93 | void TCPContext::Close() { 94 | if (listenFd >= 0) { 95 | SYSCALL_RETURN_ZERO(close(listenFd)); 96 | listenFd = -1; 97 | } 98 | while (!endpoints.empty()) { 99 | auto it = endpoints.begin(); 100 | CloseEndpoint(it->second); 101 | } 102 | } 103 | 104 | TCPEndpointHandle TCPContext::Connect(std::string remote, uint16_t port) { 105 | int sock = socket(AF_INET, SOCK_STREAM, 0); 106 | assert(sock >= 0); 107 | 108 | sockaddr_in peer{}; 109 | peer.sin_family = AF_INET; 110 | peer.sin_port = htons(port); 111 | peer.sin_addr.s_addr = inet_addr(remote.c_str()); 112 | 113 | SYSCALL_RETURN_ZERO(connect(sock, reinterpret_cast(&peer), sizeof(peer))); 114 | 115 | TCPEndpointHandle ep{sock, peer}; 116 | endpoints.insert({sock, ep}); 117 | return ep; 118 | } 119 | 120 | TCPEndpointHandleVec TCPContext::Accept() { 121 | sockaddr_in peer{}; 122 | socklen_t len = sizeof(peer); 123 | 124 | TCPEndpointHandleVec newEps; 125 | 126 | while (true) { 127 | int sock = accept(listenFd, reinterpret_cast(&peer), &len); 128 | if (sock >= 0) { 129 | TCPEndpointHandle ep{sock, peer}; 130 | newEps.push_back(ep); 131 | endpoints.insert({sock, ep}); 132 | } 133 | if ((sock == -1) && ((errno == EAGAIN) || (errno == EWOULDBLOCK))) { 134 | break; 135 | } 136 | } 137 | 138 | return newEps; 139 | } 140 | 141 | void TCPContext::CloseEndpoint(TCPEndpointHandle ep) { 142 | if (endpoints.find(ep.fd) == endpoints.end()) return; 143 | SYSCALL_RETURN_ZERO_IGNORE_ERROR(shutdown(ep.fd, SHUT_WR), ENOTCONN); 144 | SYSCALL_RETURN_ZERO(close(ep.fd)); 145 | endpoints.erase(ep.fd); 146 | } 147 | 148 | } // namespace application 149 | } // namespace mori 150 | -------------------------------------------------------------------------------- /tests/cpp/io/test_engine.cpp: -------------------------------------------------------------------------------- 1 | // Copyright © Advanced Micro Devices, Inc. All rights reserved. 2 | // 3 | // MIT License 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include 28 | #include 29 | 30 | #include "mori/application/utils/check.hpp" 31 | #include "mori/io/io.hpp" 32 | 33 | using namespace mori::io; 34 | 35 | int GetFreePort() { 36 | int fd = socket(AF_INET, SOCK_STREAM, 0); 37 | if (fd < 0) return -1; 38 | 39 | int opt = 1; 40 | setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); 41 | 42 | sockaddr_in addr{}; 43 | addr.sin_family = AF_INET; 44 | addr.sin_port = 0; 45 | addr.sin_addr.s_addr = INADDR_ANY; 46 | 47 | if (bind(fd, reinterpret_cast(&addr), sizeof(addr)) != 0) { 48 | close(fd); 49 | return -1; 50 | } 51 | 52 | socklen_t len = sizeof(addr); 53 | if (getsockname(fd, reinterpret_cast(&addr), &len) != 0) { 54 | close(fd); 55 | return -1; 56 | } 57 | 58 | int port = ntohs(addr.sin_port); 59 | 60 | close(fd); 61 | return port; 62 | } 63 | 64 | void TestMoriIOEngine() { 65 | SetLogLevel("trace"); 66 | 67 | IOEngineConfig config; 68 | config.host = "127.0.0.1"; 69 | config.port = GetFreePort(); 70 | IOEngine initiator("initiator", config); 71 | 72 | RdmaBackendConfig rdmaConfig{}; 73 | initiator.CreateBackend(BackendType::RDMA, rdmaConfig); 74 | 75 | int newPort = GetFreePort(); 76 | assert(newPort != config.port); 77 | config.port = newPort; 78 | IOEngine target("target", config); 79 | target.CreateBackend(BackendType::RDMA, rdmaConfig); 80 | 81 | EngineDesc initiatorEngineDesc = initiator.GetEngineDesc(); 82 | EngineDesc targetEngineDesc = target.GetEngineDesc(); 83 | 84 | initiator.RegisterRemoteEngine(targetEngineDesc); 85 | target.RegisterRemoteEngine(initiatorEngineDesc); 86 | 87 | void *initiatorBuf, *targetBuf; 88 | size_t bufSize = 1024 * 1024 * 4; 89 | HIP_RUNTIME_CHECK(hipMalloc(&initiatorBuf, bufSize)); 90 | HIP_RUNTIME_CHECK(hipMalloc(&targetBuf, bufSize)); 91 | HIP_RUNTIME_CHECK(hipMemset(targetBuf, 1, bufSize)); 92 | 93 | MemoryDesc initiatorMem = 94 | initiator.RegisterMemory(initiatorBuf, bufSize, 0, MemoryLocationType::GPU); 95 | MemoryDesc targetMem = target.RegisterMemory(targetBuf, bufSize, 0, MemoryLocationType::GPU); 96 | 97 | int transferCnt = 64; 98 | 99 | for (int i = 0; i < transferCnt; i++) { 100 | TransferStatus initiatorStatus, targetStatus; 101 | TransferUniqueId id = initiator.AllocateTransferUniqueId(); 102 | initiator.Read(initiatorMem, 0, targetMem, 0, bufSize, &initiatorStatus, id); 103 | printf("read %d id %lu\n", i, id); 104 | while (initiatorStatus.Code() == StatusCode::INIT) { 105 | } 106 | while (targetStatus.Code() == StatusCode::INIT) { 107 | target.PopInboundTransferStatus(initiator.GetEngineDesc().key, id, &targetStatus); 108 | } 109 | printf("Status message initiator %s target %s read value %d\n", 110 | initiatorStatus.Message().c_str(), targetStatus.Message().c_str(), 111 | reinterpret_cast(initiatorBuf)[511]); 112 | } 113 | 114 | std::vector initiatorStatusVec(transferCnt); 115 | std::vector targetStatusVec(transferCnt); 116 | std::vector trsfIds(transferCnt); 117 | 118 | for (int i = 0; i < transferCnt; i++) { 119 | TransferUniqueId id = initiator.AllocateTransferUniqueId(); 120 | trsfIds[i] = id; 121 | initiator.Read(initiatorMem, 0, targetMem, 0, bufSize, &initiatorStatusVec[i], id); 122 | } 123 | 124 | for (int i = 0; i < transferCnt; i++) { 125 | while (initiatorStatusVec[i].Code() == StatusCode::INIT) { 126 | } 127 | while (targetStatusVec[i].Code() == StatusCode::INIT) { 128 | target.PopInboundTransferStatus(initiator.GetEngineDesc().key, trsfIds[i], 129 | &targetStatusVec[i]); 130 | } 131 | printf("Status message initiator %s target %s read value %d\n", 132 | initiatorStatusVec[i].Message().c_str(), targetStatusVec[i].Message().c_str(), 133 | reinterpret_cast(initiatorBuf)[511]); 134 | } 135 | 136 | initiator.DeregisterMemory(initiatorMem); 137 | target.DeregisterMemory(targetMem); 138 | } 139 | 140 | int main() { TestMoriIOEngine(); } 141 | --------------------------------------------------------------------------------