├── VERSION ├── src ├── .clang-format ├── CMakeLists.txt └── firmware │ └── riscv │ └── wormhole │ └── host_mem_address_map.h ├── docs ├── install_docs_requirements.sh ├── images │ ├── tensix_grid.png │ ├── tensix_harvested_rows.png │ ├── tt_logo_stacked_color.png │ ├── tensix_logical_coordinates.png │ ├── tensix_physical_coordinates.png │ ├── tensix_translated_coordinates.png │ ├── tensix_logical_coordinates_harvested.png │ ├── tensix_virtual_coordinates_harvested.png │ ├── tensix_phyiscal_coordinates_harvested.png │ └── tensix_translated_coordinates_harvested.png ├── build_docs.sh └── BUILDING_DOXYGEN.md ├── device ├── libs │ └── lite_fabric.bin ├── tt_device │ ├── tt_sim_tt_device.cpp │ ├── rtl_simulation_tt_device.cpp │ ├── remote_communication_lite_fabric.cpp │ └── remote_blackhole_tt_device.cpp ├── api │ └── umd │ │ └── device │ │ ├── tt_device │ │ ├── tt_sim_tt_device.hpp │ │ ├── rtl_simulation_tt_device.hpp │ │ ├── remote_communication_legacy_firmware.hpp │ │ ├── remote_blackhole_tt_device.hpp │ │ ├── remote_communication_lite_fabric.hpp │ │ └── blackhole_tt_device.hpp │ │ ├── types │ │ ├── wormhole_dram.hpp │ │ ├── communication_protocol.hpp │ │ ├── telemetry.hpp │ │ ├── tlb.hpp │ │ ├── wormhole_telemetry.hpp │ │ ├── blackhole_arc.hpp │ │ └── arch.hpp │ │ ├── arc │ │ ├── blackhole_arc_telemetry_reader.hpp │ │ ├── wormhole_arc_telemetry_reader.hpp │ │ ├── smbus_arc_telemetry_reader.hpp │ │ ├── wormhole_arc_messenger.hpp │ │ └── blackhole_arc_messenger.hpp │ │ ├── firmware │ │ ├── wormhole_18_7_firmware_info_provider.hpp │ │ ├── blackhole_18_7_firmware_info_provider.hpp │ │ ├── firmware_utils.hpp │ │ ├── README.md │ │ └── wormhole_18_3_firmware_info_provider.hpp │ │ ├── simulation │ │ ├── simulation_host.hpp │ │ ├── rtl_simulation_chip.hpp │ │ └── tt_sim_chip.hpp │ │ ├── utils │ │ ├── common.hpp │ │ ├── timeouts.hpp │ │ └── kmd_versions.hpp │ │ ├── lite_fabric │ │ ├── lite_fabric_constants.hpp │ │ ├── lite_fabric_host_utils.hpp │ │ ├── fabric_edm_types.hpp │ │ └── lf_dev_mem_map.hpp │ │ ├── warm_reset.hpp │ │ ├── logging │ │ └── config.hpp │ │ ├── coordinates │ │ └── wormhole_coordinate_manager.hpp │ │ ├── tt_io.hpp │ │ ├── driver_atomics.hpp │ │ ├── topology │ │ └── topology_utils.hpp │ │ ├── chip_helpers │ │ └── tlb_manager.hpp │ │ ├── jtag │ │ └── jtag.hpp │ │ └── pcie │ │ ├── tlb_handle.hpp │ │ └── tlb_window.hpp ├── types │ ├── xy_pair.cpp │ └── tensix_soft_reset_options.cpp ├── firmware │ ├── blackhole_18_7_firmware_info_provider.cpp │ └── wormhole_18_7_firmware_info_provider.cpp ├── simulation │ └── simulation_device.fbs ├── arch │ └── architecture_implementation.cpp ├── arc │ ├── blackhole_arc_messenger.cpp │ ├── blackhole_arc_telemetry_reader.cpp │ ├── wormhole_arc_telemetry_reader.cpp │ ├── smbus_arc_telemetry_reader.cpp │ └── arc_messenger.cpp ├── hugepage.hpp ├── logging │ └── config.cpp └── pcie │ └── tlb_handle.cpp ├── .git-blame-ignore-revs ├── .github ├── CODEOWNERS ├── fedora-39.Dockerfile ├── ubuntu-20.04.Dockerfile ├── ubuntu-22.04.Dockerfile ├── ubuntu-24.04.Dockerfile ├── workflows │ ├── community-issue-tagging.yml │ ├── pre-commit.yml │ ├── build-and-run-all-benchmarks.yml │ └── build-image.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── pull_request_template.md ├── manylinux.Dockerfile ├── docker_rhel_install_common.sh └── docker_install_common.sh ├── .gitignore ├── tests ├── microbenchmark │ ├── benchmarks │ │ ├── ethernet_io │ │ │ └── README.md │ │ ├── open_cluster │ │ │ ├── README.md │ │ │ └── test_open_cluster.cpp │ │ └── iommu │ │ │ └── README.md │ ├── CMakeLists.txt │ └── common │ │ └── microbenchmark_utils.hpp ├── unit_test_main.cpp ├── pcie │ ├── CMakeLists.txt │ └── test_pcie_device.cpp ├── unified │ └── CMakeLists.txt ├── misc │ └── CMakeLists.txt ├── galaxy │ ├── CMakeLists.txt │ ├── test_galaxy_common.hpp │ └── test_galaxy_common.cpp ├── blackhole │ ├── CMakeLists.txt │ └── test_chip_info_bh.cpp ├── wormhole │ └── CMakeLists.txt ├── baremetal │ └── CMakeLists.txt ├── cluster_descriptor_examples │ ├── wormhole_N150_unique_ids.yaml │ ├── blackhole_P150.yaml │ ├── wormhole_N150.yaml │ ├── blackhole_P100.yaml │ ├── wormhole_N300.yaml │ ├── wormhole_N300_routing_info.yaml │ ├── wormhole_N300_with_bus_id.yaml │ ├── wormhole_N300_board_info.yaml │ ├── blackhole_P300_first_mmio.yaml │ ├── blackhole_P300_second_mmio.yaml │ ├── blackhole_P300_both_mmio.yaml │ ├── wormhole_2xN300_unconnected.yaml │ ├── wormhole_N300_with_remote_connections.yaml │ └── 2x2_n300_cluster_desc.yaml ├── soc_descs │ ├── quasar_simulation_1x1.yaml │ ├── wormhole_b0_one_dram_one_tensix_no_eth.yaml │ ├── blackhole_simulation_1x2.yaml │ ├── wormhole_b0_1x1.yaml │ ├── wormhole_b0_8x10.yaml │ └── blackhole_140_arch_no_noc1.yaml ├── api │ ├── CMakeLists.txt │ ├── test_software_harvesting.cpp │ ├── test_tlb_manager.cpp │ ├── test_arc_telemetry.cpp │ └── GENERATE_ASSEMBLY_FOR_TESTS.md ├── simulation │ ├── CMakeLists.txt │ └── device_fixture.hpp ├── CMakeLists.txt └── test_utils │ ├── setup_risc_cores.hpp │ └── device_test_utils.hpp ├── cmake ├── stubs │ ├── README.md │ └── cpm-stubs.cmake ├── x86-linux-clang-17-toolchain.cmake ├── ttexalens_private_check.cmake ├── umdConfig.cmake.in ├── check_libcpp.cmake ├── CPM.cmake ├── compilers.cmake ├── sanitizers.cmake └── example_client.cmake ├── LICENSE_understanding.txt ├── .yamllint ├── .gersemirc ├── .clang-tidy ├── common ├── CMakeLists.txt ├── disjoint_set.hpp └── timestamp.hpp ├── examples ├── tt_device_example │ ├── CMakeLists.txt │ └── README.md ├── CMakeLists.txt └── README.md ├── nanobind ├── tests │ ├── test_py_cluster.py │ ├── test_py_basic_types.py │ ├── test_py_warm_reset.py │ └── test_py_telemetry.py ├── py_api_cluster.cpp ├── py_api_module.cpp └── py_api_warm_reset.cpp ├── .vscode ├── default.settings.json └── default.launch.json ├── tools ├── common.hpp ├── README.md ├── CMakeLists.txt └── topology.cpp ├── .pre-commit-config.yaml ├── .clangd ├── .clang-format ├── CHANGELOG ├── .pre-commit-hooks └── check-copyright-config.yaml ├── scripts └── iommu_detect.sh └── pyproject.toml /VERSION: -------------------------------------------------------------------------------- 1 | 0.7.0 2 | -------------------------------------------------------------------------------- /src/.clang-format: -------------------------------------------------------------------------------- 1 | DisableFormat: true 2 | SortIncludes: false 3 | -------------------------------------------------------------------------------- /docs/install_docs_requirements.sh: -------------------------------------------------------------------------------- 1 | sudo apt-get update 2 | sudo apt-get install -y doxygen -------------------------------------------------------------------------------- /device/libs/lite_fabric.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tenstorrent/tt-umd/HEAD/device/libs/lite_fabric.bin -------------------------------------------------------------------------------- /docs/images/tensix_grid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tenstorrent/tt-umd/HEAD/docs/images/tensix_grid.png -------------------------------------------------------------------------------- /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # CMake && C++ linting with Gersemi and Clang-tidy 2 | 51d19190b6890929b2a4833d0fd75221a1c23d4f 3 | -------------------------------------------------------------------------------- /docs/images/tensix_harvested_rows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tenstorrent/tt-umd/HEAD/docs/images/tensix_harvested_rows.png -------------------------------------------------------------------------------- /docs/images/tt_logo_stacked_color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tenstorrent/tt-umd/HEAD/docs/images/tt_logo_stacked_color.png -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # For now, every member is the owner of the whole repo 2 | * @broskoTT @pjanevskiTT @nbuncicTT @aleksamarkovicTT 3 | -------------------------------------------------------------------------------- /docs/images/tensix_logical_coordinates.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tenstorrent/tt-umd/HEAD/docs/images/tensix_logical_coordinates.png -------------------------------------------------------------------------------- /docs/images/tensix_physical_coordinates.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tenstorrent/tt-umd/HEAD/docs/images/tensix_physical_coordinates.png -------------------------------------------------------------------------------- /docs/images/tensix_translated_coordinates.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tenstorrent/tt-umd/HEAD/docs/images/tensix_translated_coordinates.png -------------------------------------------------------------------------------- /docs/images/tensix_logical_coordinates_harvested.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tenstorrent/tt-umd/HEAD/docs/images/tensix_logical_coordinates_harvested.png -------------------------------------------------------------------------------- /docs/images/tensix_virtual_coordinates_harvested.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tenstorrent/tt-umd/HEAD/docs/images/tensix_virtual_coordinates_harvested.png -------------------------------------------------------------------------------- /docs/images/tensix_phyiscal_coordinates_harvested.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tenstorrent/tt-umd/HEAD/docs/images/tensix_phyiscal_coordinates_harvested.png -------------------------------------------------------------------------------- /docs/images/tensix_translated_coordinates_harvested.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tenstorrent/tt-umd/HEAD/docs/images/tensix_translated_coordinates_harvested.png -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(firmware INTERFACE) 2 | add_library(${PROJECT_NAME}::Firmware ALIAS firmware) 3 | 4 | target_include_directories(firmware INTERFACE firmware/riscv) 5 | -------------------------------------------------------------------------------- /.github/fedora-39.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM fedora:39 2 | 3 | COPY docker_rhel_install_common.sh /docker_rhel_install_common.sh 4 | RUN chmod +x /docker_rhel_install_common.sh && /docker_rhel_install_common.sh 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | *.o 3 | *.d/ 4 | .umd/ 5 | compile_commands.json 6 | .cache/** 7 | .cpmcache/ 8 | .envrc 9 | .vscode/ 10 | *.log 11 | *.csv 12 | core 13 | __pycache__ 14 | tt_umd.egg-info 15 | .venv 16 | ucli.key -------------------------------------------------------------------------------- /tests/microbenchmark/benchmarks/ethernet_io/README.md: -------------------------------------------------------------------------------- 1 | # ETH IO benchmark 2 | 3 | This benchmark contains tests that are measuring performance of IO to devices not connected to the host directly over PCIe, rather over ETH to chips connected via PCIe. 4 | -------------------------------------------------------------------------------- /.github/ubuntu-20.04.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | ENV OS_CODENAME=focal 5 | 6 | COPY docker_install_common.sh /docker_install_common.sh 7 | RUN chmod +x /docker_install_common.sh && /docker_install_common.sh 8 | -------------------------------------------------------------------------------- /cmake/stubs/README.md: -------------------------------------------------------------------------------- 1 | This directory contains stubs of functions that we call directly, but whose definition we cannot easily 2 | point the formatter (Gersemi) to. See https://github.com/BlankSpruce/gersemi?tab=readme-ov-file#lets-make-a-deal for more details. -------------------------------------------------------------------------------- /.github/ubuntu-22.04.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | ENV OS_CODENAME=jammy 5 | 6 | COPY docker_install_common.sh /docker_install_common.sh 7 | RUN chmod +x /docker_install_common.sh && /docker_install_common.sh 8 | 9 | -------------------------------------------------------------------------------- /.github/ubuntu-24.04.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:24.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | ENV OS_CODENAME=noble 5 | 6 | COPY docker_install_common.sh /docker_install_common.sh 7 | RUN chmod +x /docker_install_common.sh && /docker_install_common.sh 8 | 9 | -------------------------------------------------------------------------------- /LICENSE_understanding.txt: -------------------------------------------------------------------------------- 1 | For the avoidance of doubt, this software assists in programming Tenstorrent products. 2 | 3 | However, making, using, or selling hardware, models, or IP may require the license of rights (such as patent rights) from Tenstorrent or others. 4 | -------------------------------------------------------------------------------- /.yamllint: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | extends: default 4 | 5 | rules: 6 | # yaml documents should start optionally with --- 7 | document-start: disable 8 | line-length: 9 | max: 120 10 | # the "on:" is detected as truthy value and spawns a false warning 11 | truthy: disable -------------------------------------------------------------------------------- /.gersemirc: -------------------------------------------------------------------------------- 1 | # yaml-language-server: $schema=https://raw.githubusercontent.com/BlankSpruce/gersemi/master/gersemi/configuration.schema.json 2 | 3 | definitions: [cmake, cmake/stubs] 4 | indent: 4 5 | line_length: 120 6 | list_expansion: favour-expansion 7 | warn_about_unknown_commands: false 8 | -------------------------------------------------------------------------------- /cmake/x86-linux-clang-17-toolchain.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_SYSTEM_NAME Linux) 2 | 3 | set(CMAKE_CXX_COMPILER clang++-17 CACHE STRING "C++ compiler") 4 | set(CMAKE_C_COMPILER clang-17 CACHE STRING "C compiler") 5 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++" CACHE STRING "CXX FLAGS for clang") 6 | -------------------------------------------------------------------------------- /.github/workflows/community-issue-tagging.yml: -------------------------------------------------------------------------------- 1 | name: "Community Issue / PR Labeling Workflow" 2 | 3 | on: 4 | issues: 5 | types: [opened] 6 | pull_request: 7 | types: [opened] 8 | 9 | jobs: 10 | call-central-workflow: 11 | uses: tenstorrent/tt-github-actions/.github/workflows/on-community-issue.yml@main 12 | -------------------------------------------------------------------------------- /tests/unit_test_main.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #include 6 | 7 | #include "gtest_initializer.hpp" 8 | 9 | int main(int argc, char **argv) { 10 | initialize_gtest(argc, argv); 11 | return RUN_ALL_TESTS(); 12 | } 13 | -------------------------------------------------------------------------------- /docs/build_docs.sh: -------------------------------------------------------------------------------- 1 | if [[ -z "$TT_UMD_HOME" ]]; then 2 | echo "Must provide TT_UMD_HOME in environment" 1>&2 3 | exit 1 4 | fi 5 | 6 | echo "Building tt-umd docs..." 7 | 8 | pushd $TT_UMD_HOME 9 | DOCS_BUILD_DIR="build" 10 | if [ ! -d "$DOCS_BUILD_DIR" ]; then 11 | mkdir $DOCS_BUILD_DIR 12 | fi 13 | doxygen Doxyfile 14 | popd -------------------------------------------------------------------------------- /tests/pcie/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(test_pcie_device test_pcie_device.cpp) 2 | target_link_libraries(test_pcie_device PRIVATE test_common) 3 | set_target_properties( 4 | test_pcie_device 5 | PROPERTIES 6 | RUNTIME_OUTPUT_DIRECTORY 7 | ${CMAKE_BINARY_DIR}/test/umd/test_pcie_device 8 | OUTPUT_NAME 9 | test_pcie_device 10 | ) 11 | -------------------------------------------------------------------------------- /.clang-tidy: -------------------------------------------------------------------------------- 1 | --- 2 | Checks: > 3 | -*, 4 | modernize-use-override, 5 | -clang-diagnostic-unknown-warning-option, 6 | bugprone-use-after-move, 7 | bugprone-redundant-branch-condition, 8 | cppcoreguidelines-use-enum-class, 9 | cppcoreguidelines-pro-type-cstyle-cast 10 | 11 | HeaderFilterRegex: '^(?!.*\.cpmcache).*$' 12 | WarningsAsErrors: '*' 13 | FormatStyle: 'file' -------------------------------------------------------------------------------- /tests/microbenchmark/benchmarks/open_cluster/README.md: -------------------------------------------------------------------------------- 1 | # Cluster open benchmark 2 | 3 | This benchmark contains tests that are measuring performance of opening/constructing Cluster object. Since work done in the cluster objects is non-trivial, it is important to measure how long it takes to open the cluster and how long it takes to construct the Cluster object. This should run on all our configurations. -------------------------------------------------------------------------------- /device/tt_device/tt_sim_tt_device.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | #include "umd/device/tt_device/tt_sim_tt_device.hpp" 5 | 6 | namespace tt::umd { 7 | 8 | TTSimTTDevice::TTSimTTDevice() { 9 | throw std::runtime_error("Creating TTSimTTDevice without an underlying communication device is not supported."); 10 | } 11 | 12 | } // namespace tt::umd 13 | -------------------------------------------------------------------------------- /cmake/ttexalens_private_check.cmake: -------------------------------------------------------------------------------- 1 | include(ExternalProject) 2 | 3 | ExternalProject_Add( 4 | ttexalens_private 5 | GIT_REPOSITORY ${TTEXALENS_PRIVATE_GIT_REPOSITORY} 6 | GIT_TAG ${TTEXALENS_PRIVATE_GIT_TAG} 7 | GIT_SHALLOW TRUE 8 | PREFIX ${CMAKE_CURRENT_BINARY_DIR}/ttexalens_private 9 | CONFIGURE_COMMAND 10 | "" 11 | BUILD_COMMAND 12 | "" 13 | INSTALL_COMMAND 14 | "" 15 | ) 16 | -------------------------------------------------------------------------------- /tests/unified/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(UNIFIED_TESTS_SRCS 2 | multiprocess.cpp 3 | test_tlb.cpp 4 | ) 5 | 6 | add_executable(unified_tests ${UNIFIED_TESTS_SRCS}) 7 | target_link_libraries(unified_tests PRIVATE test_common) 8 | set_target_properties( 9 | unified_tests 10 | PROPERTIES 11 | RUNTIME_OUTPUT_DIRECTORY 12 | ${CMAKE_BINARY_DIR}/test/umd/unified 13 | OUTPUT_NAME 14 | unified_tests 15 | ) 16 | -------------------------------------------------------------------------------- /tests/misc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(UMD_MISC_TESTS_SRCS 2 | test_semver.cpp 3 | test_assert.cpp 4 | ) 5 | 6 | add_executable(umd_misc_tests ${UMD_MISC_TESTS_SRCS}) 7 | target_link_libraries(umd_misc_tests PRIVATE test_common) 8 | set_target_properties( 9 | umd_misc_tests 10 | PROPERTIES 11 | RUNTIME_OUTPUT_DIRECTORY 12 | ${CMAKE_BINARY_DIR}/test/umd/misc 13 | OUTPUT_NAME 14 | umd_misc_tests 15 | ) 16 | -------------------------------------------------------------------------------- /common/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(umd_common INTERFACE) 2 | add_library(${PROJECT_NAME}::Common ALIAS umd_common) 3 | 4 | target_sources( 5 | umd_common 6 | INTERFACE 7 | assert.hpp 8 | backtrace.hpp 9 | disjoint_set.hpp 10 | gtest_initializer.hpp # FIXME: this should be tucked away with the tests 11 | timestamp.hpp 12 | utils.hpp 13 | ) 14 | 15 | target_include_directories(umd_common INTERFACE .) 16 | -------------------------------------------------------------------------------- /device/api/umd/device/tt_device/tt_sim_tt_device.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "umd/device/tt_device/tt_device.hpp" 14 | 15 | namespace tt::umd { 16 | class TTSimTTDevice : public TTDevice { 17 | public: 18 | TTSimTTDevice(); 19 | }; 20 | } // namespace tt::umd 21 | -------------------------------------------------------------------------------- /device/types/xy_pair.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #include "umd/device/types/xy_pair.hpp" 8 | 9 | #include 10 | 11 | namespace tt { 12 | 13 | std::string xy_pair::str() const { return fmt::format("(x={},y={})", x, y); } 14 | 15 | std::string cxy_pair::str() const { return fmt::format("(chip={},x={},y={})", chip, x, y); } 16 | 17 | } // namespace tt 18 | -------------------------------------------------------------------------------- /examples/tt_device_example/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(tt_device_example tt_device_example.cpp) 2 | 3 | target_link_libraries( 4 | tt_device_example 5 | PRIVATE 6 | umd::device 7 | tt-logger::tt-logger 8 | ) 9 | 10 | set_target_properties( 11 | tt_device_example 12 | PROPERTIES 13 | RUNTIME_OUTPUT_DIRECTORY 14 | ${CMAKE_BINARY_DIR}/examples/tt_device_example/ 15 | OUTPUT_NAME 16 | tt_device_example 17 | ) 18 | -------------------------------------------------------------------------------- /device/tt_device/rtl_simulation_tt_device.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | #include "umd/device/tt_device/rtl_simulation_tt_device.hpp" 5 | 6 | namespace tt::umd { 7 | 8 | RtlSimulationTTDevice::RtlSimulationTTDevice() { 9 | throw std::runtime_error( 10 | "Creating RtlSimulationTTDevice without an underlying communication device is not supported."); 11 | } 12 | 13 | } // namespace tt::umd 14 | -------------------------------------------------------------------------------- /cmake/umdConfig.cmake.in: -------------------------------------------------------------------------------- 1 | 2 | # @PROJECT_NAME@Config.cmake.in 3 | @PACKAGE_INIT@ 4 | 5 | # Set package as found 6 | set(@PROJECT_NAME@_FOUND TRUE) 7 | 8 | # Include the exported targets 9 | include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake") 10 | 11 | # Set the directory containing the CMake files for the project 12 | get_filename_component(@PROJECT_NAME@_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) 13 | 14 | message(STATUS "Found @PROJECT_NAME@ at ${@PROJECT_NAME@_CMAKE_DIR}") 15 | -------------------------------------------------------------------------------- /device/api/umd/device/tt_device/rtl_simulation_tt_device.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "umd/device/tt_device/tt_device.hpp" 14 | 15 | namespace tt::umd { 16 | class RtlSimulationTTDevice : public TTDevice { 17 | public: 18 | RtlSimulationTTDevice(); 19 | }; 20 | } // namespace tt::umd 21 | -------------------------------------------------------------------------------- /tests/galaxy/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(UNIT_TESTS_GLX_SRCS 2 | test_galaxy_common.cpp 3 | test_umd_concurrent_threads.cpp 4 | test_umd_remote_api_stability.cpp 5 | test_umd_remote_api.cpp 6 | ) 7 | 8 | add_executable(unit_tests_glx ${UNIT_TESTS_GLX_SRCS}) 9 | target_link_libraries(unit_tests_glx PRIVATE test_common) 10 | set_target_properties( 11 | unit_tests_glx 12 | PROPERTIES 13 | RUNTIME_OUTPUT_DIRECTORY 14 | ${CMAKE_BINARY_DIR}/test/umd/galaxy 15 | ) 16 | -------------------------------------------------------------------------------- /nanobind/tests/test_py_cluster.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2025 Tenstorrent Inc. 2 | # SPDX-License-Identifier: Apache-2.0 3 | import unittest 4 | import tt_umd 5 | 6 | class TestCluster(unittest.TestCase): 7 | def test_cluster_functionality(self): 8 | cluster = tt_umd.Cluster() 9 | target_device_ids = cluster.get_target_device_ids() 10 | print("Cluster device IDs:", target_device_ids) 11 | clocks = cluster.get_clocks() 12 | print("Cluster clocks:", clocks) 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: broskoTT 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **Stack trace** 14 | Please paste the stack trace as this can speed up triaging. 15 | 16 | **To reproduce** 17 | Steps to reproduce the behavior. 18 | 19 | **Expected behavior** 20 | A clear and concise description of what you expect to happen. 21 | -------------------------------------------------------------------------------- /.vscode/default.settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "[cpp]": { 3 | "editor.formatOnSave": true, 4 | }, 5 | "C_Cpp.clang_format_fallbackStyle": "Google", 6 | "C_Cpp.clang_format_style": "file", 7 | "clangd.path": "clangd-17", 8 | "clangd.arguments": [ 9 | "--compile-commands-dir=${workspaceFolder}/build", 10 | "--header-insertion=never", 11 | "--enable-config" 12 | ], 13 | "C_Cpp.default.intelliSenseMode": "clang-x64", 14 | "C_Cpp.intelliSenseEngine": "disabled" 15 | } -------------------------------------------------------------------------------- /tests/blackhole/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(UNIT_TESTS_BH_SRCS 2 | test_cluster_bh.cpp 3 | test_arc_messages_bh.cpp 4 | test_chip_info_bh.cpp 5 | ) 6 | 7 | add_executable(unit_tests_blackhole ${UNIT_TESTS_BH_SRCS}) 8 | target_link_libraries(unit_tests_blackhole PRIVATE test_common) 9 | set_target_properties( 10 | unit_tests_blackhole 11 | PROPERTIES 12 | RUNTIME_OUTPUT_DIRECTORY 13 | ${CMAKE_BINARY_DIR}/test/umd/blackhole 14 | OUTPUT_NAME 15 | unit_tests 16 | ) 17 | -------------------------------------------------------------------------------- /device/api/umd/device/types/wormhole_dram.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | namespace tt::umd { 10 | 11 | namespace wormhole { 12 | 13 | enum WormholeDramTrainingStatus : uint8_t { 14 | TrainingNone, 15 | TrainingFail, 16 | TrainingPass, 17 | TrainingSkip, 18 | PhyOff, 19 | ReadEye, 20 | BistEye, 21 | CaDebug, 22 | }; 23 | 24 | } // namespace wormhole 25 | 26 | } // namespace tt::umd 27 | -------------------------------------------------------------------------------- /tests/wormhole/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(UNIT_TESTS_WH_SRCS 2 | test_cluster_wh.cpp 3 | test_umd_remote_api_stability.cpp 4 | test_arc_messages_wh.cpp 5 | test_remote_communication_wh.cpp 6 | ) 7 | 8 | add_executable(unit_tests_wormhole ${UNIT_TESTS_WH_SRCS}) 9 | target_link_libraries(unit_tests_wormhole PRIVATE test_common) 10 | set_target_properties( 11 | unit_tests_wormhole 12 | PROPERTIES 13 | RUNTIME_OUTPUT_DIRECTORY 14 | ${CMAKE_BINARY_DIR}/test/umd/wormhole_b0 15 | OUTPUT_NAME 16 | unit_tests 17 | ) 18 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Check if this is being built as a standalone project by running cmake from this dir 2 | if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) 3 | # This is a root project build from install artifacts 4 | cmake_minimum_required(VERSION 3.16) 5 | project(tt_umd_examples) 6 | 7 | message(STATUS "Building examples as standalone project from UMD install artifacts") 8 | 9 | include(${PROJECT_SOURCE_DIR}/../cmake/example_client.cmake) 10 | endif() 11 | 12 | # Add all example subdirectories 13 | add_subdirectory(tt_device_example) 14 | -------------------------------------------------------------------------------- /tests/baremetal/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(BAREMETAL_TESTS_SRCS 2 | test_cluster_descriptor_offline.cpp 3 | test_core_coord_translation_wh.cpp 4 | test_core_coord_translation_bh.cpp 5 | test_soc_descriptor.cpp 6 | ) 7 | 8 | add_executable(baremetal_tests ${BAREMETAL_TESTS_SRCS}) 9 | target_link_libraries(baremetal_tests PRIVATE test_common) 10 | set_target_properties( 11 | baremetal_tests 12 | PROPERTIES 13 | RUNTIME_OUTPUT_DIRECTORY 14 | ${CMAKE_BINARY_DIR}/test/umd/baremetal 15 | OUTPUT_NAME 16 | baremetal_tests 17 | ) 18 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: broskoTT 7 | 8 | --- 9 | 10 | **Timeline** 11 | Is the feature tied to a deadline, or is there a timeline otherwise which would increase the impact of the feature? 12 | 13 | **Is your feature request related to a problem? Please describe.** 14 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 15 | 16 | **Proposed solution** 17 | A clear and concise description of what you want to happen. 18 | -------------------------------------------------------------------------------- /cmake/check_libcpp.cmake: -------------------------------------------------------------------------------- 1 | # Only perform the check if Clang is the compiler 2 | if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") 3 | include(CheckCXXCompilerFlag) 4 | 5 | check_cxx_compiler_flag( 6 | "-stdlib=libc++" 7 | HAS_LIBCPP 8 | ) 9 | 10 | if(HAS_LIBCPP) 11 | message(STATUS "libc++ is available") 12 | else() 13 | message( 14 | WARNING 15 | "libc++ was not detected! If you are intending to use Clang's implementation of the c++ library, please ensure that libc++ is installed and available." 16 | ) 17 | endif() 18 | endif() 19 | -------------------------------------------------------------------------------- /tests/cluster_descriptor_examples/wormhole_N150_unique_ids.yaml: -------------------------------------------------------------------------------- 1 | arch: 2 | 0: wormhole_b0 3 | chips: 4 | 0: 5 | - 0 6 | - 0 7 | - 0 8 | - 0 9 | chip_unique_ids: 10 | 0: 72059269368820857 11 | ethernet_connections: 12 | [] 13 | chips_with_mmio: 14 | - 0: 0 15 | harvesting: 16 | 0: 17 | noc_translation: true 18 | harvest_mask: 64 19 | dram_harvesting_mask: 0 20 | eth_harvesting_mask: 0 21 | pcie_harvesting_mask: 0 22 | chip_to_boardtype: 23 | 0: n150 24 | boards: 25 | - 26 | - board_id: 72059269368820857 27 | - board_type: n150 28 | - chips: 29 | - 0 -------------------------------------------------------------------------------- /device/api/umd/device/arc/blackhole_arc_telemetry_reader.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #pragma once 7 | 8 | #include "umd/device/arc/arc_telemetry_reader.hpp" 9 | #include "umd/device/arch/blackhole_implementation.hpp" 10 | 11 | extern bool umd_use_noc1; 12 | 13 | namespace tt::umd { 14 | 15 | class BlackholeArcTelemetryReader : public ArcTelemetryReader { 16 | public: 17 | BlackholeArcTelemetryReader(TTDevice* tt_device); 18 | 19 | protected: 20 | void get_telemetry_address() override; 21 | }; 22 | 23 | } // namespace tt::umd 24 | -------------------------------------------------------------------------------- /docs/BUILDING_DOXYGEN.md: -------------------------------------------------------------------------------- 1 | # TT-UMD Docs 2 | 3 | ## Requirements 4 | 5 | In order to be able to build the docs, required packages need to be installed. In order to install the required packages, run [`install_docs_requirements.sh`](install_docs_requirements.sh) 6 | 7 | ## Build docs 8 | 9 | In order to build docs environment variable `TT_UMD_HOME` needs to be set to root of tt-umd project. 10 | 11 | After that you can run [`build_docs.sh`](build_docs.sh) 12 | 13 | In `build/docs` directory you will find multiple formats of the docs. 14 | 15 | ## Adding documentation 16 | 17 | Augment the `INPUT` line in [`Doxyfile`](../Doxyfile) 18 | -------------------------------------------------------------------------------- /tests/cluster_descriptor_examples/blackhole_P150.yaml: -------------------------------------------------------------------------------- 1 | arch: 2 | 0: blackhole 3 | chips: 4 | {} 5 | chip_unique_ids: 6 | 0: 4476187513037 7 | ethernet_connections: 8 | [] 9 | ethernet_connections_to_remote_devices: 10 | [] 11 | chips_with_mmio: 12 | - 0: 0 13 | harvesting: 14 | 0: 15 | noc_translation: true 16 | harvest_mask: 0 17 | dram_harvesting_mask: 0 18 | eth_harvesting_mask: 288 19 | pcie_harvesting_mask: 2 20 | l2cpu_harvesting_mask: 0 21 | chip_to_boardtype: 22 | 0: p150 23 | boards: 24 | - 25 | - board_id: 4476187513037 26 | - board_type: p150 27 | - chips: 28 | - 0 29 | -------------------------------------------------------------------------------- /device/api/umd/device/types/communication_protocol.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | #include 9 | #include 10 | 11 | namespace tt::umd { 12 | 13 | enum class IODeviceType { 14 | PCIe, 15 | JTAG, 16 | }; 17 | 18 | // Const map of Device type names for each of the types listed in the enum. 19 | static const std::unordered_map DeviceTypeToString = { 20 | {IODeviceType::PCIe, "PCIe"}, 21 | {IODeviceType::JTAG, "JTAG"}, 22 | }; 23 | 24 | } // namespace tt::umd 25 | -------------------------------------------------------------------------------- /tests/cluster_descriptor_examples/wormhole_N150.yaml: -------------------------------------------------------------------------------- 1 | arch: { 2 | 0: Wormhole, 3 | } 4 | 5 | chips: { 6 | 0: [0,0,0,0], 7 | } 8 | 9 | ethernet_connections: [ 10 | ] 11 | 12 | chips_with_mmio: [ 13 | 0: 0, 14 | ] 15 | 16 | # harvest_mask is the bit indicating which tensix row is harvested. So bit 0 = first tensix row; bit 1 = second tensix row etc... 17 | harvesting: { 18 | 0: {noc_translation: true, harvest_mask: 32}, 19 | } 20 | 21 | # This value will be null if the boardtype is unknown, should never happen in practice but to be defensive it would be useful to throw an error on this case. 22 | boardtype: { 23 | 0: n150, 24 | } -------------------------------------------------------------------------------- /nanobind/tests/test_py_basic_types.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2025 Tenstorrent Inc. 2 | # SPDX-License-Identifier: Apache-2.0 3 | import unittest 4 | import tt_umd 5 | 6 | class TestBasicTypes(unittest.TestCase): 7 | def test_eth_coord(self): 8 | eth_coord = tt_umd.EthCoord(0, 1, 2, 3, 4) 9 | 10 | def test_tt_xy_pair(self): 11 | xy_pair = tt_umd.tt_xy_pair(1, 2) 12 | self.assertEqual(str(xy_pair), "(1, 2)") 13 | 14 | def test_arch(self): 15 | for arch in tt_umd.ARCH: 16 | self.assertEqual(arch, tt_umd.ARCH.from_str(str(arch))) 17 | self.assertEqual(str(tt_umd.ARCH.WORMHOLE_B0), "wormhole_b0") 18 | -------------------------------------------------------------------------------- /nanobind/py_api_cluster.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #include 7 | #include 8 | #include 9 | 10 | #include "umd/device/cluster.hpp" 11 | #include "umd/device/topology/topology_discovery.hpp" 12 | 13 | namespace nb = nanobind; 14 | 15 | using namespace tt::umd; 16 | 17 | void bind_cluster(nb::module_ &m) { 18 | nb::class_(m, "Cluster") 19 | .def(nb::init<>()) 20 | .def("get_target_device_ids", &Cluster::get_target_device_ids) 21 | .def("get_clocks", &Cluster::get_clocks); 22 | } 23 | -------------------------------------------------------------------------------- /device/api/umd/device/arc/wormhole_arc_telemetry_reader.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #pragma once 7 | 8 | #include "umd/device/arc/arc_messenger.hpp" 9 | #include "umd/device/arc/arc_telemetry_reader.hpp" 10 | #include "umd/device/arch/wormhole_implementation.hpp" 11 | 12 | extern bool umd_use_noc1; 13 | 14 | namespace tt::umd { 15 | 16 | class WormholeArcTelemetryReader : public ArcTelemetryReader { 17 | public: 18 | WormholeArcTelemetryReader(TTDevice* tt_device); 19 | 20 | protected: 21 | void get_telemetry_address() override; 22 | }; 23 | 24 | } // namespace tt::umd 25 | -------------------------------------------------------------------------------- /device/firmware/blackhole_18_7_firmware_info_provider.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | #include "umd/device/firmware/blackhole_18_7_firmware_info_provider.hpp" 5 | 6 | #include "umd/device/arch/blackhole_implementation.hpp" 7 | #include "umd/device/tt_device/tt_device.hpp" 8 | 9 | namespace tt::umd { 10 | 11 | Blackhole_18_7_FirmwareInfoProvider::Blackhole_18_7_FirmwareInfoProvider(TTDevice* tt_device) : 12 | FirmwareInfoProvider(tt_device) {} 13 | 14 | uint32_t Blackhole_18_7_FirmwareInfoProvider::get_max_clock_freq() const { return blackhole::AICLK_BUSY_VAL; } 15 | 16 | } // namespace tt::umd 17 | -------------------------------------------------------------------------------- /tests/soc_descs/quasar_simulation_1x1.yaml: -------------------------------------------------------------------------------- 1 | grid: 2 | x_size: 1 3 | y_size: 3 4 | 5 | arc: 6 | [] 7 | 8 | pcie: 9 | [] 10 | 11 | dram: 12 | [[0-0]] 13 | 14 | eth: 15 | [] 16 | 17 | functional_workers: 18 | [0-1] 19 | 20 | harvested_workers: 21 | [] 22 | 23 | router_only: 24 | [0-2] 25 | 26 | worker_l1_size: 27 | 4194304 28 | 29 | dram_bank_size: 30 | 1073741824 31 | 32 | eth_l1_size: 33 | 0 34 | 35 | arch_name: QUASAR 36 | 37 | features: 38 | unpacker: 39 | version: 1 40 | inline_srca_trans_without_srca_trans_instr: False 41 | math: 42 | dst_size_alignment: 32768 43 | packer: 44 | version: 1 45 | overlay: 46 | version: 1 -------------------------------------------------------------------------------- /tests/api/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(API_TESTS_SRCS 2 | test_chip.cpp 3 | test_cluster_descriptor.cpp 4 | test_cluster.cpp 5 | test_tlb_manager.cpp 6 | test_software_harvesting.cpp 7 | test_sysmem_manager.cpp 8 | test_tt_device.cpp 9 | test_noc.cpp 10 | test_jtag.cpp 11 | test_arc_telemetry.cpp 12 | test_lite_fabric.cpp 13 | ) 14 | 15 | add_executable(api_tests ${API_TESTS_SRCS}) 16 | target_link_libraries(api_tests PRIVATE test_common) 17 | set_target_properties( 18 | api_tests 19 | PROPERTIES 20 | RUNTIME_OUTPUT_DIRECTORY 21 | ${CMAKE_BINARY_DIR}/test/umd/api 22 | OUTPUT_NAME 23 | api_tests 24 | ) 25 | -------------------------------------------------------------------------------- /tests/cluster_descriptor_examples/blackhole_P100.yaml: -------------------------------------------------------------------------------- 1 | arch: { 2 | 0: Blackhole, 3 | } 4 | 5 | chips: { 6 | } 7 | 8 | ethernet_connections: [ 9 | ] 10 | 11 | chips_with_mmio: [ 12 | 0: 0, 13 | ] 14 | 15 | # harvest_mask is the bit indicating which tensix row is harvested. So bit 0 = first tensix row; bit 1 = second tensix row etc... 16 | # on P100 we harvest all ETH cores 17 | harvesting: { 18 | 0: {noc_translation: false, harvest_mask: 0, eth_harvesting_mask: 16383}, 19 | } 20 | 21 | # This value will be null if the boardtype is unknown, should never happen in practice but to be defensive it would be useful to throw an error on this case. 22 | boardtype: { 23 | 0: p100, 24 | } -------------------------------------------------------------------------------- /device/simulation/simulation_device.fbs: -------------------------------------------------------------------------------- 1 | // Schema for simulation_device 2 | 3 | enum DEVICE_COMMAND : byte { 4 | WRITE = 0, 5 | READ = 1, 6 | ALL_TENSIX_RESET_DEASSERT = 2, 7 | ALL_TENSIX_RESET_ASSERT = 3, 8 | START = 4, 9 | EXIT = 5, 10 | ALL_NEO_DMS_RESET_DEASSERT = 6, 11 | ALL_NEO_DMS_RESET_ASSERT = 7, 12 | NEO_DM_RESET_ASSERT = 8, 13 | NEO_DM_RESET_DEASSERT = 9, 14 | } 15 | 16 | struct tt_vcs_core { 17 | x : uint64; 18 | y : uint64; 19 | } 20 | 21 | table DeviceRequestResponse { 22 | command : DEVICE_COMMAND; 23 | data : [uint32]; 24 | core : tt_vcs_core; 25 | address : uint64; 26 | size : uint32; 27 | } 28 | 29 | root_type DeviceRequestResponse; -------------------------------------------------------------------------------- /tests/simulation/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(SIMULATION_TEST_SRCS test_simulation_device.cpp) 2 | 3 | foreach(TEST ${SIMULATION_TEST_SRCS}) 4 | get_filename_component(TEST_NAME ${TEST} NAME_WE) 5 | add_executable(${TEST_NAME} ${TEST}) 6 | target_link_libraries(${TEST_NAME} PRIVATE test_common) 7 | target_include_directories(${TEST_NAME} PRIVATE ${PROJECT_SOURCE_DIR}/device/simulation) 8 | set_target_properties( 9 | ${TEST_NAME} 10 | PROPERTIES 11 | RUNTIME_OUTPUT_DIRECTORY 12 | ${CMAKE_BINARY_DIR}/test/simulation 13 | ) 14 | 15 | list(APPEND SIM_TESTS ${TEST_NAME}) 16 | endforeach() 17 | 18 | add_custom_target(simulation_tests DEPENDS ${SIM_TESTS}) 19 | -------------------------------------------------------------------------------- /tests/soc_descs/wormhole_b0_one_dram_one_tensix_no_eth.yaml: -------------------------------------------------------------------------------- 1 | grid: 2 | x_size: 2 3 | y_size: 2 4 | 5 | arc: 6 | [ ] 7 | 8 | pcie: 9 | [ ] 10 | 11 | dram: 12 | [[0-0]] 13 | 14 | eth: 15 | [ ] 16 | 17 | functional_workers: 18 | [ 19 | 1-1, 20 | ] 21 | 22 | harvested_workers: 23 | [] 24 | 25 | router_only: 26 | [ 27 | 1-0, 0-1 28 | ] 29 | 30 | worker_l1_size: 31 | 1499136 32 | 33 | dram_bank_size: 34 | 1073741824 35 | 36 | eth_l1_size: 37 | 262144 38 | 39 | arch_name: WORMHOLE_B0 40 | 41 | features: 42 | unpacker: 43 | version: 1 44 | inline_srca_trans_without_srca_trans_instr: False 45 | math: 46 | dst_size_alignment: 32768 47 | packer: 48 | version: 1 49 | overlay: 50 | version: 1 51 | -------------------------------------------------------------------------------- /tests/microbenchmark/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(UBENCH_SRC 2 | benchmarks/tlb/test_tlb.cpp 3 | benchmarks/pcie_dma/test_pcie_dma.cpp 4 | benchmarks/iommu/test_iommu.cpp 5 | benchmarks/open_cluster/test_open_cluster.cpp 6 | benchmarks/ethernet_io/test_ethernet_io.cpp 7 | common/microbenchmark_utils.cpp 8 | ) 9 | add_executable(ubench ${UBENCH_SRC}) 10 | target_link_libraries( 11 | ubench 12 | PRIVATE 13 | test_common 14 | nanobench 15 | ) 16 | target_include_directories( 17 | ubench 18 | PRIVATE 19 | ${nanobench_SOURCE_DIR}/src/include 20 | . 21 | ) 22 | set_target_properties( 23 | ubench 24 | PROPERTIES 25 | RUNTIME_OUTPUT_DIRECTORY 26 | ${CMAKE_BINARY_DIR}/test/umd/ubenchmarks 27 | ) 28 | -------------------------------------------------------------------------------- /tools/common.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #include 6 | #include 7 | 8 | inline std::vector extract_int_vector(const cxxopts::OptionValue& cxxoption) { 9 | std::vector int_vector; 10 | for (std::string item : cxxoption.as>()) { 11 | int_vector.push_back(std::stoi(item)); 12 | } 13 | return int_vector; 14 | } 15 | 16 | inline std::unordered_set extract_int_set(const cxxopts::OptionValue& cxxoption) { 17 | std::unordered_set int_set; 18 | for (std::string item : cxxoption.as>()) { 19 | int_set.insert(std::stoi(item)); 20 | } 21 | return int_set; 22 | } 23 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- 1 | # Run Pre-commit Hooks 2 | name: Run Pre-commit Hooks 3 | 4 | on: 5 | workflow_dispatch: 6 | pull_request: 7 | push: 8 | branches: ["main"] 9 | 10 | jobs: 11 | pre-commit: 12 | name: Run Pre-commit Hooks 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout code 16 | uses: actions/checkout@v4 17 | with: 18 | fetch-depth: 0 # Fetch all history so 'origin/main' is available 19 | 20 | - name: Set up Python 21 | uses: actions/setup-python@v4 22 | with: 23 | python-version: 3.11 24 | 25 | - name: Run Pre-commit and Fix Issues 26 | uses: pre-commit/action@v3.0.1 27 | with: 28 | extra_args: "--from-ref origin/main --to-ref HEAD" 29 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ### Issue 2 | (Link to Github issue(s)) 3 | 4 | ### Description 5 | (Add freeform description.) 6 | 7 | ### List of the changes 8 | (Itemized list of all the changes.) 9 | 10 | ### Testing 11 | (Comment on CI testing or Manual testing touching this change.) 12 | 13 | ### API Changes 14 | (When making API changes, don't merge this PR until tt_metal and tt_debuda PRs are approved.) 15 | (Then merge this PR, change the client PRs to point to UMD main, and then merge them.) 16 | (Remove this line if untrue) There are no API changes in this PR. 17 | (Remove following lines if untrue) This PR has API changes: 18 | - [ ] (If breaking change) tt_metal approved PR pointing to this branch: link 19 | - [ ] (If breaking change) tt_debuda approved PR pointing to this branch: link 20 | -------------------------------------------------------------------------------- /tests/soc_descs/blackhole_simulation_1x2.yaml: -------------------------------------------------------------------------------- 1 | grid: 2 | x_size: 2 3 | y_size: 2 4 | 5 | arc: 6 | [] 7 | 8 | pcie: 9 | [] 10 | 11 | dram: 12 | [[1-0]] 13 | 14 | eth: 15 | [] 16 | 17 | functional_workers: 18 | [0-1, 1-1] 19 | 20 | harvested_workers: 21 | [] 22 | 23 | router_only: 24 | [0-0] 25 | 26 | noc0_x_to_noc1_x: 27 | [ 28 | 1, 0 29 | ] 30 | 31 | noc0_y_to_noc1_y: 32 | [ 33 | 1, 0 34 | ] 35 | 36 | worker_l1_size: 37 | 1572864 38 | 39 | dram_bank_size: 40 | 1073741824 41 | 42 | eth_l1_size: 43 | 0 44 | 45 | arch_name: BLACKHOLE 46 | 47 | features: 48 | unpacker: 49 | version: 1 50 | inline_srca_trans_without_srca_trans_instr: False 51 | math: 52 | dst_size_alignment: 32768 53 | packer: 54 | version: 1 55 | overlay: 56 | version: 1 57 | -------------------------------------------------------------------------------- /device/api/umd/device/firmware/wormhole_18_7_firmware_info_provider.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | 11 | #include "umd/device/firmware/firmware_info_provider.hpp" 12 | 13 | namespace tt::umd { 14 | 15 | /* This class captures Wormhole firmware up to version 18.7.0. 16 | * Firmware releases with this and older versions don't have max AICLK inside 17 | * new telemetry for Wormhole so that has to be read from SM bus telemetry. 18 | */ 19 | class Wormhole_18_7_FirmwareInfoProvider : public FirmwareInfoProvider { 20 | public: 21 | Wormhole_18_7_FirmwareInfoProvider(TTDevice* tt_device); 22 | 23 | uint32_t get_max_clock_freq() const override; 24 | }; 25 | 26 | } // namespace tt::umd 27 | -------------------------------------------------------------------------------- /device/api/umd/device/simulation/simulation_host.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | #pragma once 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "umd/device/types/xy_pair.hpp" 11 | 12 | typedef struct nng_socket_s nng_socket; 13 | typedef struct nng_listener_s nng_listener; 14 | 15 | namespace tt::umd { 16 | 17 | class SimulationHost { 18 | public: 19 | SimulationHost(); 20 | ~SimulationHost(); 21 | 22 | void init(); 23 | void start_host(); 24 | void send_to_device(uint8_t *buf, size_t buf_size); 25 | size_t recv_from_device(void **data_ptr); 26 | 27 | private: 28 | std::unique_ptr host_socket; 29 | std::unique_ptr host_listener; 30 | }; 31 | 32 | } // namespace tt::umd 33 | -------------------------------------------------------------------------------- /tests/cluster_descriptor_examples/wormhole_N300.yaml: -------------------------------------------------------------------------------- 1 | arch: { 2 | 0: Wormhole, 3 | 1: Wormhole, 4 | } 5 | 6 | chips: { 7 | 0: [0,0,0,0], 8 | 1: [1,0,0,0], 9 | } 10 | 11 | ethernet_connections: [ 12 | [{chip: 0, chan: 8}, {chip: 1, chan: 0}], 13 | [{chip: 0, chan: 9}, {chip: 1, chan: 1}], 14 | ] 15 | 16 | chips_with_mmio: [ 17 | 0: 0, 18 | ] 19 | 20 | # harvest_mask is the bit indicating which tensix row is harvested. So bit 0 = first tensix row; bit 1 = second tensix row etc... 21 | harvesting: { 22 | 0: {noc_translation: true, harvest_mask: 65}, 23 | 1: {noc_translation: true, harvest_mask: 5}, 24 | } 25 | 26 | # This value will be null if the boardtype is unknown, should never happen in practice but to be defensive it would be useful to throw an error on this case. 27 | boardtype: { 28 | 0: n300, 29 | 1: n300, 30 | } -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/BlankSpruce/gersemi 3 | rev: 0.16.2 4 | hooks: 5 | - id: gersemi 6 | - repo: https://github.com/pre-commit/mirrors-clang-format 7 | rev: v17.0.6 8 | hooks: 9 | - id: clang-format 10 | - repo: https://github.com/adrienverge/yamllint 11 | rev: v1.35.1 12 | hooks: 13 | - id: yamllint 14 | exclude: ^(?!\.github/).* # Only include files in .github/ 15 | - repo: https://github.com/espressif/check-copyright/ 16 | rev: v1.0.3 17 | hooks: 18 | - id: check-copyright 19 | args: ['--config', '.pre-commit-hooks/check-copyright-config.yaml', "--ignore", ""] 20 | - repo: local 21 | hooks: 22 | - id: check-cpp-comment-periods 23 | name: Check C++ comments end with period 24 | entry: .pre-commit-hooks/check_cpp_comments.py 25 | language: script 26 | types: [c++] 27 | -------------------------------------------------------------------------------- /nanobind/py_api_module.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #include 7 | 8 | namespace nb = nanobind; 9 | 10 | // Forward declarations for binding functions from each module. 11 | void bind_basic_types(nb::module_ &m); 12 | void bind_cluster(nb::module_ &m); 13 | void bind_tt_device(nb::module_ &m); 14 | void bind_telemetry(nb::module_ &m); 15 | void bind_topology_discovery(nb::module_ &m); 16 | void bind_warm_reset(nb::module_ &m); 17 | void bind_soc_descriptor(nb::module_ &m); 18 | 19 | // Main module entry point. 20 | NB_MODULE(tt_umd, m) { 21 | bind_basic_types(m); 22 | bind_cluster(m); 23 | bind_tt_device(m); 24 | bind_telemetry(m); 25 | bind_topology_discovery(m); 26 | bind_warm_reset(m); 27 | bind_soc_descriptor(m); 28 | } 29 | -------------------------------------------------------------------------------- /device/api/umd/device/utils/common.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "umd/device/types/xy_pair.hpp" 15 | 16 | static inline std::vector flatten_vector(const std::vector>& vector_of_vectors) { 17 | std::vector flat_vector; 18 | for (const auto& single_vector : vector_of_vectors) { 19 | flat_vector.insert(flat_vector.end(), single_vector.begin(), single_vector.end()); 20 | } 21 | return flat_vector; 22 | } 23 | 24 | static inline std::string to_lower(const std::string& str) { 25 | std::string res = str; 26 | std::transform(res.begin(), res.end(), res.begin(), ::tolower); 27 | return res; 28 | } 29 | -------------------------------------------------------------------------------- /tests/cluster_descriptor_examples/wormhole_N300_routing_info.yaml: -------------------------------------------------------------------------------- 1 | arch: { 2 | 0: Wormhole, 3 | 1: Wormhole, 4 | } 5 | 6 | chips: { 7 | 0: [0,0,0,0], 8 | 1: [1,0,0,0], 9 | } 10 | 11 | ethernet_connections: [ 12 | [{chip: 0, chan: 8}, {chip: 1, chan: 0}, {routing_enabled: true}], 13 | [{chip: 0, chan: 9}, {chip: 1, chan: 1}, {routing_enabled: true}], 14 | ] 15 | 16 | chips_with_mmio: [ 17 | 0: 0, 18 | ] 19 | 20 | # harvest_mask is the bit indicating which tensix row is harvested. So bit 0 = first tensix row; bit 1 = second tensix row etc... 21 | harvesting: { 22 | 0: {noc_translation: true, harvest_mask: 65}, 23 | 1: {noc_translation: true, harvest_mask: 5}, 24 | } 25 | 26 | # This value will be null if the boardtype is unknown, should never happen in practice but to be defensive it would be useful to throw an error on this case. 27 | boardtype: { 28 | 0: n300, 29 | 1: n300, 30 | } -------------------------------------------------------------------------------- /device/api/umd/device/arc/smbus_arc_telemetry_reader.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | #include "umd/device/arc/arc_telemetry_reader.hpp" 12 | #include "umd/device/tt_device/tt_device.hpp" 13 | #include "umd/device/types/telemetry.hpp" 14 | #include "umd/device/types/xy_pair.hpp" 15 | 16 | namespace tt::umd { 17 | 18 | class SmBusArcTelemetryReader : public ArcTelemetryReader { 19 | public: 20 | SmBusArcTelemetryReader(TTDevice* tt_device); 21 | 22 | uint32_t read_entry(const uint8_t telemetry_tag) override; 23 | 24 | bool is_entry_available(const uint8_t telemetry_tag) override; 25 | 26 | protected: 27 | void get_telemetry_address() override; 28 | 29 | private: 30 | uint64_t telemetry_base_noc_addr; 31 | }; 32 | 33 | } // namespace tt::umd 34 | -------------------------------------------------------------------------------- /tests/cluster_descriptor_examples/wormhole_N300_with_bus_id.yaml: -------------------------------------------------------------------------------- 1 | arch: { 2 | 0: Wormhole, 3 | 1: Wormhole, 4 | } 5 | 6 | chips: { 7 | 0: [0,0,0,0], 8 | 1: [1,0,0,0], 9 | } 10 | 11 | ethernet_connections: [ 12 | [{chip: 0, chan: 8}, {chip: 1, chan: 0}], 13 | [{chip: 0, chan: 9}, {chip: 1, chan: 1}], 14 | ] 15 | 16 | chips_with_mmio: [ 17 | 0: 0, 18 | ] 19 | 20 | # harvest_mask is the bit indicating which tensix row is harvested. So bit 0 = first tensix row; bit 1 = second tensix row etc... 21 | harvesting: { 22 | 0: {noc_translation: true, harvest_mask: 65}, 23 | 1: {noc_translation: true, harvest_mask: 5}, 24 | } 25 | 26 | # This value will be null if the boardtype is unknown, should never happen in practice but to be defensive it would be useful to throw an error on this case. 27 | boardtype: { 28 | 0: n300, 29 | 1: n300, 30 | } 31 | 32 | chip_to_bus_id: { 33 | 0: "0x1a", 34 | 1: "0x1b", 35 | } 36 | -------------------------------------------------------------------------------- /device/api/umd/device/firmware/blackhole_18_7_firmware_info_provider.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | 11 | #include "umd/device/firmware/firmware_info_provider.hpp" 12 | 13 | namespace tt::umd { 14 | 15 | /* This class captures Blackhole firmware up to version 18.7.0. 16 | * In this firmware release there was not ASIC id information available, 17 | * as well as maximum possible AICLK on the device. So these functions return 18 | * placeholder values in this class. 19 | * Release: https://github.com/tenstorrent/tt-firmware/releases/tag/v18.8.0 20 | */ 21 | class Blackhole_18_7_FirmwareInfoProvider : public FirmwareInfoProvider { 22 | public: 23 | Blackhole_18_7_FirmwareInfoProvider(TTDevice* tt_device); 24 | 25 | uint32_t get_max_clock_freq() const override; 26 | }; 27 | 28 | } // namespace tt::umd 29 | -------------------------------------------------------------------------------- /cmake/CPM.cmake: -------------------------------------------------------------------------------- 1 | # SPDX-License-Identifier: MIT 2 | # 3 | # SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors 4 | 5 | set(CPM_DOWNLOAD_VERSION 0.40.2) 6 | set(CPM_HASH_SUM "c8cdc32c03816538ce22781ed72964dc864b2a34a310d3b7104812a5ca2d835d") 7 | 8 | # Always Require the CMake option, but provide default 9 | set(CPM_SOURCE_CACHE "${CMAKE_SOURCE_DIR}/.cpmcache" CACHE STRING "Path to CPM source cache") 10 | 11 | set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") 12 | 13 | # Expand relative path. This is important if the provided path contains a tilde (~) 14 | get_filename_component(CPM_DOWNLOAD_LOCATION ${CPM_DOWNLOAD_LOCATION} ABSOLUTE) 15 | 16 | file( 17 | DOWNLOAD 18 | https://github.com/cpm-cmake/CPM.cmake/releases/download/v${CPM_DOWNLOAD_VERSION}/CPM.cmake 19 | ${CPM_DOWNLOAD_LOCATION} 20 | EXPECTED_HASH SHA256=${CPM_HASH_SUM} 21 | ) 22 | 23 | include(${CPM_DOWNLOAD_LOCATION}) 24 | -------------------------------------------------------------------------------- /.clangd: -------------------------------------------------------------------------------- 1 | Diagnostics: 2 | ClangTidy: 3 | # 'Add:' is the correct key. 4 | # To replace all default checks (which is what your 'Checks:' did), 5 | # we first add '-*' to disable everything, then add the check you want. 6 | Add: ['readability-identifier-naming'] 7 | 8 | # If those extra checks need specific options, define them here. 9 | # Note: This merges with options from .clang-tidy. 10 | CheckOptions: 11 | readability-identifier-naming.ClassCase: CamelCase 12 | # readability-identifier-naming.ConstantCase: UPPER_CASE 13 | readability-identifier-naming.FunctionCase: lower_case 14 | readability-identifier-naming.ParameterCase: lower_case 15 | readability-identifier-naming.ConstantParameterCase: lower_case 16 | readability-identifier-naming.GlobalConstantCase: UPPER_CASE 17 | # readability-identifier-naming.LocalConstantCase: UPPER_CASE 18 | readability-identifier-naming.ClassStaticConstantCase: UPPER_CASE -------------------------------------------------------------------------------- /device/arch/architecture_implementation.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #include "umd/device/arch/architecture_implementation.hpp" 6 | 7 | #include "umd/device/arch/blackhole_implementation.hpp" 8 | #include "umd/device/arch/grendel_implementation.hpp" 9 | #include "umd/device/arch/wormhole_implementation.hpp" 10 | 11 | namespace tt::umd { 12 | 13 | std::unique_ptr architecture_implementation::create(tt::ARCH architecture) { 14 | switch (architecture) { 15 | case tt::ARCH::QUASAR: 16 | return std::make_unique(); 17 | case tt::ARCH::BLACKHOLE: 18 | return std::make_unique(); 19 | case tt::ARCH::WORMHOLE_B0: 20 | return std::make_unique(); 21 | default: 22 | return nullptr; 23 | } 24 | } 25 | 26 | } // namespace tt::umd 27 | -------------------------------------------------------------------------------- /tests/cluster_descriptor_examples/wormhole_N300_board_info.yaml: -------------------------------------------------------------------------------- 1 | arch: 2 | 0: wormhole_b0 3 | 1: wormhole_b0 4 | chips: 5 | 0: 6 | - 0 7 | - 0 8 | - 0 9 | - 0 10 | 1: 11 | - 1 12 | - 0 13 | - 0 14 | - 0 15 | ethernet_connections: 16 | - 17 | - chip: 0 18 | chan: 9 19 | - chip: 1 20 | chan: 1 21 | - 22 | - chip: 0 23 | chan: 8 24 | - chip: 1 25 | chan: 0 26 | chips_with_mmio: 27 | - 0: 1 28 | harvesting: 29 | 0: 30 | noc_translation: true 31 | harvest_mask: 33 32 | dram_harvesting_mask: 0 33 | eth_harvesting_mask: 0 34 | pcie_harvesting_mask: 0 35 | 1: 36 | noc_translation: true 37 | harvest_mask: 65 38 | dram_harvesting_mask: 0 39 | eth_harvesting_mask: 0 40 | pcie_harvesting_mask: 0 41 | chip_to_boardtype: 42 | 0: n300 43 | 1: n300 44 | boards: 45 | - 46 | - board_id: 72058994491072686 47 | - board_type: n300 48 | - chips: 49 | - 0 50 | - 1 -------------------------------------------------------------------------------- /device/firmware/wormhole_18_7_firmware_info_provider.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | #include "umd/device/firmware/wormhole_18_7_firmware_info_provider.hpp" 5 | 6 | #include "umd/device/arc/smbus_arc_telemetry_reader.hpp" 7 | #include "umd/device/tt_device/tt_device.hpp" 8 | #include "umd/device/types/wormhole_dram.hpp" 9 | #include "umd/device/types/wormhole_telemetry.hpp" 10 | 11 | namespace tt::umd { 12 | 13 | Wormhole_18_7_FirmwareInfoProvider::Wormhole_18_7_FirmwareInfoProvider(TTDevice* tt_device) : 14 | FirmwareInfoProvider(tt_device) {} 15 | 16 | uint32_t Wormhole_18_7_FirmwareInfoProvider::get_max_clock_freq() const { 17 | const std::unique_ptr sm_bus_telemetry = 18 | std::make_unique(tt_device); 19 | uint32_t aiclk_telemetry = sm_bus_telemetry->read_entry(wormhole::AICLK); 20 | return (aiclk_telemetry >> 16) & 0xFFFF; 21 | } 22 | 23 | } // namespace tt::umd 24 | -------------------------------------------------------------------------------- /nanobind/py_api_warm_reset.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #include 7 | #include 8 | 9 | #include "umd/device/warm_reset.hpp" 10 | 11 | namespace nb = nanobind; 12 | 13 | using namespace tt::umd; 14 | 15 | void bind_warm_reset(nb::module_ &m) { 16 | // WarmReset class binding. 17 | nb::class_(m, "WarmReset") 18 | .def_static( 19 | "warm_reset", 20 | &WarmReset::warm_reset, 21 | nb::arg("pci_device_ids"), 22 | nb::arg("reset_m3") = false, 23 | "Perform a warm reset of the device. reset_m3 flag sends specific ARC message to do a M3 board level " 24 | "reset.") 25 | .def_static( 26 | "ubb_warm_reset", 27 | &WarmReset::ubb_warm_reset, 28 | nb::arg("timeout_s") = 100, 29 | "Perform a UBB warm reset with specified timeout in seconds."); 30 | } 31 | -------------------------------------------------------------------------------- /.github/manylinux.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM quay.io/pypa/manylinux_2_34_x86_64 2 | # (AlmaLinux 9 based) 3 | # Built wheels are also expected to be compatible with other distros using glibc 2.34 or later, including: 4 | # Debian 12+ 5 | # Ubuntu 21.10+ 6 | # Fedora 35+ 7 | # CentOS/RHEL 9+ 8 | 9 | # Hack for CIv2 - fix mirror URLs 10 | RUN FILES=(/etc/yum.repos.d/*.repo) && \ 11 | sed --in-place -e 's/^mirrorlist=/# mirrorlist=/g' -e 's/^# baseurl=/baseurl=/' "${FILES[@]}" || true 12 | 13 | # Install system dependencies matching docker_install_common.sh but using DNF 14 | # This includes all the dependencies needed for building tt-umd 15 | RUN dnf install -y \ 16 | ninja-build \ 17 | hwloc-devel \ 18 | vim-common \ 19 | && dnf clean all 20 | 21 | # Set up environment variables for building 22 | ENV CC=/opt/rh/gcc-toolset-14/root/bin/gcc 23 | ENV CMAKE_C_COMPILER=/opt/rh/gcc-toolset-14/root/bin/gcc 24 | ENV CXX=/opt/rh/gcc-toolset-14/root/bin/g++ 25 | ENV CMAKE_CXX_COMPILER=/opt/rh/gcc-toolset-14/root/bin/g++ 26 | -------------------------------------------------------------------------------- /device/arc/blackhole_arc_messenger.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #include "umd/device/arc/blackhole_arc_messenger.hpp" 7 | 8 | #include "umd/device/tt_device/tt_device.hpp" 9 | 10 | namespace tt::umd { 11 | 12 | BlackholeArcMessenger::BlackholeArcMessenger(TTDevice* tt_device) : ArcMessenger(tt_device) { 13 | blackhole_arc_msg_queue = BlackholeArcMessageQueue::get_blackhole_arc_message_queue( 14 | tt_device, BlackholeArcMessageQueueIndex::APPLICATION); 15 | } 16 | 17 | uint32_t BlackholeArcMessenger::send_message( 18 | const uint32_t msg_code, 19 | std::vector& return_values, 20 | const std::vector& args, 21 | const std::chrono::milliseconds timeout_ms) { 22 | auto lock = lock_manager.acquire_mutex(MutexType::ARC_MSG, tt_device->get_pci_device()->get_device_num()); 23 | return blackhole_arc_msg_queue->send_message((ArcMessageType)msg_code, args, timeout_ms); 24 | } 25 | 26 | } // namespace tt::umd 27 | -------------------------------------------------------------------------------- /cmake/stubs/cpm-stubs.cmake: -------------------------------------------------------------------------------- 1 | function(CPMAddPackage) 2 | list(LENGTH ARGN argnLength) 3 | if(argnLength EQUAL 1) 4 | cpm_parse_add_package_single_arg("${ARGN}" ARGN) 5 | 6 | # The shorthand syntax implies EXCLUDE_FROM_ALL and SYSTEM 7 | set(ARGN "${ARGN};EXCLUDE_FROM_ALL;YES;SYSTEM;YES;") 8 | endif() 9 | 10 | set(oneValueArgs 11 | NAME 12 | FORCE 13 | VERSION 14 | GIT_TAG 15 | DOWNLOAD_ONLY 16 | GITHUB_REPOSITORY 17 | GITLAB_REPOSITORY 18 | BITBUCKET_REPOSITORY 19 | GIT_REPOSITORY 20 | SOURCE_DIR 21 | FIND_PACKAGE_ARGUMENTS 22 | NO_CACHE 23 | SYSTEM 24 | GIT_SHALLOW 25 | EXCLUDE_FROM_ALL 26 | SOURCE_SUBDIR 27 | CUSTOM_CACHE_KEY 28 | ) 29 | 30 | set(multiValueArgs 31 | URL 32 | OPTIONS 33 | DOWNLOAD_COMMAND 34 | PATCHES 35 | ) 36 | 37 | cmake_parse_arguments(CPM_ARGS "" "${oneValueArgs}" "${multiValueArgs}" "${ARGN}") 38 | endfunction() 39 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # UMD Examples 2 | 3 | This directory contains examples demonstrating how to use various software components in the Tenstorrent Unified Memory Driver (UMD). 4 | 5 | ## Available Examples 6 | 7 | ### `tt_device_example/` 8 | Demonstrates TTDevice usage, showcasing basic device operations and the difference between functionality available before and after calling `init_tt_device()`. 9 | 10 | ## Building Examples 11 | 12 | Examples are not built by default. To build them: 13 | 14 | ```bash 15 | # Configure with examples enabled 16 | cmake -B build -DTT_UMD_BUILD_EXAMPLES=ON 17 | 18 | # Build 19 | cmake --build build 20 | ``` 21 | 22 | Each example directory contains its own README with specific usage instructions. 23 | 24 | ## Adding New Examples 25 | 26 | When adding new examples: 27 | 1. Create a new subdirectory with a descriptive name 28 | 2. Include a README.md explaining the example's purpose and usage 29 | 3. Add your example to the main `CMakeLists.txt` in this directory 30 | 4. Update this README to list the new example 31 | -------------------------------------------------------------------------------- /tests/blackhole/test_chip_info_bh.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | #include 5 | 6 | #include "umd/device/tt_device/tt_device.hpp" 7 | 8 | using namespace tt; 9 | using namespace tt::umd; 10 | 11 | TEST(BlackholeChipInfo, BasicChipInfo) { 12 | std::vector pci_device_ids = PCIDevice::enumerate_devices(); 13 | 14 | for (int pci_device_id : pci_device_ids) { 15 | std::unique_ptr tt_device = TTDevice::create(pci_device_id); 16 | tt_device->init_tt_device(); 17 | 18 | const ChipInfo chip_info = tt_device->get_chip_info(); 19 | 20 | EXPECT_TRUE( 21 | chip_info.board_type == BoardType::P100 || chip_info.board_type == BoardType::P150 || 22 | chip_info.board_type == BoardType::P300); 23 | 24 | // TODO: uncomment this when we can read asic location properly from telemetry. 25 | // EXPECT_TRUE(chip_info.asic_location == 0 || chip_info.asic_location == 1);. 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /tests/cluster_descriptor_examples/blackhole_P300_first_mmio.yaml: -------------------------------------------------------------------------------- 1 | arch: 2 | 0: blackhole 3 | 1: blackhole 4 | chips: 5 | {} 6 | chip_unique_ids: 7 | 1: 9493540888584 8 | 0: 9493540888585 9 | ethernet_connections: 10 | - 11 | - chip: 0 12 | chan: 8 13 | - chip: 1 14 | chan: 3 15 | - 16 | - chip: 0 17 | chan: 9 18 | - chip: 1 19 | chan: 2 20 | ethernet_connections_to_remote_devices: 21 | [] 22 | chips_with_mmio: 23 | - 0: 0 24 | harvesting: 25 | 0: 26 | noc_translation: true 27 | harvest_mask: 272 28 | dram_harvesting_mask: 0 29 | eth_harvesting_mask: 288 30 | pcie_harvesting_mask: 1 31 | l2cpu_harvesting_mask: 0 32 | 1: 33 | noc_translation: true 34 | harvest_mask: 8320 35 | dram_harvesting_mask: 0 36 | eth_harvesting_mask: 288 37 | pcie_harvesting_mask: 2 38 | l2cpu_harvesting_mask: 0 39 | chip_to_boardtype: 40 | 0: p300 41 | 1: p300 42 | boards: 43 | - 44 | - board_id: 4746770444292 45 | - board_type: p300 46 | - chips: 47 | - 0 48 | - 1 49 | asic_locations: 50 | 0: 1 51 | 1: 0 -------------------------------------------------------------------------------- /tests/cluster_descriptor_examples/blackhole_P300_second_mmio.yaml: -------------------------------------------------------------------------------- 1 | arch: 2 | 0: blackhole 3 | 1: blackhole 4 | chips: 5 | {} 6 | chip_unique_ids: 7 | 1: 9493540888585 8 | 0: 9493540888584 9 | ethernet_connections: 10 | - 11 | - chip: 0 12 | chan: 2 13 | - chip: 1 14 | chan: 9 15 | - 16 | - chip: 0 17 | chan: 3 18 | - chip: 1 19 | chan: 8 20 | ethernet_connections_to_remote_devices: 21 | [] 22 | chips_with_mmio: 23 | - 0: 1 24 | harvesting: 25 | 0: 26 | noc_translation: true 27 | harvest_mask: 8320 28 | dram_harvesting_mask: 0 29 | eth_harvesting_mask: 288 30 | pcie_harvesting_mask: 2 31 | l2cpu_harvesting_mask: 0 32 | 1: 33 | noc_translation: true 34 | harvest_mask: 272 35 | dram_harvesting_mask: 0 36 | eth_harvesting_mask: 288 37 | pcie_harvesting_mask: 1 38 | l2cpu_harvesting_mask: 0 39 | chip_to_boardtype: 40 | 0: p300 41 | 1: p300 42 | boards: 43 | - 44 | - board_id: 4746770444292 45 | - board_type: p300 46 | - chips: 47 | - 1 48 | - 0 49 | asic_locations: 50 | 0: 0 51 | 1: 1 -------------------------------------------------------------------------------- /tests/cluster_descriptor_examples/blackhole_P300_both_mmio.yaml: -------------------------------------------------------------------------------- 1 | arch: 2 | 0: blackhole 3 | 1: blackhole 4 | chips: 5 | {} 6 | chip_unique_ids: 7 | 1: 9493540888585 8 | 0: 9493540888584 9 | ethernet_connections: 10 | - 11 | - chip: 0 12 | chan: 2 13 | - chip: 1 14 | chan: 9 15 | - 16 | - chip: 0 17 | chan: 3 18 | - chip: 1 19 | chan: 8 20 | ethernet_connections_to_remote_devices: 21 | [] 22 | chips_with_mmio: 23 | - 0: 1 24 | - 1: 0 25 | harvesting: 26 | 0: 27 | noc_translation: true 28 | harvest_mask: 8320 29 | dram_harvesting_mask: 0 30 | eth_harvesting_mask: 288 31 | pcie_harvesting_mask: 2 32 | l2cpu_harvesting_mask: 0 33 | 1: 34 | noc_translation: true 35 | harvest_mask: 272 36 | dram_harvesting_mask: 0 37 | eth_harvesting_mask: 288 38 | pcie_harvesting_mask: 1 39 | l2cpu_harvesting_mask: 0 40 | chip_to_boardtype: 41 | 0: p300 42 | 1: p300 43 | boards: 44 | - 45 | - board_id: 4746770444292 46 | - board_type: p300 47 | - chips: 48 | - 1 49 | - 0 50 | asic_locations: 51 | 0: 0 52 | 1: 1 -------------------------------------------------------------------------------- /common/disjoint_set.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | // A standard disjoint set data structure to track connected components. 12 | template 13 | class DisjointSet { 14 | public: 15 | void add_item(T item) { parent[item] = item; } 16 | 17 | int get_set(T item) { 18 | while (parent[item] != item) { 19 | item = parent[item]; 20 | } 21 | return item; 22 | } 23 | 24 | void merge(T item1, T item2) { 25 | T set1 = get_set(item1); 26 | T set2 = get_set(item2); 27 | parent[set1] = parent[set2] = std::min(set1, set2); 28 | } 29 | 30 | bool are_same_set(T item1, T item2) { return get_set(item1) == get_set(item2); } 31 | 32 | int get_num_sets() { 33 | std::unordered_set sets; 34 | for (auto [item, _] : parent) { 35 | sets.insert(get_set(item)); 36 | } 37 | return sets.size(); 38 | } 39 | 40 | private: 41 | std::unordered_map parent; 42 | }; 43 | -------------------------------------------------------------------------------- /tests/microbenchmark/benchmarks/open_cluster/test_open_cluster.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #include 7 | #include 8 | 9 | #include "common/microbenchmark_utils.hpp" 10 | #include "umd/device/cluster.hpp" 11 | 12 | using namespace tt::umd; 13 | 14 | /** 15 | * Measure the time it takes to open/construct a Cluster object with default ClusterOptions. 16 | */ 17 | TEST(MicrobenchmarkOpenCluster, ClusterConstructor) { 18 | const std::vector headers = {"Opening cluster of devices (ms)"}; 19 | 20 | auto now = std::chrono::steady_clock::now(); 21 | std::unique_ptr cluster = std::make_unique(); 22 | auto end = std::chrono::steady_clock::now(); 23 | 24 | std::vector> rows; 25 | std::vector row; 26 | row.push_back(test::utils::convert_double_to_string( 27 | std::chrono::duration_cast(end - now).count() / (double)1e6)); 28 | rows.push_back(row); 29 | 30 | test::utils::print_markdown_table_format(headers, rows); 31 | } 32 | -------------------------------------------------------------------------------- /tests/cluster_descriptor_examples/wormhole_2xN300_unconnected.yaml: -------------------------------------------------------------------------------- 1 | arch: { 2 | 0: Wormhole, 3 | 1: Wormhole, 4 | 2: Wormhole, 5 | 3: Wormhole, 6 | } 7 | 8 | chips: { 9 | 0: [0,0,0,0], 10 | 1: [0,0,0,0], 11 | 2: [1,0,0,0], 12 | 3: [1,0,0,0], 13 | } 14 | 15 | ethernet_connections: [ 16 | [{chip: 0, chan: 8}, {chip: 2, chan: 0}], 17 | [{chip: 0, chan: 9}, {chip: 2, chan: 1}], 18 | [{chip: 1, chan: 8}, {chip: 3, chan: 0}], 19 | [{chip: 1, chan: 9}, {chip: 3, chan: 1}], 20 | ] 21 | 22 | chips_with_mmio: [ 23 | 0: 0, 24 | 1: 1, 25 | ] 26 | 27 | # harvest_mask is the bit indicating which tensix row is harvested. So bit 0 = first tensix row; bit 1 = second tensix row etc... 28 | harvesting: { 29 | 0: {noc_translation: true, harvest_mask: 65}, 30 | 1: {noc_translation: true, harvest_mask: 3}, 31 | 2: {noc_translation: true, harvest_mask: 5}, 32 | 3: {noc_translation: true, harvest_mask: 33}, 33 | } 34 | 35 | # This value will be null if the boardtype is unknown, should never happen in practice but to be defensive it would be useful to throw an error on this case. 36 | boardtype: { 37 | 0: n300, 38 | 1: n300, 39 | 2: n300, 40 | 3: n300, 41 | } -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google 2 | ColumnLimit: 120 3 | IndentWidth: 4 4 | 5 | # This will make access modifiers (public/protected/private) sit on the same indentation as `class` keyword 6 | AccessModifierOffset: -4 7 | 8 | # Arguments, parameters and construction initializer are broken as following: 9 | # - Try to fit everything into single line (controlled by ColumnLimit). 10 | # - If it doesn't fit, break immediately after open bracket (in case of arguments and parameters) 11 | # or after colon in case of constructor initializers. 12 | # - Try to fit everything else into the second line. 13 | # - If it doesn't fit in second line, then each argument, parameter or initializer will sit in its own line. 14 | AlignAfterOpenBracket: AlwaysBreak 15 | BinPackArguments: false 16 | BinPackParameters: false 17 | 18 | # When constructor initializers exist in the constructor definition, leave the colon as last thing on the original 19 | # line instead of putting it on the next line. 20 | BreakConstructorInitializers: AfterColon 21 | 22 | # Disallow single statements after if/else/for/while/do without curly braces. 23 | InsertBraces: true 24 | 25 | # Separate definition blocks, including classes, structs, enums, and functions. 26 | SeparateDefinitionBlocks: Always 27 | -------------------------------------------------------------------------------- /device/arc/blackhole_arc_telemetry_reader.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #include "umd/device/arc/blackhole_arc_telemetry_reader.hpp" 7 | 8 | #include 9 | 10 | #include "umd/device/arch/blackhole_implementation.hpp" 11 | #include "umd/device/types/telemetry.hpp" 12 | 13 | extern bool umd_use_noc1; 14 | 15 | namespace tt::umd { 16 | 17 | BlackholeArcTelemetryReader::BlackholeArcTelemetryReader(TTDevice* tt_device) : ArcTelemetryReader(tt_device) { 18 | arc_core = blackhole::get_arc_core(tt_device->get_noc_translation_enabled(), umd_use_noc1); 19 | get_telemetry_address(); 20 | initialize_telemetry(); 21 | } 22 | 23 | void BlackholeArcTelemetryReader::get_telemetry_address() { 24 | uint32_t telemetry_table_addr_u32; 25 | tt_device->read_from_arc_apb(&telemetry_table_addr_u32, blackhole::SCRATCH_RAM_13, sizeof(uint32_t)); 26 | telemetry_table_addr = telemetry_table_addr_u32; 27 | uint32_t telemetry_values_addr_u32; 28 | tt_device->read_from_arc_apb(&telemetry_values_addr_u32, blackhole::SCRATCH_RAM_12, sizeof(uint32_t)); 29 | telemetry_values_addr = telemetry_values_addr_u32; 30 | } 31 | 32 | } // namespace tt::umd 33 | -------------------------------------------------------------------------------- /device/api/umd/device/lite_fabric/lite_fabric_constants.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2025 Tenstorrent AI ULC 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | // Substitute for 1d_fabric_constants.hpp. 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "umd/device/lite_fabric/lite_fabric_header.hpp" 14 | 15 | namespace tt::umd { 16 | 17 | namespace lite_fabric { 18 | 19 | // Only 1 receiver because 1 erisc. 20 | constexpr uint32_t NUM_RECEIVER_CHANNELS = 1; 21 | 22 | // Only 1 sender because no upstream edm. 23 | constexpr uint32_t NUM_SENDER_CHANNELS = 1; 24 | 25 | constexpr std::array SENDER_NUM_BUFFERS_ARRAY = {2}; 26 | 27 | constexpr std::array RECEIVER_NUM_BUFFERS_ARRAY = {2}; 28 | 29 | static_assert(NUM_SENDER_CHANNELS == 1); 30 | 31 | // Alignment for read and write to work on all core types. 32 | constexpr uint32_t GLOBAL_ALIGNMENT = 64; 33 | 34 | // Additional 64B reserved for data alignment. 35 | constexpr uint32_t ALIGNMENT_BUFFER_SIZE = GLOBAL_ALIGNMENT; 36 | 37 | constexpr uint32_t CHANNEL_BUFFER_SIZE = 2048 + ALIGNMENT_BUFFER_SIZE + sizeof(lite_fabric::FabricLiteHeader); 38 | 39 | } // namespace lite_fabric 40 | } // namespace tt::umd 41 | -------------------------------------------------------------------------------- /device/api/umd/device/lite_fabric/lite_fabric_host_utils.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2025 Tenstorrent AI ULC 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | 10 | #include "umd/device/lite_fabric/lite_fabric.hpp" 11 | #include "umd/device/types/xy_pair.hpp" 12 | #include "umd/device/utils/timeouts.hpp" 13 | 14 | namespace tt::umd { 15 | 16 | namespace lite_fabric { 17 | 18 | std::vector read_binary_file(const std::string& file_name); 19 | 20 | uint32_t get_eth_channel_mask(Chip* chip, const std::vector& eth_cores); 21 | 22 | uint32_t get_local_init_addr(); 23 | 24 | void set_reset_state(Chip* chip, tt_cxy_pair translated_core, bool assert_reset); 25 | 26 | void set_pc(Chip* chip, tt_cxy_pair translated_core, uint32_t pc_addr, uint32_t pc_val); 27 | 28 | void wait_for_state( 29 | Chip* chip, 30 | tt_cxy_pair translated_core, 31 | uint32_t addr, 32 | uint32_t state, 33 | std::chrono::milliseconds timeout_ms = timeout::BH_LITE_FABRIC_STATE_CHANGE_TIMEOUT); 34 | 35 | void launch_lite_fabric(Chip* chip, const std::vector& eth_cores); 36 | 37 | void terminate_lite_fabric(Chip* chip, const std::vector& eth_cores); 38 | 39 | } // namespace lite_fabric 40 | 41 | } // namespace tt::umd 42 | -------------------------------------------------------------------------------- /device/api/umd/device/arc/wormhole_arc_messenger.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #pragma once 7 | 8 | #include "umd/device/arc/arc_messenger.hpp" 9 | #include "umd/device/utils/timeouts.hpp" 10 | 11 | namespace tt::umd { 12 | 13 | class WormholeArcMessenger : public ArcMessenger { 14 | public: 15 | /** 16 | * Constructor for WormholeArcMessenger. 17 | * 18 | * @param tt_device TTDevice object used to communicate with the ARC of the device. 19 | */ 20 | WormholeArcMessenger(TTDevice* tt_device); 21 | 22 | /** 23 | * Send ARC message. The call of send_message is blocking, timeout is to be implemented. 24 | * 25 | * @param msg_code ARC messsage type. 26 | * @param return_values Return values from the ARC message. 27 | * @param args Arguments for the message. For Wormhole, only 2 args are allowed, each <= uint16_t max. 28 | * @param timeout_ms Timeout in milliseconds; 0 to wait indefinitely. 29 | */ 30 | uint32_t send_message( 31 | const uint32_t msg_code, 32 | std::vector& return_values, 33 | const std::vector& args = {}, 34 | const std::chrono::milliseconds timeout_ms = timeout::ARC_MESSAGE_TIMEOUT) override; 35 | }; 36 | 37 | } // namespace tt::umd 38 | -------------------------------------------------------------------------------- /examples/tt_device_example/README.md: -------------------------------------------------------------------------------- 1 | # TTDevice Example 2 | 3 | This example demonstrates TTDevice usage and shows which capabilities are available before and after calling `init_tt_device()`. 4 | 5 | ## Building and Running 6 | 7 | ```bash 8 | # Configure with examples enabled 9 | cmake -B build -DTT_UMD_BUILD_EXAMPLES=ON 10 | 11 | # Build 12 | cmake --build ./build 13 | 14 | # Run 15 | ./build/examples/tt_device_example/tt_device_example 16 | ``` 17 | 18 | ## What it demonstrates 19 | 20 | TTDevice provides two levels of functionality: 21 | 22 | ### Before `init_tt_device()` (Basic Access) 23 | - Device architecture and PCI info 24 | - Register access via BAR operations 25 | - Basic memory operations 26 | 27 | ### After `init_tt_device()` (Full Features) 28 | - All other TTDevice functions become available 29 | - Clock and temperature monitoring 30 | - ARC communication and telemetry 31 | 32 | ## Usage Pattern 33 | 34 | ```cpp 35 | // Create device 36 | std::unique_ptr device = TTDevice::create(device_id); 37 | 38 | // Basic operations work immediately 39 | tt::ARCH arch = device->get_arch(); 40 | uint32_t value = device->bar_read32(address); 41 | 42 | // Initialize for full features 43 | device->init_tt_device(); 44 | 45 | // Advanced operations now available 46 | uint32_t clock = device->get_clock(); 47 | ArcMessenger* messenger = device->get_arc_messenger(); 48 | ``` 49 | -------------------------------------------------------------------------------- /device/api/umd/device/warm_reset.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "umd/device/utils/timeouts.hpp" 14 | 15 | namespace tt::umd { 16 | 17 | class WarmReset { 18 | public: 19 | static void warm_reset(std::vector pci_device_ids = {}, bool reset_m3 = false); 20 | 21 | static void ubb_warm_reset(const std::chrono::milliseconds timeout_ms = timeout::UBB_WARM_RESET_TIMEOUT); 22 | 23 | private: 24 | static constexpr auto POST_RESET_WAIT = std::chrono::milliseconds(2'000); 25 | static constexpr auto UBB_POST_RESET_WAIT = std::chrono::milliseconds(30'000); 26 | 27 | static void warm_reset_blackhole_legacy(std::vector pci_device_ids); 28 | 29 | static void warm_reset_wormhole_legacy(std::vector pci_device_ids, bool reset_m3); 30 | 31 | static void warm_reset_arch_agnostic( 32 | std::vector pci_device_ids, 33 | bool reset_m3, 34 | std::chrono::milliseconds reset_m3_timeout = timeout::WARM_RESET_M3_TIMEOUT); 35 | 36 | static void wormhole_ubb_ipmi_reset(int ubb_num, int dev_num, int op_mode, int reset_time); 37 | 38 | static void ubb_wait_for_driver_load(const std::chrono::milliseconds timeout_ms); 39 | }; 40 | 41 | } // namespace tt::umd 42 | -------------------------------------------------------------------------------- /cmake/compilers.cmake: -------------------------------------------------------------------------------- 1 | function(FIND_AND_SET_CLANG17) 2 | find_program(CLANGPP_17 clang++-17) 3 | find_program(CLANG_17 clang-17) 4 | 5 | if(NOT CLANGPP_17 OR NOT CLANG_17) 6 | message(FATAL_ERROR "Clang-17 not found. Make sure you have clang-17 and clang++-17 installed and in your PATH") 7 | endif() 8 | 9 | set(CMAKE_CXX_COMPILER "${CLANGPP_17}" PARENT_SCOPE) 10 | set(CMAKE_C_COMPILER "${CLANG_17}" PARENT_SCOPE) 11 | endfunction() 12 | 13 | function(CHECK_COMPILERS) 14 | message(STATUS "Checking compilers") 15 | 16 | if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") 17 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++" CACHE STRING "CXX FLAGS for clang") 18 | if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "17.0.0" OR CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL "18.0.0") 19 | message(WARNING "Only Clang-17 is tested right now") 20 | endif() 21 | elseif(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") 22 | if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "12.0.0") 23 | message(WARNING "GCC-12 or higher is suggested") 24 | elseif(CMAKE_CXX_COMPILER_VERSION GREATER_EQUAL "13.0.0") 25 | message(WARNING "Only GCC-12 is tested right now") 26 | endif() 27 | else() 28 | message(FATAL_ERROR "Unsupported compiler: ${CMAKE_CXX_COMPILER_ID} ! Only Clang and GCC are supported") 29 | endif() 30 | endfunction() 31 | -------------------------------------------------------------------------------- /.github/docker_rhel_install_common.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Define DNF command with the common flags 4 | DNFC="dnf install -y --setopt=tsflags=nodocs" 5 | 6 | echo "Updating system packages and installing prerequisites..." 7 | dnf update -y 8 | 9 | # Install tools group and core dependencies. 10 | # '@Development Tools' provides the equivalent of 'build-essential' (gcc, g++, make). 11 | $DNFC \ 12 | @Development\ Tools \ 13 | git \ 14 | git-lfs \ 15 | wget \ 16 | dnf-plugins-core \ 17 | rpm-build \ 18 | python3-pip \ 19 | yamllint \ 20 | xxd \ 21 | cpio \ 22 | libnsl2 \ 23 | libnsl2-devel 24 | 25 | ## Fedora ships a recent CMake; no external repo needed. 26 | 27 | # Install C++ development dependencies (using -devel suffix) 28 | echo "Installing C++ development dependencies..." 29 | $DNFC \ 30 | cmake \ 31 | ninja-build \ 32 | hwloc-devel \ 33 | gtest-devel \ 34 | yaml-cpp-devel \ 35 | boost-devel \ 36 | python3-devel 37 | 38 | # --- Install Clang toolchain (Fedora) --- 39 | echo "Installing Clang toolchain from Fedora repositories..." 40 | $DNFC \ 41 | clang \ 42 | clang-tools-extra \ 43 | llvm-libs \ 44 | libcxx-devel 45 | 46 | 47 | # --- Install Python dependencies --- 48 | echo "Installing Python dependencies..." 49 | python3 -m pip install --no-cache-dir pytest 50 | 51 | echo "Cleanup DNF cache..." 52 | dnf clean all 53 | -------------------------------------------------------------------------------- /tests/pcie/test_pcie_device.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "umd/device/pcie/pci_device.hpp" 16 | 17 | using namespace tt::umd; 18 | 19 | TEST(PcieDeviceTest, Numa) { 20 | std::vector nodes; 21 | 22 | for (auto device_id : PCIDevice::enumerate_devices()) { 23 | PCIDevice device(device_id); 24 | nodes.push_back(device.get_numa_node()); 25 | } 26 | 27 | // Acceptable outcomes: 28 | // 1. all of them are -1 (not a NUMA system) 29 | // 2. all of them are >= 0 (NUMA system) 30 | // 3. empty vector (no devices enumerated) 31 | 32 | if (!nodes.empty()) { 33 | bool all_negative_one = std::all_of(nodes.begin(), nodes.end(), [](int node) { return node == -1; }); 34 | bool all_non_negative = std::all_of(nodes.begin(), nodes.end(), [](int node) { return node >= 0; }); 35 | 36 | EXPECT_TRUE(all_negative_one || all_non_negative) 37 | << "NUMA nodes should either all be -1 (non-NUMA system) or all be non-negative (NUMA system)" 38 | << " but got: " << fmt::format("{}", fmt::join(nodes, ", ")); 39 | } else { 40 | SUCCEED() << "No PCIe devices were enumerated"; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /device/api/umd/device/firmware/firmware_utils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | 11 | #include "umd/device/tt_device/tt_device.hpp" 12 | #include "umd/device/types/arch.hpp" 13 | #include "umd/device/types/xy_pair.hpp" 14 | #include "umd/device/utils/semver.hpp" 15 | 16 | namespace tt::umd { 17 | static semver_t fw_version_from_telemetry(const uint32_t telemetry_data); 18 | 19 | semver_t get_firmware_version_util(TTDevice* tt_device); 20 | 21 | std::optional get_expected_eth_firmware_version_from_firmware_bundle( 22 | semver_t fw_bundle_version, tt::ARCH arch); 23 | 24 | semver_t get_eth_fw_version_from_telemetry(const uint32_t telemetry_data, tt::ARCH arch); 25 | 26 | semver_t get_tt_flash_version_from_telemetry(const uint32_t telemetry_data); 27 | 28 | semver_t get_cm_fw_version_from_telemetry(const uint32_t telemetry_data, tt::ARCH arch); 29 | 30 | semver_t get_dm_app_fw_version_from_telemetry(const uint32_t telemetry_data, tt::ARCH arch); 31 | 32 | semver_t get_dm_bl_fw_version_from_telemetry(const uint32_t telemetry_data, tt::ARCH arch); 33 | 34 | semver_t get_gddr_fw_version_from_telemetry(const uint32_t telemetry_data, tt::ARCH arch); 35 | 36 | std::optional verify_eth_fw_integrity(TTDevice* tt_device, tt_xy_pair eth_core, semver_t eth_fw_version); 37 | 38 | } // namespace tt::umd 39 | -------------------------------------------------------------------------------- /device/api/umd/device/tt_device/remote_communication_legacy_firmware.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | #include "umd/device/tt_device/remote_communication.hpp" 12 | 13 | namespace tt::umd { 14 | 15 | class SysmemManager; 16 | 17 | class RemoteCommunicationLegacyFirmware : public RemoteCommunication { 18 | public: 19 | RemoteCommunicationLegacyFirmware( 20 | TTDevice* local_tt_device, EthCoord target_chip, SysmemManager* sysmem_manager = nullptr); 21 | 22 | void read_non_mmio( 23 | tt_xy_pair target_core, 24 | void* dest, 25 | uint64_t core_src, 26 | uint32_t size_in_bytes, 27 | const std::chrono::milliseconds timeout_ms = timeout::NON_MMIO_RW_TIMEOUT) override; 28 | 29 | void write_to_non_mmio( 30 | tt_xy_pair target_core, 31 | const void* src, 32 | uint64_t core_dest, 33 | uint32_t size_in_bytes, 34 | bool broadcast = false, 35 | std::vector broadcast_header = {}, 36 | const std::chrono::milliseconds timeout_ms = timeout::NON_MMIO_RW_TIMEOUT) override; 37 | 38 | void wait_for_non_mmio_flush(const std::chrono::milliseconds timeout_ms = timeout::NON_MMIO_RW_TIMEOUT) override; 39 | 40 | private: 41 | EthCoord target_chip; 42 | }; 43 | 44 | } // namespace tt::umd 45 | -------------------------------------------------------------------------------- /device/hugepage.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | 12 | #include "umd/device/types/cluster_descriptor_types.hpp" 13 | 14 | namespace tt::umd { 15 | 16 | // Hugepages must be 1GB in size 17 | // It's important that this is 64 bits, so that it doesn't overflow when multiplied to 4. 18 | const uint64_t HUGEPAGE_REGION_SIZE = 1ULL << 30; // 1GB 19 | 20 | // Get number of 1GB host hugepages installed. 21 | uint32_t get_num_hugepages(); 22 | 23 | // Dynamically figure out how many host memory channels (based on hugepages installed) for each device, based on arch. 24 | uint32_t get_available_num_host_mem_channels( 25 | const uint32_t num_channels_per_device_target, const uint16_t device_id, const uint16_t revision_id); 26 | 27 | // Looks for hugetlbfs inside /proc/mounts matching desired pagesize (typically 1G). 28 | std::string find_hugepage_dir(std::size_t pagesize); 29 | 30 | // Open a file in for the hugepage mapping. 31 | // All processes operating on the same pipeline must agree on the file name. 32 | // Today we assume there's only one pipeline running within the system. 33 | // One hugepage per device such that each device gets unique memory. 34 | int open_hugepage_file(const std::string &dir, ChipId physical_device_id, uint16_t channel); 35 | 36 | } // namespace tt::umd 37 | -------------------------------------------------------------------------------- /device/api/umd/device/logging/config.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2025 Tenstorrent AI ULC 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #pragma once 6 | 7 | /** 8 | * @brief UMD logger configuration and utilities. 9 | * 10 | * This namespace contains functionality for configuring the logging system 11 | * used by the UMD (User Mode Driver). 12 | */ 13 | namespace tt::umd::logging { 14 | 15 | /** 16 | * @brief Logging severity levels 17 | * 18 | * Defines the different severity levels for logging messages, from most 19 | * verbose (trace) to most severe (critical), with an option to disable 20 | * logging completely (off). 21 | */ 22 | enum class level { 23 | trace, ///< Most detailed logging level, for tracing program execution 24 | debug, ///< Debugging information, useful during development 25 | info, ///< General informational messages about program operation 26 | warn, ///< Warning messages for potentially harmful situations 27 | error, ///< Error messages for serious problems 28 | critical, ///< Critical errors that may lead to program termination 29 | off ///< Disables all logging 30 | }; 31 | 32 | /** 33 | * @brief Sets the global logging level 34 | * 35 | * @param lvl The new logging level to set. Messages with severity levels 36 | * lower than this level will not be logged. 37 | */ 38 | void set_level(level lvl); 39 | 40 | } // namespace tt::umd::logging 41 | -------------------------------------------------------------------------------- /device/logging/config.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2025 Tenstorrent AI ULC 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | /** 6 | * @file config.cpp 7 | * @brief Implementation of UMD (User Mode Driver) logging initialization 8 | * 9 | * This file contains the initialization code for the UMD logging system. 10 | * It creates a static instance of the TT LoggerInitializer with specific 11 | * environment variable names for UMD logging configuration. 12 | */ 13 | 14 | #include "umd/device/logging/config.hpp" 15 | 16 | #include 17 | 18 | namespace tt::umd::logging { 19 | 20 | /// Map our internal enum to spdlog's level enum. 21 | spdlog::level::level_enum to_spdlog_level(level lvl) { 22 | switch (lvl) { 23 | case level::trace: 24 | return spdlog::level::trace; 25 | case level::debug: 26 | return spdlog::level::debug; 27 | case level::info: 28 | return spdlog::level::info; 29 | case level::warn: 30 | return spdlog::level::warn; 31 | case level::error: 32 | return spdlog::level::err; 33 | case level::critical: 34 | return spdlog::level::critical; 35 | case level::off: 36 | return spdlog::level::off; 37 | } 38 | return spdlog::level::info; // fallback 39 | } 40 | 41 | void set_level(level lvl) { ::tt::LoggerRegistry::instance().set_level(to_spdlog_level(lvl)); } 42 | 43 | } // namespace tt::umd::logging 44 | -------------------------------------------------------------------------------- /nanobind/tests/test_py_warm_reset.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2025 Tenstorrent Inc. 2 | # SPDX-License-Identifier: Apache-2.0 3 | import unittest 4 | import tt_umd 5 | 6 | class TestWarmReset(unittest.TestCase): 7 | @unittest.skip("Skipping warm reset test to avoid resetting cards during unit tests") 8 | def test_warm_reset(self): 9 | """Test warm reset functionality - SKIPPED to avoid resetting cards""" 10 | pci_ids = tt_umd.PCIDevice.enumerate_devices() 11 | if len(pci_ids) == 0: 12 | print("No PCI devices found.") 13 | return 14 | 15 | # Create TTDevice for PCI ID 0 16 | dev = tt_umd.TTDevice.create(0) 17 | dev.init_tt_device() 18 | 19 | # Get board type and architecture 20 | board_type = dev.get_board_type() 21 | arch = dev.get_arch() 22 | print(f"Device board type: {board_type}") 23 | print(f"Device architecture: {arch}") 24 | 25 | # Check if it's UBB (Unified Board Bundle) and call appropriate warm reset 26 | if board_type == tt_umd.BoardType.UBB_WORMHOLE: 27 | print("UBB_WORMHOLE board detected, executing UBB warm reset...") 28 | tt_umd.WarmReset.ubb_warm_reset(timeout_s=60) # Uncomment to actually reset 29 | else: 30 | print(f"Non-UBB board detected (type: {board_type}), executing standard warm reset...") 31 | tt_umd.WarmReset.warm_reset(pci_ids) # Uncomment to actually reset 32 | -------------------------------------------------------------------------------- /device/api/umd/device/arc/blackhole_arc_messenger.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #pragma once 7 | 8 | #include "umd/device/arc/arc_messenger.hpp" 9 | #include "umd/device/arc/blackhole_arc_message_queue.hpp" 10 | #include "umd/device/utils/timeouts.hpp" 11 | 12 | namespace tt::umd { 13 | 14 | class BlackholeArcMessenger : public ArcMessenger { 15 | public: 16 | /** 17 | * Constructor for BlackholeArcMessenger. 18 | * 19 | * @param tt_device TTDevice object used to communicate with the ARC of the device. 20 | */ 21 | BlackholeArcMessenger(TTDevice* tt_device); 22 | 23 | /** 24 | * Send ARC message. The call of send_message is blocking, timeout is to be implemented. 25 | * 26 | * @param msg_code ARC messsage type. 27 | * @param return_values Return values from the ARC message. 28 | * @param args Arguments for the message. For Blackhole, up to 7 args are allowed. 29 | * @param timeout_ms Timeout in milliseconds; 0 to wait indefinitely. 30 | */ 31 | uint32_t send_message( 32 | const uint32_t msg_code, 33 | std::vector& return_values, 34 | const std::vector& args = {}, 35 | const std::chrono::milliseconds timeout_ms = timeout::ARC_MESSAGE_TIMEOUT) override; 36 | 37 | private: 38 | std::unique_ptr blackhole_arc_msg_queue = nullptr; 39 | }; 40 | 41 | } // namespace tt::umd 42 | -------------------------------------------------------------------------------- /device/api/umd/device/simulation/rtl_simulation_chip.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | 12 | #include "umd/device/simulation/simulation_chip.hpp" 13 | #include "umd/device/simulation/simulation_host.hpp" 14 | 15 | namespace tt::umd { 16 | 17 | // RTL simulation implementation using subprocess and flatbuffer communication. 18 | class RtlSimulationChip : public SimulationChip { 19 | public: 20 | RtlSimulationChip(const std::filesystem::path& simulator_directory, SocDescriptor soc_descriptor, ChipId chip_id); 21 | ~RtlSimulationChip() override = default; 22 | 23 | void start_device() override; 24 | void close_device() override; 25 | 26 | void write_to_device(CoreCoord core, const void* src, uint64_t l1_dest, uint32_t size) override; 27 | void read_from_device(CoreCoord core, void* dest, uint64_t l1_src, uint32_t size) override; 28 | 29 | void send_tensix_risc_reset(tt_xy_pair translated_core, const TensixSoftResetOptions& soft_resets) override; 30 | void send_tensix_risc_reset(const TensixSoftResetOptions& soft_resets) override; 31 | void assert_risc_reset(CoreCoord core, const RiscType selected_riscs) override; 32 | void deassert_risc_reset(CoreCoord core, const RiscType selected_riscs, bool staggered_start) override; 33 | 34 | private: 35 | SimulationHost host; 36 | }; 37 | 38 | } // namespace tt::umd 39 | -------------------------------------------------------------------------------- /device/api/umd/device/tt_device/remote_blackhole_tt_device.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #pragma once 7 | 8 | #include "umd/device/chip/local_chip.hpp" 9 | #include "umd/device/tt_device/blackhole_tt_device.hpp" 10 | 11 | namespace tt::umd { 12 | 13 | class RemoteBlackholeTTDevice : public BlackholeTTDevice { 14 | public: 15 | void read_from_device(void* mem_ptr, tt_xy_pair core, uint64_t addr, uint32_t size) override; 16 | 17 | void write_to_device(const void* mem_ptr, tt_xy_pair core, uint64_t addr, uint32_t size) override; 18 | 19 | void read_from_arc_apb(void* mem_ptr, uint64_t arc_addr_offset, size_t size) override; 20 | 21 | void noc_multicast_write( 22 | void* dst, size_t size, tt_xy_pair core_start, tt_xy_pair core_end, uint64_t addr) override; 23 | 24 | void write_to_arc_apb(const void* mem_ptr, uint64_t arc_addr_offset, size_t size) override; 25 | 26 | void wait_for_non_mmio_flush() override; 27 | 28 | RemoteCommunication* get_remote_communication(); 29 | 30 | protected: 31 | bool is_arc_available_over_axi() override; 32 | 33 | private: 34 | RemoteBlackholeTTDevice(std::unique_ptr remote_communication); 35 | 36 | friend std::unique_ptr TTDevice::create(std::unique_ptr remote_communication); 37 | 38 | std::unique_ptr remote_communication_; 39 | }; 40 | 41 | } // namespace tt::umd 42 | -------------------------------------------------------------------------------- /tests/galaxy/test_galaxy_common.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | #include "umd/device/cluster.hpp" 18 | #include "umd/device/types/xy_pair.hpp" 19 | 20 | // static const std::string SOC_DESC_PATH = "./tests/soc_descs/wormhole_b0_8x10.yaml"; 21 | 22 | using namespace tt; 23 | using namespace tt::umd; 24 | 25 | struct tt_multichip_core_addr { 26 | tt_multichip_core_addr() : core{}, chip{}, addr{} {} 27 | 28 | tt_multichip_core_addr(ChipId chip, CoreCoord core, std::uint64_t addr) : core(core), chip(chip), addr(addr) {} 29 | 30 | CoreCoord core; 31 | ChipId chip; 32 | std::uint64_t addr; 33 | 34 | std::string str() const { return fmt::format("(chip={},core={},addr=0x{:x})", chip, core.str(), addr); } 35 | }; 36 | 37 | // SIMPLE DATAMOVEMENT API BASED ON UMD 38 | // send one contiguous chunk of data from one sender core to a receiver core 39 | void move_data( 40 | Cluster& device, tt_multichip_core_addr sender_core, tt_multichip_core_addr receiver_core, uint32_t size); 41 | 42 | // send one contiguous chunk of data to a vector of receiver cores 43 | void broadcast_data( 44 | Cluster& device, 45 | tt_multichip_core_addr sender_core, 46 | std::vector receiver_cores, 47 | uint32_t size); 48 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [0.7.0] - 2025-11-29 4 | 5 | ### Changed 6 | 7 | Changed to a more generic arc_msg API. 8 | 9 | ## [0.6.0] - 2025-11-24 10 | 11 | ### Changed 12 | 13 | Change the usage of TLBs such that KMD is in control of TLB allocation instead of UMD. 14 | TLBs are now allocated using KMD's dedicated API. 15 | 16 | ## [0.5.3] - 2025-11-14 17 | 18 | ### Changed 19 | 20 | Added generation of .deb and .rpm packages. 21 | Added three separate packages (runtime, development and python). 22 | 23 | ## [0.5.1] - 2025-11-12 24 | 25 | ### Changed 26 | 27 | Manylinux builds and Pypi test publishing. 28 | Many smaller fixes and improvements. 29 | 30 | ## [0.4.0] - 2025-10-18 31 | 32 | ### Changed 33 | 34 | Removed old type names. 35 | 36 | ## [0.3.0] - 2025-10-17 37 | 38 | ### Changed 39 | 40 | Many smaller fixes and improvements. 41 | TTsim support improvements. 42 | JTAG support improvement. 43 | Fixing CMake install path. 44 | Further work on integrating new KMD TLBs. 45 | 46 | ## [0.2.0] - 2025-09-15 47 | 48 | ### Changed 49 | 50 | A couple of smaller fixes and improvements, including L2CPU harvesting, fixes for new FW. Better TTSim support. Further JTAG support. 51 | Introduced new soft reset API. 52 | Introduced lite fabric initial version. 53 | 54 | ## [0.1.1] - 2025-08-25 55 | 56 | ### Changed 57 | 58 | - Created an automated release workflow. 59 | 60 | 61 | ## [0.1.0] - 2024-08-25 62 | 63 | ### Changed 64 | 65 | - Initial pre-release. The changes were not tracked properly through changelog up to this version. 66 | -------------------------------------------------------------------------------- /device/api/umd/device/types/telemetry.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | 11 | namespace tt::umd { 12 | 13 | enum TelemetryTag : uint8_t { 14 | BOARD_ID_HIGH = 1, 15 | BOARD_ID_LOW = 2, 16 | ASIC_ID = 3, 17 | HARVESTING_STATE = 4, 18 | UPDATE_TELEM_SPEED = 5, 19 | VCORE = 6, 20 | TDP = 7, 21 | TDC = 8, 22 | VDD_LIMITS = 9, 23 | THM_LIMITS = 10, 24 | ASIC_TEMPERATURE = 11, 25 | VREG_TEMPERATURE = 12, 26 | BOARD_TEMPERATURE = 13, 27 | AICLK = 14, 28 | AXICLK = 15, 29 | ARCCLK = 16, 30 | L2CPUCLK0 = 17, 31 | L2CPUCLK1 = 18, 32 | L2CPUCLK2 = 19, 33 | L2CPUCLK3 = 20, 34 | ETH_LIVE_STATUS = 21, 35 | DDR_STATUS = 22, 36 | DDR_SPEED = 23, 37 | ETH_FW_VERSION = 24, 38 | GDDR_FW_VERSION = 25, 39 | DM_APP_FW_VERSION = 26, 40 | DM_BL_FW_VERSION = 27, 41 | FLASH_BUNDLE_VERSION = 28, 42 | CM_FW_VERSION = 29, 43 | L2CPU_FW_VERSION = 30, 44 | FAN_SPEED = 31, 45 | TIMER_HEARTBEAT = 32, 46 | TELEMETRY_ENUM_COUNT = 33, 47 | ENABLED_TENSIX_COL = 34, 48 | ENABLED_ETH = 35, 49 | ENABLED_GDDR = 36, 50 | ENABLED_L2CPU = 37, 51 | PCIE_USAGE = 38, 52 | NUMBER_OF_TAGS = 39, 53 | NOC_TRANSLATION = 40, 54 | ASIC_LOCATION = 52, 55 | TT_FLASH_VERSION = 58, 56 | ASIC_ID_HIGH = 61, 57 | ASIC_ID_LOW = 62, 58 | AICLK_LIMIT_MAX = 63, 59 | }; 60 | 61 | } // namespace tt::umd 62 | -------------------------------------------------------------------------------- /.github/workflows/build-and-run-all-benchmarks.yml: -------------------------------------------------------------------------------- 1 | # Build and then run all benchmarks, on all supported archs. 2 | name: Build and run all benchmarks 3 | 4 | on: 5 | workflow_dispatch: 6 | inputs: 7 | build-type: 8 | required: false 9 | default: Release 10 | type: choice 11 | options: 12 | - Release 13 | - RelWithDebInfo 14 | - Debug 15 | - ASan 16 | - TSan 17 | 18 | jobs: 19 | build-tests: 20 | secrets: inherit 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | ubuntu-docker-version: [ 25 | 'ubuntu-22.04', 26 | 'ubuntu-24.04', 27 | ] 28 | uses: ./.github/workflows/build-tests.yml 29 | with: 30 | ubuntu-docker-version: ${{ matrix.ubuntu-docker-version}} 31 | timeout: 10 32 | build-type: ${{ inputs.build-type || 'Release' }} 33 | 34 | run-benchmarks: 35 | secrets: inherit 36 | needs: build-tests 37 | strategy: 38 | fail-fast: false 39 | matrix: 40 | test-group: [ 41 | {arch: wormhole_b0, card: tt-ubuntu-2204-n150-viommu-stable, timeout: 45}, 42 | ] 43 | ubuntu-docker-version: [ 44 | 'ubuntu-22.04', 45 | ] 46 | uses: ./.github/workflows/run-benchmarks.yml 47 | with: 48 | arch: ${{ matrix.test-group.arch}} 49 | ubuntu-docker-version: ${{ matrix.ubuntu-docker-version}} 50 | card: ${{ matrix.test-group.card}} 51 | timeout: ${{ matrix.test-group.timeout}} 52 | build-type: ${{ inputs.build-type }} 53 | -------------------------------------------------------------------------------- /device/api/umd/device/tt_device/remote_communication_lite_fabric.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | #include "umd/device/lite_fabric/lite_fabric.hpp" 12 | #include "umd/device/tt_device/remote_communication.hpp" 13 | 14 | namespace tt::umd { 15 | 16 | class SysmemManager; 17 | 18 | class RemoteCommunicationLiteFabric : public RemoteCommunication { 19 | public: 20 | RemoteCommunicationLiteFabric(TTDevice* local_tt_device, SysmemManager* sysmem_manager = nullptr); 21 | 22 | void read_non_mmio( 23 | tt_xy_pair target_core, 24 | void* dest, 25 | uint64_t core_src, 26 | uint32_t size_in_bytes, 27 | const std::chrono::milliseconds timeout_ms = timeout::NON_MMIO_RW_TIMEOUT) override; 28 | 29 | void write_to_non_mmio( 30 | tt_xy_pair target_core, 31 | const void* src, 32 | uint64_t core_dest, 33 | uint32_t size_in_bytes, 34 | bool broadcast = false, 35 | std::vector broadcast_header = {}, 36 | const std::chrono::milliseconds timeout_ms = timeout::NON_MMIO_RW_TIMEOUT) override; 37 | 38 | void wait_for_non_mmio_flush(const std::chrono::milliseconds timeout_ms = timeout::NON_MMIO_RW_TIMEOUT) override; 39 | 40 | private: 41 | lite_fabric::HostToLiteFabricInterface 42 | host_interface; 43 | }; 44 | 45 | } // namespace tt::umd 46 | -------------------------------------------------------------------------------- /device/api/umd/device/utils/timeouts.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | 11 | namespace tt::umd::timeout { 12 | inline constexpr auto NON_MMIO_RW_TIMEOUT = std::chrono::milliseconds(5'000); 13 | 14 | inline constexpr auto ARC_MESSAGE_TIMEOUT = std::chrono::milliseconds(1'000); 15 | inline constexpr auto ARC_STARTUP_TIMEOUT = std::chrono::milliseconds(5'000); 16 | inline constexpr auto ARC_POST_RESET_TIMEOUT = std::chrono::milliseconds(1'000); 17 | inline constexpr auto ARC_LONG_POST_RESET_TIMEOUT = std::chrono::milliseconds(300'000); 18 | 19 | inline constexpr auto DRAM_TRAINING_TIMEOUT = std::chrono::milliseconds(60'000); 20 | inline constexpr auto ETH_QUEUE_ENABLE_TIMEOUT = std::chrono::milliseconds(30'000); 21 | inline constexpr auto ETH_TRAINING_TIMEOUT = std::chrono::milliseconds(60'000); 22 | 23 | inline constexpr auto AICLK_TIMEOUT = std::chrono::milliseconds(100); 24 | 25 | inline constexpr auto WARM_RESET_M3_TIMEOUT = std::chrono::milliseconds(20'000); 26 | inline constexpr auto WARM_RESET_REAPPEAR_POLL_INTERVAL = std::chrono::milliseconds(100); 27 | inline constexpr auto WARM_RESET_DEVICES_REAPPEAR_TIMEOUT = std::chrono::milliseconds(10'000); 28 | 29 | inline constexpr auto UBB_WARM_RESET_TIMEOUT = std::chrono::milliseconds(100'000); 30 | inline constexpr auto BH_WARM_RESET_TIMEOUT = std::chrono::milliseconds(2'000); 31 | 32 | inline constexpr auto BH_LITE_FABRIC_STATE_CHANGE_TIMEOUT = std::chrono::milliseconds(10'000); 33 | } // namespace tt::umd::timeout 34 | -------------------------------------------------------------------------------- /tests/simulation/device_fixture.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #include "tests/test_utils/fetch_local_files.hpp" 16 | #include "umd/device/simulation/rtl_simulation_chip.hpp" 17 | #include "umd/device/simulation/simulation_chip.hpp" 18 | 19 | namespace tt::umd { 20 | 21 | class SimulationDeviceFixture : public ::testing::Test { 22 | protected: 23 | static void SetUpTestSuite() { 24 | // yaml path is dummy and won't change test behavior 25 | const char* simulator_path = getenv("TT_UMD_SIMULATOR"); 26 | if (simulator_path == nullptr) { 27 | throw std::runtime_error( 28 | "You need to define TT_UMD_SIMULATOR that will point to simulator path. eg. build/versim-wormhole-b0"); 29 | } 30 | auto soc_descriptor_path = SimulationChip::get_soc_descriptor_path_from_simulator_path(simulator_path); 31 | auto soc_descriptor = SocDescriptor(soc_descriptor_path); 32 | device = SimulationChip::create(simulator_path, soc_descriptor, 0, 1); 33 | device->start_device(); 34 | } 35 | 36 | static void TearDownTestSuite() { device->close_device(); } 37 | 38 | static std::unique_ptr device; 39 | }; 40 | 41 | std::unique_ptr SimulationDeviceFixture::device = nullptr; 42 | 43 | } // namespace tt::umd 44 | -------------------------------------------------------------------------------- /device/api/umd/device/types/tlb.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | namespace tt::umd { 15 | 16 | struct tlb_offsets { 17 | uint32_t local_offset; 18 | uint32_t x_end; 19 | uint32_t y_end; 20 | uint32_t x_start; 21 | uint32_t y_start; 22 | uint32_t noc_sel; 23 | uint32_t mcast; 24 | uint32_t ordering; 25 | uint32_t linked; 26 | uint32_t static_vc; 27 | uint32_t static_vc_end; 28 | }; 29 | 30 | struct tlb_data { 31 | uint64_t local_offset = 0; 32 | uint64_t x_end = 0; 33 | uint64_t y_end = 0; 34 | uint64_t x_start = 0; 35 | uint64_t y_start = 0; 36 | uint64_t noc_sel = 0; 37 | uint64_t mcast = 0; 38 | uint64_t ordering = 0; 39 | uint64_t linked = 0; 40 | uint64_t static_vc = 0; 41 | 42 | // Orderings. 43 | static constexpr uint64_t Relaxed = 0; 44 | static constexpr uint64_t Strict = 1; 45 | static constexpr uint64_t Posted = 2; 46 | 47 | bool check(const tlb_offsets &offset) const; 48 | std::pair apply_offset(const tlb_offsets &offset) const; 49 | }; 50 | 51 | struct tlb_configuration { 52 | uint64_t size; 53 | uint64_t base; 54 | uint64_t cfg_addr; 55 | uint64_t index_offset; 56 | uint64_t tlb_offset; 57 | tlb_offsets offset; 58 | }; 59 | 60 | enum TlbMapping : uint8_t { 61 | UC = 0, // Uncached 62 | WC = 1, // Write-combined 63 | }; 64 | 65 | } // namespace tt::umd 66 | -------------------------------------------------------------------------------- /tests/cluster_descriptor_examples/wormhole_N300_with_remote_connections.yaml: -------------------------------------------------------------------------------- 1 | arch: 2 | 0: wormhole_b0 3 | 1: wormhole_b0 4 | chips: 5 | 0: 6 | - 1 7 | - 0 8 | - 0 9 | - 0 10 | 1: 11 | - 0 12 | - 0 13 | - 0 14 | - 0 15 | ethernet_connections: 16 | - 17 | - chip: 0 18 | chan: 9 19 | - chip: 1 20 | chan: 1 21 | - 22 | - chip: 0 23 | chan: 8 24 | - chip: 1 25 | chan: 0 26 | ethernet_connections_to_remote_devices: 27 | - 28 | - chip: 1 29 | chan: 7 30 | - remote_chip_id: 14251335820 31 | chan: 7 32 | - 33 | - chip: 1 34 | chan: 6 35 | - remote_chip_id: 14251335820 36 | chan: 6 37 | - 38 | - chip: 0 39 | chan: 15 40 | - remote_chip_id: 9956368524 41 | chan: 15 42 | - 43 | - chip: 0 44 | chan: 14 45 | - remote_chip_id: 9956368524 46 | chan: 14 47 | - 48 | - chip: 0 49 | chan: 1 50 | - remote_chip_id: 9956368575 51 | chan: 1 52 | - 53 | - chip: 0 54 | chan: 0 55 | - remote_chip_id: 9956368575 56 | chan: 0 57 | chips_with_mmio: 58 | - 0: 0 59 | harvesting: 60 | 0: 61 | noc_translation: true 62 | harvest_mask: 136 63 | dram_harvesting_mask: 0 64 | eth_harvesting_mask: 0 65 | pcie_harvesting_mask: 0 66 | 1: 67 | noc_translation: true 68 | harvest_mask: 576 69 | dram_harvesting_mask: 0 70 | eth_harvesting_mask: 0 71 | pcie_harvesting_mask: 0 72 | chip_to_boardtype: 73 | 0: n300 74 | 1: n300 75 | boards: 76 | - 77 | - board_id: 72058990194991233 78 | - board_type: n300 79 | - chips: 80 | - 0 81 | - 1 -------------------------------------------------------------------------------- /tests/api/test_software_harvesting.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | #include "tests/test_utils/device_test_utils.hpp" 11 | #include "umd/device/arch/blackhole_implementation.hpp" 12 | #include "umd/device/arch/grendel_implementation.hpp" 13 | #include "umd/device/arch/wormhole_implementation.hpp" 14 | #include "umd/device/cluster.hpp" 15 | #include "umd/device/cluster_descriptor.hpp" 16 | 17 | using namespace tt::umd; 18 | 19 | TEST(SoftwareHarvesting, TensixSoftwareHarvestingAllChips) { 20 | std::unique_ptr cluster = std::make_unique(ClusterOptions{ 21 | .simulated_harvesting_masks = {0x3, 0, 0}, 22 | }); 23 | 24 | for (const ChipId& chip : cluster->get_target_device_ids()) { 25 | tt::ARCH arch = cluster->get_cluster_description()->get_arch(chip); 26 | 27 | uint32_t upper_limit_num_cores; 28 | if (arch == tt::ARCH::WORMHOLE_B0) { 29 | // At least 2 rows are expected to be harvested. 30 | upper_limit_num_cores = 64; 31 | } else if (arch == tt::ARCH::BLACKHOLE) { 32 | // At least 2 columns are expected to be harvested. 33 | upper_limit_num_cores = 120; 34 | } 35 | ASSERT_LE(cluster->get_soc_descriptor(chip).get_cores(CoreType::TENSIX).size(), upper_limit_num_cores); 36 | } 37 | 38 | for (const ChipId& chip : cluster->get_target_device_ids()) { 39 | EXPECT_TRUE((0x3 & cluster->get_soc_descriptor(chip).harvesting_masks.tensix_harvesting_mask) == 0x3); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /tools/README.md: -------------------------------------------------------------------------------- 1 | # Tools 2 | 3 | ## Build flow 4 | 5 | In general, see the common build instructions in the main [README](../README.md) 6 | 7 | Short instructions for building tools: 8 | ``` 9 | cmake -B build -G Ninja 10 | cmake --build build --target umd_tools 11 | ``` 12 | 13 | ## Topology tool 14 | 15 | The topology tool can be used to generate cluster descriptor which describes system topology of tenstorrent devices. 16 | It shows information such as pci connected chips, remote chips, ethernet connections, harvesting, etc. 17 | 18 | You can run the following for more information: 19 | ``` 20 | ./build/tools/umd/topology --help 21 | ``` 22 | 23 | Example output: 24 | ``` 25 | ... 26 | ethernet_connections: 27 | - 28 | - chip: 5 29 | chan: 1 30 | - chip: 2 31 | chan: 9 32 | - 33 | - chip: 5 34 | chan: 0 35 | - chip: 2 36 | chan: 8 37 | ... 38 | ``` 39 | 40 | ## Telemetry tool 41 | 42 | The telemetry tool can be used to read telemetry from ARC. You can provide which pci chips should be polled, the frequency of polling and which telemetry to read. 43 | It has a special mode where it can read some important factors for Wormhole device. 44 | 45 | If you want to save the values, you can also pass an output file to write to. 46 | 47 | You can run the following for more information: 48 | ``` 49 | ./build/tools/umd/telemetry --help 50 | ``` 51 | 52 | Example output: 53 | ``` 54 | ... 55 | Device id 0 - AICLK: 1350 VCore: 844 Power: 60 Temp: 64.12027 56 | Device id 0 - AICLK: 1350 VCore: 844 Power: 60 Temp: 64.632965 57 | Device id 0 - AICLK: 1350 VCore: 844 Power: 60 Temp: 64.632965 58 | ... 59 | ``` -------------------------------------------------------------------------------- /device/api/umd/device/coordinates/wormhole_coordinate_manager.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include "umd/device/arch/wormhole_implementation.hpp" 10 | #include "umd/device/coordinates/coordinate_manager.hpp" 11 | 12 | namespace tt::umd { 13 | 14 | class WormholeCoordinateManager : public CoordinateManager { 15 | public: 16 | WormholeCoordinateManager( 17 | const bool noc_translation_enabled, 18 | HarvestingMasks harvesting_masks, 19 | const tt_xy_pair& tensix_grid_size, 20 | const std::vector& tensix_cores, 21 | const tt_xy_pair& dram_grid_size, 22 | const std::vector& dram_cores, 23 | const std::vector& eth_cores, 24 | const tt_xy_pair& arc_grid_size, 25 | const std::vector& arc_cores, 26 | const tt_xy_pair& pcie_grid_size, 27 | const std::vector& pcie_cores, 28 | const std::vector& router_cores, 29 | const std::vector& security_cores, 30 | const std::vector& l2cpu_cores, 31 | const std::vector& noc0_x_to_noc1_x = {}, 32 | const std::vector& noc0_y_to_noc1_y = {}); 33 | 34 | protected: 35 | void fill_tensix_noc0_translated_mapping() override; 36 | void fill_dram_noc0_translated_mapping() override; 37 | void fill_eth_noc0_translated_mapping() override; 38 | void fill_pcie_noc0_translated_mapping() override; 39 | void fill_arc_noc0_translated_mapping() override; 40 | 41 | tt_xy_pair get_tensix_grid_size() const override; 42 | }; 43 | 44 | } // namespace tt::umd 45 | -------------------------------------------------------------------------------- /device/api/umd/device/types/wormhole_telemetry.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | namespace tt::umd { 10 | 11 | namespace wormhole { 12 | 13 | enum TelemetryTag : uint8_t { 14 | ENUM_VERSION = 0, 15 | DEVICE_ID = 1, 16 | ASIC_RO = 2, 17 | ASIC_IDD = 3, 18 | BOARD_ID_HIGH = 4, 19 | BOARD_ID_LOW = 5, 20 | ARC0_FW_VERSION = 6, 21 | ARC1_FW_VERSION = 7, 22 | ARC2_FW_VERSION = 8, 23 | ARC3_FW_VERSION = 9, 24 | SPIBOOTROM_FW_VERSION = 10, 25 | ETH_FW_VERSION = 11, 26 | DM_BL_FW_VERSION = 12, 27 | DM_APP_FW_VERSION = 13, 28 | DDR_STATUS = 14, 29 | ETH_STATUS0 = 15, 30 | ETH_STATUS1 = 16, 31 | PCIE_STATUS = 17, 32 | FAULTS = 18, 33 | ARC0_HEALTH = 19, 34 | ARC1_HEALTH = 20, 35 | ARC2_HEALTH = 21, 36 | ARC3_HEALTH = 22, 37 | FAN_SPEED = 23, 38 | AICLK = 24, 39 | AXICLK = 25, 40 | ARCCLK = 26, 41 | THROTTLER = 27, 42 | VCORE = 28, 43 | ASIC_TEMPERATURE = 29, 44 | VREG_TEMPERATURE = 30, 45 | BOARD_TEMPERATURE = 31, 46 | TDP = 32, 47 | TDC = 33, 48 | VDD_LIMITS = 34, 49 | THM_LIMITS = 35, 50 | WH_FW_DATE = 36, 51 | ASIC_TMON0 = 37, 52 | ASIC_TMON1 = 38, 53 | MVDDQ_POWER = 39, 54 | GDDR_TRAIN_TEMP0 = 40, 55 | GDDR_TRAIN_TEMP1 = 41, 56 | BOOT_DATE = 42, 57 | RT_SECONDS = 43, 58 | ETH_DEBUG_STATUS0 = 44, 59 | ETH_DEBUG_STATUS1 = 45, 60 | TT_FLASH_VERSION = 46, 61 | ETH_LOOPBACK_STATUS = 47, 62 | ETH_LIVE_STATUS = 48, 63 | FW_BUNDLE_VERSION = 49, 64 | NUMBER_OF_TAGS = 50 65 | }; 66 | 67 | } // namespace wormhole 68 | 69 | } // namespace tt::umd 70 | -------------------------------------------------------------------------------- /device/api/umd/device/lite_fabric/fabric_edm_types.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #pragma once 6 | 7 | #include 8 | 9 | namespace tt::umd { 10 | 11 | namespace lite_fabric { 12 | 13 | struct WorkerXY { 14 | uint16_t x; 15 | uint16_t y; 16 | 17 | constexpr WorkerXY() : x(0), y(0) {} 18 | 19 | constexpr WorkerXY(uint16_t x, uint16_t y) : x(x), y(y) {} 20 | 21 | constexpr uint32_t to_uint32() const { return (y << 16) | x; } 22 | 23 | static constexpr WorkerXY from_uint32(uint32_t v) { return WorkerXY(v & 0xFFFF, (v >> 16) & 0xFFFF); } 24 | 25 | constexpr bool operator==(const WorkerXY& rhs) const { return x == rhs.x && y == rhs.y; } 26 | 27 | constexpr bool operator!=(const WorkerXY& rhs) const { return !(*this == rhs); } 28 | }; 29 | 30 | struct EDMChannelWorkerLocationInfo { 31 | uint32_t worker_semaphore_address; 32 | uint32_t align_pad_0; // Padding added for safe reading over noc 33 | uint32_t align_pad_1; 34 | uint32_t align_pad_2; 35 | 36 | uint32_t worker_teardown_semaphore_address; 37 | uint32_t align_pad_3; // Padding added for safe reading over noc 38 | uint32_t align_pad_4; 39 | uint32_t align_pad_5; 40 | 41 | WorkerXY worker_xy; 42 | uint32_t align_pad_6; // Padding added for safe reading over noc 43 | uint32_t align_pad_7; 44 | uint32_t align_pad_8; 45 | 46 | uint32_t edm_read_counter = 0; 47 | uint32_t align_pad_9; // Padding added for safe reading over noc 48 | uint32_t align_pad_10; 49 | uint32_t align_pad_11; 50 | }; 51 | 52 | static_assert(sizeof(EDMChannelWorkerLocationInfo) <= 64); 53 | 54 | } // namespace lite_fabric 55 | } // namespace tt::umd 56 | -------------------------------------------------------------------------------- /device/arc/wormhole_arc_telemetry_reader.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #include "umd/device/arc/wormhole_arc_telemetry_reader.hpp" 7 | 8 | #include "umd/device/arch/wormhole_implementation.hpp" 9 | #include "umd/device/types/telemetry.hpp" 10 | 11 | extern bool umd_use_noc1; 12 | 13 | namespace tt::umd { 14 | 15 | WormholeArcTelemetryReader::WormholeArcTelemetryReader(TTDevice* tt_device) : ArcTelemetryReader(tt_device) { 16 | arc_core = !umd_use_noc1 ? wormhole::ARC_CORES_NOC0[0] 17 | : tt_xy_pair( 18 | wormhole::NOC0_X_TO_NOC1_X[wormhole::ARC_CORES_NOC0[0].x], 19 | wormhole::NOC0_Y_TO_NOC1_Y[wormhole::ARC_CORES_NOC0[0].y]); 20 | get_telemetry_address(); 21 | initialize_telemetry(); 22 | } 23 | 24 | void WormholeArcTelemetryReader::get_telemetry_address() { 25 | static constexpr uint64_t noc_telemetry_offset = 0x810000000; 26 | uint32_t telemetry_table_addr_offset; 27 | tt_device->read_from_device( 28 | &telemetry_table_addr_offset, 29 | arc_core, 30 | wormhole::ARC_NOC_RESET_UNIT_BASE_ADDR + wormhole::NOC_NODEID_X_0, 31 | sizeof(uint32_t)); 32 | 33 | telemetry_table_addr = telemetry_table_addr_offset + noc_telemetry_offset; 34 | 35 | uint32_t telemetry_values_addr_offset; 36 | tt_device->read_from_device( 37 | &telemetry_values_addr_offset, 38 | arc_core, 39 | wormhole::ARC_NOC_RESET_UNIT_BASE_ADDR + wormhole::NOC_NODEID_Y_0, 40 | sizeof(uint32_t)); 41 | 42 | telemetry_values_addr = telemetry_values_addr_offset + noc_telemetry_offset; 43 | } 44 | 45 | } // namespace tt::umd 46 | -------------------------------------------------------------------------------- /device/api/umd/device/types/blackhole_arc.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | 11 | namespace tt::umd { 12 | 13 | namespace blackhole { 14 | 15 | // Note, this only includes message IDs that have actually be implemented in CMFW. 16 | enum class ArcMessageType : uint8_t { 17 | RESERVED_01 = 0x01, // reserved to avoid conflict with initial SCRATCH[5] value 18 | NOP = 0x11, // Do nothing 19 | SET_VOLTAGE = 0x12, 20 | GET_VOLTAGE = 0x13, 21 | SWITCH_CLK_SCHEME = 0x14, 22 | REPORT_SCRATCH_ONLY = 0x16, 23 | SEND_PCIE_MSI = 0x17, 24 | SWITCH_VOUT_CONTROL = 0x18, 25 | READ_EEPROM = 0x19, 26 | WRITE_EEPROM = 0x1A, 27 | READ_TS = 0x1B, 28 | READ_PD = 0x1C, 29 | READ_VM = 0x1D, 30 | I2C_MESSAGE = 0x1E, 31 | EFUSE_BURN_BITS = 0x1F, 32 | FORCE_AICLK = 0x33, 33 | FORCE_VDD = 0x39, 34 | AICLK_GO_BUSY = 0x52, 35 | AICLK_GO_LONG_IDLE = 0x54, 36 | TRIGGER_RESET = 0x56, // arg: 3 = ASIC + M3 reset, other values = ASIC-only reset 37 | RESERVED_60 = 0x60, // reserved to avoid conflict with boot-time SCRATCH[5] value 38 | TEST = 0x90, 39 | PCIE_DMA_CHIP_TO_HOST_TRANSFER = 0x9B, 40 | PCIE_DMA_HOST_TO_CHIP_TRANSFER = 0x9C, 41 | ASIC_STATE0 = 0xA0, 42 | ASIC_STATE1 = 0xA1, 43 | ASIC_STATE3 = 0xA3, 44 | ASIC_STATE5 = 0xA5, 45 | SET_LAST_SERIAL = 0xBE, 46 | EFUSE_BURN = 0xBF, 47 | }; 48 | 49 | // Usage of queues proposed by Syseng. 50 | enum BlackholeArcMessageQueueIndex : uint8_t { 51 | KMD = 0, 52 | MONITORING = 1, 53 | TOOLS = 2, 54 | APPLICATION = 3, 55 | }; 56 | 57 | } // namespace blackhole 58 | 59 | } // namespace tt::umd 60 | -------------------------------------------------------------------------------- /device/api/umd/device/utils/kmd_versions.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include "umd/device/utils/semver.hpp" 10 | 11 | namespace tt::umd { 12 | 13 | /** 14 | * KMD version 1.29.0 introduced IOMMU support. UMD requires at least this version to run with IOMMU enabled. 15 | * With never versions of KMD, UMD will still work when IOMMU is disabled on the system. 16 | */ 17 | inline constexpr semver_t KMD_IOMMU = semver_t(1, 29, 0); 18 | 19 | /** 20 | * KMD version 2.0.0 introduced support for mapping buffers to NOC by using IOCTL. Before 2.0.0, UMD used to access 21 | * iATU configuration registers directly to perform such mappings. KMD exposed this functionality via IOCTL which brings 22 | * the ability to map buffers from multiple processes safely. While it's still possible to use direct register access 23 | * for mapping buffers to NOC on KMD versions older than 2.0.0, it's discouraged to do so. 24 | */ 25 | inline constexpr semver_t KMD_MAP_TO_NOC = semver_t(2, 0, 0); 26 | 27 | /** 28 | * KMD version 2.4.1 introduced architecture agnostic reset support. With the new IOCTL in KMD 2.4.1, by using the same 29 | * IOCTL UMD can now reset different architectures without needing to have architecture specific reset IOCTLs. 30 | */ 31 | inline constexpr semver_t KMD_ARCH_AGNOSTIC_RESET = semver_t{2, 4, 1}; 32 | 33 | /** 34 | * KMD version 1.34.0 introduced support for configuring and using PCIe TLBs for buffer mappings. This feature enables 35 | * calls into KMD to reserve TLB by size, in order to enable multiple user processes to use the device safely at the 36 | * same time. 37 | */ 38 | inline constexpr semver_t KMD_TLBS = semver_t(1, 34, 0); 39 | } // namespace tt::umd 40 | -------------------------------------------------------------------------------- /device/types/tensix_soft_reset_options.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #include "umd/device/types/tensix_soft_reset_options.hpp" 6 | 7 | #include "umd/device/cluster.hpp" 8 | #include "umd/device/types/xy_pair.hpp" 9 | 10 | namespace tt::umd { 11 | 12 | std::string TensixSoftResetOptionsToString(TensixSoftResetOptions value) { 13 | std::string output; 14 | 15 | if ((value & TensixSoftResetOptions::BRISC) != TensixSoftResetOptions::NONE) { 16 | output += "BRISC | "; 17 | } 18 | if ((value & TensixSoftResetOptions::TRISC0) != TensixSoftResetOptions::NONE) { 19 | output += "TRISC0 | "; 20 | } 21 | if ((value & TensixSoftResetOptions::TRISC1) != TensixSoftResetOptions::NONE) { 22 | output += "TRISC1 | "; 23 | } 24 | if ((value & TensixSoftResetOptions::TRISC2) != TensixSoftResetOptions::NONE) { 25 | output += "TRISC2 | "; 26 | } 27 | if ((value & TensixSoftResetOptions::NCRISC) != TensixSoftResetOptions::NONE) { 28 | output += "NCRISC | "; 29 | } 30 | if ((value & TensixSoftResetOptions::STAGGERED_START) != TensixSoftResetOptions::NONE) { 31 | output += "STAGGERED_START | "; 32 | } 33 | 34 | if (output.empty()) { 35 | output = "UNKNOWN"; 36 | } else { 37 | output.erase(output.end() - 3, output.end()); 38 | } 39 | 40 | return output; 41 | } 42 | 43 | TensixSoftResetOptions invert_selected_options(TensixSoftResetOptions selected) { 44 | uint32_t selected_bits = static_cast(selected); 45 | uint32_t inverted = (~selected_bits) & static_cast(ALL_TENSIX_SOFT_RESET); 46 | return static_cast(inverted); 47 | } 48 | 49 | } // namespace tt::umd 50 | -------------------------------------------------------------------------------- /src/firmware/riscv/wormhole/host_mem_address_map.h: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | #include 9 | #include 10 | 11 | // Remove inline ASAP. 12 | inline namespace wormhole { 13 | 14 | namespace host_mem { 15 | 16 | struct address_map { 17 | 18 | // SYSMEM accessible via DEVICE-to-HOST MMIO. 19 | 20 | static constexpr std::int32_t DEVICE_TO_HOST_MMIO_SIZE_BYTES = 1024 * 1024 * 1024; // 1GB 21 | static constexpr std::int32_t DEVICE_TO_HOST_SCRATCH_SIZE_BYTES = 128 * 1024 * 1024; 22 | static constexpr std::int32_t DEVICE_TO_HOST_SCRATCH_START = DEVICE_TO_HOST_MMIO_SIZE_BYTES - DEVICE_TO_HOST_SCRATCH_SIZE_BYTES; 23 | static constexpr std::int32_t DEVICE_TO_HOST_REGION_SIZE_BYTES = DEVICE_TO_HOST_MMIO_SIZE_BYTES - DEVICE_TO_HOST_SCRATCH_SIZE_BYTES; 24 | static constexpr std::int32_t DEVICE_TO_HOST_REGION_START = 0; 25 | 26 | static constexpr std::int32_t ETH_ROUTING_BLOCK_SIZE = 32 * 1024; 27 | static constexpr std::int32_t ETH_ROUTING_BUFFERS_START = DEVICE_TO_HOST_SCRATCH_START; 28 | static constexpr std::int32_t ETH_ROUTING_BUFFERS_SIZE = ETH_ROUTING_BLOCK_SIZE * 16 * 4;// 16 ethernet cores x 4 buffers/core 29 | 30 | // Concurrent perf trace parameters. 31 | static constexpr std::int32_t HOST_PERF_SCRATCH_BUF_START = DEVICE_TO_HOST_SCRATCH_START + ETH_ROUTING_BUFFERS_SIZE; 32 | static constexpr std::int32_t HOST_PERF_SCRATCH_BUF_SIZE = 64 * 1024 * 1024; 33 | static constexpr std::int32_t NUM_THREADS_IN_EACH_DEVICE_DUMP = 1; 34 | static constexpr std::int32_t NUM_HOST_PERF_QUEUES = 6 * 64; 35 | static constexpr std::int32_t HOST_PERF_QUEUE_SLOT_SIZE = HOST_PERF_SCRATCH_BUF_SIZE / NUM_HOST_PERF_QUEUES / 32 * 32; 36 | }; 37 | } 38 | } 39 | 40 | -------------------------------------------------------------------------------- /device/api/umd/device/firmware/README.md: -------------------------------------------------------------------------------- 1 | ## Firmware Compatibility Layer 2 | 3 | This module provides a structured way to handle different firmware versions while keeping the codebase clean and maintainable. 4 | 5 | ### Design Overview 6 | 7 | - FirmwareInfoProvider (Base Class) 8 | 9 | - Represents the implementation for the latest firmware version. 10 | 11 | - Always contains the most up-to-date behavior. 12 | 13 | - New firmware features should be added here. 14 | 15 | - Derived Classes (Older Firmware Versions) 16 | 17 | - For each older firmware version, a class is derived from FirmwareInfoProvider. 18 | 19 | - These derived classes override only the functions whose behavior differs from the latest firmware. 20 | 21 | - This minimizes code duplication and isolates version-specific logic. 22 | 23 | ### Version Selection 24 | 25 | The correct implementation is chosen at runtime based on the device’s firmware version and architecture. 26 | This ensures that the application interacts with a consistent interface regardless of the firmware version. 27 | 28 | ### Deprecation Policy 29 | 30 | - Older firmware versions may eventually be deprecated. 31 | 32 | - When a firmware version is no longer supported, its derived class should be removed. 33 | 34 | - Over time, the goal is to have only the FirmwareInfoProvider base class remain. 35 | 36 | - This ensures the code reflects only the currently supported firmware set. 37 | 38 | ### Benefits 39 | 40 | - Clarity: The base class always reflects the latest firmware specification. 41 | 42 | - Maintainability: Only differences are overridden, reducing code duplication. 43 | 44 | - Flexibility: Supports multiple firmware versions without complex conditionals. 45 | 46 | - Future-Proofing: Easy to remove deprecated versions without breaking newer ones. -------------------------------------------------------------------------------- /device/api/umd/device/tt_io.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | namespace tt::umd { 12 | 13 | /** 14 | * @brief Provides write access to a SoC core via a statically-mapped TLB. 15 | * 16 | * TLB refers to the aperture within the device BAR that is mapped to a NOC 17 | * endpoint (i.e. an (X, Y) location + address) within the chip. 18 | * 19 | * It is the caller's responsibility to manage the lifetime of Writer objects. 20 | */ 21 | class Writer { 22 | friend class TLBManager; 23 | 24 | public: 25 | /** 26 | * @brief Write to a SoC core. 27 | * 28 | * @param address must be aligned to the size of T 29 | * @param value 30 | */ 31 | template 32 | void write(uint32_t address, T value) const { 33 | auto dst = reinterpret_cast(base) + address; 34 | 35 | if (address >= tlb_size) { 36 | throw std::runtime_error("Address out of bounds for TLB"); 37 | } 38 | 39 | if (alignof(T) > 1 && (dst & (alignof(T) - 1))) { 40 | throw std::runtime_error("Unaligned write"); 41 | } 42 | 43 | *reinterpret_cast(dst) = value; 44 | } 45 | 46 | private: 47 | /** 48 | * @brief Cluster interface to construct a new Writer object. 49 | * 50 | * @param base pointer to the base address of a mapped TLB. 51 | * @param tlb_size size of the mapped TLB. 52 | */ 53 | Writer(void *base, size_t tlb_size) : base(base), tlb_size(tlb_size) { 54 | assert(base); 55 | assert(tlb_size > 0); 56 | } 57 | 58 | void *base{nullptr}; 59 | size_t tlb_size{0}; 60 | }; 61 | 62 | } // namespace tt::umd 63 | -------------------------------------------------------------------------------- /tests/soc_descs/wormhole_b0_1x1.yaml: -------------------------------------------------------------------------------- 1 | # Note taken from software repo - may need updates. 2 | grid: 3 | x_size: 10 4 | y_size: 12 5 | 6 | arc: 7 | [ 0-10 ] 8 | 9 | pcie: 10 | [ 0-3 ] 11 | 12 | dram: 13 | [ 14 | [0-0, 0-1, 0-11], 15 | [0-5, 0-6, 0-7], 16 | [5-0, 5-1, 5-11], 17 | [5-2, 5-9, 5-10], 18 | [5-3, 5-4, 5-8], 19 | [5-5, 5-6, 5-7], 20 | ] 21 | 22 | eth: 23 | [ 24 | 9-0, 1-0, 8-0, 2-0, 7-0, 3-0, 6-0, 4-0, 25 | 9-6, 1-6, 8-6, 2-6, 7-6, 3-6, 6-6, 4-6, 26 | ] 27 | 28 | functional_workers: 29 | [ 1-1 ] 30 | 31 | harvested_workers: 32 | [] 33 | 34 | router_only: 35 | [ 36 | 0-2, 0-4, 0-8, 0-9, 37 | 2-1, 3-1, 4-1, 6-1, 7-1, 8-1, 9-1, 38 | 1-2, 2-2, 3-2, 4-2, 6-2, 7-2, 8-2, 9-2, 39 | 1-3, 2-3, 3-3, 4-3, 6-3, 7-3, 8-3, 9-3, 40 | 1-4, 2-4, 3-4, 4-4, 6-4, 7-4, 8-4, 9-4, 41 | 1-5, 2-5, 3-5, 4-5, 6-5, 7-5, 8-5, 9-5, 42 | 1-7, 2-7, 3-7, 4-7, 6-7, 7-7, 8-7, 9-7, 43 | 1-8, 2-8, 3-8, 4-8, 6-8, 7-8, 8-8, 9-8, 44 | 1-9, 2-9, 3-9, 4-9, 6-9, 7-9, 8-9, 9-9, 45 | 1-10, 2-10, 3-10, 4-10, 6-10, 7-10, 8-10, 9-10, 46 | 1-11, 2-11, 3-11, 4-11, 6-11, 7-11, 8-11, 9-11, 47 | ] 48 | 49 | noc0_x_to_noc1_x: 50 | [ 51 | 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 52 | ] 53 | 54 | noc0_y_to_noc1_y: 55 | [ 56 | 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 57 | ] 58 | 59 | worker_l1_size: 60 | 1499136 61 | 62 | dram_bank_size: 63 | 2147483648 64 | 65 | eth_l1_size: 66 | 262144 67 | 68 | arch_name: WORMHOLE_B0 69 | 70 | features: 71 | unpacker: 72 | version: 2 73 | inline_srca_trans_without_srca_trans_instr: True 74 | math: 75 | dst_size_alignment: 32768 76 | packer: 77 | version: 2 78 | overlay: 79 | version: 2 80 | -------------------------------------------------------------------------------- /cmake/sanitizers.cmake: -------------------------------------------------------------------------------- 1 | get_property(isMultiConfig GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) 2 | if(isMultiConfig) 3 | if(NOT "ASan" IN_LIST CMAKE_CONFIGURATION_TYPES) 4 | list(APPEND CMAKE_CONFIGURATION_TYPES ASan) 5 | endif() 6 | if(NOT "TSan" IN_LIST CMAKE_CONFIGURATION_TYPES) 7 | list(APPEND CMAKE_CONFIGURATION_TYPES TSan) 8 | endif() 9 | endif() 10 | 11 | set_property( 12 | GLOBAL 13 | APPEND 14 | PROPERTY 15 | DEBUG_CONFIGURATIONS 16 | ASan 17 | TSan 18 | ) 19 | 20 | # ASan, LSan and UBSan do not conflict with each other and are each fast enough that we can combine them. 21 | # Saves us from an explosion of pipelines to test our code. 22 | set(asan_flags "-fsanitize=address -fsanitize=leak -fsanitize=undefined") 23 | set(asan_compile_flags "${asan_flags} -fno-omit-frame-pointer") 24 | set(CMAKE_C_FLAGS_ASAN "${CMAKE_C_FLAGS_RELWITHDEBINFO} ${asan_compile_flags}") 25 | set(CMAKE_CXX_FLAGS_ASAN "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${asan_compile_flags}") 26 | set(CMAKE_EXE_LINKER_FLAGS_ASAN "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} ${asan_compile_flags}") 27 | set(CMAKE_SHARED_LINKER_FLAGS_ASAN "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO} ${asan_compile_flags}") 28 | set(CMAKE_MODULE_LINKER_FLAGS_ASAN "${CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO} ${asan_compile_flags}") 29 | 30 | set(tsan_flags "-fsanitize=thread -fno-omit-frame-pointer") 31 | set(CMAKE_C_FLAGS_TSAN "${CMAKE_C_FLAGS_RELWITHDEBINFO} ${tsan_flags}") 32 | set(CMAKE_CXX_FLAGS_TSAN "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${tsan_flags}") 33 | set(CMAKE_EXE_LINKER_FLAGS_TSAN "${CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO} ${tsan_flags}") 34 | set(CMAKE_SHARED_LINKER_FLAGS_TSAN "${CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO} ${tsan_flags}") 35 | set(CMAKE_MODULE_LINKER_FLAGS_TSAN "${CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO} ${tsan_flags}") 36 | -------------------------------------------------------------------------------- /tests/soc_descs/wormhole_b0_8x10.yaml: -------------------------------------------------------------------------------- 1 | # Note taken from software repo - may need updates. 2 | grid: 3 | x_size: 10 4 | y_size: 12 5 | 6 | arc: 7 | [ 0-10 ] 8 | 9 | pcie: 10 | [ 0-3 ] 11 | 12 | dram: 13 | [ 14 | [0-0, 0-1, 0-11], 15 | [0-5, 0-6, 0-7], 16 | [5-0, 5-1, 5-11], 17 | [5-2, 5-9, 5-10], 18 | [5-3, 5-4, 5-8], 19 | [5-5, 5-6, 5-7], 20 | ] 21 | 22 | eth: 23 | [ 24 | 9-0, 1-0, 8-0, 2-0, 7-0, 3-0, 6-0, 4-0, 25 | 9-6, 1-6, 8-6, 2-6, 7-6, 3-6, 6-6, 4-6, 26 | ] 27 | 28 | functional_workers: 29 | [ 30 | 1-1, 2-1, 3-1, 4-1, 6-1, 7-1, 8-1, 9-1, 31 | 1-2, 2-2, 3-2, 4-2, 6-2, 7-2, 8-2, 9-2, 32 | 1-3, 2-3, 3-3, 4-3, 6-3, 7-3, 8-3, 9-3, 33 | 1-4, 2-4, 3-4, 4-4, 6-4, 7-4, 8-4, 9-4, 34 | 1-5, 2-5, 3-5, 4-5, 6-5, 7-5, 8-5, 9-5, 35 | 1-7, 2-7, 3-7, 4-7, 6-7, 7-7, 8-7, 9-7, 36 | 1-8, 2-8, 3-8, 4-8, 6-8, 7-8, 8-8, 9-8, 37 | 1-9, 2-9, 3-9, 4-9, 6-9, 7-9, 8-9, 9-9, 38 | 1-10, 2-10, 3-10, 4-10, 6-10, 7-10, 8-10, 9-10, 39 | 1-11, 2-11, 3-11, 4-11, 6-11, 7-11, 8-11, 9-11, 40 | ] 41 | 42 | harvested_workers: 43 | [] 44 | 45 | router_only: 46 | [ 47 | 0-2, 0-4, 0-8, 0-9 48 | ] 49 | 50 | noc0_x_to_noc1_x: 51 | [ 52 | 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 53 | ] 54 | 55 | noc0_y_to_noc1_y: 56 | [ 57 | 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 58 | ] 59 | 60 | worker_l1_size: 61 | 1499136 62 | 63 | dram_bank_size: 64 | 2147483648 65 | 66 | eth_l1_size: 67 | 262144 68 | 69 | arch_name: WORMHOLE_B0 70 | 71 | features: 72 | unpacker: 73 | version: 2 74 | inline_srca_trans_without_srca_trans_instr: True 75 | math: 76 | dst_size_alignment: 32768 77 | packer: 78 | version: 2 79 | overlay: 80 | version: 2 81 | -------------------------------------------------------------------------------- /tests/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(test_common INTERFACE) 2 | target_link_libraries( 3 | test_common 4 | INTERFACE 5 | umd::device 6 | umd::Firmware 7 | gtest_main 8 | gtest 9 | pthread 10 | spdlog::spdlog_header_only 11 | fmt::fmt-header-only 12 | tt-logger::tt-logger 13 | $<$:nng> 14 | ) 15 | target_include_directories( 16 | test_common 17 | INTERFACE 18 | ${PROJECT_SOURCE_DIR} 19 | ${CMAKE_CURRENT_SOURCE_DIR} 20 | "$" 21 | ) 22 | target_compile_definitions(test_common INTERFACE UMD_TESTS_ROOT_PATH="${CMAKE_CURRENT_SOURCE_DIR}") 23 | 24 | if(MASTER_PROJECT) 25 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/microbenchmark) 26 | endif() 27 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/api) 28 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/baremetal) 29 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/blackhole) 30 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/misc) 31 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/pcie) 32 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/unified) 33 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/wormhole) 34 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/galaxy) 35 | 36 | if(TT_UMD_BUILD_SIMULATION) 37 | message(STATUS "Building ${PROJECT_NAME} with Simulation Tests") 38 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/simulation) 39 | endif() 40 | 41 | add_custom_target( 42 | umd_tests 43 | DEPENDS 44 | unit_tests_blackhole 45 | unit_tests_wormhole 46 | unit_tests_glx 47 | test_pcie_device 48 | api_tests 49 | baremetal_tests 50 | umd_misc_tests 51 | unified_tests 52 | ) 53 | 54 | if(TT_UMD_BUILD_SIMULATION) 55 | add_dependencies(umd_tests simulation_tests) 56 | endif() 57 | -------------------------------------------------------------------------------- /tests/microbenchmark/benchmarks/iommu/README.md: -------------------------------------------------------------------------------- 1 | # IOMMU benchmark 2 | 3 | This benchmark contains tests that are measuring performance of different IOMMU operations through UMD and KMD. 4 | 5 | IOMMU operations that UMD does are mapping the buffer through IOMMU and unampping it when it is not needed anymore. Both operations are done by making ioctl call to KMD. 6 | 7 | ## Results 8 | 9 | ### Mapping different sizes with IOMMU 10 | 11 | | Number of pages | Mapping size (MB) | Average map time (ns) | Bandwidth map (MB/s) | Average unmap time (ns) | Bandwidth unmap (MB/s) | 12 | |---|---|---|---|---|---| 13 | | 1.00 | 0.00 | 20418.00 | 191.31 | 21301.00 | 183.38 | 14 | | 2.00 | 0.00 | 21548.00 | 362.56 | 30844.00 | 253.29 | 15 | | 4.00 | 0.00 | 28985.00 | 539.07 | 48018.00 | 325.40 | 16 | | 8.00 | 0.00 | 54813.00 | 570.12 | 101095.00 | 309.12 | 17 | | 16.00 | 0.00 | 85164.00 | 733.88 | 153440.00 | 407.33 | 18 | | 32.00 | 0.00 | 116143.00 | 1076.26 | 407850.00 | 306.49 | 19 | | 64.00 | 0.00 | 261064.00 | 957.62 | 557351.00 | 448.55 | 20 | | 128.00 | 0.00 | 462981.00 | 1079.96 | 1081240.00 | 462.43 | 21 | | 256.00 | 1.00 | 769940.00 | 1298.80 | 2141362.00 | 466.99 | 22 | | 512.00 | 2.00 | 1597899.00 | 1251.64 | 3774593.00 | 529.86 | 23 | | 1024.00 | 4.00 | 3064844.00 | 1305.12 | 8126527.00 | 492.22 | 24 | | 2048.00 | 8.00 | 6625268.00 | 1207.50 | 15175334.00 | 527.17 | 25 | | 4096.00 | 16.00 | 12288348.00 | 1302.05 | 29370120.00 | 544.77 | 26 | | 8192.00 | 32.00 | 24616148.00 | 1299.96 | 60247223.00 | 531.14 | 27 | | 16384.00 | 64.00 | 48471598.00 | 1320.36 | 119874332.00 | 533.89 | 28 | | 32768.00 | 128.00 | 98764664.00 | 1296.01 | 241551922.00 | 529.91 | 29 | | 65536.00 | 256.00 | 199796772.00 | 1281.30 | 481864442.00 | 531.27 | 30 | | 131072.00 | 512.00 | 407482019.00 | 1256.50 | 970738770.00 | 527.43 | 31 | | 262144.00 | 1024.00 | 806153725.00 | 1270.23 | 1895624261.00 | 540.19 | -------------------------------------------------------------------------------- /common/timestamp.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | namespace tt::umd::util { 15 | 16 | class Timestamp { 17 | std::chrono::steady_clock::time_point start; 18 | 19 | public: 20 | Timestamp() : start(std::chrono::steady_clock::now()) {} 21 | 22 | void reset() { start = std::chrono::steady_clock::now(); } 23 | 24 | uint64_t nanoseconds() const { 25 | auto now = std::chrono::steady_clock::now(); 26 | return std::chrono::duration_cast(now - start).count(); 27 | } 28 | 29 | uint64_t microseconds() const { 30 | auto now = std::chrono::steady_clock::now(); 31 | return std::chrono::duration_cast(now - start).count(); 32 | } 33 | 34 | uint64_t milliseconds() const { 35 | auto now = std::chrono::steady_clock::now(); 36 | return std::chrono::duration_cast(now - start).count(); 37 | } 38 | 39 | uint64_t seconds() const { 40 | auto now = std::chrono::steady_clock::now(); 41 | return std::chrono::duration_cast(now - start).count(); 42 | } 43 | 44 | std::string to_string() const { 45 | auto ns = nanoseconds(); 46 | if (ns < 1000) { 47 | return fmt::format("{} ns", ns); 48 | } 49 | auto us = microseconds(); 50 | if (us < 1000) { 51 | return fmt::format("{} μs", us); 52 | } 53 | auto ms = milliseconds(); 54 | if (ms < 1000) { 55 | return fmt::format("{} ms", ms); 56 | } 57 | return fmt::format("{} s", seconds()); 58 | } 59 | }; 60 | 61 | } // namespace tt::umd::util 62 | -------------------------------------------------------------------------------- /.pre-commit-hooks/check-copyright-config.yaml: -------------------------------------------------------------------------------- 1 | DEFAULT: 2 | perform_check: yes # should the check be performed? 3 | # Sections setting this to 'no' don't need to include any other options as they are ignored 4 | # When a file is using a section with the option set to 'no', no checks are performed. 5 | 6 | # what licenses (or license expressions) are allowed for files in this section 7 | # when setting this option in a section, you need to list all the allowed licenses 8 | allowed_licenses: 9 | - Apache-2.0 10 | license_for_new_files: Apache-2.0 # license to be used when inserting a new copyright notice 11 | new_notice_c: | # notice for new C, CPP, H, HPP and LD files 12 | /* 13 | * SPDX-FileCopyrightText: (c) {years} Tenstorrent Inc. 14 | * 15 | * SPDX-License-Identifier: {license} 16 | */ 17 | new_notice_python: | # notice for new python files 18 | # SPDX-FileCopyrightText: {years} Tenstorrent Inc. 19 | # SPDX-License-Identifier: {license} 20 | 21 | # comment lines matching: 22 | # SPDX-FileCopyrightText: year[-year] Espressif Systems 23 | # or 24 | # SPDX-FileContributor: year[-year] Espressif Systems 25 | # are replaced with this template prefixed with the correct comment notation (# or // or *) and SPDX- notation 26 | espressif_copyright: '{years} Tenstorrent Inc.' 27 | 28 | # You can create your own rules for files or group of files 29 | # examples_and_unit_tests: 30 | # include: 31 | # - 'examples/' 32 | # - 'components/**/test/**' 33 | # - 'components/**/test_apps/**' 34 | # - 'tools/test_apps/**' 35 | # allowed_licenses: 36 | # - Apache-2.0 37 | # - Unlicense 38 | # - CC0-1.0 39 | # license_for_new_files: Unlicense OR CC0-1.0 40 | 41 | # ignore: # You can also select ignoring files here 42 | # perform_check: no # Don't check files from that block 43 | # include: 44 | # - example/file/path -------------------------------------------------------------------------------- /device/api/umd/device/lite_fabric/lf_dev_mem_map.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2025 Tenstorrent AI ULC 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #pragma once 6 | 7 | #define MEM_LOCAL_SIZE (8 * 1024) /* Local memory size -- on erisc1. not being shared with base firmware*/ 8 | #define MEM_LOCAL_BASE 0xFFB00000 /* Local memory base address */ 9 | #define MEM_LITE_FABRIC_NOC_ATOMIC_RET_VAL_ADDR 4 10 | 11 | #define LITE_FABRIC_BARRIER 12 12 | 13 | // NOTE: Base firmware data is starting at 0x70000. 14 | // We need to ensure that the Lite Fabric memory does not overlap with it or Metal. 15 | #define MEM_LITE_FABRIC_MEMORY_BASE 0x6A000 16 | #define MEM_LITE_FABRIC_MEMORY_SIZE (24 * 1024) 17 | #define MEM_LITE_FABRIC_MEMORY_END (MEM_LITE_FABRIC_MEMORY_BASE + MEM_LITE_FABRIC_MEMORY_SIZE) 18 | 19 | /* Lite Fabric Memory Layout */ 20 | /* Text (firmware code) section */ 21 | #define LITE_FABRIC_TEXT_START MEM_LITE_FABRIC_MEMORY_BASE 22 | #define LITE_FABRIC_TEXT_SIZE 0x2000 23 | 24 | /* Data section (in L1) */ 25 | #define LITE_FABRIC_DATA_START (LITE_FABRIC_TEXT_START + LITE_FABRIC_TEXT_SIZE) 26 | #define LITE_FABRIC_DATA_SIZE 0x1000 27 | 28 | /* Scratch space for init. Not used. Data is in L1 at this time */ 29 | #define LITE_FABRIC_INIT_SCRATCH (LITE_FABRIC_DATA_START + LITE_FABRIC_DATA_SIZE) 30 | #define LITE_FABRIC_INIT_SCRATCH_SIZE 1024 31 | 32 | /* Configuration area */ 33 | #define LITE_FABRIC_CONFIG_START (LITE_FABRIC_DATA_START + LITE_FABRIC_DATA_SIZE) 34 | #define LITE_FABRIC_CONFIG_SIZE 0x2400 35 | 36 | /* Stack configuration */ 37 | #define LITE_FABRIC_STACK_START (MEM_LOCAL_BASE) 38 | #define LITE_FABRIC_STACK_SIZE 1024 39 | 40 | /* Reset PC for ERISC1 (running lite fabric) */ 41 | #define LITE_FABRIC_RESET_PC (MEM_LOCAL_BASE | 0x14008) 42 | 43 | /* Static assert in bh_hal_eth_asserts.hpp */ 44 | #define MEMORY_LAYOUT_END (LITE_FABRIC_CONFIG_START + LITE_FABRIC_CONFIG_SIZE) 45 | -------------------------------------------------------------------------------- /device/tt_device/remote_communication_lite_fabric.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #include "umd/device/tt_device/remote_communication_lite_fabric.hpp" 7 | 8 | namespace tt::umd { 9 | 10 | RemoteCommunicationLiteFabric::RemoteCommunicationLiteFabric(TTDevice* local_tt_device, SysmemManager* sysmem_manager) : 11 | RemoteCommunication(local_tt_device, sysmem_manager) { 12 | host_interface = lite_fabric::LiteFabricMemoryMap::make_host_interface(local_tt_device); 13 | } 14 | 15 | void RemoteCommunicationLiteFabric::read_non_mmio( 16 | tt_xy_pair target_core, 17 | void* dest, 18 | uint64_t core_src, 19 | uint32_t size_in_bytes, 20 | const std::chrono::milliseconds timeout_ms) { 21 | tt_xy_pair eth_core = get_remote_transfer_ethernet_core(); 22 | CoreCoord core_coord = CoreCoord(eth_core.x, eth_core.y, CoreType::ETH, CoordSystem::NOC0); 23 | host_interface.read(dest, size_in_bytes, core_coord, target_core, core_src); 24 | } 25 | 26 | void RemoteCommunicationLiteFabric::write_to_non_mmio( 27 | tt_xy_pair target_core, 28 | const void* src, 29 | uint64_t core_dest, 30 | uint32_t size_in_bytes, 31 | bool broadcast, 32 | std::vector broadcast_header, 33 | const std::chrono::milliseconds timeout_ms) { 34 | // hacking this to be void* from const void* 35 | // TODO: support const void* properly. 36 | tt_xy_pair eth_core = get_remote_transfer_ethernet_core(); 37 | CoreCoord core_coord = CoreCoord(eth_core.x, eth_core.y, CoreType::ETH, CoordSystem::NOC0); 38 | host_interface.write(const_cast(src), size_in_bytes, core_coord, target_core, core_dest); 39 | } 40 | 41 | void RemoteCommunicationLiteFabric::wait_for_non_mmio_flush(const std::chrono::milliseconds timeout_ms) { 42 | // TODO(pjanevski): implement this. 43 | } 44 | 45 | } // namespace tt::umd 46 | -------------------------------------------------------------------------------- /.vscode/default.launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "umd_api_test", 9 | "type": "cppdbg", 10 | "request": "launch", 11 | "program": "${workspaceFolder}/build/test/umd/api/api_tests", 12 | "args": [ 13 | "--gtest_filter=\"*OpenAllSiliconChips\"", 14 | ], 15 | "stopAtEntry": false, 16 | "cwd": "${workspaceFolder}", 17 | "environment": [], 18 | "externalConsole": false, 19 | "MIMode": "gdb", 20 | "setupCommands": [ 21 | { 22 | "description": "Enable pretty-printing for gdb", 23 | "text": "-enable-pretty-printing", 24 | "ignoreFailures": true 25 | }, 26 | { 27 | "description": "Catch std::out_of_range exceptions", 28 | "text": "catch throw std::out_of_range" 29 | }, 30 | { 31 | "description": "Enable libstdc++ pretty printers", 32 | "text": "python import sys; sys.path.insert(0, '/usr/share/gcc/python')" 33 | }, 34 | { 35 | "description": "Register libstdc++ pretty printers", 36 | "text": 37 | "python from libstdcxx.v6 import register_libstdcxx_printers; register_libstdcxx_printers(gdb)" 38 | } 39 | ], 40 | "miDebuggerPath": "/usr/local/bin/gdb", 41 | "logging": { 42 | "engineLogging": true 43 | } 44 | }, 45 | ] 46 | } 47 | -------------------------------------------------------------------------------- /.github/docker_install_common.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Install essential packages first (required for HTTPS and GPG operations) 4 | apt-get update && apt-get install -y \ 5 | ca-certificates \ 6 | gnupg \ 7 | wget 8 | 9 | # Add Kitware repository for latest CMake 10 | wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null 11 | echo "deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ $OS_CODENAME main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null 12 | 13 | # Install build and runtime deps 14 | apt-get update && apt-get install -y \ 15 | software-properties-common \ 16 | build-essential \ 17 | cmake \ 18 | ninja-build \ 19 | git \ 20 | git-lfs \ 21 | libhwloc-dev \ 22 | libgtest-dev \ 23 | libyaml-cpp-dev \ 24 | libboost-all-dev \ 25 | wget \ 26 | yamllint \ 27 | python3-dev \ 28 | python3-pip \ 29 | xxd \ 30 | rpm \ 31 | dpkg-dev \ 32 | fakeroot 33 | 34 | # Install Python dependencies 35 | python3 -m pip install --no-cache-dir pytest 36 | 37 | # gcc-12 should be available only for ubuntu 22 and not 20 38 | if apt-cache show gcc-12 > /dev/null 2>&1; then 39 | echo "gcc-12 is available. Installing..." 40 | apt-get install -y gcc-12 g++-12 41 | else 42 | echo "gcc-12 is not available in the repository." 43 | fi 44 | 45 | # Install clang 17 46 | wget https://apt.llvm.org/llvm.sh && \ 47 | chmod u+x llvm.sh && \ 48 | ./llvm.sh 17 && \ 49 | apt install -y libc++-17-dev libc++abi-17-dev && \ 50 | ln -s /usr/bin/clang-17 /usr/bin/clang && \ 51 | ln -s /usr/bin/clang++-17 /usr/bin/clang++ 52 | 53 | # Install clang-format 54 | apt install -y clang-format-17 && \ 55 | ln -s /usr/bin/clang-format-17 /usr/bin/clang-format 56 | 57 | # Install clang-tidy-17 58 | apt-get install -y clang-tidy-17 59 | -------------------------------------------------------------------------------- /device/api/umd/device/driver_atomics.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #pragma once 7 | 8 | #if defined(__x86_64__) || defined(__i386__) 9 | #include 10 | #endif 11 | 12 | namespace tt_driver_atomics { 13 | 14 | #if defined(__x86_64__) || defined(__i386__) 15 | // Store-Any barrier. 16 | static inline __attribute__((always_inline)) void sfence() { _mm_sfence(); } 17 | 18 | // Load-Any barrier. 19 | static inline __attribute__((always_inline)) void lfence() { _mm_lfence(); } 20 | 21 | // Any-Any barrier. 22 | static inline __attribute__((always_inline)) void mfence() { _mm_mfence(); } 23 | 24 | #elif defined(__ARM_ARCH) 25 | 26 | static inline __attribute__((always_inline)) void sfence() { 27 | // Full memory barrier (full system). ARM does not have a Store-Any barrier. 28 | // https://developer.arm.com/documentation/100941/0101/Barriers 29 | asm volatile("DMB SY" : : : "memory"); 30 | } 31 | 32 | static inline __attribute__((always_inline)) void lfence() { 33 | // Load-Any barrier (full system) 34 | // https://developer.arm.com/documentation/100941/0101/Barriers 35 | asm volatile("DMB LD" : : : "memory"); 36 | } 37 | 38 | static inline __attribute__((always_inline)) void mfence() { 39 | // Full memory barrier (full system). 40 | // https://developer.arm.com/documentation/100941/0101/Barriers 41 | asm volatile("DMB SY" : : : "memory"); 42 | } 43 | 44 | #elif defined(__riscv) 45 | 46 | static inline __attribute__((always_inline)) void sfence() { asm volatile("fence ow, ow" : : : "memory"); } 47 | 48 | static inline __attribute__((always_inline)) void lfence() { asm volatile("fence ir, ir" : : : "memory"); } 49 | 50 | static inline __attribute__((always_inline)) void mfence() { asm volatile("fence iorw, iorw" : : : "memory"); } 51 | 52 | #else 53 | #error "Unsupported architecture" 54 | #endif 55 | 56 | } // namespace tt_driver_atomics 57 | -------------------------------------------------------------------------------- /device/api/umd/device/topology/topology_utils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include "umd/device/types/cluster_types.hpp" 10 | 11 | namespace tt::umd { 12 | 13 | template 14 | void size_buffer_to_capacity(std::vector& data_buf, std::size_t size_in_bytes) { 15 | std::size_t target_size = 0; 16 | if (size_in_bytes > 0) { 17 | target_size = ((size_in_bytes - 1) / sizeof(T)) + 1; 18 | } 19 | data_buf.resize(target_size); 20 | } 21 | 22 | static inline uint64_t get_sys_addr( 23 | const DriverNocParams& noc_params, 24 | uint32_t chip_x, 25 | uint32_t chip_y, 26 | uint32_t noc_x, 27 | uint32_t noc_y, 28 | uint64_t offset) { 29 | uint64_t result = chip_y; 30 | uint64_t noc_addr_local_bits_mask = (1UL << noc_params.noc_addr_local_bits) - 1; 31 | result <<= noc_params.noc_addr_node_id_bits; 32 | result |= chip_x; 33 | result <<= noc_params.noc_addr_node_id_bits; 34 | result |= noc_y; 35 | result <<= noc_params.noc_addr_node_id_bits; 36 | result |= noc_x; 37 | result <<= noc_params.noc_addr_local_bits; 38 | result |= (noc_addr_local_bits_mask & offset); 39 | return result; 40 | } 41 | 42 | static inline uint16_t get_sys_rack( 43 | const DriverEthInterfaceParams& eth_interface_params, uint32_t rack_x, uint32_t rack_y) { 44 | uint32_t result = rack_y; 45 | result <<= eth_interface_params.eth_rack_coord_width; 46 | result |= rack_x; 47 | 48 | return result; 49 | } 50 | 51 | static inline bool is_non_mmio_cmd_q_full( 52 | const DriverEthInterfaceParams& eth_interface_params, uint32_t curr_wptr, uint32_t curr_rptr) { 53 | return (curr_wptr != curr_rptr) && ((curr_wptr & eth_interface_params.cmd_buf_size_mask) == 54 | (curr_rptr & eth_interface_params.cmd_buf_size_mask)); 55 | } 56 | 57 | } // namespace tt::umd 58 | -------------------------------------------------------------------------------- /device/api/umd/device/chip_helpers/tlb_manager.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | #include "umd/device/pcie/tlb_window.hpp" 12 | #include "umd/device/types/arch.hpp" 13 | #include "umd/device/types/tlb.hpp" 14 | #include "umd/device/types/xy_pair.hpp" 15 | 16 | namespace tt::umd { 17 | 18 | class Writer; 19 | class TTDevice; 20 | 21 | class TLBManager { 22 | public: 23 | TLBManager(TTDevice* tt_device); 24 | 25 | // All tt_xy_pairs should be in TRANSLATED coords. 26 | void configure_tlb(tt_xy_pair core, size_t tlb_size, uint64_t address, uint64_t ordering); 27 | void configure_tlb_kmd(tt_xy_pair core, size_t tlb_size, uint64_t address, uint64_t ordering); 28 | bool is_tlb_mapped(tt_xy_pair core); 29 | bool is_tlb_mapped(tt_xy_pair core, uint64_t address, uint32_t size_in_bytes); 30 | 31 | Writer get_static_tlb_writer(tt_xy_pair core); 32 | tlb_configuration get_tlb_configuration(tt_xy_pair core); 33 | 34 | // TODO: the following members will be moved to private once enough stuff is moved out of cluster. 35 | std::unordered_map tlb_config_map_; 36 | std::unordered_map map_core_to_tlb_; 37 | std::unordered_map> tlb_windows_; 38 | 39 | TTDevice* get_tt_device() { return tt_device_; } 40 | 41 | TlbWindow* get_tlb_window(const tt_xy_pair core); 42 | 43 | std::unique_ptr allocate_tlb_window( 44 | tlb_data config, const TlbMapping mapping = TlbMapping::WC, const size_t tlb_size = 0); 45 | 46 | private: 47 | // TODO: move these functions to the layer below, or make separate functions 48 | // to handle getting TLBs per architecture. 49 | static const std::vector get_tlb_arch_sizes(const tt::ARCH arch); 50 | 51 | TTDevice* tt_device_; 52 | }; 53 | 54 | } // namespace tt::umd 55 | -------------------------------------------------------------------------------- /device/api/umd/device/jtag/jtag.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2025 Tenstorrent AI ULC 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | #pragma once 5 | 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | struct DlCloser { 16 | void operator()(void* handle) const; 17 | }; 18 | 19 | using DlHandle = std::unique_ptr; 20 | 21 | class Jtag { 22 | public: 23 | explicit Jtag(const char* lib_path); 24 | 25 | int open_jlink_by_serial_wrapper(unsigned int serial_number); 26 | int open_jlink_wrapper(); 27 | uint32_t read_tdr(const char* client, uint32_t reg_offset); 28 | uint32_t readmon_tdr(const char* client, uint32_t id, uint32_t reg_offset); 29 | void writemon_tdr(const char* client, uint32_t id, uint32_t reg_offset, uint32_t data); 30 | void write_tdr(const char* client, uint32_t reg_offset, uint32_t data); 31 | void dbus_memdump( 32 | const char* client_name, 33 | const char* mem, 34 | const char* thread_id_name, 35 | const char* start_addr, 36 | const char* end_addr); 37 | void dbus_sigdump( 38 | const char* client_name, uint32_t dbg_client_id, uint32_t dbg_signal_sel_start, uint32_t dbg_signal_sel_end); 39 | void write_axi(uint32_t reg_addr, uint32_t data); 40 | void write_noc_xy(uint32_t node_x_id, uint32_t node_y_id, uint64_t noc_addr, uint32_t noc_data, uint8_t noc_id); 41 | uint32_t read_axi(uint32_t reg_addr); 42 | uint32_t read_noc_xy(uint32_t node_x_id, uint32_t node_y_id, uint64_t noc_addr, uint8_t noc_id); 43 | std::vector enumerate_jlink(); 44 | void close_jlink(); 45 | uint32_t read_id_raw(); 46 | uint32_t read_id(); 47 | uint32_t get_device_family(); 48 | 49 | private: 50 | static DlHandle handle; 51 | std::unordered_map func_map; 52 | 53 | void* load_function(const char* name); 54 | void openLibrary(const std::string& filePath, int flags = RTLD_LAZY); 55 | }; 56 | -------------------------------------------------------------------------------- /device/api/umd/device/pcie/tlb_handle.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #pragma once 7 | 8 | #include 9 | #include 10 | 11 | #include "umd/device/tt_kmd_lib/tt_kmd_lib.h" 12 | #include "umd/device/types/tlb.hpp" 13 | 14 | namespace tt::umd { 15 | 16 | class TlbHandle { 17 | public: 18 | /** 19 | * Constructor for TlbHandle. 20 | * Allocates a TLB from KMD of the specified size and maps it to the user space. 21 | * 22 | * @param tt_device Pointer to the tt_device structure representing the PCI device. 23 | * @param size Size of the TLB to allocate. 24 | * @param tlb_mapping Type of TLB mapping (UC or WC). The first mapping of TLB determines its caching behavior. 25 | */ 26 | TlbHandle(tt_device_t* tt_device, size_t size, const TlbMapping tlb_mapping = TlbMapping::UC); 27 | 28 | ~TlbHandle() noexcept; 29 | 30 | /** 31 | * Configures the TLB with the provided configuration. 32 | * 33 | * @param new_config The new configuration for the TLB. 34 | */ 35 | void configure(const tlb_data& new_config); 36 | 37 | /** 38 | * Returns the base mapped address of the TLB. 39 | */ 40 | uint8_t* get_base(); 41 | 42 | /** 43 | * Returns the size of the TLB. 44 | */ 45 | size_t get_size() const; 46 | 47 | /** 48 | * Returns the current configuration of the TLB. 49 | */ 50 | const tlb_data& get_config() const; 51 | 52 | /** 53 | * Returns the TLB mapping type (UC or WC). 54 | */ 55 | const TlbMapping get_tlb_mapping() const; 56 | 57 | /** 58 | * Returns the TLB ID, actually representing index of TLB in BAR0. 59 | */ 60 | int get_tlb_id() const; 61 | 62 | private: 63 | void free_tlb() noexcept; 64 | 65 | int tlb_id; 66 | uint8_t* tlb_base; 67 | size_t tlb_size; 68 | tlb_data tlb_config; 69 | tt_device_t* tt_device_; 70 | TlbMapping tlb_mapping; 71 | tt_tlb_t* tlb_handle_ = nullptr; 72 | }; 73 | 74 | } // namespace tt::umd 75 | -------------------------------------------------------------------------------- /tools/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Check if this is being built as a standalone project by running cmake from this dir 2 | if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) 3 | # This is a root project build from install artifacts 4 | cmake_minimum_required(VERSION 3.16) 5 | project(tt_umd_tools) 6 | 7 | message(STATUS "Building tools as standalone project from UMD install artifacts") 8 | 9 | include(${PROJECT_SOURCE_DIR}/../cmake/example_client.cmake) 10 | endif() 11 | 12 | add_library(tools_common INTERFACE) 13 | target_link_libraries( 14 | tools_common 15 | INTERFACE 16 | umd::device 17 | tt-logger::tt-logger 18 | cxxopts::cxxopts 19 | ) 20 | 21 | add_executable(telemetry telemetry.cpp) 22 | target_link_libraries(telemetry PRIVATE tools_common) 23 | set_target_properties( 24 | telemetry 25 | PROPERTIES 26 | RUNTIME_OUTPUT_DIRECTORY 27 | ${CMAKE_BINARY_DIR}/tools/umd/ 28 | OUTPUT_NAME 29 | telemetry 30 | ) 31 | 32 | add_executable(topology topology.cpp) 33 | target_link_libraries(topology PRIVATE tools_common) 34 | set_target_properties( 35 | topology 36 | PROPERTIES 37 | RUNTIME_OUTPUT_DIRECTORY 38 | ${CMAKE_BINARY_DIR}/tools/umd/ 39 | OUTPUT_NAME 40 | topology 41 | ) 42 | 43 | add_executable(harvesting harvesting.cpp) 44 | target_link_libraries(harvesting PRIVATE tools_common) 45 | set_target_properties( 46 | harvesting 47 | PROPERTIES 48 | RUNTIME_OUTPUT_DIRECTORY 49 | ${CMAKE_BINARY_DIR}/tools/umd/ 50 | OUTPUT_NAME 51 | harvesting 52 | ) 53 | 54 | add_executable(system_health system_health.cpp) 55 | target_link_libraries(system_health PRIVATE tools_common) 56 | set_target_properties( 57 | system_health 58 | PROPERTIES 59 | RUNTIME_OUTPUT_DIRECTORY 60 | ${CMAKE_BINARY_DIR}/tools/umd/ 61 | OUTPUT_NAME 62 | system_health 63 | ) 64 | 65 | add_custom_target( 66 | umd_tools 67 | DEPENDS 68 | telemetry 69 | topology 70 | harvesting 71 | system_health 72 | ) 73 | -------------------------------------------------------------------------------- /device/api/umd/device/types/arch.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | #include "umd/device/utils/common.hpp" 16 | 17 | // Types in this file can be used without using the driver, hence they aren't in tt::umd namespace. 18 | namespace tt { 19 | 20 | /** 21 | * Enums for different architectures. 22 | */ 23 | enum class ARCH { 24 | GRAYSKULL = 1, 25 | WORMHOLE_B0 = 2, 26 | BLACKHOLE = 3, 27 | QUASAR = 4, 28 | Invalid = 0xFF, 29 | }; 30 | 31 | static inline tt::ARCH arch_from_str(const std::string &arch_str) { 32 | std::string arch_str_lower = to_lower(arch_str); 33 | 34 | if ((arch_str_lower == "wormhole") || (arch_str_lower == "wormhole_b0")) { 35 | return tt::ARCH::WORMHOLE_B0; 36 | } else if (arch_str_lower == "blackhole") { 37 | return tt::ARCH::BLACKHOLE; 38 | } else if (arch_str_lower == "quasar") { 39 | return tt::ARCH::QUASAR; 40 | } else { 41 | return tt::ARCH::Invalid; 42 | } 43 | } 44 | 45 | static inline std::string arch_to_str(const tt::ARCH arch) { 46 | switch (arch) { 47 | case tt::ARCH::WORMHOLE_B0: 48 | return "wormhole_b0"; 49 | case tt::ARCH::BLACKHOLE: 50 | return "blackhole"; 51 | case tt::ARCH::QUASAR: 52 | return "quasar"; 53 | case tt::ARCH::Invalid: 54 | default: 55 | return "Invalid"; 56 | } 57 | } 58 | 59 | static inline std::ostream &operator<<(std::ostream &out, const tt::ARCH &arch) { return out << arch_to_str(arch); } 60 | 61 | } // namespace tt 62 | 63 | namespace fmt { 64 | template <> 65 | struct formatter { 66 | constexpr auto parse(fmt::format_parse_context &ctx) { return ctx.begin(); } 67 | 68 | template 69 | constexpr auto format(tt::ARCH const &arch, Context &ctx) const { 70 | return format_to(ctx.out(), "{}", tt::arch_to_str(arch)); 71 | } 72 | }; 73 | } // namespace fmt 74 | -------------------------------------------------------------------------------- /cmake/example_client.cmake: -------------------------------------------------------------------------------- 1 | # This file is meant as an example CMakeLists.txt of how to build a client from the installed UMD artifacts. 2 | # It also contains some default third party dependencies that might be needed by the client, but are needed for some UMD 'clients' such as tools and examples. 3 | # You can copy some parts or whole CMake configuration from this file to your client's CMakeLists.txt file. 4 | 5 | # We have to manually add all third_party dependencies here since we are building from install artifacts 6 | include(${CMAKE_CURRENT_LIST_DIR}/CPM.cmake) 7 | CPMAddPackage(NAME fmt GITHUB_REPOSITORY fmtlib/fmt GIT_TAG 11.1.4) 8 | CPMAddPackage( 9 | NAME spdlog 10 | GITHUB_REPOSITORY gabime/spdlog 11 | VERSION 1.15.2 12 | OPTIONS 13 | "CMAKE_MESSAGE_LOG_LEVEL NOTICE" 14 | "SPDLOG_FMT_EXTERNAL_HO ON" 15 | "SPDLOG_INSTALL ON" 16 | ) 17 | CPMAddPackage(NAME tt-logger GITHUB_REPOSITORY tenstorrent/tt-logger VERSION 1.1.6) 18 | CPMAddPackage( 19 | NAME cxxopts 20 | GITHUB_REPOSITORY jarro2783/cxxopts 21 | GIT_TAG 22 | dbf4c6a66816f6c3872b46cc6af119ad227e04e1 #version 3.2.1 + patches 23 | OPTIONS 24 | "CMAKE_MESSAGE_LOG_LEVEL NOTICE" 25 | ) 26 | CPMAddPackage(NAME nanobind GITHUB_REPOSITORY wjakob/nanobind VERSION 2.7.0 OPTIONS "CMAKE_MESSAGE_LOG_LEVEL NOTICE") 27 | 28 | # Find the installed UMD package 29 | find_package(umd QUIET) 30 | if(NOT umd_FOUND) 31 | message( 32 | FATAL_ERROR 33 | "UMD package not found! When building from install artifacts, you need to tell CMake where to find the installed UMD package.\n" 34 | "\n" 35 | "Solutions:\n" 36 | " 1. Use CMAKE_PREFIX_PATH:\n" 37 | " cmake . -DCMAKE_PREFIX_PATH=/path/to/umd/install\n" 38 | "\n" 39 | " 2. Use umd_DIR directly:\n" 40 | " cmake . -Dumd_DIR=/path/to/umd/install/lib/cmake/umd\n" 41 | "\n" 42 | " 3. Set environment variable:\n" 43 | " export CMAKE_PREFIX_PATH=/path/to/umd/install\n" 44 | "\n" 45 | "The install path is where you ran: cmake --install --prefix " 46 | ) 47 | endif() 48 | -------------------------------------------------------------------------------- /tests/test_utils/setup_risc_cores.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: © 2025 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #pragma once 6 | 7 | #include "assembly_programs_for_tests.hpp" 8 | #include "umd/device/cluster.hpp" 9 | 10 | using namespace tt; 11 | using namespace tt::umd; 12 | 13 | namespace test_utils { 14 | 15 | void setup_risc_cores_on_cluster(Cluster* cluster) { 16 | auto architecture = cluster->get_chip(0)->get_tt_device()->get_arch(); 17 | std::array brisc_program_default{}; 18 | std::copy( 19 | brisc_configuration_program_default.cbegin(), 20 | brisc_configuration_program_default.cend(), 21 | std::next(brisc_program_default.begin(), 1)); 22 | 23 | switch (architecture) { 24 | case tt::ARCH::WORMHOLE_B0: 25 | brisc_program_default[0] = WORMHOLE_BRISC_BASE_INSTRUCTION; 26 | break; 27 | case tt::ARCH::BLACKHOLE: 28 | brisc_program_default[0] = BLACKHOLE_BRISC_BASE_INSTRUCTION; 29 | break; 30 | default: 31 | return; 32 | } 33 | 34 | for (auto& chip_id : cluster->get_target_device_ids()) { 35 | for (const CoreCoord& tensix_core : cluster->get_soc_descriptor(chip_id).get_cores(CoreType::TENSIX)) { 36 | auto chip = cluster->get_chip(chip_id); 37 | auto core = cluster->get_soc_descriptor(chip_id).translate_coord_to(tensix_core, CoordSystem::TRANSLATED); 38 | 39 | cluster->assert_risc_reset(chip_id, core, RiscType::ALL_TENSIX); 40 | 41 | cluster->l1_membar(chip_id, {core}); 42 | 43 | cluster->write_to_device( 44 | brisc_program_default.data(), brisc_program_default.size() * sizeof(std::uint32_t), chip_id, core, 0); 45 | 46 | cluster->l1_membar(chip_id, {core}); 47 | 48 | cluster->deassert_risc_reset(chip_id, core, RiscType::BRISC); 49 | 50 | cluster->l1_membar(chip_id, {core}); 51 | 52 | cluster->assert_risc_reset(chip_id, core, RiscType::ALL_TENSIX); 53 | 54 | cluster->l1_membar(chip_id, {core}); 55 | } 56 | } 57 | } 58 | 59 | } // namespace test_utils 60 | -------------------------------------------------------------------------------- /device/arc/smbus_arc_telemetry_reader.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #include "umd/device/arc/smbus_arc_telemetry_reader.hpp" 7 | 8 | #include "umd/device/arch/wormhole_implementation.hpp" 9 | #include "umd/device/types/wormhole_telemetry.hpp" 10 | 11 | extern bool umd_use_noc1; 12 | 13 | namespace tt::umd { 14 | 15 | SmBusArcTelemetryReader::SmBusArcTelemetryReader(TTDevice* tt_device) : ArcTelemetryReader(tt_device) { 16 | arc_core = !umd_use_noc1 ? wormhole::ARC_CORES_NOC0[0] 17 | : tt_xy_pair( 18 | wormhole::NOC0_X_TO_NOC1_X[wormhole::ARC_CORES_NOC0[0].x], 19 | wormhole::NOC0_Y_TO_NOC1_Y[wormhole::ARC_CORES_NOC0[0].y]); 20 | get_telemetry_address(); 21 | } 22 | 23 | void SmBusArcTelemetryReader::get_telemetry_address() { 24 | std::vector arc_msg_return_values = {0}; 25 | uint32_t exit_code = tt_device->get_arc_messenger()->send_message( 26 | wormhole::ARC_MSG_COMMON_PREFIX | (uint32_t)wormhole::arc_message_type::GET_SMBUS_TELEMETRY_ADDR, 27 | arc_msg_return_values, 28 | {0, 0}); 29 | 30 | static constexpr uint64_t noc_telemetry_offset = 0x810000000; 31 | telemetry_base_noc_addr = arc_msg_return_values[0] + noc_telemetry_offset; 32 | } 33 | 34 | uint32_t SmBusArcTelemetryReader::read_entry(const uint8_t telemetry_tag) { 35 | if (!is_entry_available(telemetry_tag)) { 36 | throw std::runtime_error(fmt::format( 37 | "Telemetry entry {} not available. You can use is_entry_available() to check if the entry is available.", 38 | telemetry_tag)); 39 | } 40 | 41 | uint32_t telemetry_value; 42 | tt_device->read_from_device( 43 | &telemetry_value, arc_core, telemetry_base_noc_addr + telemetry_tag * sizeof(uint32_t), sizeof(uint32_t)); 44 | 45 | return telemetry_value; 46 | } 47 | 48 | bool SmBusArcTelemetryReader::is_entry_available(const uint8_t telemetry_tag) { 49 | return telemetry_tag >= 0 && telemetry_tag < wormhole::TelemetryTag::NUMBER_OF_TAGS; 50 | } 51 | 52 | } // namespace tt::umd 53 | -------------------------------------------------------------------------------- /tests/test_utils/device_test_utils.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #pragma once 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | #include "umd/device/cluster.hpp" 16 | #include "umd/device/cluster_descriptor.hpp" 17 | 18 | using namespace tt; 19 | using namespace tt::umd; 20 | 21 | namespace test_utils { 22 | 23 | template 24 | static void size_buffer_to_capacity(std::vector& data_buf, std::size_t size_in_bytes) { 25 | std::size_t target_size = 0; 26 | if (size_in_bytes > 0) { 27 | target_size = ((size_in_bytes - 1) / sizeof(T)) + 1; 28 | } 29 | data_buf.resize(target_size); 30 | } 31 | 32 | static void read_data_from_device( 33 | Cluster& cluster, std::vector& vec, ChipId chip_id, CoreCoord core, uint64_t addr, uint32_t size) { 34 | size_buffer_to_capacity(vec, size); 35 | cluster.read_from_device(vec.data(), chip_id, core, addr, size); 36 | } 37 | 38 | inline void fill_with_random_bytes(uint8_t* data, size_t n) { 39 | static std::random_device rd; 40 | static std::mt19937_64 gen(rd()); 41 | uint64_t* data64 = reinterpret_cast(data); 42 | std::generate_n(data64, n / 8, [&]() { return gen(); }); 43 | 44 | // Handle remaining bytes. 45 | for (size_t i = (n / 8) * 8; i < n; ++i) { 46 | data[i] = static_cast(gen()); 47 | } 48 | } 49 | 50 | inline std::string convert_to_comma_separated_string(const std::unordered_set& devices) { 51 | return fmt::format("{}", fmt::join(devices, ",")); 52 | } 53 | 54 | inline bool is_iommu_available() { return Cluster().get_tt_device(0)->get_pci_device()->is_iommu_enabled(); } 55 | 56 | inline bool is_virtual_machine() { 57 | std::ifstream cpuinfo("/proc/cpuinfo"); 58 | std::string line; 59 | while (std::getline(cpuinfo, line)) { 60 | if (line.find("flags") != std::string::npos && line.find("hypervisor") != std::string::npos) { 61 | return true; 62 | } 63 | } 64 | return false; 65 | } 66 | 67 | } // namespace test_utils 68 | -------------------------------------------------------------------------------- /tests/galaxy/test_galaxy_common.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: (c) 2023 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #include "test_galaxy_common.hpp" 6 | 7 | #include "tests/test_utils/device_test_utils.hpp" 8 | 9 | void move_data( 10 | Cluster& device, tt_multichip_core_addr sender_core, tt_multichip_core_addr receiver_core, uint32_t size) { 11 | std::vector readback_vec = {}; 12 | test_utils::read_data_from_device( 13 | device, 14 | readback_vec, 15 | sender_core.chip, 16 | device.get_soc_descriptor(sender_core.chip).get_coord_at(sender_core.core, sender_core.core.coord_system), 17 | sender_core.addr, 18 | size); 19 | device.write_to_device( 20 | readback_vec.data(), 21 | readback_vec.size() * sizeof(std::uint32_t), 22 | receiver_core.chip, 23 | device.get_soc_descriptor(receiver_core.chip).get_coord_at(receiver_core.core, receiver_core.core.coord_system), 24 | receiver_core.addr); 25 | device.wait_for_non_mmio_flush(); // Barrier to ensure that all writes over ethernet were commited 26 | 27 | return; 28 | } 29 | 30 | void broadcast_data( 31 | Cluster& device, 32 | tt_multichip_core_addr sender_core, 33 | std::vector receiver_cores, 34 | uint32_t size) { 35 | std::vector readback_vec = {}; 36 | test_utils::read_data_from_device( 37 | device, 38 | readback_vec, 39 | sender_core.chip, 40 | device.get_soc_descriptor(sender_core.chip).get_coord_at(sender_core.core, sender_core.core.coord_system), 41 | sender_core.addr, 42 | size); 43 | for (const auto& receiver_core : receiver_cores) { 44 | device.write_to_device( 45 | readback_vec.data(), 46 | readback_vec.size() * sizeof(std::uint32_t), 47 | receiver_core.chip, 48 | device.get_soc_descriptor(receiver_core.chip) 49 | .get_coord_at(receiver_core.core, receiver_core.core.coord_system), 50 | receiver_core.addr); 51 | } 52 | device.wait_for_non_mmio_flush(); // Barrier to ensure that all writes over ethernet were commited 53 | 54 | return; 55 | } 56 | -------------------------------------------------------------------------------- /device/api/umd/device/pcie/tlb_window.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #pragma once 7 | 8 | #include 9 | 10 | #include "umd/device/pcie/tlb_handle.hpp" 11 | 12 | namespace tt::umd { 13 | 14 | class TlbWindow { 15 | public: 16 | TlbWindow(std::unique_ptr handle, const tlb_data config); 17 | 18 | void write32(uint64_t offset, uint32_t value); 19 | 20 | uint32_t read32(uint64_t offset); 21 | 22 | void write_register(uint64_t offset, const void* data, size_t size); 23 | 24 | void read_register(uint64_t offset, void* data, size_t size); 25 | 26 | void write_block(uint64_t offset, const void* data, size_t size); 27 | 28 | void read_block(uint64_t offset, void* data, size_t size); 29 | 30 | TlbHandle& handle_ref() const; 31 | 32 | size_t get_size() const; 33 | 34 | void configure(const tlb_data& new_config); 35 | 36 | uint64_t get_base_address() const; 37 | 38 | private: 39 | void validate(uint64_t offset, size_t size) const; 40 | 41 | uint64_t get_total_offset(uint64_t offset) const; 42 | 43 | // Custom device memcpy. This is only safe for memory-like regions on the device (Tensix L1, DRAM, ARC CSM). 44 | // Both routines assume that misaligned accesses are permitted on host memory. 45 | // 46 | // 1. AARCH64 device memory does not allow unaligned accesses (including pair loads/stores), 47 | // which glibc's memcpy may perform when unrolling. This affects from and to device. 48 | // 2. syseng#3487 WH GDDR5 controller has a bug when 1-byte writes are temporarily adjacent 49 | // to 2-byte writes. We avoid ever performing a 1-byte write to the device. This only affects to device. 50 | static void memcpy_from_device(void* dest, const void* src, std::size_t num_bytes); 51 | static void memcpy_to_device(void* dest, const void* src, std::size_t num_bytes); 52 | 53 | void write_regs(volatile uint32_t* dest, const uint32_t* src, uint32_t word_len); 54 | 55 | void read_regs(void* src_reg, uint32_t word_len, void* data); 56 | 57 | std::unique_ptr tlb_handle; 58 | uint64_t offset_from_aligned_addr = 0; 59 | }; 60 | 61 | } // namespace tt::umd 62 | -------------------------------------------------------------------------------- /.github/workflows/build-image.yml: -------------------------------------------------------------------------------- 1 | # This workflow is intended to be called manually when a new Docker image is needed. 2 | name: Build and Publish Docker Image 3 | 4 | on: 5 | workflow_dispatch: 6 | inputs: 7 | timeout: 8 | required: true 9 | description: 'The timeout for the job in minutes' 10 | type: number 11 | default: 15 12 | 13 | jobs: 14 | build: 15 | # Add permissions for writing packages 16 | permissions: 17 | packages: write 18 | 19 | # Due to parsing bug, fromJSON is used to convert string to number 20 | timeout-minutes: ${{ fromJSON(inputs.timeout) }} 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | build: [ 25 | {name: ubuntu-22.04, runner: ubuntu-22.04}, 26 | {name: ubuntu-24.04, runner: ubuntu-24.04}, 27 | {name: fedora-39, runner: ubuntu-22.04}, 28 | {name: manylinux, runner: ubuntu-22.04}, 29 | ] 30 | 31 | name: Building docker image ${{ matrix.build.name }} 32 | runs-on: ${{ matrix.build.runner }} 33 | 34 | steps: 35 | - name: Set environment variable 36 | run: echo "CI_IMAGE_NAME=ghcr.io/${{ github.repository }}/tt-umd-ci-${{ matrix.build.name }}" >> $GITHUB_ENV 37 | 38 | - name: Fix permissions 39 | run: sudo chmod 777 -R $GITHUB_WORKSPACE 40 | 41 | - name: Checkout repository 42 | uses: actions/checkout@v4 43 | with: 44 | submodules: recursive 45 | 46 | - name: Set up Docker Buildx 47 | uses: docker/setup-buildx-action@v3 48 | 49 | - name: Log in to GitHub Container Registry 50 | uses: docker/login-action@v3 51 | with: 52 | registry: ghcr.io 53 | username: ${{ github.repository_owner }} 54 | password: ${{ secrets.GITHUB_TOKEN }} 55 | 56 | - name: Build and export base Docker image 57 | uses: docker/build-push-action@v6 58 | with: 59 | context: .github 60 | file: .github/${{ matrix.build.name }}.Dockerfile 61 | push: true 62 | build-args: | 63 | GIT_SHA=${{ github.sha }} 64 | tags: | 65 | ${{ env.CI_IMAGE_NAME}}:${{ github.sha }} 66 | ${{ env.CI_IMAGE_NAME}}:latest 67 | -------------------------------------------------------------------------------- /device/arc/arc_messenger.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #include "umd/device/arc/arc_messenger.hpp" 7 | 8 | #include "umd/device/arc/blackhole_arc_messenger.hpp" 9 | #include "umd/device/arc/wormhole_arc_messenger.hpp" 10 | #include "umd/device/tt_device/tt_device.hpp" 11 | #include "umd/device/utils/common.hpp" 12 | 13 | namespace tt::umd { 14 | 15 | std::unique_ptr ArcMessenger::create_arc_messenger(TTDevice* tt_device) { 16 | tt::ARCH arch = tt_device->get_arch(); 17 | 18 | switch (arch) { 19 | case tt::ARCH::WORMHOLE_B0: 20 | return std::make_unique(tt_device); 21 | break; 22 | case tt::ARCH::BLACKHOLE: 23 | return std::make_unique(tt_device); 24 | break; 25 | default: 26 | throw std::runtime_error("Unsupported architecture for creating ArcMessenger."); 27 | } 28 | } 29 | 30 | ArcMessenger::ArcMessenger(TTDevice* tt_device) : tt_device(tt_device) { 31 | lock_manager.initialize_mutex( 32 | MutexType::ARC_MSG, tt_device->get_communication_device_id(), tt_device->get_communication_device_type()); 33 | lock_manager.initialize_mutex( 34 | MutexType::REMOTE_ARC_MSG, 35 | tt_device->get_communication_device_id(), 36 | tt_device->get_communication_device_type()); 37 | // TODO: Remove this once we have proper mutex usage. 38 | lock_manager.initialize_mutex(MutexType::ARC_MSG); 39 | } 40 | 41 | uint32_t ArcMessenger::send_message( 42 | const uint32_t msg_code, const std::vector& args, const std::chrono::milliseconds timeout_ms) { 43 | std::vector return_values; 44 | return send_message(msg_code, return_values, args, timeout_ms); 45 | } 46 | 47 | ArcMessenger::~ArcMessenger() { 48 | lock_manager.clear_mutex( 49 | MutexType::ARC_MSG, tt_device->get_communication_device_id(), tt_device->get_communication_device_type()); 50 | lock_manager.clear_mutex( 51 | MutexType::REMOTE_ARC_MSG, 52 | tt_device->get_communication_device_id(), 53 | tt_device->get_communication_device_type()); 54 | } 55 | 56 | } // namespace tt::umd 57 | -------------------------------------------------------------------------------- /tests/api/test_tlb_manager.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | // This file holds Chip specific API examples. 6 | 7 | #include 8 | 9 | #include "tests/test_utils/device_test_utils.hpp" 10 | #include "umd/device/soc_descriptor.hpp" 11 | #include "umd/device/tt_device/tt_device.hpp" 12 | #include "umd/device/tt_io.hpp" 13 | 14 | using namespace tt::umd; 15 | 16 | // TODO: Once default auto TLB setup is in, check it is setup properly. 17 | TEST(ApiTLBManager, ManualTLBConfiguration) { 18 | std::vector pci_device_ids = PCIDevice::enumerate_devices(); 19 | 20 | for (int pci_device_id : pci_device_ids) { 21 | std::unique_ptr tt_device = TTDevice::create(pci_device_id); 22 | const size_t tlb_tensix_size = tt_device->get_arch() == tt::ARCH::WORMHOLE_B0 ? (1 << 20) : (1 << 21); 23 | tt_device->init_tt_device(); 24 | 25 | std::unique_ptr tlb_manager = std::make_unique(tt_device.get()); 26 | ChipInfo chip_info = tt_device->get_chip_info(); 27 | 28 | SocDescriptor soc_desc(tt_device->get_arch(), chip_info); 29 | 30 | std::int32_t c_zero_address = 0; 31 | 32 | for (CoreCoord translated_core : soc_desc.get_cores(CoreType::TENSIX, CoordSystem::TRANSLATED)) { 33 | tlb_manager->configure_tlb(translated_core, tlb_tensix_size, c_zero_address, tlb_data::Relaxed); 34 | } 35 | 36 | // So now that we have configured TLBs we can use it to interface with the TTDevice. 37 | auto any_worker_translated_core = soc_desc.get_cores(CoreType::TENSIX, CoordSystem::TRANSLATED)[0]; 38 | tlb_configuration tlb_description = tlb_manager->get_tlb_configuration(any_worker_translated_core); 39 | 40 | // TODO: Maybe accept tlb_index only? 41 | uint64_t address_l1_to_write = 0; 42 | std::vector buffer_to_write = {0x01, 0x02, 0x03, 0x04}; 43 | // Writing to TLB over Writer class. 44 | // TODO: This should be converted to AbstractIO writer. 45 | Writer writer = tlb_manager->get_static_tlb_writer(any_worker_translated_core); 46 | writer.write(address_l1_to_write, buffer_to_write[0]); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /device/api/umd/device/firmware/wormhole_18_3_firmware_info_provider.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | 11 | #include "umd/device/firmware/wormhole_18_7_firmware_info_provider.hpp" 12 | 13 | namespace tt::umd { 14 | 15 | /* This class captures Wormhole firmware versions up to version 18.3.0. 16 | * Wormhole devices before this version use SM bus telemetry so all the data 17 | * is read from that telemetry which is completely different from the newer telemetry 18 | * used in 18.4.0 and later versions. 19 | */ 20 | class Wormhole_18_3_FirmwareInfoProvider : public Wormhole_18_7_FirmwareInfoProvider { 21 | public: 22 | Wormhole_18_3_FirmwareInfoProvider(TTDevice* tt_device); 23 | 24 | uint64_t get_board_id() const override; 25 | 26 | uint32_t get_eth_fw_version() const override; 27 | 28 | std::optional get_eth_fw_version_semver() const override; 29 | 30 | std::optional get_gddr_fw_version() const override; 31 | 32 | std::optional get_cm_fw_version() const override; 33 | 34 | std::optional get_dm_app_fw_version() const override; 35 | 36 | std::optional get_dm_bl_fw_version() const override; 37 | 38 | std::optional get_tt_flash_version() const override; 39 | 40 | double get_asic_temperature() const override; 41 | 42 | std::vector get_dram_training_status(uint32_t num_dram_channels) const override; 43 | 44 | uint32_t get_max_clock_freq() const override; 45 | 46 | uint8_t get_asic_location() const override; 47 | 48 | std::optional get_aiclk() const override; 49 | 50 | std::optional get_axiclk() const override; 51 | 52 | std::optional get_arcclk() const override; 53 | 54 | std::optional get_fan_speed() const override; 55 | 56 | std::optional get_tdp() const override; 57 | 58 | std::optional get_tdc() const override; 59 | 60 | std::optional get_vcore() const override; 61 | 62 | std::optional get_board_temperature() const override; 63 | 64 | uint32_t get_heartbeat() const override; 65 | }; 66 | 67 | } // namespace tt::umd 68 | -------------------------------------------------------------------------------- /tests/api/test_arc_telemetry.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | #include 5 | 6 | #include 7 | 8 | #include "umd/device/arc/arc_telemetry_reader.hpp" 9 | #include "umd/device/types/telemetry.hpp" 10 | 11 | using namespace tt; 12 | using namespace tt::umd; 13 | 14 | TEST(TestTelemetry, BasicTelemetry) { 15 | std::vector pci_device_ids = PCIDevice::enumerate_devices(); 16 | 17 | for (int pci_device_id : pci_device_ids) { 18 | std::unique_ptr tt_device = TTDevice::create(pci_device_id); 19 | tt_device->init_tt_device(); 20 | if (tt_device->get_firmware_version() < semver_t(18, 4, 0)) { 21 | log_warning( 22 | tt::LogUMD, 23 | "Skipping telemetry test on device {} with firmware version {} < 18.4.0", 24 | pci_device_id, 25 | tt_device->get_firmware_version().to_string()); 26 | continue; 27 | } 28 | 29 | ArcTelemetryReader* arc_telemetry_reader = tt_device->get_arc_telemetry_reader(); 30 | 31 | uint32_t board_id_high = arc_telemetry_reader->read_entry(TelemetryTag::BOARD_ID_HIGH); 32 | uint32_t board_id_low = arc_telemetry_reader->read_entry(TelemetryTag::BOARD_ID_LOW); 33 | 34 | const uint64_t board_id = ((uint64_t)board_id_high << 32) | (board_id_low); 35 | EXPECT_NO_THROW(get_board_type_from_board_id(board_id)); 36 | } 37 | } 38 | 39 | TEST(TestTelemetry, TelemetryEntryAvailable) { 40 | std::vector pci_device_ids = PCIDevice::enumerate_devices(); 41 | 42 | for (int pci_device_id : pci_device_ids) { 43 | std::unique_ptr tt_device = TTDevice::create(pci_device_id); 44 | tt_device->init_tt_device(); 45 | ArcTelemetryReader* arc_telemetry_reader = tt_device->get_arc_telemetry_reader(); 46 | 47 | EXPECT_TRUE(arc_telemetry_reader->is_entry_available(TelemetryTag::BOARD_ID_HIGH)); 48 | EXPECT_TRUE(arc_telemetry_reader->is_entry_available(TelemetryTag::BOARD_ID_LOW)); 49 | 50 | // Blackhole tag table is still not finalized, but we are probably never going to have 200 tags. 51 | EXPECT_FALSE(arc_telemetry_reader->is_entry_available(200)); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /scripts/iommu_detect.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Device IDs 4 | TT_VID="1e52" 5 | GS_PID="faca" 6 | WH_PID="401e" 7 | BH_PID="b140" 8 | 9 | tt_devices=$(lspci -D -d ${TT_VID}: | cut -d' ' -f1) 10 | 11 | if [ -z "$tt_devices" ]; then 12 | echo "No Tenstorrent devices found" 13 | exit 1 14 | fi 15 | 16 | found_gs_wh=false 17 | found_bh=false 18 | 19 | # First identify devices 20 | for dev in $tt_devices; do 21 | device_id=$(lspci -D -n -s "$dev" | cut -d' ' -f3 | cut -d: -f2) 22 | case $device_id in 23 | $GS_PID|$WH_PID) found_gs_wh=true ;; 24 | $BH_PID) found_bh=true ;; 25 | esac 26 | done 27 | 28 | # Check and output for each device 29 | for dev in $tt_devices; do 30 | device_id=$(lspci -D -n -s "$dev" | cut -d' ' -f3 | cut -d: -f2) 31 | echo "Checking device $dev (ID: $device_id):" 32 | 33 | # Check IOMMU status for this device 34 | iommu_enabled=false 35 | iommu_type="none" 36 | if [ -f "/sys/bus/pci/devices/${dev}/iommu_group/type" ]; then 37 | iommu_type=$(cat "/sys/bus/pci/devices/${dev}/iommu_group/type") 38 | [[ "$iommu_type" == *"DMA"* ]] && iommu_enabled=true 39 | fi 40 | 41 | if [[ "$device_id" == "$GS_PID" || "$device_id" == "$WH_PID" ]]; then 42 | if [ "$iommu_enabled" = true ]; then 43 | echo " WARNING: Grayskull/Wormhole device with IOMMU enabled (type: $iommu_type) - this configuration is not supported" 44 | else 45 | echo " Grayskull/Wormhole device detected - hugepages required" 46 | fi 47 | elif [[ "$device_id" == "$BH_PID" ]]; then 48 | if [ "$iommu_enabled" = true ]; then 49 | echo " Blackhole device with IOMMU enabled (type: $iommu_type) - hugepages optional" 50 | else 51 | echo " Blackhole device with no IOMMU/passthrough (type: $iommu_type) - hugepages required" 52 | fi 53 | else 54 | echo " Unknown device ID: $device_id" 55 | fi 56 | done 57 | 58 | echo -e "\nSummary:" 59 | if [ "$found_gs_wh" = true ]; then 60 | echo "- System has Grayskull/Wormhole devices - hugepages required" 61 | echo "- IOMMU must be disabled or in passthrough mode" 62 | elif [ "$found_bh" = true ]; then 63 | echo "- System has Blackhole devices - check IOMMU status above to determine if hugepages are needed" 64 | fi 65 | -------------------------------------------------------------------------------- /nanobind/tests/test_py_telemetry.py: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2025 Tenstorrent Inc. 2 | # SPDX-License-Identifier: Apache-2.0 3 | import unittest 4 | import tt_umd 5 | 6 | class TestTelemetry(unittest.TestCase): 7 | def test_telemetry(self): 8 | pci_ids = tt_umd.PCIDevice.enumerate_devices() 9 | print("Devices found: ", pci_ids) 10 | if (len(pci_ids) == 0): 11 | print("No PCI devices found.") 12 | return 13 | 14 | # Test telemetry for all available devices 15 | for pci_id in pci_ids: 16 | dev = tt_umd.TTDevice.create(pci_id) 17 | dev.init_tt_device() 18 | tel_reader = dev.get_arc_telemetry_reader() 19 | tag = int(tt_umd.TelemetryTag.ASIC_TEMPERATURE) 20 | print(f"Device {pci_id} - Telemetry reading for asic temperature: ", tel_reader.read_entry(tag)) 21 | 22 | def test_remote_telemetry(self): 23 | cluster_descriptor, umd_tt_devices = tt_umd.TopologyDiscovery.discover() 24 | tag = int(tt_umd.TelemetryTag.ASIC_TEMPERATURE) 25 | for chip, dev in umd_tt_devices.items(): 26 | tel_reader = umd_tt_devices[chip].get_arc_telemetry_reader() 27 | print(f"Telemetry reading for {'local' if dev.is_remote() else 'remote'} chip {chip} ASIC temperature: ", tel_reader.read_entry(tag)) 28 | 29 | def test_smbus_telemetry(self): 30 | """Test SMBUS telemetry reader on wormhole devices""" 31 | pci_ids = tt_umd.PCIDevice.enumerate_devices() 32 | if (len(pci_ids) == 0): 33 | print("No PCI devices found.") 34 | return 35 | 36 | # Test SMBUS telemetry for all available devices 37 | for pci_id in pci_ids: 38 | dev = tt_umd.TTDevice.create(pci_id) 39 | dev.init_tt_device() 40 | 41 | # Only test SMBUS telemetry on wormhole devices 42 | if dev.get_arch() == tt_umd.ARCH.WORMHOLE_B0: 43 | smbus_reader = tt_umd.SmBusArcTelemetryReader(dev) 44 | if smbus_reader.is_entry_available(tt_umd.wormhole.TelemetryTag.ASIC_TEMPERATURE): 45 | temp = smbus_reader.read_entry(tt_umd.wormhole.TelemetryTag.ASIC_TEMPERATURE) 46 | print(f"Device {pci_id} - SMBUS telemetry ASIC temperature: {temp}") 47 | 48 | -------------------------------------------------------------------------------- /tests/api/GENERATE_ASSEMBLY_FOR_TESTS.md: -------------------------------------------------------------------------------- 1 | # How to Generate Assembly from C++ Code Using Godbolt (Compiler Explorer) for Tensix Cores 2 | 3 | ## Overview 4 | 5 | This guide explains how to use [Compiler Explorer (Godbolt)](https://godbolt.org/) to view assembly output generated from C++ source code. It applies to all five Tensix cores: 6 | 7 | - BRISC 8 | - TRISC0, TRISC1, TRISC2 9 | - NCRISC 10 | 11 | --- 12 | ## Step-by-Step Instructions 13 | 14 | ### 1. Open Godbolt 15 | 16 | - Go to: [https://godbolt.org/](https://godbolt.org/) 17 | 18 | ### 2. Select the Language 19 | 20 | - Choose **C++** as the source language in the editor pane. 21 | 22 | ### 3. Configure Compiler and Flags 23 | 24 | - Select a suitable compiler, e.g., `RISC-V (64-bit) gcc 15.1.0` (used for the `DeassertResetBrisc` example). 25 | - Set the compiler flags: 26 | `-Ox -march=rv32i -mabi=ilp32` 27 | (replace `x` with the desired optimization level: `O0`, `O1`, `O2`, `O3`, etc.) 28 | - In the **compiler output options**, enable: 29 | - Compile to binary object 30 | - Intel assembly syntax 31 | - Demangle identifiers 32 | 33 | **Note:** Optimization levels may reorder instructions. Be cautious if certain operations must occur in a specific sequence (e.g., enabling a write before accessing a register). 34 | 35 | ### 4. Write or Paste C++ Code 36 | 37 | ```cpp 38 | // Example C++ code 39 | int main() { 40 | unsigned int* a = (unsigned int*)0x10000; 41 | *a = 0x87654000; 42 | while (true); 43 | } 44 | ``` 45 | 46 | ### 5. Copy the assembly from the output: 47 | 48 | For the code in section 4. the instrctions look like this: 49 | ```asm 50 | main: 51 | lui a5,0x10 52 | lui a4,0x87654 53 | sw a4,0(a5) 54 | .L2: 55 | j 8 <.L2> 56 | R_RISCV_RVC_JUMP .L2 57 | ``` 58 | and the machine instructions in hex look like this: 59 | ```cpp 60 | 0x000107b7 // lui a5,0x10 61 | 0x87654737 // lui a4,0x87654 62 | 0x00e7a023 // sw a4,0(a5) 63 | 0x0000006f // jal x0, 0 64 | ``` 65 | 66 | Here is a link to the example explained above: https://godbolt.org/z/hs4oKMznv 67 | 68 | **Note:** The BRISC core always starts running code at address `0x00000000`, while the other cores can start at different, configurable addresses. Because of this, make sure to set the starting addresses for the other cores before running the program. -------------------------------------------------------------------------------- /device/pcie/tlb_handle.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | #include "umd/device/pcie/tlb_handle.hpp" 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include "assert.hpp" 15 | #include "ioctl.h" 16 | 17 | namespace tt::umd { 18 | 19 | TlbHandle::TlbHandle(tt_device_t* tt_device, size_t size, const TlbMapping tlb_mapping) : 20 | tlb_size(size), tt_device_(tt_device), tlb_mapping(tlb_mapping) { 21 | int ret_code = tt_tlb_alloc( 22 | tt_device_, size, tlb_mapping == TlbMapping::UC ? TT_MMIO_CACHE_MODE_UC : TT_MMIO_CACHE_MODE_WC, &tlb_handle_); 23 | 24 | if (ret_code != 0) { 25 | TT_THROW("tt_tlb_alloc failed with error code {} for TLB size {}.", ret_code, size); 26 | } 27 | 28 | tt_tlb_get_id(tlb_handle_, reinterpret_cast(&tlb_id)); 29 | 30 | tt_tlb_get_mmio(tlb_handle_, reinterpret_cast(&tlb_base)); 31 | } 32 | 33 | TlbHandle::~TlbHandle() noexcept { free_tlb(); } 34 | 35 | void TlbHandle::configure(const tlb_data& new_config) { 36 | tt_noc_addr_config_t config{}; 37 | config.addr = new_config.local_offset; 38 | config.x_end = new_config.x_end; 39 | config.y_end = new_config.y_end; 40 | config.x_start = new_config.x_start; 41 | config.y_start = new_config.y_start; 42 | config.noc = new_config.noc_sel; 43 | config.mcast = new_config.mcast; 44 | config.ordering = new_config.ordering; 45 | config.static_vc = new_config.static_vc; 46 | 47 | int ret_code = tt_tlb_map(tt_device_, tlb_handle_, &config); 48 | 49 | if (ret_code != 0) { 50 | TT_THROW("tt_tlb_map failed with error code {} for TLB size {}.", ret_code, tlb_size); 51 | } 52 | 53 | tlb_config = new_config; 54 | } 55 | 56 | uint8_t* TlbHandle::get_base() { return tlb_base; } 57 | 58 | size_t TlbHandle::get_size() const { return tlb_size; } 59 | 60 | const tlb_data& TlbHandle::get_config() const { return tlb_config; } 61 | 62 | const TlbMapping TlbHandle::get_tlb_mapping() const { return tlb_mapping; } 63 | 64 | void TlbHandle::free_tlb() noexcept { tt_tlb_free(tt_device_, tlb_handle_); } 65 | 66 | int TlbHandle::get_tlb_id() const { return tlb_id; } 67 | 68 | } // namespace tt::umd 69 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["scikit-build-core>=0.11", "nanobind", "setuptools"] 3 | build-backend = "scikit_build_core.build" 4 | 5 | [project] 6 | name = "tt-umd" 7 | dynamic = ["version"] 8 | authors = [ 9 | { name = "Tenstorrent", email = "foundationsw-umd@tenstorrent.com" } 10 | ] 11 | description = "User Mode Driver for tenstorrent" 12 | readme = "README.md" 13 | license = { file = "LICENSE" } 14 | classifiers = [ 15 | "Development Status :: 4 - Beta", 16 | "Intended Audience :: Developers", 17 | "License :: OSI Approved :: Apache Software License", 18 | "Operating System :: POSIX :: Linux", 19 | "Programming Language :: Python :: 3", 20 | "Programming Language :: Python :: 3.8", 21 | "Programming Language :: Python :: 3.9", 22 | "Programming Language :: Python :: 3.10", 23 | "Programming Language :: Python :: 3.11", 24 | "Programming Language :: Python :: 3.12", 25 | "Programming Language :: C++", 26 | "Topic :: Software Development :: Libraries", 27 | "Topic :: System :: Hardware", 28 | ] 29 | 30 | [project.urls] 31 | "Homepage" = "https://tenstorrent.com" 32 | "Bug Reports" = "https://github.com/tenstorrent/tt-umd/issues" 33 | "Source" = "https://github.com/tenstorrent/tt-umd" 34 | "Documentation" = "https://github.com/tenstorrent/tt-umd/blob/main/README.md" 35 | 36 | [tool.scikit-build] 37 | cmake.build-type = "Release" 38 | build-dir = "build/" 39 | # Only build the Python-related targets (new key in scikit-build-core >= 0.10) 40 | build.targets = ["nanobind_tt_umd", "device"] 41 | # Exclude development files from the wheel 42 | wheel.exclude = ["CMakeCache.txt", "*.cmake", "CMakeFiles/", "Makefile"] 43 | # Install only the 'pip_wheel' component defined in CMake (new key path) 44 | install.components = ["pip_wheel"] 45 | 46 | [tool.scikit-build.cmake.define] 47 | TT_UMD_BUILD_TOOLS = "OFF" 48 | TT_UMD_BUILD_EXAMPLES = "OFF" 49 | TT_UMD_BUILD_TESTS = "OFF" 50 | TT_UMD_BUILD_SIMULATION = "OFF" 51 | TT_UMD_BUILD_PIP = "ON" 52 | TT_UMD_BUILD_PYTHON = "ON" 53 | TT_UMD_BUILD_STATIC = "ON" 54 | 55 | [tool.scikit-build.metadata.version] 56 | provider = "scikit_build_core.metadata.regex" 57 | input = "VERSION" 58 | regex = "(?P.*)" 59 | 60 | [tool.cibuildwheel] 61 | build = "cp38-* cp39-* cp310-* cp311-* cp312-* cp313-*" 62 | archs = "x86_64" 63 | skip = "*-musllinux_*" 64 | build-verbosity = 1 65 | -------------------------------------------------------------------------------- /tests/soc_descs/blackhole_140_arch_no_noc1.yaml: -------------------------------------------------------------------------------- 1 | # Note taken from software repo - may need updates. 2 | grid: 3 | x_size: 17 4 | y_size: 12 5 | 6 | arc: 7 | [ 8-0 ] 8 | 9 | pcie: 10 | [ 2-0, 11-0 ] 11 | 12 | dram: 13 | [ 14 | [0-0, 0-1, 0-11], 15 | [0-2, 0-10, 0-3], 16 | [0-9, 0-4, 0-8], 17 | [0-5, 0-7, 0-6], 18 | [9-0, 9-1, 9-11], 19 | [9-2, 9-10, 9-3], 20 | [9-9, 9-4, 9-8], 21 | [9-5, 9-7, 9-6], 22 | ] 23 | 24 | eth: 25 | [ 26 | 1-1, 16-1, 2-1, 15-1, 3-1, 14-1, 4-1, 13-1, 5-1, 12-1, 6-1, 11-1, 7-1, 10-1, 27 | ] 28 | 29 | functional_workers: 30 | [ 31 | 1-2, 2-2, 3-2, 4-2, 5-2, 6-2, 7-2, 10-2, 11-2, 12-2, 13-2, 14-2, 15-2, 16-2, 32 | 1-3, 2-3, 3-3, 4-3, 5-3, 6-3, 7-3, 10-3, 11-3, 12-3, 13-3, 14-3, 15-3, 16-3, 33 | 1-4, 2-4, 3-4, 4-4, 5-4, 6-4, 7-4, 10-4, 11-4, 12-4, 13-4, 14-4, 15-4, 16-4, 34 | 1-5, 2-5, 3-5, 4-5, 5-5, 6-5, 7-5, 10-5, 11-5, 12-5, 13-5, 14-5, 15-5, 16-5, 35 | 1-6, 2-6, 3-6, 4-6, 5-6, 6-6, 7-6, 10-6, 11-6, 12-6, 13-6, 14-6, 15-6, 16-6, 36 | 1-7, 2-7, 3-7, 4-7, 5-7, 6-7, 7-7, 10-7, 11-7, 12-7, 13-7, 14-7, 15-7, 16-7, 37 | 1-8, 2-8, 3-8, 4-8, 5-8, 6-8, 7-8, 10-8, 11-8, 12-8, 13-8, 14-8, 15-8, 16-8, 38 | 1-9, 2-9, 3-9, 4-9, 5-9, 6-9, 7-9, 10-9, 11-9, 12-9, 13-9, 14-9, 15-9, 16-9, 39 | 1-10, 2-10, 3-10, 4-10, 5-10, 6-10, 7-10, 10-10, 11-10, 12-10, 13-10, 14-10, 15-10, 16-10, 40 | 1-11, 2-11, 3-11, 4-11, 5-11, 6-11, 7-11, 10-11, 11-11, 12-11, 13-11, 14-11, 15-11, 16-11, 41 | ] 42 | 43 | harvested_workers: 44 | [] 45 | 46 | router_only: 47 | [ 48 | 1-0, 3-0, 4-0, 5-0, 6-0, 7-0, 10-0, 12-0, 13-0, 14-0, 15-0, 16-0, 49 | 8-1, 8-10, 8-8, 8-6, 8-4, 8-11 50 | ] 51 | 52 | security: 53 | [ 54 | 8-2 55 | ] 56 | 57 | l2cpu: 58 | [ 59 | 8-3, 8-9, 8-5, 8-7 60 | ] 61 | 62 | worker_l1_size: 63 | 1572864 64 | 65 | dram_bank_size: 66 | 4294967296 67 | 68 | eth_l1_size: 69 | 262144 70 | 71 | arch_name: BLACKHOLE 72 | 73 | features: 74 | unpacker: 75 | version: 2 76 | inline_srca_trans_without_srca_trans_instr: True 77 | math: 78 | dst_size_alignment: 32768 79 | packer: 80 | version: 2 81 | overlay: 82 | version: 2 83 | -------------------------------------------------------------------------------- /tools/topology.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | #include 5 | #include 6 | 7 | #include "common.hpp" 8 | #include "umd/device/cluster.hpp" 9 | #include "umd/device/cluster_descriptor.hpp" 10 | 11 | using namespace tt::umd; 12 | 13 | int main(int argc, char *argv[]) { 14 | cxxopts::Options options("topology", "Extract system topology and save it to a yaml file."); 15 | 16 | options.add_options()("f,path", "File path to save cluster descriptor to.", cxxopts::value())( 17 | "l,logical_devices", 18 | "List of logical device ids to filter cluster descriptor for.", 19 | cxxopts::value>())( 20 | "j,jtag", 21 | "Use JTAG mode for device communication. If not provided, PCIe will be used by default.", 22 | cxxopts::value()->default_value("false"))("h,help", "Print usage"); 23 | 24 | auto result = options.parse(argc, argv); 25 | 26 | if (result.count("help")) { 27 | std::cout << options.help() << std::endl; 28 | return 0; 29 | } 30 | 31 | if (result.count("logical_devices") && result.count("devices")) { 32 | std::cerr << "Error: Using both 'devices' and 'logical_devices' options is not allowed." << std::endl; 33 | return 1; 34 | } 35 | 36 | std::string cluster_descriptor_path = ""; 37 | if (result.count("path")) { 38 | cluster_descriptor_path = result["path"].as(); 39 | } 40 | 41 | std::unordered_set device_ids = {}; 42 | IODeviceType device_type = IODeviceType::PCIe; 43 | 44 | if (result["jtag"].as()) { 45 | device_type = IODeviceType::JTAG; 46 | } 47 | 48 | std::unique_ptr cluster_descriptor = Cluster::create_cluster_descriptor("", device_type); 49 | 50 | if (result.count("logical_devices")) { 51 | std::unordered_set logical_device_ids = extract_int_set(result["logical_devices"]); 52 | 53 | cluster_descriptor = 54 | ClusterDescriptor::create_constrained_cluster_descriptor(cluster_descriptor.get(), logical_device_ids); 55 | } 56 | 57 | std::string output_path = cluster_descriptor->serialize_to_file(cluster_descriptor_path); 58 | log_info(tt::LogUMD, "Cluster descriptor serialized to {}", output_path); 59 | return 0; 60 | } 61 | -------------------------------------------------------------------------------- /tests/cluster_descriptor_examples/2x2_n300_cluster_desc.yaml: -------------------------------------------------------------------------------- 1 | arch: 2 | 0: wormhole_b0 3 | 1: wormhole_b0 4 | 2: wormhole_b0 5 | 3: wormhole_b0 6 | chips: 7 | 0: 8 | - 1 9 | - 0 10 | - 0 11 | - 0 12 | 1: 13 | - 1 14 | - 1 15 | - 0 16 | - 0 17 | 2: 18 | - 2 19 | - 1 20 | - 0 21 | - 0 22 | 3: 23 | - 2 24 | - 0 25 | - 0 26 | - 0 27 | chip_unique_ids: 28 | 3: 10226782261 29 | 2: 10226774114 30 | 1: 10226774041 31 | 0: 9957417446 32 | ethernet_connections: 33 | - 34 | - chip: 0 35 | chan: 6 36 | - chip: 3 37 | chan: 6 38 | - 39 | - chip: 0 40 | chan: 7 41 | - chip: 3 42 | chan: 7 43 | - 44 | - chip: 0 45 | chan: 14 46 | - chip: 1 47 | chan: 14 48 | - 49 | - chip: 0 50 | chan: 15 51 | - chip: 1 52 | chan: 15 53 | - 54 | - chip: 1 55 | chan: 0 56 | - chip: 2 57 | chan: 0 58 | - 59 | - chip: 1 60 | chan: 1 61 | - chip: 2 62 | chan: 1 63 | - 64 | - chip: 2 65 | chan: 14 66 | - chip: 3 67 | chan: 14 68 | - 69 | - chip: 2 70 | chan: 15 71 | - chip: 3 72 | chan: 15 73 | ethernet_connections_to_remote_devices: 74 | [] 75 | chips_with_mmio: 76 | - 0: 0 77 | - 2: 2 78 | harvesting: 79 | 0: 80 | noc_translation: true 81 | harvest_mask: 65 82 | dram_harvesting_mask: 0 83 | eth_harvesting_mask: 0 84 | pcie_harvesting_mask: 0 85 | 1: 86 | noc_translation: true 87 | harvest_mask: 528 88 | dram_harvesting_mask: 0 89 | eth_harvesting_mask: 0 90 | pcie_harvesting_mask: 0 91 | 2: 92 | noc_translation: true 93 | harvest_mask: 520 94 | dram_harvesting_mask: 0 95 | eth_harvesting_mask: 0 96 | pcie_harvesting_mask: 0 97 | 3: 98 | noc_translation: true 99 | harvest_mask: 513 100 | dram_harvesting_mask: 0 101 | eth_harvesting_mask: 0 102 | pcie_harvesting_mask: 0 103 | chip_to_boardtype: 104 | 0: n300 105 | 1: n300 106 | 2: n300 107 | 3: n300 108 | boards: 109 | - 110 | - board_id: 72058990196040166 111 | - board_type: n300 112 | - chips: 113 | - 1 114 | - 0 115 | - 116 | - board_id: 72058994491928601 117 | - board_type: n300 118 | - chips: 119 | - 2 120 | - 3 121 | -------------------------------------------------------------------------------- /device/api/umd/device/simulation/tt_sim_chip.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include "umd/device/simulation/simulation_chip.hpp" 15 | 16 | namespace tt::umd { 17 | 18 | // TTSIM implementation using dynamic library (.so files). 19 | class TTSimChip : public SimulationChip { 20 | public: 21 | TTSimChip( 22 | const std::filesystem::path& simulator_directory, 23 | SocDescriptor soc_descriptor, 24 | ChipId chip_id, 25 | bool copy_sim_binary = false); 26 | ~TTSimChip() override; 27 | 28 | void start_device() override; 29 | void close_device() override; 30 | 31 | void write_to_device(CoreCoord core, const void* src, uint64_t l1_dest, uint32_t size) override; 32 | void read_from_device(CoreCoord core, void* dest, uint64_t l1_src, uint32_t size) override; 33 | 34 | void send_tensix_risc_reset(tt_xy_pair translated_core, const TensixSoftResetOptions& soft_resets) override; 35 | void send_tensix_risc_reset(const TensixSoftResetOptions& soft_resets) override; 36 | void assert_risc_reset(CoreCoord core, const RiscType selected_riscs) override; 37 | void deassert_risc_reset(CoreCoord core, const RiscType selected_riscs, bool staggered_start) override; 38 | 39 | private: 40 | void create_simulator_binary(); 41 | off_t resize_simulator_binary(int src_fd); 42 | void copy_simulator_binary(); 43 | void secure_simulator_binary(); 44 | void close_simulator_binary(); 45 | void load_simulator_library(const std::filesystem::path& path); 46 | std::unique_ptr architecture_impl_; 47 | int copied_simulator_fd_ = -1; 48 | 49 | void* libttsim_handle = nullptr; 50 | uint32_t libttsim_pci_device_id = 0; 51 | void (*pfn_libttsim_init)() = nullptr; 52 | void (*pfn_libttsim_exit)() = nullptr; 53 | uint32_t (*pfn_libttsim_pci_config_rd32)(uint32_t bus_device_function, uint32_t offset) = nullptr; 54 | void (*pfn_libttsim_tile_rd_bytes)(uint32_t x, uint32_t y, uint64_t addr, void* p, uint32_t size) = nullptr; 55 | void (*pfn_libttsim_tile_wr_bytes)(uint32_t x, uint32_t y, uint64_t addr, const void* p, uint32_t size) = nullptr; 56 | void (*pfn_libttsim_clock)(uint32_t n_clocks) = nullptr; 57 | }; 58 | 59 | } // namespace tt::umd 60 | -------------------------------------------------------------------------------- /tests/microbenchmark/common/microbenchmark_utils.hpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | #pragma once 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "umd/device/cluster.hpp" 15 | 16 | namespace tt::umd::test::utils { 17 | 18 | /** 19 | * Return performance of read and write operations to specific chip and core in MBs/s. 20 | * 21 | * @param buf_size Size of the buffer in bytes. 22 | * @param num_iterations Number of iterations to perform for read and write operations. 23 | * @param cluster The cluster to perform the operations on. 24 | * @param chip The logical chip ID to perform the operations on. 25 | * @param core The core coordinates to perform the operations on. 26 | * @return A pair containing the write bandwidth and read bandwidth in MB/s. 27 | */ 28 | std::pair perf_read_write( 29 | const size_t buf_size, 30 | const uint32_t num_iterations, 31 | Cluster* cluster, 32 | const ChipId chip, 33 | const CoreCoord core, 34 | const uint32_t address = 0); 35 | 36 | /** 37 | * Prints a table in Markdown format. Headers are printed as the first row, followed by a separator row, 38 | * and then the data rows. Headers length must match the length of each row. Example: 39 | * | Size (MB) | Host -> Device Tensix L1 (MB/s) | Device Tensix L1 -> Host (MB/s) | 40 | * |---|---|---| 41 | * | 1.00 | 13157.70 | 2493.65 | 42 | * 43 | * @param headers The headers of the table. 44 | * @param rows The rows of the table, where each row is a vector of strings. 45 | */ 46 | void print_markdown_table_format( 47 | const std::vector& headers, const std::vector>& rows); 48 | 49 | /** 50 | * Calculates the speed in MB/s given the number of bytes and the time in nanoseconds. 51 | * 52 | * @param bytes The number of bytes processed. 53 | * @param ns The time taken in nanoseconds. 54 | * @return The speed in MB/s. 55 | */ 56 | double calc_speed(size_t bytes, uint64_t ns); 57 | 58 | /** 59 | * Converts a double value to a string with fixed-point notation and two decimal places. 60 | * 61 | * @param value The double value to convert. 62 | * @return A string representation of the double value. 63 | */ 64 | std::string convert_double_to_string(double value); 65 | 66 | } // namespace tt::umd::test::utils 67 | -------------------------------------------------------------------------------- /device/tt_device/remote_blackhole_tt_device.cpp: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: (c) 2025 Tenstorrent Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | #include "umd/device/tt_device/remote_blackhole_tt_device.hpp" 5 | 6 | #include "umd/device/arch/blackhole_implementation.hpp" 7 | 8 | namespace tt::umd { 9 | 10 | RemoteBlackholeTTDevice::RemoteBlackholeTTDevice(std::unique_ptr remote_communication) : 11 | BlackholeTTDevice(remote_communication->get_local_device()->get_pci_device()), 12 | remote_communication_(std::move(remote_communication)) { 13 | is_remote_tt_device = true; 14 | } 15 | 16 | void RemoteBlackholeTTDevice::read_from_device(void* mem_ptr, tt_xy_pair core, uint64_t addr, uint32_t size) { 17 | remote_communication_->read_non_mmio(core, mem_ptr, addr, size); 18 | } 19 | 20 | void RemoteBlackholeTTDevice::write_to_device(const void* mem_ptr, tt_xy_pair core, uint64_t addr, uint32_t size) { 21 | remote_communication_->write_to_non_mmio(core, mem_ptr, addr, size); 22 | } 23 | 24 | void RemoteBlackholeTTDevice::read_from_arc_apb(void* mem_ptr, uint64_t arc_addr_offset, size_t size) { 25 | read_from_device( 26 | mem_ptr, get_arc_core(), architecture_impl_->get_arc_apb_noc_base_address() + arc_addr_offset, size); 27 | } 28 | 29 | void RemoteBlackholeTTDevice::write_to_arc_apb(const void* mem_ptr, uint64_t arc_addr_offset, size_t size) { 30 | write_to_device( 31 | mem_ptr, get_arc_core(), architecture_impl_->get_arc_apb_noc_base_address() + arc_addr_offset, size); 32 | } 33 | 34 | void RemoteBlackholeTTDevice::wait_for_non_mmio_flush() { remote_communication_->wait_for_non_mmio_flush(); } 35 | 36 | RemoteCommunication* RemoteBlackholeTTDevice::get_remote_communication() { return remote_communication_.get(); } 37 | 38 | // ARC tile access over AXI is not supported for remote devices. 39 | bool RemoteBlackholeTTDevice::is_arc_available_over_axi() { return false; } 40 | 41 | void RemoteBlackholeTTDevice::noc_multicast_write( 42 | void* dst, size_t size, tt_xy_pair core_start, tt_xy_pair core_end, uint64_t addr) { 43 | // TODO: implement multicast over remote communication. 44 | // For now, we fallback to unicast for all cores. 45 | for (uint32_t x = core_start.x; x <= core_end.x; ++x) { 46 | for (uint32_t y = core_start.y; y <= core_end.y; ++y) { 47 | write_to_device(dst, tt_xy_pair(x, y), addr, size); 48 | } 49 | } 50 | } 51 | 52 | } // namespace tt::umd 53 | -------------------------------------------------------------------------------- /device/api/umd/device/tt_device/blackhole_tt_device.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * SPDX-FileCopyrightText: (c) 2024 Tenstorrent Inc. 3 | * 4 | * SPDX-License-Identifier: Apache-2.0 5 | */ 6 | 7 | #pragma once 8 | 9 | #include 10 | #include 11 | 12 | #include "umd/device/arc/blackhole_arc_telemetry_reader.hpp" 13 | #include "umd/device/tt_device/tt_device.hpp" 14 | #include "umd/device/utils/timeouts.hpp" 15 | 16 | namespace tt::umd { 17 | 18 | class BlackholeTTDevice : public TTDevice { 19 | public: 20 | ~BlackholeTTDevice(); 21 | 22 | void configure_iatu_region(size_t region, uint64_t target, size_t region_size) override; 23 | 24 | bool wait_arc_core_start(const std::chrono::milliseconds timeout_ms = timeout::ARC_STARTUP_TIMEOUT) override; 25 | 26 | uint32_t get_clock() override; 27 | 28 | uint32_t get_min_clock_freq() override; 29 | 30 | bool get_noc_translation_enabled() override; 31 | 32 | void dma_d2h(void *dst, uint32_t src, size_t size) override; 33 | 34 | void dma_h2d(uint32_t dst, const void *src, size_t size) override; 35 | 36 | void dma_h2d_zero_copy(uint32_t dst, const void *src, size_t size) override; 37 | 38 | void dma_d2h_zero_copy(void *dst, uint32_t src, size_t size) override; 39 | 40 | void read_from_arc_apb(void *mem_ptr, uint64_t arc_addr_offset, size_t size) override; 41 | 42 | void write_to_arc_apb(const void *mem_ptr, uint64_t arc_addr_offset, size_t size) override; 43 | 44 | void read_from_arc_csm(void *mem_ptr, uint64_t arc_addr_offset, size_t size) override; 45 | 46 | void write_to_arc_csm(const void *mem_ptr, uint64_t arc_addr_offset, size_t size) override; 47 | 48 | ChipInfo get_chip_info() override; 49 | 50 | std::chrono::milliseconds wait_eth_core_training( 51 | const tt_xy_pair eth_core, const std::chrono::milliseconds timeout_ms = timeout::ETH_TRAINING_TIMEOUT) override; 52 | 53 | protected: 54 | BlackholeTTDevice(std::shared_ptr pci_device); 55 | BlackholeTTDevice(std::shared_ptr jtag_device, uint8_t jlink_id); 56 | 57 | bool is_hardware_hung() override; 58 | 59 | virtual bool is_arc_available_over_axi(); 60 | 61 | private: 62 | int get_pcie_x_coordinate(); 63 | 64 | friend std::unique_ptr TTDevice::create(int device_number, IODeviceType device_type); 65 | 66 | static constexpr uint64_t ATU_OFFSET_IN_BH_BAR2 = 0x1000; 67 | std::set iatu_regions_; 68 | }; 69 | 70 | } // namespace tt::umd 71 | --------------------------------------------------------------------------------