├── .azuredevops └── rocm-ci.yml ├── .clang-format ├── .cmake-format ├── .editorconfig ├── .git-blame-ignore-revs ├── .github ├── CODEOWNERS ├── CONTRIBUTING.md ├── dependabot.yml ├── palamida.yml └── workflows │ ├── cmake_format.yml │ ├── kws-caller.yml │ ├── label_cherrypicks.yml │ ├── main.yml │ └── rocm_ci_caller.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── CHANGELOG.md ├── CMakeLists.txt ├── CPPLINT.cfg ├── DEBIAN └── control ├── Docker ├── Dockerfile └── README.md ├── LICENSE ├── README.md ├── authentication ├── 01gen_root_cert.sh ├── 02gen_ssl_artifacts.sh ├── install_client.sh ├── install_server.sh ├── openssl.cnf └── readme.txt ├── cmake_modules ├── Findrocprofiler.cmake ├── Findrvs.cmake ├── rdc-config-version.cmake.in ├── rdc-config.cmake.in ├── utils.cmake └── version_util.sh ├── common ├── rdc_capabilities.cc ├── rdc_capabilities.h ├── rdc_field.data ├── rdc_fields_supported.cc ├── rdc_fields_supported.h ├── rdc_utils.cc └── rdc_utils.h ├── docs ├── Conceptual │ └── components.rst ├── conf.py ├── data │ ├── api_libs.png │ ├── features.png │ ├── features_jobs.png │ ├── handbook_openssl.png │ ├── install_components.png │ ├── integration_config1.png │ ├── integration_config2.png │ ├── integration_config3.png │ ├── integration_config4.png │ ├── integration_config5.png │ ├── integration_config6.png │ ├── integration_gpu_clock.png │ └── integration_login.png ├── doxygen │ ├── .gitignore │ └── Doxyfile ├── how-to │ ├── integration.rst │ ├── user_guide.rst │ ├── using_RDC.rst │ └── using_RDC_features.rst ├── index.rst ├── install │ ├── handbook.rst │ └── install.rst ├── license.md ├── reference │ ├── api_intro.rst │ └── api_ref.rst ├── sphinx │ ├── _toc.yml.in │ ├── requirements.in │ └── requirements.txt └── tutorial │ └── job_stats_sample.rst ├── example ├── CMakeLists.txt ├── README.md ├── config_example.cc ├── diagnostic_example.cc ├── field_value_example.cc ├── health_example.cc ├── job_stats_example.cc ├── policy_example.cc ├── rocprofiler_example.cc └── topologylink_example.cc ├── include ├── rdc │ ├── rdc.h │ └── rdc_private.h ├── rdc_lib │ ├── RdcCacheManager.h │ ├── RdcConfigSettings.h │ ├── RdcDiagnostic.h │ ├── RdcDiagnosticLibInterface.h │ ├── RdcEntityCodec.h │ ├── RdcException.h │ ├── RdcGroupSettings.h │ ├── RdcHandler.h │ ├── RdcLibraryLoader.h │ ├── RdcLogger.h │ ├── RdcMetricFetcher.h │ ├── RdcMetricsUpdater.h │ ├── RdcModuleMgr.h │ ├── RdcNotification.h │ ├── RdcPartition.h │ ├── RdcPerfTimer.h │ ├── RdcPolicy.h │ ├── RdcTelemetry.h │ ├── RdcTelemetryLibInterface.h │ ├── RdcTopologyLink.h │ ├── RdcWatchTable.h │ ├── impl │ │ ├── RdcCacheManagerImpl.h │ │ ├── RdcConfigSettingsImpl.h │ │ ├── RdcDiagnosticModule.h │ │ ├── RdcEmbeddedHandler.h │ │ ├── RdcGroupSettingsImpl.h │ │ ├── RdcMetricFetcherImpl.h │ │ ├── RdcMetricsUpdaterImpl.h │ │ ├── RdcModuleMgrImpl.h │ │ ├── RdcNotificationImpl.h │ │ ├── RdcPartitionImpl.h │ │ ├── RdcPolicyImpl.h │ │ ├── RdcRVSLib.h │ │ ├── RdcRocpLib.h │ │ ├── RdcRocrLib.h │ │ ├── RdcSmiDiagnosticImpl.h │ │ ├── RdcSmiLib.h │ │ ├── RdcStandaloneHandler.h │ │ ├── RdcTelemetryModule.h │ │ ├── RdcTopologyLinkImpl.h │ │ ├── RdcWatchTableImpl.h │ │ └── SmiUtils.h │ └── rdc_common.h └── rdc_modules │ ├── kernels │ └── binary_search_kernels.cl │ ├── rdc_rocp │ ├── RdcRocpBase.h │ └── RdcRocpCounterSampler.h │ ├── rdc_rocr │ ├── ComputeQueueTest.h │ ├── MemoryAccess.h │ ├── MemoryTest.h │ ├── RdcRocrBase.h │ ├── TestBase.h │ ├── base_rocr_utils.h │ └── common.h │ └── rdc_rvs │ └── RvsBase.h ├── lychee.toml ├── protos └── rdc.proto ├── python_binding ├── README.md ├── README_rdc_rest_api.txt ├── RdcReader.py ├── RdcUtil.py ├── prometheus_targets.json ├── rdc_bootstrap.py ├── rdc_collectd.conf ├── rdc_collectd.py ├── rdc_grafana_dashboard_example.json ├── rdc_prometheus.py ├── rdc_prometheus_example.yml └── rdc_rest_api.py ├── rdc_libs ├── CMakeLists.txt ├── bootstrap │ ├── CMakeLists.txt │ └── src │ │ ├── RdcBootStrap.cc │ │ ├── RdcEntityCodec.cc │ │ ├── RdcLibraryLoader.cc │ │ └── RdcLogger.cc ├── rdc │ ├── CMakeLists.txt │ └── src │ │ ├── RdcCacheManagerImpl.cc │ │ ├── RdcConfigSettingsImpl.cc │ │ ├── RdcDiagnosticModule.cc │ │ ├── RdcEmbeddedHandler.cc │ │ ├── RdcGroupSettingsImpl.cc │ │ ├── RdcMetricFetcherImpl.cc │ │ ├── RdcMetricsUpdaterImpl.cc │ │ ├── RdcModuleMgrImpl.cc │ │ ├── RdcNotificationImpl.cc │ │ ├── RdcPartitionImpl.cc │ │ ├── RdcPerfTimer.cc │ │ ├── RdcPolicyImpl.cc │ │ ├── RdcRVSLib.cc │ │ ├── RdcRocpLib.cc │ │ ├── RdcRocrLib.cc │ │ ├── RdcSmiDiagnosticImpl.cc │ │ ├── RdcSmiLib.cc │ │ ├── RdcTelemetryModule.cc │ │ ├── RdcTopologyLinkImpl.cc │ │ ├── RdcWatchTableImpl.cc │ │ └── SmiUtils.cc ├── rdc_client │ ├── CMakeLists.txt │ └── src │ │ └── RdcStandaloneHandler.cc └── rdc_modules │ ├── kernels │ ├── binary_search_kernels.cl │ ├── gpuReadWrite_kernels.cl │ └── hsaco │ │ ├── gfx1010 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx1011 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx1012 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx1030 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx1031 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx1032 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx1033 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx700 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx701 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx702 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx801 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx802 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx803 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx805 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx810 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx900 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx902 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx904 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx906 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx908 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx90a │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx940 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ ├── gfx941 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ │ └── gfx942 │ │ ├── binary_search_kernels.hsaco │ │ └── gpuReadWrite_kernels.hsaco │ ├── rdc_rocp │ ├── CMakeLists.txt │ ├── RdcRocpBase.cc │ ├── RdcRocpCounterSampler.cc │ └── RdcTelemetryLib.cc │ ├── rdc_rocr │ ├── CMakeLists.txt │ ├── ComputeQueueTest.cc │ ├── MemoryAccess.cc │ ├── MemoryTest.cc │ ├── RdcDiagnosticLib.cc │ ├── RdcRocrBase.cc │ ├── TestBase.cc │ ├── base_rocr_utils.cc │ └── common.cc │ └── rdc_rvs │ ├── CMakeLists.txt │ ├── RdcDiagnosticLib.cc │ ├── RvsBase.cc │ └── conf │ ├── MI210 │ ├── babel.conf │ ├── babel_long.conf │ ├── gpup_single.conf │ ├── gst_single.conf │ ├── gst_single_long.conf │ ├── iet_stress.conf │ ├── iet_stress_long.conf │ ├── pbqt_single.conf │ ├── pebb_single.conf │ ├── pebb_single_long.conf │ └── tst_single.conf │ ├── MI300A │ ├── iet_stress.conf │ ├── iet_stress_long.conf │ ├── pebb_single.conf │ └── pebb_single_long.conf │ ├── MI300X │ ├── babel.conf │ ├── babel_long.conf │ ├── gst_ext.conf │ ├── gst_selfcheck.conf │ ├── gst_single.conf │ ├── gst_single_long.conf │ ├── gst_stress.conf │ ├── iet_single.conf │ ├── iet_stress.conf │ ├── iet_stress_long.conf │ ├── pbqt_single.conf │ ├── pebb_single.conf │ └── pebb_single_long.conf │ ├── MI308X │ ├── babel.conf │ ├── babel_long.conf │ ├── gst_single.conf │ ├── gst_single_long.conf │ ├── gst_thermal.conf │ ├── iet_single.conf │ ├── iet_stress.conf │ ├── iet_stress_long.conf │ └── iet_thermal.conf │ ├── default │ ├── nv21 │ ├── gpup_single.conf │ ├── gst_single.conf │ ├── gst_single_long.conf │ ├── gst_stress_3_hrs.conf │ ├── iet_stress.conf │ ├── iet_stress_long.conf │ ├── mem.conf │ ├── pbqt_single.conf │ ├── pebb_single.conf │ ├── pebb_single_long.conf │ ├── peqt_single.conf │ ├── pesm_1.conf │ └── rcqt_single.conf │ ├── nv31 │ ├── gpup_single.conf │ ├── gst_single.conf │ ├── gst_single_long.conf │ ├── gst_stress_3_hrs.conf │ ├── iet_stress.conf │ ├── iet_stress_long.conf │ ├── mem.conf │ ├── pbqt_single.conf │ ├── pebb_single.conf │ ├── pebb_single_long.conf │ ├── peqt_single.conf │ ├── pesm_1.conf │ └── rcqt_single.conf │ └── nv32 │ ├── gpup_single.conf │ ├── gst_single.conf │ ├── gst_single_long.conf │ ├── gst_stress_3_hrs.conf │ ├── iet_stress.conf │ ├── iet_stress_long.conf │ ├── mem.conf │ ├── pbqt_single.conf │ ├── pebb_single.conf │ ├── pebb_single_long.conf │ ├── peqt_single.conf │ ├── pesm_1.conf │ └── rcqt_single.conf ├── rdci ├── CMakeLists.txt ├── include │ ├── RdciConfigSubSystem.h │ ├── RdciDiagSubSystem.h │ ├── RdciDiscoverySubSystem.h │ ├── RdciDmonSubSystem.h │ ├── RdciFieldGroupSubSystem.h │ ├── RdciGroupSubSystem.h │ ├── RdciHealthSubSystem.h │ ├── RdciPolicySubSystem.h │ ├── RdciStatsSubSystem.h │ ├── RdciSubSystem.h │ ├── RdciTopologyLinkSubSystem.h │ └── RdciXgmiLinkStatusSubSystem.h └── src │ ├── RdciConfigSubSystem.cc │ ├── RdciDiagSubSystem.cc │ ├── RdciDiscoverySubSystem.cc │ ├── RdciDmonSubSystem.cc │ ├── RdciFieldGroupSubSystem.cc │ ├── RdciGroupSubSystem.cc │ ├── RdciHealthSubSystem.cc │ ├── RdciPolicySubSystem.cc │ ├── RdciStatsSubSystem.cc │ ├── RdciSubSystem.cc │ ├── RdciTopologyLinkSubSystem.cc │ ├── RdciXgmiLinkStatusSubSystem.cc │ └── rdci.cc ├── server ├── CMakeLists.txt ├── include │ └── rdc │ │ ├── rdc_admin_service.h │ │ ├── rdc_api_service.h │ │ └── rdc_server_main.h ├── rdc.service.in ├── rdc_options.conf ├── run_build.sh └── src │ ├── rdc_admin_service.cc │ ├── rdc_api_service.cc │ └── rdc_server_main.cc ├── src ├── DEBIAN_postinst.in ├── DEBIAN_prerm.in ├── RPM_postun.in ├── RPM_preun.in ├── RPM_rpm_post.in ├── header_template.hpp.in └── rdc64Config.in ├── tests ├── example │ ├── CMakeLists.txt │ └── rdc_client_test.cc └── rdc_tests │ ├── CMakeLists.txt │ ├── functional │ ├── rdci_discovery.cc │ ├── rdci_discovery.h │ ├── rdci_dmon.cc │ ├── rdci_dmon.h │ ├── rdci_fieldgroup.cc │ ├── rdci_fieldgroup.h │ ├── rdci_group.cc │ ├── rdci_group.h │ ├── rdci_stats.cc │ └── rdci_stats.h │ ├── main.cc │ ├── rdctst.exclude │ ├── test_base.cc │ ├── test_base.h │ ├── test_common.cc │ ├── test_common.h │ ├── test_utils.cc │ └── test_utils.h └── tools ├── cmake_format.sh └── run_github_actions_locally.sh /.azuredevops/rocm-ci.yml: -------------------------------------------------------------------------------- 1 | resources: 2 | repositories: 3 | - repository: pipelines_repo 4 | type: github 5 | endpoint: ROCm 6 | name: ROCm/ROCm 7 | 8 | variables: 9 | - group: common 10 | - template: /.azuredevops/variables-global.yml@pipelines_repo 11 | 12 | trigger: 13 | batch: true 14 | branches: 15 | include: 16 | - amd-staging 17 | paths: 18 | exclude: 19 | - .github 20 | - docs 21 | - '.*.y*ml' 22 | - '*.md' 23 | - LICENSE 24 | 25 | pr: 26 | autoCancel: true 27 | branches: 28 | include: 29 | - amd-staging 30 | paths: 31 | exclude: 32 | - .github 33 | - docs 34 | - '.*.y*ml' 35 | - '*.md' 36 | - LICENSE 37 | drafts: false 38 | 39 | jobs: 40 | - template: ${{ variables.CI_COMPONENT_PATH }}/rdc.yml@pipelines_repo 41 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | BasedOnStyle: Google 4 | ColumnLimit: 100 5 | 6 | # Force pointers to the type for C++. 7 | # For some reason Google style doesn't specify this.. 8 | DerivePointerAlignment: false 9 | PointerAlignment: Left 10 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig standardizes spacing in all editors: https://EditorConfig.org 2 | # Please get a plugin for your editor to match the formatting 3 | 4 | # top-most EditorConfig file 5 | root = true 6 | 7 | # Matches multiple files with brace expansion notation 8 | # Set default charset 9 | [*.{c,cc,cpp,h,hh,hpp}] 10 | charset = utf-8 11 | indent_style = space 12 | indent_size = 2 13 | 14 | [*.py] 15 | indent_style = space 16 | indent_size = 4 17 | 18 | [*.proto] 19 | charset = utf-8 20 | indent_style = space 21 | indent_size = 2 22 | 23 | -------------------------------------------------------------------------------- /.git-blame-ignore-revs: -------------------------------------------------------------------------------- 1 | # ignore formatting commit 2 | 434e40305d6771040caec164b6b1369cc0ef51ad 3 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @dmitrii-galantsev @bill-shuzhou-liu 2 | 3 | docs/* @ROCm/rocm-documentation 4 | *.md @ROCm/rocm-documentation 5 | *.rst @ROCm/rocm-documentation 6 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/docs/sphinx" # Location of package manifests 10 | open-pull-requests-limit: 10 11 | schedule: 12 | interval: "monthly" 13 | labels: 14 | - "documentation" 15 | - "dependencies" 16 | - "ci:docs-only" 17 | reviewers: 18 | - "samjwu" 19 | -------------------------------------------------------------------------------- /.github/palamida.yml: -------------------------------------------------------------------------------- 1 | disabled: false 2 | scmId: gh-emu-rocm 3 | branchesToScan: 4 | - amd-staging 5 | - amd-mainline -------------------------------------------------------------------------------- /.github/workflows/kws-caller.yml: -------------------------------------------------------------------------------- 1 | name: Rocm Validation Suite KWS 2 | on: 3 | push: 4 | branches: [amd-staging, amd-mainline] 5 | pull_request: 6 | types: [opened, synchronize, reopened] 7 | workflow_dispatch: 8 | jobs: 9 | kws: 10 | if: ${{ github.event_name == 'pull_request' }} 11 | uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/kws.yml@mainline 12 | secrets: inherit 13 | with: 14 | pr_number: ${{github.event.pull_request.number}} 15 | base_branch: ${{github.base_ref}} 16 | -------------------------------------------------------------------------------- /.github/workflows/label_cherrypicks.yml: -------------------------------------------------------------------------------- 1 | # caution: this whole file was written using Claude 3.7 Sonnet 2 | name: Auto Label Cherry-Pick 3 | 4 | on: 5 | pull_request: 6 | types: [opened, synchronize, reopened] 7 | 8 | jobs: 9 | add-label: 10 | runs-on: ubuntu-latest 11 | container: 12 | image: node:16-alpine 13 | permissions: 14 | pull-requests: write 15 | steps: 16 | - name: Add label to cherry-pick PRs 17 | uses: actions/github-script@v6 18 | with: 19 | script: | 20 | const pr = context.payload.pull_request; 21 | const headBranch = pr.head.ref; 22 | const baseBranch = pr.base.ref; 23 | 24 | // Check if head branch contains cherry-pick pattern or base branch starts with release/ 25 | const isCherryPick = /cherry.*pick/i.test(headBranch); 26 | const isReleaseTarget = baseBranch.startsWith('release/'); 27 | 28 | if (isCherryPick || isReleaseTarget) { 29 | // Label to apply 30 | const labelToAdd = 'cherry-pick'; 31 | 32 | // Try to add the label 33 | try { 34 | await github.rest.issues.addLabels({ 35 | owner: context.repo.owner, 36 | repo: context.repo.repo, 37 | issue_number: pr.number, 38 | labels: [labelToAdd] 39 | }); 40 | console.log(`Added label "${labelToAdd}" to PR #${pr.number}`); 41 | } catch (error) { 42 | console.error(`Error adding label: ${error.message}`); 43 | } 44 | } else { 45 | console.log('PR does not match criteria for automatic labeling'); 46 | } 47 | 48 | -------------------------------------------------------------------------------- /.github/workflows/rocm_ci_caller.yml: -------------------------------------------------------------------------------- 1 | name: ROCm CI Caller 2 | on: 3 | pull_request: 4 | branches: [amd-mainline] 5 | types: [opened, reopened, synchronize] 6 | push: 7 | branches: [amd-mainline] 8 | workflow_dispatch: 9 | 10 | jobs: 11 | call-workflow: 12 | uses: AMD-ROCm-Internal/rocm_ci_infra/.github/workflows/rocm_ci.yml@mainline 13 | secrets: inherit 14 | with: 15 | input_sha: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} 16 | input_pr_num: ${{ github.event_name == 'pull_request' && github.event.pull_request.number || 0 }} 17 | input_pr_url: ${{ github.event_name == 'pull_request' && github.event.pull_request.html_url || '' }} 18 | input_pr_title: ${{ github.event_name == 'pull_request' && github.event.pull_request.title || '' }} 19 | repository_name: ${{ github.repository }} 20 | base_ref: ${{ github.event_name == 'pull_request' && github.base_ref || github.ref }} 21 | trigger_event_type: ${{ github.event_name }} 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # build artifacts 2 | .cache/ 3 | build/ 4 | DEBIAN/postinst 5 | DEBIAN/prerm 6 | RPM/ 7 | include/rdc/rdc64Config.h 8 | 9 | # documentation artifacts 10 | _build/ 11 | _images/ 12 | _static/ 13 | _templates/ 14 | _toc.yml 15 | docBin/ 16 | docs/_doxygen/ 17 | 18 | # VisualStudioCode 19 | .vscode 20 | 21 | # do NOT ignore these files 22 | !.clang-format 23 | !.editorconfig 24 | !.cmake-format 25 | !.pre-commit-config.yaml 26 | 27 | # misc 28 | __pycache__/ 29 | authentication/CA/ 30 | 31 | # act 32 | act.variables 33 | act.secrets 34 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # - How to use: 2 | # python3 -m pip install pre-commit 3 | # pre-commit install --install hooks 4 | # Upon a new commit - the hooks should automagically run 5 | # 6 | # - How to skip: 7 | # git commit --no-verify 8 | # or 9 | # SKIP=clang-format-docker git commit 10 | # SKIP=cpplint-docker git commit 11 | 12 | fail_fast: false 13 | repos: 14 | # For portability I decided to use Docker containers 15 | - repo: https://github.com/dmitrii-galantsev/pre-commit-docker-cpplint 16 | rev: 0.0.3 17 | hooks: 18 | - id: clang-format-docker 19 | - id: cpplint-docker 20 | - repo: https://github.com/cheshirekow/cmake-format-precommit 21 | rev: v0.6.13 22 | hooks: 23 | - id: cmake-format 24 | # Below is a local way of running formatters and linters 25 | # NOTE: clang-tidy is not used in the above tests 26 | # - repo: https://github.com/pocc/pre-commit-hooks 27 | # rev: v1.3.5 28 | # hooks: 29 | # - id: clang-format 30 | # args: [--no-diff, -i] 31 | # - id: clang-tidy 32 | # args: [-p=build, --quiet] 33 | # - id: cpplint 34 | # args: [--verbose=5] 35 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | version: 2 5 | 6 | sphinx: 7 | configuration: docs/conf.py 8 | 9 | formats: [htmlzip, pdf, epub] 10 | 11 | python: 12 | install: 13 | - requirements: docs/sphinx/requirements.txt 14 | 15 | build: 16 | os: ubuntu-22.04 17 | tools: 18 | python: "3.10" 19 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log for RDC 2 | 3 | Full documentation for RDC is available at [ROCm DataCenter Tool User Guide](https://rocm.docs.amd.com/projects/rdc/en/latest/). 4 | 5 | ## RDC for ROCm 6.4.0 6 | 7 | ### Added 8 | 9 | - RDC policy feature 10 | - Power and thermal throttling metrics 11 | - RVS [IET](https://github.com/ROCm/ROCmValidationSuite/tree/a6177fc5e3f2679f98bbbc80dc536d535a43fb69/iet.so), [PEBB](https://github.com/ROCm/ROCmValidationSuite/tree/a6177fc5e3f2679f98bbbc80dc536d535a43fb69/pebb.so), and [memory bandwidth tests](https://github.com/ROCm/ROCmValidationSuite/tree/a6177fc5e3f2679f98bbbc80dc536d535a43fb69/babel.so) 12 | - Link status 13 | - RDC_FI_PROF_SM_ACTIVE metric 14 | 15 | ### Changed 16 | 17 | - Migrated from [rocprofiler v1](https://github.com/ROCm/rocprofiler) to [rocprofiler-sdk](https://github.com/ROCm/rocprofiler-sdk) 18 | - Improved README.md for better usability 19 | - Moved `rdc_options` into `share/rdc/conf/` 20 | - Fixed ABSL in clang18+ 21 | 22 | ## RDC for ROCm 6.3.0 23 | 24 | ### Added 25 | 26 | - [RVS](https://github.com/ROCm/ROCmValidationSuite) integration 27 | - Real time logging for diagnostic command 28 | - `--version` command 29 | - `XGMI_TOTAL_READ_KB` and `XGMI_TOTAL_WRITE_KB` monitoring metrics 30 | 31 | ## RDC for ROCm 6.2.0 32 | 33 | - Added [rocprofiler](https://github.com/ROCm/rocprofiler) dmon metrics 34 | - Added new ECC metrics 35 | - Added [ROCmValidationSuite](https://github.com/ROCm/ROCmValidationSuite) diagnostic command 36 | - Fully migrated to [AMDSMI](https://github.com/ROCm/amdsmi) 37 | - Removed RASLIB dependency and blobs 38 | - Removed [rocm_smi_lib](https://github.com/ROCm/rocm_smi_lib) dependency 39 | 40 | ## RDC for ROCm 6.1.0 41 | 42 | - Added `--address` flag to rdcd 43 | - Upgraded from C++11 to C++17 44 | - Upgraded gRPC 45 | 46 | ## RDC for ROCm 5.5.0 47 | 48 | - Added new profiling metrics for RDC dmon module. 49 | -------------------------------------------------------------------------------- /CPPLINT.cfg: -------------------------------------------------------------------------------- 1 | set noparent 2 | linelength=100 3 | filter=-build/include_subdir,-legal/copyright,-runtime/printf,-build/c++11,-runtime/int,-build/header_guard 4 | -------------------------------------------------------------------------------- /DEBIAN/control: -------------------------------------------------------------------------------- 1 | Package: rdc 2 | Architecture: amd64 3 | Maintainer: Advanced Micro Devices (AMD) 4 | Depends: 5 | Priority: optional 6 | Version: MODULE_VERSION 7 | Description: AMD Radeon Data Center 8 | This package contains the Radeon Data Center tools. 9 | -------------------------------------------------------------------------------- /Docker/README.md: -------------------------------------------------------------------------------- 1 | # Setup Instructions for AMDSMI and RDC Images 2 | 3 | Follow these steps to set up both the AMDSMI and RDC images. 4 | 5 | ## Prerequisites 6 | 7 | Ensure you have the necessary permissions and tools installed to clone repositories and build Docker images. 8 | 9 | ## Step 1: Clone Repositories 10 | 11 | Download the latest AMDSMI and RDC repositories using the following commands: 12 | 13 | ### AMDSMI 14 | 15 | ```bash 16 | git clone https://github.com/ROCm/amdsmi.git 17 | ``` 18 | 19 | ### RDC 20 | 21 | ```bash 22 | git clone https://github.com/ROCm/rdc.git 23 | ``` 24 | 25 | ## Step 2: Build AMDSMI Base Image 26 | 27 | 1. Navigate to the `amdsmi` directory on your system. 28 | 2. Build the Docker image using the following command: 29 | 30 | ```bash 31 | docker build -t amdsmi-image . 32 | ``` 33 | 34 | ## Step 3: Build RDC Image 35 | 36 | 1. Navigate to the `rdc` directory on your system. 37 | 2. Navigate into the `/Docker` directory. 38 | 3. Build the Docker image using the following command: 39 | 40 | ```bash 41 | docker build -t rdc-image . 42 | ``` 43 | 44 | ## Step 4: Run RDC Image 45 | 46 | To run the RDC image, use the following command: 47 | 48 | ```bash 49 | docker run rdc-image 50 | ``` 51 | 52 | If the above command does not work, try the following: 53 | 54 | 1. Run the image with a bash entry point: 55 | 56 | ```bash 57 | docker run -it --entrypoint /bin/bash rdc-image 58 | ``` 59 | 60 | 2. Once inside the container, run the following command: 61 | 62 | ```bash 63 | sudo /opt/rocm/bin/rdcd -u 64 | ``` 65 | 66 | ## Step 5: Run AMDSMI Image (optional) 67 | 68 | To be able to run AMDSMI commands inside of the image run the following: 69 | 70 | ```bash 71 | sudo docker run --rm -ti \ 72 | --privileged \ 73 | --volume $(realpath ./):/src:rw \ 74 | amdsmi-image 75 | ``` 76 | > [!IMPORTANT] 77 | > Make sure that you are in the `amdsmi` directory before running. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019-2025 Advanced Micro Devices, Inc. All rights reserved. 4 | 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /authentication/01gen_root_cert.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script should be called only once to generate a root 4 | # certificate 5 | 6 | mkdir -p CA 7 | pushd CA 8 | mkdir private newcerts 9 | chmod 700 private newcerts 10 | 11 | # Our next step is to create a database for the certificates we will sign: 12 | echo '01' >serial 13 | touch index.txt 14 | 15 | # openssl_part1.cnf 16 | 17 | # Create the Root Certificate 18 | # This call of openssl encrypts the keys 19 | # openssl req -new -x509 extensions v3_ca -keyout private/rdc_cakey.pem \ 20 | # -out rdc_cacert.pem -days 3650 -config ../openssl.cnf 21 | 22 | # This call of openssl does not encrypt the keys 23 | openssl req -new -x509 -nodes -extensions v3_ca -keyout private/rdc_cakey.pem \ 24 | -out rdc_cacert.pem -days 3650 -config ../openssl.cnf 25 | # This generates: 26 | # A private key in private/rdc_cakey.pem 27 | # A root CA certificate in rdc_cacert.pem (distribute to clients) 28 | 29 | popd 30 | 31 | -------------------------------------------------------------------------------- /authentication/02gen_ssl_artifacts.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script generates ssl keys and self-signed certificates 4 | 5 | INSTALL_RT="artifacts" 6 | 7 | generate_artifacts() { 8 | HOST=$1 9 | echo "**********************************" 10 | echo "*** Generating $HOST artifacts ***" 11 | echo "**********************************" 12 | 13 | mkdir -p ${INSTALL_RT}/${HOST}/private 14 | mkdir -p ${INSTALL_RT}/${HOST}/certs 15 | 16 | echo "Generate CSR..." 17 | openssl req -new -nodes -out rdc_csr.pem -config ../openssl.cnf 18 | echo "Sign Certificate..." 19 | openssl ca -out rdc_${HOST}_cert.pem -config ../openssl.cnf -infiles rdc_csr.pem 20 | mv rdc_${HOST}_cert.pem ${INSTALL_RT}/${HOST}/certs/ 21 | mv key.pem ${INSTALL_RT}/${HOST}/private/rdc_${HOST}_cert.key 22 | cp rdc_cacert.pem ${INSTALL_RT}/${HOST}/certs/ 23 | } 24 | 25 | pushd CA 26 | echo 27 | echo "**********************" 28 | echo "IMPORTANT:" 29 | echo " * Make sure to use the same hostname (wildcards accepted) each" 30 | echo " time when prompted for \"Common Name\"" 31 | echo " * Make sure to select \"y\" when you are asked whether you want" 32 | echo " to sign the certificates" 33 | echo "**********************" 34 | echo 35 | generate_artifacts "server" 36 | generate_artifacts "client" 37 | rm rdc_cacert.pem 38 | cp ../install_client.sh ../install_server.sh $INSTALL_RT 39 | 40 | popd 41 | 42 | 43 | -------------------------------------------------------------------------------- /authentication/install_client.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Note: 4 | # * This script should reside in the artifacts directory 5 | # when executed. 6 | # * This script may require root privilege 7 | 8 | if [ $# -lt 1 ]; then 9 | echo "Need to specify a installation root directory (e.g., /etc/rdc)" 10 | exit 1 11 | fi 12 | 13 | INSTALL_DIR=$1 14 | mkdir -p $INSTALL_DIR 15 | cp -R client $INSTALL_DIR 16 | chown -R rdc:rdc $INSTALL_DIR/client 17 | 18 | -------------------------------------------------------------------------------- /authentication/install_server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Note: 4 | # * This script should reside in the artifacts directory 5 | # when executed. 6 | # * This script may require root privilege 7 | 8 | if [ $# -lt 1 ]; then 9 | echo "Need to specify a installation root directory (e.g., /etc/rdc)" 10 | exit 1 11 | fi 12 | 13 | INSTALL_DIR=$1 14 | mkdir -p $INSTALL_DIR 15 | cp -R server $INSTALL_DIR 16 | mkdir -p $INSTALL_DIR/client/certs 17 | cp client/certs/rdc_cacert.pem $INSTALL_DIR/client/certs 18 | chmod 700 $INSTALL_DIR/server/private 19 | chown -R rdc:rdc $INSTALL_DIR/server 20 | chown -R rdc:rdc $INSTALL_DIR/client 21 | 22 | -------------------------------------------------------------------------------- /authentication/readme.txt: -------------------------------------------------------------------------------- 1 | # How-to generate authentication files 2 | 3 | 1. Modify openssl.cnf to match your company info 4 | 2. ./01gen_root_cert.sh 5 | 3. ./02gen_ssl_artifacts.sh 6 | 4. cd CA/artifacts 7 | 5. sudo ./install_client.sh /etc/rdc 8 | 6. sudo ./install_server.sh /etc/rdc 9 | 10 | For a full guide refer to: 11 | https://rocm.docs.amd.com/projects/rdc/en/latest/install/handbook.html#generate-files-for-authentication 12 | -------------------------------------------------------------------------------- /cmake_modules/Findrocprofiler.cmake: -------------------------------------------------------------------------------- 1 | # This module provides a rocprofiler::rocprofiler package 2 | # You can specify the ROCM directory by setting ROCM_DIR 3 | 4 | set(NAME rocprofiler) 5 | 6 | if(NOT DEFINED ROCM_DIR) 7 | set(ROCM_DIR "/opt/rocm") 8 | endif() 9 | list(APPEND CMAKE_PREFIX_PATH ${ROCM_DIR}) 10 | 11 | find_library( 12 | ${NAME}_LIBRARY 13 | NAMES ${NAME} ${NAME}64 REQUIRED REGISTRY_VIEW BOTH 14 | PATH_SUFFIXES lib) 15 | 16 | if(NOT DEFINED (${NAME}_INCLUDE_DIR)) 17 | find_path( 18 | ${NAME}_INCLUDE_DIR 19 | NAMES ${NAME}.h 20 | HINTS "${ROCM_DIR}/include" 21 | PATH_SUFFIXES ${NAME} ${NAME}/inc) 22 | endif() 23 | 24 | include(FindPackageHandleStandardArgs) 25 | find_package_handle_standard_args( 26 | ${NAME} 27 | FOUND_VAR ${NAME}_FOUND 28 | REQUIRED_VARS ${NAME}_LIBRARY ${NAME}_INCLUDE_DIR) 29 | 30 | if(${NAME}_FOUND AND NOT TARGET ${NAME}::${NAME}) 31 | add_library(${NAME}::${NAME} UNKNOWN IMPORTED) 32 | set_target_properties( 33 | ${NAME}::${NAME} 34 | PROPERTIES IMPORTED_LOCATION "${${NAME}_LIBRARY}" 35 | INTERFACE_COMPILE_OPTIONS "${PC_${NAME}_CFLAGS_OTHER}" 36 | INTERFACE_INCLUDE_DIRECTORIES "${${NAME}_INCLUDE_DIR}") 37 | endif() 38 | -------------------------------------------------------------------------------- /cmake_modules/Findrvs.cmake: -------------------------------------------------------------------------------- 1 | # This module provides a rvs::rvs package 2 | # You can specify the ROCM directory by setting ROCM_DIR 3 | 4 | set(NAME rvs) 5 | 6 | if(NOT DEFINED ROCM_DIR) 7 | set(ROCM_DIR "/opt/rocm") 8 | endif() 9 | list(APPEND CMAKE_PREFIX_PATH ${ROCM_DIR}) 10 | 11 | find_library( 12 | ${NAME}_LIBRARY 13 | NAMES ${NAME} ${NAME}64 ${NAME}lib # RVS is special and is named librvslib.so 14 | REQUIRED REGISTRY_VIEW BOTH 15 | PATH_SUFFIXES lib) 16 | 17 | if(NOT DEFINED (${NAME}_INCLUDE_DIR)) 18 | find_path( 19 | ${NAME}_INCLUDE_DIR 20 | NAMES ${NAME}.h 21 | HINTS "${ROCM_DIR}/include" 22 | PATH_SUFFIXES ${NAME} ${NAME}/inc) 23 | endif() 24 | 25 | include(FindPackageHandleStandardArgs) 26 | find_package_handle_standard_args( 27 | ${NAME} 28 | FOUND_VAR ${NAME}_FOUND 29 | REQUIRED_VARS ${NAME}_LIBRARY ${NAME}_INCLUDE_DIR) 30 | 31 | if(${NAME}_FOUND AND NOT TARGET ${NAME}::${NAME}) 32 | add_library(${NAME}::${NAME} UNKNOWN IMPORTED) 33 | set_target_properties( 34 | ${NAME}::${NAME} 35 | PROPERTIES IMPORTED_LOCATION "${${NAME}_LIBRARY}" 36 | INTERFACE_COMPILE_OPTIONS "${PC_${NAME}_CFLAGS_OTHER}" 37 | INTERFACE_INCLUDE_DIRECTORIES "${${NAME}_INCLUDE_DIR}") 38 | find_library(rocm-core NAMES rocm-core REQUIRED) 39 | find_package(yaml-cpp REQUIRED) 40 | find_package(rocblas REQUIRED) 41 | find_package(hipblaslt REQUIRED) 42 | find_package(hsakmt REQUIRED) 43 | find_package(hip REQUIRED) 44 | find_package(hsa-runtime64 REQUIRED) 45 | find_package(amd_smi REQUIRED) 46 | target_link_libraries( 47 | ${NAME}::${NAME} 48 | INTERFACE ${rocm-core} 49 | yaml-cpp 50 | roc::rocblas 51 | roc::hipblaslt 52 | hsakmt::hsakmt 53 | hip::amdhip64 54 | hsa-runtime64::hsa-runtime64 55 | amd_smi) 56 | endif() 57 | -------------------------------------------------------------------------------- /cmake_modules/rdc-config-version.cmake.in: -------------------------------------------------------------------------------- 1 | set(PACKAGE_VERSION "@RDC_VERSION@") 2 | 3 | # Check whether the requested PACKAGE_FIND_VERSION is compatible 4 | if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}") 5 | set(PACKAGE_VERSION_COMPATIBLE FALSE) 6 | else() 7 | set(PACKAGE_VERSION_COMPATIBLE TRUE) 8 | if("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}") 9 | set(PACKAGE_VERSION_EXACT TRUE) 10 | endif() 11 | endif() 12 | -------------------------------------------------------------------------------- /cmake_modules/rdc-config.cmake.in: -------------------------------------------------------------------------------- 1 | # - Config file for the rdc package 2 | # It defines the following variables 3 | # RDC_INCLUDE_DIRS - include directories for rdc 4 | # RDC_LIBRARIES - libraries to link against 5 | 6 | @PACKAGE_INIT@ 7 | 8 | # Compute paths 9 | get_filename_component(RDC_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) 10 | set(ROCM_RDC_INCLUDE_DIR "${PACKAGE_PREFIX_DIR}/${CMAKE_INSTALL_INCLUDEDIR}") 11 | set(ROCM_RDC_LIB_DIR "${PACKAGE_PREFIX_DIR}/${CMAKE_INSTALL_LIBDIR}") 12 | 13 | # Our library dependencies (contains definitions for IMPORTED targets) 14 | if(NOT TARGET rdc_libs AND NOT rdc_BINARY_DIR) 15 | include("${RDC_CMAKE_DIR}/rdcTargets.cmake") 16 | endif() 17 | 18 | # These are IMPORTED targets created by rdcTargets.cmake 19 | set(ROCM_RDC_LIBRARIES "@CONF_LIBS@") 20 | -------------------------------------------------------------------------------- /cmake_modules/version_util.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Handle commandline args 4 | while [ "$1" != "" ]; do 5 | case $1 in 6 | -c ) # Commits since prevous tag 7 | TARGET="count" ;; 8 | * ) 9 | TARGET="count" 10 | break ;; 11 | esac 12 | shift 1 13 | done 14 | TAG_PREFIX=$1 15 | reg_ex="${TAG_PREFIX}*" 16 | 17 | commits_since_last_tag() { 18 | TAG_ARR=(`git tag --sort=committerdate -l ${reg_ex} | tail -2`) 19 | # if we don't have 2 tags, just say there were 0 commits since 20 | # last tag 21 | if [ ${#TAG_ARR[@]} != 2 ]; then 22 | echo 0 23 | exit 0 24 | fi 25 | 26 | PREVIOUS_TAG=${TAG_ARR[0]} 27 | CURRENT_TAG=${TAG_ARR[1]} 28 | 29 | PREV_CMT_NUM=`git rev-list --count $PREVIOUS_TAG` 30 | CURR_CMT_NUM=`git rev-list --count $CURRENT_TAG` 31 | 32 | # Commits since prevous tag: 33 | let NUM_COMMITS="${CURR_CMT_NUM}-${PREV_CMT_NUM}" 34 | echo $NUM_COMMITS 35 | } 36 | 37 | case $TARGET in 38 | count) commits_since_last_tag ;; 39 | *) die "Invalid target $target" ;; 40 | esac 41 | 42 | exit 0 43 | 44 | -------------------------------------------------------------------------------- /common/rdc_capabilities.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #ifndef COMMON_RDC_CAPABILITIES_H_ 24 | #define COMMON_RDC_CAPABILITIES_H_ 25 | 26 | #include 27 | 28 | namespace amd { 29 | namespace rdc { 30 | 31 | int GetCapability(cap_value_t cap, cap_flag_t cap_type, bool* enabled); 32 | int ModifyCapability(cap_value_t cap, cap_flag_t cap_type, bool enable); 33 | 34 | struct ScopedCapability { 35 | ScopedCapability(cap_value_t cp, cap_flag_t cpt) : cap_(cp), cap_type_(cpt), error_(0) { 36 | error_ = ModifyCapability(cap_, cap_type_, true); 37 | } 38 | ~ScopedCapability() { error_ = ModifyCapability(cap_, cap_type_, false); } 39 | void Relinquish(void) { error_ = ModifyCapability(cap_, cap_type_, false); } 40 | int error(void) { return error_; } 41 | 42 | private: 43 | cap_value_t cap_; 44 | cap_flag_t cap_type_; 45 | int error_; 46 | }; 47 | 48 | } // namespace rdc 49 | } // namespace amd 50 | 51 | #endif // COMMON_RDC_CAPABILITIES_H_ 52 | -------------------------------------------------------------------------------- /common/rdc_fields_supported.cc: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #include "common/rdc_fields_supported.h" 23 | 24 | #include 25 | 26 | #include 27 | 28 | #include "rdc/rdc.h" 29 | namespace amd { 30 | namespace rdc { 31 | 32 | #define FLD_DESC_ENT(ID, DESC, LABEL, DISPLAY) \ 33 | {static_cast(ID), {#ID, (DESC), (LABEL), (DISPLAY)}}, 34 | static const fld_id2name_map_t field_id_to_descript = { 35 | #include "common/rdc_field.data" 36 | }; 37 | #undef FLD_DESC_ENT 38 | 39 | #define FLD_DESC_ENT(ID, DESC, LABEL, DISPLAY) {#ID, (ID)}, 40 | static fld_name2id_map_t field_name_to_id = { 41 | #include "common/rdc_field.data" // NOLINT 42 | }; 43 | #undef FLD_DESC_ENT 44 | 45 | amd::rdc::fld_id2name_map_t& get_field_id_description_from_id(void) { return field_id_to_descript; } 46 | 47 | bool get_field_id_from_name(const std::string name, rdc_field_t* value) { 48 | assert(value != nullptr); 49 | auto id = field_name_to_id.find(name); 50 | if (id == field_name_to_id.end()) { 51 | return false; 52 | } 53 | 54 | *value = static_cast(id->second); 55 | return true; 56 | } 57 | 58 | bool is_field_valid(rdc_field_t field_id) { 59 | if (field_id == RDC_FI_INVALID) { 60 | return false; 61 | } 62 | return field_id_to_descript.find(static_cast(field_id)) != field_id_to_descript.end(); 63 | } 64 | 65 | } // namespace rdc 66 | } // namespace amd 67 | -------------------------------------------------------------------------------- /common/rdc_fields_supported.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef COMMON_RDC_FIELDS_SUPPORTED_H_ 23 | #define COMMON_RDC_FIELDS_SUPPORTED_H_ 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | #include "rdc/rdc.h" 30 | 31 | namespace amd { 32 | namespace rdc { 33 | 34 | typedef struct { 35 | std::string enum_name; 36 | std::string description; 37 | std::string label; 38 | bool do_display; 39 | } field_id_descript; 40 | 41 | typedef const std::map fld_id2name_map_t; 42 | typedef std::unordered_map fld_name2id_map_t; 43 | 44 | bool get_field_id_from_name(const std::string name, rdc_field_t* value); 45 | fld_id2name_map_t& get_field_id_description_from_id(void); // NOLINT 46 | bool is_field_valid(rdc_field_t field_id); 47 | 48 | } // namespace rdc 49 | } // namespace amd 50 | 51 | #endif // COMMON_RDC_FIELDS_SUPPORTED_H_ 52 | -------------------------------------------------------------------------------- /common/rdc_utils.h: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | */ 23 | 24 | #ifndef COMMON_RDC_UTILS_H_ 25 | #define COMMON_RDC_UTILS_H_ 26 | 27 | #include 28 | 29 | namespace amd { 30 | namespace rdc { 31 | 32 | #ifdef NDEBUG 33 | #define debug_print(fmt, ...) \ 34 | do { \ 35 | } while (false) 36 | #else 37 | #define debug_print(fmt, ...) \ 38 | do { \ 39 | fprintf(stderr, fmt, ##__VA_ARGS__); \ 40 | } while (false) 41 | #endif 42 | 43 | bool FileExists(char const* filename); 44 | 45 | int ReadFile(std::string path, std::string* retStr, bool chop_newline = false); 46 | int ReadFile(const char* path, std::string* retStr, bool chop_newline = false); 47 | 48 | bool IsNumber(const std::string& s); 49 | bool IsIP(const std::string& s); 50 | 51 | } // namespace rdc 52 | } // namespace amd 53 | 54 | #endif // COMMON_RDC_UTILS_H_ 55 | -------------------------------------------------------------------------------- /docs/Conceptual/components.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: documentation of the installation, configuration, and use of the ROCm Data Center tool 3 | :keywords: ROCm Data Center tool, RDC, ROCm, API, reference, data type, support 4 | 5 | .. _components: 6 | 7 | *************** 8 | RDC components 9 | *************** 10 | 11 | The components of the RDC tool are illustrated in the following figure. 12 | 13 | .. figure:: ../data/install_components.png 14 | 15 | High-level diagram of RDC components 16 | 17 | RDC (API) library 18 | ----------------- 19 | 20 | This library is the central piece, which interacts with different modules and provides all the features described. This shared library provides C API and Python bindings so that third-party tools should be able to use it directly if required. 21 | 22 | RDC daemon (``rdcd``) 23 | --------------------- 24 | 25 | The ``rdcd`` daemon records telemetry information from GPUs. It also provides an interface to RDC command-line tool (``rdci``) running locally or remotely. It relies on the above RDC Library for all the core features. 26 | 27 | RDC command-line tool (``rdci``) 28 | -------------------------------- 29 | 30 | A command-line tool to invoke all the features of the RDC tool. This CLI can be run locally or remotely. 31 | 32 | AMDSMI library 33 | -------------- 34 | 35 | A stateless system management library that provides low-level interfaces to access GPU information -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # for PDF output on Read the Docs 8 | project = "ROCm Data Center tool" 9 | author = "Advanced Micro Devices, Inc." 10 | copyright = "Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved." 11 | 12 | html_theme = "rocm_docs_theme" 13 | html_theme_options = {"flavor": "rocm"} 14 | html_title = f"RDC documentation" 15 | external_toc_path = "./sphinx/_toc.yml" 16 | 17 | external_projects_current_project = "rdc" 18 | extensions = ["rocm_docs", "rocm_docs.doxygen"] 19 | 20 | doxygen_root = "doxygen" 21 | doxysphinx_enabled = True 22 | doxygen_project = { 23 | "name": "ROCm Data Center Tool API reference", 24 | "path": "doxygen/xml", 25 | } 26 | -------------------------------------------------------------------------------- /docs/data/api_libs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/docs/data/api_libs.png -------------------------------------------------------------------------------- /docs/data/features.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/docs/data/features.png -------------------------------------------------------------------------------- /docs/data/features_jobs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/docs/data/features_jobs.png -------------------------------------------------------------------------------- /docs/data/handbook_openssl.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/docs/data/handbook_openssl.png -------------------------------------------------------------------------------- /docs/data/install_components.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/docs/data/install_components.png -------------------------------------------------------------------------------- /docs/data/integration_config1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/docs/data/integration_config1.png -------------------------------------------------------------------------------- /docs/data/integration_config2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/docs/data/integration_config2.png -------------------------------------------------------------------------------- /docs/data/integration_config3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/docs/data/integration_config3.png -------------------------------------------------------------------------------- /docs/data/integration_config4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/docs/data/integration_config4.png -------------------------------------------------------------------------------- /docs/data/integration_config5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/docs/data/integration_config5.png -------------------------------------------------------------------------------- /docs/data/integration_config6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/docs/data/integration_config6.png -------------------------------------------------------------------------------- /docs/data/integration_gpu_clock.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/docs/data/integration_gpu_clock.png -------------------------------------------------------------------------------- /docs/data/integration_login.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/docs/data/integration_login.png -------------------------------------------------------------------------------- /docs/doxygen/.gitignore: -------------------------------------------------------------------------------- 1 | html/ 2 | xml/ 3 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The ROCm Data Center tool (RDC) addresses key infrastructure challenges regarding AMD GPUs in cluster and data center environments and simplifies their administration 3 | :keywords: ROCm Data Center tool, RDC, Data Center 4 | 5 | .. _index: 6 | 7 | ************************************* 8 | ROCm Data Center tool documentation 9 | ************************************* 10 | 11 | The ROCm Data Center tool (RDC) addresses key infrastructure challenges regarding AMD GPUs in cluster and data center environments and simplifies their administration. 12 | Here are the main RDC features: 13 | 14 | * GPU telemetry 15 | * GPU statistics for jobs 16 | * Integration with third-party tools 17 | * Open source 18 | 19 | The code is open and hosted at ``_. 20 | 21 | .. grid:: 2 22 | :gutter: 3 23 | 24 | .. grid-item-card:: Install 25 | 26 | * :ref:`rdc-install` 27 | 28 | .. grid-item-card:: How to 29 | 30 | * :ref:`using-RDC` 31 | * :ref:`rdc-features` 32 | * :ref:`rdc-3rd-party` 33 | 34 | .. grid-item-card:: API reference 35 | 36 | * :ref:`api-intro` 37 | * :ref:`rdc-ref` 38 | 39 | .. grid-item-card:: Tutorial 40 | 41 | * :ref:`job-stats-sample` 42 | 43 | To contribute to the documentation, refer to 44 | `Contributing to ROCm `_. 45 | 46 | You can find licensing information on the 47 | `Licensing `_ page. 48 | -------------------------------------------------------------------------------- /docs/install/handbook.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: documentation of the installation, configuration, and use of the ROCm Data Center tool 3 | :keywords: ROCm Data Center tool, RDC, ROCm, API, reference, data type, support 4 | 5 | .. _rdc-handbook: 6 | 7 | *************************************************** 8 | Building and testing RDC 9 | *************************************************** 10 | 11 | RDC is open source and available under the MIT License. This section is helpful for open source developers. Third-party integrators may also find this information useful. 12 | 13 | 14 | Build and Install RDC 15 | ===================== 16 | 17 | To build and install, clone the RDC source code from GitHub and use CMake. 18 | 19 | .. code-block:: shell 20 | 21 | $ git clone 22 | $ cd rdc 23 | $ mkdir -p build; cd build 24 | $ cmake -DROCM_DIR=/opt/rocm -DGRPC_ROOT="$GRPC_PROTOC_ROOT".. 25 | $ make 26 | #Install library file and header and the default location is /opt/rocm 27 | $ make install 28 | 29 | 30 | Build Documentation 31 | ------------------- 32 | 33 | You can generate PDF documentation after a successful build. The reference manual, refman.pdf, appears in the latex directory. 34 | 35 | .. code-block:: shell 36 | 37 | $ make doc 38 | $ cd latex 39 | $ make 40 | 41 | 42 | Build Unit Tests for RDC Tool 43 | ----------------------------- 44 | 45 | .. code-block:: shell 46 | 47 | $ cd rdc/tests/rdc_tests 48 | $ mkdir -p build; cd build 49 | $ cmake -DROCM_DIR=/opt/rocm -DGRPC_ROOT="$GRPC_PROTOC_ROOT".. 50 | $ make 51 | 52 | # To run the tests 53 | 54 | $ cd build/rdctst_tests 55 | $ ./rdctst 56 | 57 | 58 | Test 59 | ---- 60 | 61 | .. code-block:: shell 62 | 63 | # Run rdcd daemon 64 | $ LD_LIBRARY_PATH=$PWD/rdc_libs/ ./server/rdcd -u 65 | 66 | # In another console run the RDC command-line 67 | $ LD_LIBRARY_PATH=$PWD/rdc_libs/ ./rdci/rdci discovery -l -u 68 | 69 | 70 | -------------------------------------------------------------------------------- /docs/license.md: -------------------------------------------------------------------------------- 1 | # License 2 | 3 | ```{include} ../LICENSE 4 | ``` 5 | -------------------------------------------------------------------------------- /docs/reference/api_intro.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The ROCm Data Center tool (RDC) addresses key infrastructure challenges regarding AMD GPUs in cluster and data center environments and simplifies their administration 3 | :keywords: ROCm Data Center tool API, RDC API 4 | 5 | .. _api-intro: 6 | 7 | ************************* 8 | Introduction to RDC API 9 | ************************* 10 | 11 | .. note:: 12 | This is the alpha version of RDC API and is subject to change without notice. The primary purpose of this API is to solicit feedback. AMD accepts no responsibility for any software breakage caused by API changes. 13 | 14 | RDC API 15 | ======== 16 | 17 | RDC API is the core library that provides all the RDC features. 18 | 19 | RDC API includes the following libraries: 20 | 21 | * ``librdc_bootstrap.so``: Loads one of the following two libraries during runtime, depending on the mode. 22 | 23 | - ``rdci`` mode: Loads ``librdc_client.so`` 24 | - ``rdcd`` mode: Loads ``librdc.so`` 25 | 26 | * ``librdc_client.so``: Exposes RDC functionality using ``gRPC`` client. 27 | 28 | * ``librdc.so``: RDC API. This depends on ``libamd_smi.so``. 29 | 30 | * ``libamd_smi.so``: Stateless low overhead access to GPU data. 31 | 32 | .. figure:: ../data/api_libs.png 33 | 34 | Different libraries and how they are linked. 35 | -------------------------------------------------------------------------------- /docs/reference/api_ref.rst: -------------------------------------------------------------------------------- 1 | .. meta:: 2 | :description: The ROCm Data Center tool (RDC) addresses key infrastructure challenges regarding AMD GPUs in cluster and data center environments and simplifies their administration 3 | :keywords: ROCm Data Center library, RDC library, RDC API, ROCm Data Center API 4 | 5 | .. _rdc-ref: 6 | 7 | **************** 8 | RDC API library 9 | **************** 10 | 11 | .. doxygenindex:: 12 | -------------------------------------------------------------------------------- /docs/sphinx/_toc.yml.in: -------------------------------------------------------------------------------- 1 | # Anywhere {branch} is used, the branch name will be substituted. 2 | # These comments will also be removed. 3 | defaults: 4 | numbered: False 5 | root: index 6 | subtrees: 7 | - caption: Install 8 | entries: 9 | - file: install/install 10 | title: Installing RDC 11 | 12 | - caption: How to 13 | entries: 14 | - file: how-to/using_RDC 15 | - file: how-to/using_RDC_features 16 | - file: how-to/integration 17 | 18 | - caption: API reference 19 | entries: 20 | - file: reference/api_intro 21 | - file: reference/api_ref 22 | 23 | - caption: Tutorial 24 | entries: 25 | - file: tutorial/job_stats_sample 26 | 27 | - caption: About 28 | entries: 29 | - file: license 30 | -------------------------------------------------------------------------------- /docs/sphinx/requirements.in: -------------------------------------------------------------------------------- 1 | rocm-docs-core[api-reference]==1.20.0 2 | -------------------------------------------------------------------------------- /example/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | 4 | ### How to compile examples? 5 | 6 | ***NOTE: You have to have RDC installed somewhere.*** 7 | 8 | If you have rocm (and RDC) installed under `/opt/rocm` - then you can simply do: 9 | 10 | ```bash 11 | # same as 'mkdir -p build; cd build; cmake ../; cd ../' 12 | cmake -B build 13 | # same as 'cd build; make; cd ../' 14 | make -C build 15 | ``` 16 | 17 | If you have rocm installed under a different directory, then you will have to 18 | add that path with one of the following ways: 19 | 20 | - `cmake -DROCM_DIR=/custom/rocm/path -B build` 21 | - `ROCM_PATH=/custom/rocm/path cmake -B build` 22 | 23 | followed by `make -C build` 24 | 25 | You can also set ROCM\_PATH environment variable. 26 | 27 | 28 | ### I can't find rdc! 29 | 30 | - Is RDC installed? 31 | - Is RDC installed under `/opt/rocm`? 32 | - Can you find `/opt/rocm/lib/cmake/rdc/rdcTargets.cmake`? 33 | 34 | 35 | ### Where is rdc? 36 | 37 | ```bash 38 | ldd build/diagnostic 39 | ``` 40 | 41 | Look for `librdc_bootstrap.so` 42 | 43 | 44 | ### `diagnostic` is halted, but other examples work 45 | 46 | Did you wait long enough? 47 | 48 | It takes a while to run. 46 seconds on my machine with 2 GPUs. 49 | 50 | 51 | ### `Couldn't find the platform configure..` 52 | 53 | ### `Couldn't find the config for the Device...` 54 | 55 | That's probably ok. The examples will still run. 56 | 57 | Try to `cd` into the config directory and call these examples from there. 58 | -------------------------------------------------------------------------------- /include/rdc/rdc_private.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef INCLUDE_RDC_RDC_PRIVATE_H_ 3 | #define INCLUDE_RDC_RDC_PRIVATE_H_ 4 | 5 | #include "rdc/rdc.h" 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif // __cplusplus 10 | 11 | #ifdef __cplusplus 12 | 13 | // cstddef include causes issues on older GCC 14 | // use stddef.h instead 15 | #if __GNUC__ < 9 16 | #include 17 | #else 18 | #include 19 | #endif // __GNUC__ 20 | 21 | #include 22 | #else 23 | #include 24 | #include 25 | #endif // __cplusplus 26 | 27 | /** 28 | * @brief The maximum string length occupied by version information. 29 | */ 30 | #define USR_MAX_VERSION_STR_LENGTH 60 31 | 32 | /** 33 | * @brief Version information of mixed components 34 | */ 35 | typedef struct { 36 | char version[USR_MAX_VERSION_STR_LENGTH]; 37 | } mixed_component_version_t; 38 | 39 | /** 40 | * @brief Type of Components 41 | */ 42 | typedef enum { 43 | RDCD_COMPONENT 44 | //If needed later, add them one by one 45 | } mixed_component_t; 46 | 47 | /** 48 | * @brief Get ersion information of mixed components. 49 | * 50 | * @details Given a component type, return its version information. 51 | * 52 | * @param[in] p_rdc_handle The RDC handler. 53 | * 54 | * @param[in] component Component type. 55 | * 56 | * @param[out] p_mixed_compv Version information of the corresponding component. 57 | * 58 | * @retval ::RDC_ST_OK is returned upon successful call. 59 | */ 60 | rdc_status_t get_mixed_component_version(rdc_handle_t p_rdc_handle, mixed_component_t component, mixed_component_version_t* p_mixed_compv); 61 | 62 | #ifdef __cplusplus 63 | } 64 | #endif // __cplusplus 65 | 66 | #endif // INCLUDE_RDC_RDC_PRIVATE_H_ -------------------------------------------------------------------------------- /include/rdc_lib/RdcConfigSettings.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2024 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef INCLUDE_RDC_LIB_RDCCONFIGSETTINGS_H_ 23 | #define INCLUDE_RDC_LIB_RDCCONFIGSETTINGS_H_ 24 | 25 | #include 26 | 27 | #include "rdc/rdc.h" 28 | 29 | namespace amd { 30 | namespace rdc { 31 | 32 | class RdcConfigSettings { 33 | public: 34 | // Set one configure 35 | virtual rdc_status_t rdc_config_set(rdc_gpu_group_t group_id, rdc_config_setting_t setting) = 0; 36 | 37 | // Get the setting 38 | virtual rdc_status_t rdc_config_get(rdc_gpu_group_t group_id, 39 | rdc_config_setting_list_t* settings) = 0; 40 | 41 | // Clear the setting 42 | virtual rdc_status_t rdc_config_clear(rdc_gpu_group_t group_id) = 0; 43 | 44 | virtual ~RdcConfigSettings() {} 45 | }; 46 | typedef std::shared_ptr RdcConfigSettingsPtr; 47 | } // namespace rdc 48 | } // namespace amd 49 | 50 | #endif // INCLUDE_RDC_LIB_RDCCONFIGSETTINGS_H_ 51 | -------------------------------------------------------------------------------- /include/rdc_lib/RdcDiagnosticLibInterface.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef INCLUDE_RDC_LIB_RDCDIAGNOSTICLIBINTERFACE_H_ 23 | #define INCLUDE_RDC_LIB_RDCDIAGNOSTICLIBINTERFACE_H_ 24 | 25 | // The telemetry interface for libraries, for example, AMD-SMI. 26 | #include 27 | 28 | extern "C" { 29 | 30 | // The library will implement below function 31 | 32 | // Which test cases are supported in the library 33 | rdc_status_t rdc_diag_test_cases_query(rdc_diag_test_cases_t test_cases[MAX_TEST_CASES], 34 | uint32_t* test_case_count); 35 | 36 | // Run a specific test case 37 | 38 | rdc_status_t rdc_diag_test_case_run(rdc_diag_test_cases_t test_case, 39 | uint32_t gpu_index[RDC_MAX_NUM_DEVICES], uint32_t gpu_count, 40 | const char* config, size_t config_size, 41 | rdc_diag_test_result_t* result, rdc_diag_callback_t* callback); 42 | 43 | rdc_status_t rdc_diag_init(uint64_t flags); 44 | 45 | rdc_status_t rdc_diag_destroy(); 46 | } 47 | 48 | #endif // INCLUDE_RDC_LIB_RDCDIAGNOSTICLIBINTERFACE_H_ 49 | -------------------------------------------------------------------------------- /include/rdc_lib/RdcException.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #ifndef INCLUDE_RDC_LIB_RDCEXCEPTION_H_ 24 | #define INCLUDE_RDC_LIB_RDCEXCEPTION_H_ 25 | 26 | #include 27 | #include 28 | 29 | #include "rdc/rdc.h" 30 | 31 | namespace amd { 32 | namespace rdc { 33 | 34 | class RdcException : public std::exception { 35 | public: 36 | RdcException(rdc_status_t error, const std::string description) 37 | : err_(error), desc_(description) {} 38 | rdc_status_t error_code() const noexcept { return err_; } 39 | const char* what() const noexcept override { return desc_.c_str(); } 40 | 41 | private: 42 | rdc_status_t err_; 43 | std::string desc_; 44 | }; 45 | 46 | } // namespace rdc 47 | } // namespace amd 48 | 49 | #endif // INCLUDE_RDC_LIB_RDCEXCEPTION_H_ 50 | -------------------------------------------------------------------------------- /include/rdc_lib/RdcMetricFetcher.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef INCLUDE_RDC_LIB_RDCMETRICFETCHER_H_ 23 | #define INCLUDE_RDC_LIB_RDCMETRICFETCHER_H_ 24 | 25 | #include 26 | #include 27 | 28 | #include "rdc/rdc.h" 29 | #include "rdc_lib/RdcTelemetryLibInterface.h" 30 | #include "rdc_lib/rdc_common.h" 31 | 32 | namespace amd { 33 | namespace rdc { 34 | 35 | class RdcMetricFetcher { 36 | public: 37 | virtual rdc_status_t acquire_smi_handle(RdcFieldKey fk) = 0; 38 | virtual rdc_status_t delete_smi_handle(RdcFieldKey fk) = 0; 39 | 40 | virtual rdc_status_t fetch_smi_field(uint32_t gpu_index, rdc_field_t field_id, 41 | rdc_field_value* value) = 0; 42 | 43 | virtual rdc_status_t bulk_fetch_smi_fields( 44 | rdc_gpu_field_t* fields, uint32_t fields_count, 45 | std::vector& results) = 0; // NOLINT 46 | virtual ~RdcMetricFetcher() {} 47 | }; 48 | 49 | typedef std::shared_ptr RdcMetricFetcherPtr; 50 | 51 | } // namespace rdc 52 | } // namespace amd 53 | 54 | #endif // INCLUDE_RDC_LIB_RDCMETRICFETCHER_H_ 55 | -------------------------------------------------------------------------------- /include/rdc_lib/RdcMetricsUpdater.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef INCLUDE_RDC_LIB_RDCMETRICSUPDATER_H_ 23 | #define INCLUDE_RDC_LIB_RDCMETRICSUPDATER_H_ 24 | 25 | #include 26 | 27 | namespace amd { 28 | namespace rdc { 29 | 30 | class RdcMetricsUpdater { 31 | public: 32 | virtual void start() = 0; 33 | virtual void stop() = 0; 34 | }; 35 | 36 | typedef std::shared_ptr RdcMetricsUpdaterPtr; 37 | 38 | } // namespace rdc 39 | } // namespace amd 40 | 41 | #endif // INCLUDE_RDC_LIB_RDCMETRICSUPDATER_H_ 42 | -------------------------------------------------------------------------------- /include/rdc_lib/RdcModuleMgr.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef INCLUDE_RDC_LIB_RDCMODULEMGR_H_ 23 | #define INCLUDE_RDC_LIB_RDCMODULEMGR_H_ 24 | 25 | #include 26 | 27 | #include "rdc_lib/RdcDiagnostic.h" 28 | #include "rdc_lib/RdcTelemetry.h" 29 | 30 | namespace amd { 31 | namespace rdc { 32 | 33 | class RdcModuleMgr { 34 | public: 35 | virtual ~RdcModuleMgr() = default; 36 | virtual RdcTelemetryPtr get_telemetry_module() = 0; 37 | virtual RdcDiagnosticPtr get_diagnostic_module() = 0; 38 | }; 39 | 40 | typedef std::shared_ptr RdcModuleMgrPtr; 41 | 42 | } // namespace rdc 43 | } // namespace amd 44 | 45 | #endif // INCLUDE_RDC_LIB_RDCMODULEMGR_H_ 46 | -------------------------------------------------------------------------------- /include/rdc_lib/RdcNotification.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef INCLUDE_RDC_LIB_RDCNOTIFICATION_H_ 23 | #define INCLUDE_RDC_LIB_RDCNOTIFICATION_H_ 24 | 25 | #include 26 | #include 27 | 28 | #include "rdc/rdc.h" 29 | #include "rdc_lib/rdc_common.h" 30 | 31 | namespace amd { 32 | namespace rdc { 33 | 34 | extern const uint32_t kMaxRSMIEvents; 35 | 36 | typedef struct { 37 | uint32_t gpu_id; 38 | rdc_field_value field; 39 | } rdc_evnt_notification_t; 40 | 41 | class RdcNotification { 42 | public: 43 | virtual bool is_notification_event(rdc_field_t field) const = 0; 44 | 45 | virtual rdc_status_t set_listen_events(const std::vector fk_arr) = 0; 46 | 47 | // Blocking 48 | virtual rdc_status_t listen(rdc_evnt_notification_t* events, uint32_t* num_events, 49 | uint32_t timeout_ms) = 0; 50 | 51 | virtual rdc_status_t stop_listening(uint32_t gpu_id) = 0; 52 | virtual ~RdcNotification() {} 53 | }; 54 | 55 | typedef std::shared_ptr RdcNotificationPtr; 56 | 57 | } // namespace rdc 58 | } // namespace amd 59 | 60 | #endif // INCLUDE_RDC_LIB_RDCNOTIFICATION_H_ 61 | -------------------------------------------------------------------------------- /include/rdc_lib/RdcPartition.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2025 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef INCLUDE_RDC_LIB_RDCPARTITION_H_ 23 | #define INCLUDE_RDC_LIB_RDCPARTITION_H_ 24 | 25 | #include 26 | 27 | #include "rdc/rdc.h" 28 | 29 | namespace amd { 30 | namespace rdc { 31 | 32 | class RdcPartition { 33 | public: 34 | virtual rdc_status_t rdc_instance_profile_get_impl(uint32_t entity_index, 35 | rdc_instance_resource_type_t resource_type, 36 | rdc_resource_profile_t* profile) = 0; 37 | 38 | virtual rdc_status_t rdc_get_num_partition_impl(uint32_t index, uint16_t* num_partition) = 0; 39 | 40 | virtual ~RdcPartition() {} 41 | }; 42 | typedef std::shared_ptr RdcPartitionPtr; 43 | 44 | } // namespace rdc 45 | } // namespace amd 46 | 47 | #endif // INCLUDE_RDC_LIB_RDCPARTITION_H_ 48 | -------------------------------------------------------------------------------- /include/rdc_lib/RdcPolicy.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2024 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef INCLUDE_RDC_LIB_RDCPOLICY_H_ 23 | #define INCLUDE_RDC_LIB_RDCPOLICY_H_ 24 | 25 | #include 26 | #include 27 | 28 | #include "rdc/rdc.h" 29 | #include "rdc_lib/rdc_common.h" 30 | 31 | namespace amd { 32 | namespace rdc { 33 | 34 | class RdcPolicy { 35 | public: 36 | virtual rdc_status_t rdc_policy_set(rdc_gpu_group_t group_id, rdc_policy_t policy) = 0; 37 | 38 | virtual rdc_status_t rdc_policy_get(rdc_gpu_group_t group_id, uint32_t* count, 39 | rdc_policy_t policies[RDC_MAX_POLICY_SETTINGS]) = 0; 40 | 41 | virtual rdc_status_t rdc_policy_delete(rdc_gpu_group_t group_id, 42 | rdc_policy_condition_type_t condition_type) = 0; 43 | 44 | virtual rdc_status_t rdc_policy_register(rdc_gpu_group_t group_id, 45 | rdc_policy_register_callback callback) = 0; 46 | 47 | virtual rdc_status_t rdc_policy_unregister(rdc_gpu_group_t group_id) = 0; 48 | 49 | virtual ~RdcPolicy() {} 50 | }; 51 | 52 | typedef std::shared_ptr RdcPolicyPtr; 53 | 54 | } // namespace rdc 55 | } // namespace amd 56 | 57 | #endif // INCLUDE_RDC_LIB_RDCPOLICY_H_ 58 | -------------------------------------------------------------------------------- /include/rdc_lib/RdcTelemetry.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef INCLUDE_RDC_LIB_RDCTELEMETRY_H_ 23 | #define INCLUDE_RDC_LIB_RDCTELEMETRY_H_ 24 | 25 | #include 26 | 27 | #include "rdc/rdc.h" 28 | #include "rdc_lib/RdcTelemetryLibInterface.h" 29 | 30 | namespace amd { 31 | namespace rdc { 32 | 33 | class RdcTelemetry { 34 | public: 35 | // get support field ids 36 | virtual rdc_status_t rdc_telemetry_fields_query(uint32_t field_ids[MAX_NUM_FIELDS], 37 | uint32_t* field_count) = 0; 38 | 39 | // Fetch 40 | virtual rdc_status_t rdc_telemetry_fields_value_get(rdc_gpu_field_t* fields, 41 | uint32_t fields_count, 42 | rdc_field_value_f callback, 43 | void* user_data) = 0; 44 | 45 | virtual rdc_status_t rdc_telemetry_fields_watch(rdc_gpu_field_t* fields, 46 | uint32_t fields_count) = 0; 47 | virtual rdc_status_t rdc_telemetry_fields_unwatch(rdc_gpu_field_t* fields, 48 | uint32_t fields_count) = 0; 49 | 50 | virtual ~RdcTelemetry() {} 51 | }; 52 | typedef std::shared_ptr RdcTelemetryPtr; 53 | 54 | } // namespace rdc 55 | } // namespace amd 56 | 57 | #endif // INCLUDE_RDC_LIB_RDCTELEMETRY_H_ 58 | -------------------------------------------------------------------------------- /include/rdc_lib/RdcTopologyLink.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2024 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef INCLUDE_RDC_LIB_RDCTOPOLOGYLINK_H_ 23 | #define INCLUDE_RDC_LIB_RDCTOPOLOGYLINK_H_ 24 | 25 | #include 26 | #include 27 | 28 | #include "rdc/rdc.h" 29 | #include "rdc_lib/rdc_common.h" 30 | 31 | namespace amd { 32 | namespace rdc { 33 | 34 | class RdcTopologyLink { 35 | public: 36 | virtual rdc_status_t rdc_device_topology_get(uint32_t gpu_index, 37 | rdc_device_topology_t* results) = 0; 38 | virtual rdc_status_t rdc_link_status_get(rdc_link_status_t* results) = 0; 39 | 40 | virtual ~RdcTopologyLink() {} 41 | }; 42 | 43 | typedef std::shared_ptr RdcTopologyLinkPtr; 44 | 45 | } // namespace rdc 46 | } // namespace amd 47 | 48 | #endif // INCLUDE_RDC_LIB_RDCTOPOLOGYLINK_H_ -------------------------------------------------------------------------------- /include/rdc_lib/impl/RdcMetricsUpdaterImpl.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef INCLUDE_RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_ 23 | #define INCLUDE_RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_ 24 | 25 | #include // NOLINT(build/c++11) 26 | #include 27 | 28 | #include "rdc_lib/RdcMetricsUpdater.h" 29 | #include "rdc_lib/RdcWatchTable.h" 30 | 31 | namespace amd { 32 | namespace rdc { 33 | 34 | class RdcMetricsUpdaterImpl final : public RdcMetricsUpdater { 35 | public: 36 | void start() override; 37 | void stop() override; 38 | explicit RdcMetricsUpdaterImpl(const RdcWatchTablePtr& watch_table, 39 | const uint32_t check_frequency); 40 | ~RdcMetricsUpdaterImpl() = default; 41 | 42 | private: 43 | RdcWatchTablePtr watch_table_; 44 | std::atomic started_; 45 | std::future updater_; // keep the future of updater 46 | std::future notif_updater_; // keep the future of notif updater 47 | const uint32_t _check_frequency; // Check frequency in milliseconds 48 | }; 49 | 50 | } // namespace rdc 51 | } // namespace amd 52 | 53 | #endif // INCLUDE_RDC_LIB_IMPL_RDCMETRICSUPDATERIMPL_H_ 54 | -------------------------------------------------------------------------------- /include/rdc_lib/impl/RdcModuleMgrImpl.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef INCLUDE_RDC_LIB_IMPL_RDCMODULEMGRIMPL_H_ 23 | #define INCLUDE_RDC_LIB_IMPL_RDCMODULEMGRIMPL_H_ 24 | 25 | #include 26 | 27 | #include "rdc_lib/RdcMetricFetcher.h" 28 | #include "rdc_lib/RdcModuleMgr.h" 29 | #include "rdc_lib/RdcTelemetry.h" 30 | 31 | namespace amd { 32 | namespace rdc { 33 | 34 | class RdcModuleMgrImpl : public RdcModuleMgr { 35 | public: 36 | RdcTelemetryPtr get_telemetry_module() override; 37 | RdcDiagnosticPtr get_diagnostic_module() override; 38 | explicit RdcModuleMgrImpl(const RdcMetricFetcherPtr& fetcher); 39 | 40 | private: 41 | // Modules 42 | std::list diagnostic_modules_; 43 | std::list telemetry_modules_; 44 | 45 | // base case 46 | template 47 | rdc_status_t insert_modules(); 48 | 49 | // recursive case 50 | template 51 | rdc_status_t insert_modules(); 52 | 53 | // pass shared_ptr instead of creating it 54 | template 55 | rdc_status_t insert_modules(std::shared_ptr ptr); 56 | 57 | // Function module 58 | RdcTelemetryPtr rdc_telemetry_module_; 59 | RdcDiagnosticPtr rdc_diagnostic_module_; 60 | 61 | // Domain module 62 | RdcMetricFetcherPtr fetcher_; 63 | }; 64 | 65 | } // namespace rdc 66 | } // namespace amd 67 | 68 | #endif // INCLUDE_RDC_LIB_IMPL_RDCMODULEMGRIMPL_H_ 69 | -------------------------------------------------------------------------------- /include/rdc_lib/impl/RdcNotificationImpl.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef INCLUDE_RDC_LIB_IMPL_RDCNOTIFICATIONIMPL_H_ 23 | #define INCLUDE_RDC_LIB_IMPL_RDCNOTIFICATIONIMPL_H_ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include "rdc/rdc.h" 31 | #include "rdc_lib/RdcNotification.h" 32 | #include "rdc_lib/rdc_common.h" 33 | 34 | namespace amd { 35 | namespace rdc { 36 | 37 | class RdcNotificationImpl : public RdcNotification { 38 | public: 39 | RdcNotificationImpl(); 40 | ~RdcNotificationImpl(); 41 | 42 | bool is_notification_event(rdc_field_t field) const override; 43 | rdc_status_t set_listen_events(const std::vector fk_arr) override; 44 | // Blocking 45 | rdc_status_t listen(rdc_evnt_notification_t* events, uint32_t* num_events, 46 | uint32_t timeout_ms) override; 47 | rdc_status_t stop_listening(uint32_t gpu_id) override; 48 | 49 | private: 50 | std::map gpu_evnt_notif_masks_; 51 | std::mutex notif_mutex_; 52 | }; 53 | 54 | } // namespace rdc 55 | } // namespace amd 56 | 57 | #endif // INCLUDE_RDC_LIB_IMPL_RDCNOTIFICATIONIMPL_H_ 58 | -------------------------------------------------------------------------------- /include/rdc_lib/impl/RdcPartitionImpl.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2025 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef INCLUDE_RDC_LIB_IMPL_RDCPARTITIONIMPL_H_ 23 | #define INCLUDE_RDC_LIB_IMPL_RDCPARTITIONIMPL_H_ 24 | 25 | #include 26 | 27 | #include "rdc/rdc.h" 28 | #include "rdc_lib/RdcPartition.h" 29 | 30 | namespace amd { 31 | namespace rdc { 32 | 33 | class RdcPartitionImpl : public RdcPartition { 34 | public: 35 | rdc_status_t rdc_instance_profile_get_impl(uint32_t entity_index, 36 | rdc_instance_resource_type_t resource_type, 37 | rdc_resource_profile_t* profile); 38 | rdc_status_t rdc_get_num_partition_impl(uint32_t index, uint16_t* num_partition); 39 | }; 40 | 41 | } // namespace rdc 42 | } // namespace amd 43 | 44 | #endif // INCLUDE_RDC_LIB_IMPL_RDCPARTITIONIMPL_H_ 45 | -------------------------------------------------------------------------------- /include/rdc_lib/impl/RdcTopologyLinkImpl.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2024 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef INCLUDE_RDC_LIB_IMPL_RDCTOPOLINKYIMPL_H_ 23 | #define INCLUDE_RDC_LIB_IMPL_RDCTOPOLINKYIMPL_H_ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include // NOLINT 30 | #include 31 | #include 32 | #include 33 | 34 | #include "amd_smi/amdsmi.h" 35 | #include "rdc_lib/RdcGroupSettings.h" 36 | #include "rdc_lib/RdcMetricFetcher.h" 37 | #include "rdc_lib/RdcTopologyLink.h" 38 | 39 | namespace amd { 40 | namespace rdc { 41 | 42 | class RdcTopologyLinkImpl : public RdcTopologyLink { 43 | public: 44 | RdcTopologyLinkImpl(const RdcGroupSettingsPtr& group_settings, 45 | RdcMetricFetcherPtr metric_fetcher); 46 | ~RdcTopologyLinkImpl(); 47 | 48 | rdc_status_t rdc_device_topology_get(uint32_t gpu_index, rdc_device_topology_t* results) override; 49 | rdc_status_t rdc_link_status_get(rdc_link_status_t* results) override; 50 | 51 | private: 52 | RdcGroupSettingsPtr group_settings_; 53 | RdcMetricFetcherPtr metric_fetcher_; 54 | 55 | }; 56 | 57 | } // namespace rdc 58 | } // namespace amd 59 | 60 | #endif // INCLUDE_RDC_LIB_IMPL_RDCTOPOLINKYIMPL_H_ -------------------------------------------------------------------------------- /include/rdc_lib/impl/SmiUtils.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #ifndef INCLUDE_RDC_LIB_IMPL_RSMIUTILS_H_ 24 | #define INCLUDE_RDC_LIB_IMPL_RSMIUTILS_H_ 25 | 26 | #include 27 | 28 | #include "amd_smi/amdsmi.h" 29 | #include "rdc/rdc.h" 30 | 31 | namespace amd { 32 | namespace rdc { 33 | 34 | rdc_status_t Smi2RdcError(amdsmi_status_t rsmi); 35 | amdsmi_status_t get_processor_handle_from_id(uint32_t gpu_id, 36 | amdsmi_processor_handle* processor_handle); 37 | amdsmi_status_t get_gpu_id_from_processor_handle(amdsmi_processor_handle processor_handle, 38 | uint32_t* gpu_index); 39 | amdsmi_status_t get_processor_count(uint32_t& all_processor_count); 40 | amdsmi_status_t get_socket_handles(std::vector& sockets); 41 | amdsmi_status_t get_processor_handles(amdsmi_socket_handle socket, 42 | std::vector& processors); 43 | amdsmi_status_t get_kfd_partition_id(amdsmi_processor_handle proc, uint32_t* partition_id); 44 | amdsmi_status_t get_metrics_info(amdsmi_processor_handle proc, amdsmi_gpu_metrics_t* metrics); 45 | amdsmi_status_t get_num_partition(uint32_t index, uint16_t* num_partition); 46 | 47 | } // namespace rdc 48 | } // namespace amd 49 | 50 | #endif // INCLUDE_RDC_LIB_IMPL_RSMIUTILS_H_ 51 | -------------------------------------------------------------------------------- /include/rdc_lib/rdc_common.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #ifndef INCLUDE_RDC_LIB_RDC_COMMON_H_ 24 | #define INCLUDE_RDC_LIB_RDC_COMMON_H_ 25 | #include 26 | #include 27 | #include 28 | 29 | #include "rdc/rdc.h" 30 | 31 | // 32 | typedef std::pair RdcFieldKey; 33 | 34 | // 35 | typedef std::pair RdcFieldGroupKey; 36 | 37 | //!< The gauge metrics do not require aggregations 38 | typedef std::map rdc_gpu_gauges_t; 39 | 40 | /** 41 | * @brief The strncpy but with null terminated 42 | * 43 | * @details It will copy at most n-1 bytes from src to dst, and 44 | * always adds a null terminator following the bytes copied to dst. 45 | * 46 | * @param[out] dest The destination string to copy 47 | * 48 | * @param[in] src The source string to be copied 49 | * 50 | * @param[in] n At most n-1 bytes will be copied 51 | * 52 | * @retval Return a pointer to the destination string. 53 | */ 54 | char* strncpy_with_null(char* dest, const char* src, size_t n); 55 | 56 | #endif // INCLUDE_RDC_LIB_RDC_COMMON_H_ 57 | -------------------------------------------------------------------------------- /include/rdc_modules/rdc_rocr/MemoryTest.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef RDC_MODULES_RDC_ROCR_MEMORYTEST_H_ 23 | #define RDC_MODULES_RDC_ROCR_MEMORYTEST_H_ 24 | 25 | #include "hsa/hsa.h" 26 | #include "rdc_modules/rdc_rocr/TestBase.h" 27 | 28 | namespace amd { 29 | namespace rdc { 30 | class MemoryTest : public TestBase { 31 | public: 32 | explicit MemoryTest(uint32_t gpu_index); 33 | 34 | // @Brief: Destructor for test case of MemoryTest 35 | virtual ~MemoryTest(); 36 | 37 | // @Brief: Setup the environment for measurement 38 | virtual hsa_status_t SetUp(); 39 | 40 | // @Brief: Core measurement execution 41 | virtual void Run(); 42 | 43 | // @Brief: Clean up and retrive the resource 44 | virtual void Close(); 45 | 46 | // @Brief: Display results 47 | virtual void DisplayResults() const; 48 | 49 | // @Brief: Display information about what this test does 50 | virtual void DisplayTestInfo(void); 51 | 52 | hsa_status_t MaxSingleAllocationTest(void); 53 | 54 | hsa_status_t TestAllocate(hsa_amd_memory_pool_t pool, size_t sz); 55 | 56 | private: 57 | hsa_status_t MaxSingleAllocationTest(hsa_agent_t ag, hsa_amd_memory_pool_t pool); 58 | }; 59 | 60 | } // namespace rdc 61 | } // namespace amd 62 | 63 | #endif // RDC_MODULES_RDC_ROCR_MEMORYTEST_H_ 64 | -------------------------------------------------------------------------------- /lychee.toml: -------------------------------------------------------------------------------- 1 | exclude = ['^file://.*', '.*localhost.*'] 2 | exclude_path = ["./build"] 3 | -------------------------------------------------------------------------------- /python_binding/README.md: -------------------------------------------------------------------------------- 1 | # Quick start 2 | If you do not have the RDC installed, please specify the RDC library path using: 3 | 4 | $ export LD_LIBRARY_PATH= 5 | 6 | Then you can run RdcReader in python_binding folder: 7 | 8 | $ python RdcReader.py 9 | 10 | # Prometheus plugin 11 | Install the prometheus_client: 12 | 13 | $ pip install prometheus_client 14 | 15 | Start the rdcd with auth and then run plugin to connect to it: 16 | 17 | $ python rdc_prometheus.py 18 | 19 | Check the options of the plugin: 20 | 21 | $ python rdc_prometheus.py --help 22 | 23 | Verify the plugin is running: 24 | 25 | $ curl localhost:5000 26 | 27 | In the managment computer, install the Prometheus from 28 | https://github.com/prometheus/prometheus 29 | 30 | Modify the file prometheus_targets.json to add the compute nodes running the plugin. 31 | Start the Prometheus 32 | 33 | $ prometheus --config.file= 34 | 35 | Browse to localhost:9090 in the management computer for metrics from RDC. 36 | 37 | -------------------------------------------------------------------------------- /python_binding/README_rdc_rest_api.txt: -------------------------------------------------------------------------------- 1 | # RDC REST API 2 | 3 | ## Overview 4 | This REST API provides functionalities to: 5 | - Discover available GPUs on a node. 6 | - Configure and manage GPU monitoring queries. 7 | - Retrieve GPU metrics based on configured queries. 8 | 9 | The API is built using Flask and interacts with the RDC library to monitor GPU usage and performance metrics. 10 | 11 | ## Installation 12 | ### Prerequisites 13 | - Python 3.x 14 | - Flask 15 | - RDC Library (`librdc_bootstrap.so` must be available and accessible) 16 | 17 | ### Install Dependencies 18 | ```sh 19 | pip install flask 20 | ``` 21 | 22 | ## Running the API 23 | 1. Ensure `librdc_bootstrap.so` is in the library path: 24 | ```sh 25 | export LD_LIBRARY_PATH=/path/to/librdc_bootstrap.so:$LD_LIBRARY_PATH 26 | ``` 27 | 2. Run the API: 28 | ```sh 29 | python rdc_rest_api.py 30 | ``` 31 | 32 | The API will start and listen on `http://0.0.0.0:50052`. 33 | 34 | ## API Endpoints 35 | 36 | ### 1. Discover GPUs 37 | **GET** `/rdc/discovery` 38 | #### Response: 39 | ```json 40 | { 41 | "0": "GPU Name", 42 | "1": "GPU Name" 43 | } 44 | ``` 45 | 46 | ### 2. Create Query Criteria 47 | **POST** `/rdc/query_criteria` 48 | #### Request Body: 49 | ```json 50 | { 51 | "gpu_index": [0,1], 52 | "metrics": ["RDC_FI_GPU_CLOCK", "RDC_FI_GPU_TEMP"] 53 | } 54 | ``` 55 | #### Response: 56 | ```json 57 | { 58 | "query_id": "G-1-F-2" 59 | } 60 | ``` 61 | 62 | ### 3. Get Query Criteria 63 | **GET** `/rdc/query_criteria/` 64 | #### Response: 65 | ```json 66 | { 67 | "gpu_index": [0,1], 68 | "metrics": ["RDC_FI_GPU_CLOCK", "RDC_FI_GPU_TEMP"], 69 | "query_id": "G-1-F-2" 70 | } 71 | ``` 72 | 73 | ### 4. Delete Query Criteria 74 | **DELETE** `/rdc/query_criteria/` 75 | #### Response: 76 | ```json 77 | { 78 | "message": "Deleted successfully" 79 | } 80 | ``` 81 | 82 | ### 5. Retrieve GPU Metrics 83 | **GET** `/rdc/gpu_metrics/` 84 | #### Response: 85 | ```json 86 | [ 87 | { 88 | "gpu_index": 0, 89 | "RDC_FI_GPU_CLOCK": 1450, 90 | "RDC_FI_GPU_TEMP": 32 91 | }, 92 | { 93 | "gpu_index": 1, 94 | "RDC_FI_GPU_CLOCK": 736, 95 | "RDC_FI_GPU_TEMP": 35 96 | } 97 | ] 98 | ``` 99 | 100 | ## Notes 101 | - Ensure `librdc_bootstrap.so` is properly linked. 102 | - The API should be run on a system with RDC installed and GPUs accessible. 103 | -------------------------------------------------------------------------------- /python_binding/prometheus_targets.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "targets": [ 4 | "localhost:5000" 5 | ] 6 | } 7 | ] 8 | -------------------------------------------------------------------------------- /python_binding/rdc_collectd.conf: -------------------------------------------------------------------------------- 1 | 2 | ModulePath "/opt/rocm/rdc/python_binding" 3 | LogTraces true 4 | Interactive false 5 | Import "rdc_collectd" 6 | 7 | # Run RDC in embedded mode (default: standalone mode) 8 | embedded false 9 | # The rdcd IP and port in standalone mode (default: localhost:50051) 10 | rdc_ip_port "localhost:50051" 11 | # Set this option if the rdcd is running with unauth in standalone mode (default: false) 12 | unauth false 13 | # The list of fields name needs to be watched (default: fields in the plugin), for example 14 | # field_ids "RDC_FI_GPU_TEMP" "RDC_FI_GPU_CLOCK" 15 | # The fields update frequency in seconds (default: 10) 16 | update_freq 10 17 | # The max keep age of the fields in seconds (default: 3600) 18 | max_keep_age 3600 19 | # The max samples to keep for each field in the cache (default: 1000) 20 | max_keep_samples 1000 21 | # The list of GPUs to be watched (default: All GPUs), for example 22 | # gpu_indexes 0 1 23 | 24 | 25 | -------------------------------------------------------------------------------- /python_binding/rdc_prometheus_example.yml: -------------------------------------------------------------------------------- 1 | # global config 2 | global: 3 | scrape_interval: 10s # Set the scrape interval to every 10 seconds. Default is every 1 minute. 4 | evaluation_interval: 10s # Evaluate rules every 10 seconds. The default is every 1 minute. 5 | # scrape_timeout is set to the global default (10s). 6 | 7 | # A scrape configuration where the endpoints to scrape will be defined at prometheus_targets.json: 8 | scrape_configs: 9 | # The job name is added as a label `job=` to any timeseries scraped from this config. 10 | - job_name: 'rdc' 11 | 12 | # metrics_path defaults to '/metrics' 13 | # scheme defaults to 'http'. 14 | 15 | # Remove the port for display 16 | relabel_configs: 17 | - source_labels: [__address__] 18 | regex: '([^:]+):\d+' 19 | target_label: short_instance 20 | 21 | file_sd_configs: 22 | - files: 23 | - 'prometheus_targets.json' 24 | -------------------------------------------------------------------------------- /rdc_libs/bootstrap/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&") 2 | message(" Cmake RDC Lib-Bootstrap ") 3 | message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&") 4 | 5 | set(SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src") 6 | set(INC_DIR "${RDC_LIB_INC_DIR}") 7 | # need source dir for rocr 8 | set(BOOTSTRAP_LIB_SRC_DIR 9 | "${SRC_DIR}" 10 | PARENT_SCOPE) 11 | 12 | set(BOOTSTRAP_LIB_COMPONENT "lib${BOOTSTRAP_LIB}") 13 | set(BOOTSTRAP_LIB_SRC_LIST 14 | "${COMMON_DIR}/rdc_fields_supported.cc" "${SRC_DIR}/RdcBootStrap.cc" 15 | "${SRC_DIR}/RdcLibraryLoader.cc" "${SRC_DIR}/RdcLogger.cc" "${SRC_DIR}/RdcEntityCodec.cc") 16 | set(BOOTSTRAP_LIB_INC_LIST 17 | "${COMMON_DIR}/rdc_fields_supported.h" 18 | "${INC_DIR}/RdcHandler.h" 19 | "${INC_DIR}/RdcLibraryLoader.h" 20 | "${INC_DIR}/RdcLogger.h" 21 | "${INC_DIR}/RdcEntityCodec.h" 22 | "${INC_DIR}/rdc_common.h" 23 | "${PROJECT_SOURCE_DIR}/include/rdc/rdc.h") 24 | message("BOOTSTRAP_LIB_INC_LIST=${BOOTSTRAP_LIB_INC_LIST}") 25 | 26 | add_library(${BOOTSTRAP_LIB} SHARED ${BOOTSTRAP_LIB_SRC_LIST} ${BOOTSTRAP_LIB_INC_LIST}) 27 | target_link_libraries(${BOOTSTRAP_LIB} pthread dl) 28 | target_include_directories( 29 | ${BOOTSTRAP_LIB} PRIVATE "${PROJECT_SOURCE_DIR}" "${PROJECT_SOURCE_DIR}/include" 30 | "${COMMON_DIR}" "${AMD_SMI_INCLUDE_DIR}" "${ROCM_DIR}/include") 31 | 32 | target_include_directories(${BOOTSTRAP_LIB} PUBLIC $ 33 | $) 34 | 35 | # Set the VERSION and SOVERSION values 36 | set_property(TARGET ${BOOTSTRAP_LIB} PROPERTY SOVERSION "${VERSION_MAJOR}") 37 | set_property(TARGET ${BOOTSTRAP_LIB} PROPERTY VERSION "${SO_VERSION_STRING}") 38 | set_target_properties(${BOOTSTRAP_LIB} PROPERTIES INSTALL_RPATH "\$ORIGIN:\$ORIGIN/rdc") 39 | 40 | # If the library is a release, strip the target library 41 | if("${CMAKE_BUILD_TYPE}" STREQUAL Release) 42 | add_custom_command( 43 | TARGET ${BOOTSTRAP_LIB} 44 | POST_BUILD 45 | COMMAND ${CMAKE_STRIP} ${BOOTSTRAP_LIB_COMPONENT}.so) 46 | endif() 47 | -------------------------------------------------------------------------------- /rdc_libs/bootstrap/src/RdcLibraryLoader.cc: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #include "rdc_lib/RdcLibraryLoader.h" 24 | 25 | #include "rdc_lib/RdcException.h" 26 | 27 | namespace amd { 28 | namespace rdc { 29 | 30 | RdcLibraryLoader::RdcLibraryLoader() : libHandler_(nullptr) {} 31 | 32 | rdc_status_t RdcLibraryLoader::load(const char* filename) { 33 | if (filename == nullptr) { 34 | return RDC_ST_FAIL_LOAD_MODULE; 35 | } 36 | if (libHandler_) { 37 | unload(); 38 | } 39 | 40 | std::lock_guard guard(library_mutex_); 41 | libHandler_ = dlopen(filename, RTLD_LAZY); 42 | if (!libHandler_) { 43 | char* error = dlerror(); 44 | throw RdcException( 45 | RDC_ST_FAIL_LOAD_MODULE, 46 | std::string("Fail to open ") + std::string(filename) + ": " + std::string(error)); 47 | return RDC_ST_FAIL_LOAD_MODULE; 48 | } 49 | 50 | return RDC_ST_OK; 51 | } 52 | 53 | rdc_status_t RdcLibraryLoader::unload() { 54 | std::lock_guard guard(library_mutex_); 55 | if (libHandler_) { 56 | dlclose(libHandler_); 57 | libHandler_ = nullptr; 58 | } 59 | return RDC_ST_OK; 60 | } 61 | 62 | RdcLibraryLoader::~RdcLibraryLoader() { unload(); } 63 | 64 | } // namespace rdc 65 | } // namespace amd 66 | -------------------------------------------------------------------------------- /rdc_libs/rdc_client/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&") 2 | message(" Cmake RDC Lib-Client ") 3 | message("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&") 4 | 5 | set(SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src") 6 | set(INC_DIR "${RDC_LIB_INC_DIR}") 7 | 8 | file(GLOB PROTOBUF_GENERATED_INCLUDES "${PROTOB_OUT_DIR}/*.h") 9 | file(GLOB PROTOBUF_GENERATED_SRCS "${PROTOB_OUT_DIR}/*.cc") 10 | 11 | set(RDCCLIENT_LIB_COMPONENT "lib${RDCCLIENT_LIB}") 12 | set(RDCCLIENT_LIB_SRC_LIST "${SRC_DIR}/RdcStandaloneHandler.cc" "${PROTOBUF_GENERATED_SRCS}") 13 | 14 | set(RDCCLIENT_LIB_INC_LIST "${PROJECT_SOURCE_DIR}/include/rdc/rdc.h" "${INC_DIR}/rdc_common.h" 15 | "${INC_DIR}/RdcHandler.h" "${INC_DIR}/impl/RdcStandaloneHandler.h") 16 | 17 | message("RDCCLIENT_LIB_INC_LIST=${RDCCLIENT_LIB_INC_LIST}") 18 | 19 | add_library(${RDCCLIENT_LIB} SHARED ${RDCCLIENT_LIB_SRC_LIST} ${RDCCLIENT_LIB_INC_LIST}) 20 | target_link_libraries(${RDCCLIENT_LIB} ${BOOTSTRAP_LIB} pthread rt gRPC::grpc++ dl) 21 | target_include_directories( 22 | ${RDCCLIENT_LIB} PRIVATE "${GRPC_ROOT}/include" "${PROJECT_SOURCE_DIR}" 23 | "${PROJECT_SOURCE_DIR}/include" "${PROTOB_OUT_DIR}") 24 | 25 | # Set the VERSION and SOVERSION values 26 | set_property(TARGET ${RDCCLIENT_LIB} PROPERTY SOVERSION "${VERSION_MAJOR}") 27 | set_property(TARGET ${RDCCLIENT_LIB} PROPERTY VERSION "${SO_VERSION_STRING}") 28 | 29 | if("${CMAKE_BUILD_TYPE}" STREQUAL Release) 30 | add_custom_command( 31 | TARGET ${RDCCLIENT_LIB} 32 | POST_BUILD 33 | COMMAND ${CMAKE_STRIP} ${RDCCLIENT_LIB_COMPONENT}.so) 34 | endif() 35 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/gpuReadWrite_kernels.cl: -------------------------------------------------------------------------------- 1 | /* 2 | * ============================================================================= 3 | * ROC Runtime Conformance Release License 4 | * ============================================================================= 5 | * The University of Illinois/NCSA 6 | * Open Source License (NCSA) 7 | * 8 | * Copyright (c) 2017, Advanced Micro Devices, Inc. 9 | * All rights reserved. 10 | * 11 | * Developed by: 12 | * 13 | * AMD Research and AMD ROC Software Development 14 | * 15 | * Advanced Micro Devices, Inc. 16 | * 17 | * www.amd.com 18 | * 19 | * Permission is hereby granted, free of charge, to any person obtaining a copy 20 | * of this software and associated documentation files (the "Software"), to 21 | * deal with the Software without restriction, including without limitation 22 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 23 | * and/or sell copies of the Software, and to permit persons to whom the 24 | * Software is furnished to do so, subject to the following conditions: 25 | * 26 | * - Redistributions of source code must retain the above copyright notice, 27 | * this list of conditions and the following disclaimers. 28 | * - Redistributions in binary form must reproduce the above copyright 29 | * notice, this list of conditions and the following disclaimers in 30 | * the documentation and/or other materials provided with the distribution. 31 | * - Neither the names of , 32 | * nor the names of its contributors may be used to endorse or promote 33 | * products derived from this Software without specific prior written 34 | * permission. 35 | * 36 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 39 | * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 40 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 41 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 42 | * DEALINGS WITH THE SOFTWARE. 43 | * 44 | */ 45 | 46 | __kernel void gpuReadWrite(__global const int * a, 47 | __global int * b, __global int * c) { 48 | int i = get_global_id(0); 49 | // Reading the system memory and writing to gpu memory 50 | c[i] = a[i]; // a[i] point to system memory while c[i] to gpu memory. 51 | //writing to system memory 52 | b[i] = i; 53 | } 54 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx1010/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx1010/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx1010/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx1010/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx1011/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx1011/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx1011/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx1011/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx1012/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx1012/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx1012/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx1012/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx1030/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx1030/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx1030/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx1030/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx1031/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx1031/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx1031/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx1031/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx1032/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx1032/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx1032/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx1032/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx1033/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx1033/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx1033/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx1033/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx700/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx700/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx700/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx700/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx701/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx701/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx701/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx701/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx702/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx702/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx702/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx702/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx801/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx801/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx801/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx801/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx802/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx802/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx802/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx802/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx803/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx803/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx803/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx803/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx805/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx805/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx805/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx805/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx810/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx810/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx810/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx810/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx900/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx900/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx900/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx900/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx902/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx902/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx902/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx902/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx904/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx904/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx904/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx904/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx906/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx906/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx906/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx906/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx908/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx908/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx908/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx908/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx90a/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx90a/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx90a/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx90a/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx940/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx940/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx940/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx940/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx941/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx941/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx941/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx941/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx942/binary_search_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx942/binary_search_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/kernels/hsaco/gfx942/gpuReadWrite_kernels.hsaco: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ROCm/rdc/85b619b2f02fd92155730677cf4899808bb800bc/rdc_libs/rdc_modules/kernels/hsaco/gfx942/gpuReadWrite_kernels.hsaco -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rocr/RdcRocrBase.cc: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #include "rdc_modules/rdc_rocr/RdcRocrBase.h" 24 | 25 | #include 26 | 27 | namespace amd { 28 | namespace rdc { 29 | 30 | RdcRocrBase::RdcRocrBase(void) { 31 | num_iteration_ = 1; 32 | cpu_device_.handle = -1; 33 | gpu_device1_.handle = -1; 34 | device_pool_.handle = 0; 35 | kern_arg_pool_.handle = 0; 36 | main_queue_ = nullptr; 37 | kernarg_buffer_ = nullptr; 38 | kernel_object_ = 0; 39 | memset(&aql_, 0, sizeof(aql_)); 40 | set_requires_profile(-1); 41 | set_enable_interrupt(false); 42 | set_kernel_file_name(""); 43 | set_verbosity(1); 44 | set_monitor_verbosity(0); 45 | set_title("unset_title"); 46 | orig_hsa_enable_interrupt_ = nullptr; 47 | } 48 | 49 | RdcRocrBase::~RdcRocrBase() {} 50 | 51 | } // namespace rdc 52 | } // namespace amd 53 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/MI300A/iet_stress.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | # IET stress test 27 | # 28 | # Preconditions: 29 | # Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g 30 | # option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify 31 | # all the GPUs IDs separated by comma. 32 | # Set parallel execution to true (gemm workload execution on all GPUs in parallel) 33 | # Set gemm operation type as dgemm. 34 | # Set matrix_size to 28000. 35 | # Test duration set to 10 mins. 36 | # Target power set to 550W for each GPU. 37 | # 38 | # Run test with: 39 | # cd bin 40 | # ./rvs -c conf/MI300A/iet_stress.conf 41 | # 42 | # Expected result: 43 | # The test on each GPU passes (TRUE) if the GPU achieves power target of 550W. 44 | # 45 | 46 | actions: 47 | - name: iet-stress-550W-dgemm-true 48 | device: all 49 | module: iet 50 | parallel: true 51 | duration: 60000 52 | ramp_interval: 10000 53 | sample_interval: 3000 54 | log_interval: 3000 55 | target_power: 550 56 | matrix_size: 28000 57 | ops_type: dgemm 58 | lda: 28000 59 | ldb: 28000 60 | ldc: 28000 61 | alpha: 1 62 | beta: 1 63 | 64 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/MI300A/iet_stress_long.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | # IET stress test 27 | # 28 | # Preconditions: 29 | # Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g 30 | # option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify 31 | # all the GPUs IDs separated by comma. 32 | # Set parallel execution to true (gemm workload execution on all GPUs in parallel) 33 | # Set gemm operation type as dgemm. 34 | # Set matrix_size to 28000. 35 | # Test duration set to 10 mins. 36 | # Target power set to 550W for each GPU. 37 | # 38 | # Run test with: 39 | # cd bin 40 | # ./rvs -c conf/MI300A/iet_stress.conf 41 | # 42 | # Expected result: 43 | # The test on each GPU passes (TRUE) if the GPU achieves power target of 550W. 44 | # 45 | 46 | actions: 47 | - name: iet-stress-550W-dgemm-true 48 | device: all 49 | module: iet 50 | parallel: true 51 | duration: 300000 # 5 min 52 | ramp_interval: 10000 53 | sample_interval: 3000 54 | log_interval: 3000 55 | target_power: 550 56 | matrix_size: 28000 57 | ops_type: dgemm 58 | lda: 28000 59 | ldb: 28000 60 | ldc: 28000 61 | alpha: 1 62 | beta: 1 63 | 64 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/gst_stress.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2018-2023 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | # GST test 27 | # 28 | # Preconditions: 29 | # Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g 30 | # option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify 31 | # all the GPUs IDs separated by white space 32 | # Set parallel execution to true (workload execution on all GPUs in parallel) 33 | # Set matrix_size to 28000. 34 | # Set run count to 1 (each test will run twice) 35 | # Set copy_matrix to false (the matrices will be copied to GPUs only once) 36 | # 37 | # Run test with: 38 | # cd bin 39 | # ./rvs -c conf/gst_stress.conf 40 | # 41 | # Expected result: 42 | # The test on each GPU passes (TRUE) if the GPU achieves 50000 gflops 43 | 44 | actions: 45 | - name: gpustress-50000-dgemm-true 46 | device: all 47 | module: gst 48 | parallel: true 49 | count: 1 50 | duration: 60000 51 | copy_matrix: false 52 | target_stress: 50000 53 | matrix_size_a: 28000 54 | matrix_size_b: 28000 55 | matrix_size_c: 28000 56 | ops_type: dgemm 57 | lda: 28000 58 | ldb: 28000 59 | ldc: 28000 60 | alpha: 1 61 | beta: 1 62 | matrix_init: hiprand 63 | 64 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/MI300X/iet_stress.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | # IET stress test 27 | # 28 | # Preconditions: 29 | # Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g 30 | # option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify 31 | # all the GPUs IDs separated by comma. 32 | # Set parallel execution to true (gemm workload execution on all GPUs in parallel) 33 | # Set gemm operation type as dgemm. 34 | # Set matrix_size to 28000. 35 | # Test duration set to 1 mins. 36 | # Target power set to 750W for each GPU. 37 | # 38 | # Run test with: 39 | # cd bin 40 | # ./rvs -c conf/MI300X/iet_stress.conf 41 | # 42 | # Expected result: 43 | # The test on each GPU passes (TRUE) if the GPU achieves power target of 750W. 44 | # 45 | 46 | actions: 47 | - name: iet-stress-750W-dgemm-true 48 | device: all 49 | module: iet 50 | parallel: true 51 | duration: 60000 # 1 min 52 | ramp_interval: 10000 53 | sample_interval: 5000 54 | log_interval: 5000 55 | target_power: 750 56 | matrix_size: 28000 57 | ops_type: dgemm 58 | lda: 28000 59 | ldb: 28000 60 | ldc: 28000 61 | alpha: 1 62 | beta: 1 63 | matrix_init: hiprand 64 | 65 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/iet_stress.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | # IET stress test 27 | # 28 | # Preconditions: 29 | # Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g 30 | # option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify 31 | # all the GPUs IDs separated by comma. 32 | # Set parallel execution to true (gemm workload execution on all GPUs in parallel) 33 | # Test duration set to 1 mins. 34 | # Target power set to 650W for each GPU. 35 | # Tolerance set to 5% of target power. 36 | # 37 | # Run test with: 38 | # cd bin 39 | # ./rvs -c conf/MI308X/iet_stress.conf 40 | # 41 | # Expected result: 42 | # The test on each GPU passes (TRUE) if the GPU achieves power target of 750W. 43 | # 44 | 45 | actions: 46 | - name: iet-stress-650W-true 47 | device: all 48 | module: iet 49 | parallel: true 50 | duration: 60000 # 1 min 51 | ramp_interval: 1000 52 | sample_interval: 5000 53 | log_interval: 5000 54 | target_power: 650 55 | tolerance: 0.05 56 | bw_workload: true 57 | cp_workload: false 58 | 59 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/MI308X/iet_stress_long.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | # IET stress test 27 | # 28 | # Preconditions: 29 | # Set device to all. If you need to run the rvs only on a subset of GPUs, please run rvs with -g 30 | # option, collect the GPUs IDs (e.g.: GPU[ 5 - 50599] -> 50599 is the GPU ID) and then specify 31 | # all the GPUs IDs separated by comma. 32 | # Set parallel execution to true (gemm workload execution on all GPUs in parallel) 33 | # Test duration set to 10 mins. 34 | # Target power set to 650W for each GPU. 35 | # Tolerance set to 5% of target power. 36 | # 37 | # Run test with: 38 | # cd bin 39 | # ./rvs -c conf/MI308X/iet_stress.conf 40 | # 41 | # Expected result: 42 | # The test on each GPU passes (TRUE) if the GPU achieves power target of 750W. 43 | # 44 | 45 | actions: 46 | - name: iet-stress-650W-true 47 | device: all 48 | module: iet 49 | parallel: true 50 | duration: 300000 # 5 min 51 | ramp_interval: 1000 52 | sample_interval: 5000 53 | log_interval: 5000 54 | target_power: 650 55 | tolerance: 0.05 56 | bw_workload: true 57 | cp_workload: false 58 | 59 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/default: -------------------------------------------------------------------------------- 1 | nv21 -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv21/gpup_single.conf: -------------------------------------------------------------------------------- 1 | ../nv31/gpup_single.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv21/gst_single.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | actions: 27 | - name: gpustress-9000-sgemm-false 28 | device: all 29 | module: gst 30 | parallel: true 31 | count: 1 32 | duration: 10000 # 10 sec 33 | copy_matrix: false 34 | target_stress: 6000 35 | matrix_size_a: 8640 36 | matrix_size_b: 8640 37 | matrix_size_c: 8640 38 | ops_type: sgemm 39 | lda: 8640 40 | ldb: 8640 41 | ldc: 8640 42 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv21/gst_single_long.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2018-2024 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | actions: 27 | - name: gpustress-9000-sgemm-false 28 | device: all 29 | module: gst 30 | parallel: true 31 | count: 1 32 | duration: 300000 # 5 min 33 | copy_matrix: false 34 | target_stress: 6000 35 | matrix_size_a: 8640 36 | matrix_size_b: 8640 37 | matrix_size_c: 8640 38 | ops_type: sgemm 39 | lda: 8640 40 | ldb: 8640 41 | ldc: 8640 42 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv21/gst_stress_3_hrs.conf: -------------------------------------------------------------------------------- 1 | ../nv31/gst_stress_3_hrs.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv21/iet_stress.conf: -------------------------------------------------------------------------------- 1 | ../nv31/iet_stress.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv21/iet_stress_long.conf: -------------------------------------------------------------------------------- 1 | ../nv31/iet_stress_long.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv21/mem.conf: -------------------------------------------------------------------------------- 1 | ../nv31/mem.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv21/pbqt_single.conf: -------------------------------------------------------------------------------- 1 | ../nv31/pbqt_single.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv21/pebb_single.conf: -------------------------------------------------------------------------------- 1 | ../nv31/pebb_single.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv21/pebb_single_long.conf: -------------------------------------------------------------------------------- 1 | ../nv31/pebb_single_long.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv21/peqt_single.conf: -------------------------------------------------------------------------------- 1 | ../nv31/peqt_single.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv21/pesm_1.conf: -------------------------------------------------------------------------------- 1 | ../nv31/pesm_1.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv21/rcqt_single.conf: -------------------------------------------------------------------------------- 1 | ../nv31/rcqt_single.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv31/gst_single.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2018-2023 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | actions: 27 | - name: gpustress-10000-sgemm-false 28 | device: all 29 | module: gst 30 | parallel: true 31 | count: 1 32 | duration: 10000 # 10 sec 33 | copy_matrix: false 34 | target_stress: 10000 35 | matrix_size_a: 8640 36 | matrix_size_b: 8640 37 | matrix_size_c: 8640 38 | ops_type: sgemm 39 | lda: 8640 40 | ldb: 8640 41 | ldc: 8640 42 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv31/gst_single_long.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2018-2023 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | actions: 27 | - name: gpustress-10000-sgemm-false 28 | device: all 29 | module: gst 30 | parallel: true 31 | count: 1 32 | duration: 300000 # 5 min 33 | copy_matrix: false 34 | target_stress: 10000 35 | matrix_size_a: 8640 36 | matrix_size_b: 8640 37 | matrix_size_c: 8640 38 | ops_type: sgemm 39 | lda: 8640 40 | ldb: 8640 41 | ldc: 8640 42 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv31/gst_stress_3_hrs.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2018-2023 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | actions: 27 | - name: gpustress-3hrs 28 | device: all 29 | module: gst 30 | parallel: true 31 | count: 1 32 | duration: 10800000 33 | ramp_interval: 300000 34 | log_interval: 6000 35 | target_stress: 5000 36 | max_violations: 1 37 | copy_matrix: false 38 | tolerance: 0.01 39 | matrix_size_a: 8640 40 | matrix_size_b: 8640 41 | matrix_size_c: 8640 42 | ops_type: sgemm 43 | 44 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv31/iet_stress.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | actions: 27 | - name: action_1 28 | device: all 29 | module: iet 30 | parallel: true 31 | count: 1 32 | wait: 100 33 | duration: 50000 34 | ramp_interval: 5000 35 | sample_interval: 700 36 | log_interval: 700 37 | max_violations: 1 38 | target_power: 127 39 | tolerance: 0.06 40 | matrix_size: 8640 41 | ops_type: dgemm 42 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv31/iet_stress_long.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | actions: 27 | - name: action_1 28 | device: all 29 | module: iet 30 | parallel: true 31 | count: 1 32 | wait: 100 33 | duration: 300000 # 5 min 34 | ramp_interval: 5000 35 | sample_interval: 700 36 | log_interval: 700 37 | max_violations: 1 38 | target_power: 127 39 | tolerance: 0.06 40 | matrix_size: 8640 41 | ops_type: dgemm 42 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv31/pebb_single.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | # PEBB test #3 27 | # 28 | # testing conditions: 29 | # 1. all AMD compatible GPUs 30 | # 2. all types of devices 31 | # 3. bidirectional 32 | 33 | actions: 34 | - name: h2d-d2h-sequential-51MB 35 | device: all 36 | module: pebb 37 | log_interval: 800 38 | duration: 5000 39 | device_to_host: true 40 | host_to_device: true 41 | parallel: true 42 | block_size: 51200000 43 | link_type: 2 # PCIe 44 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv31/pebb_single_long.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | # PEBB test #3 27 | # 28 | # testing conditions: 29 | # 1. all AMD compatible GPUs 30 | # 2. all types of devices 31 | # 3. bidirectional 32 | 33 | actions: 34 | - name: h2d-d2h-sequential-51MB 35 | device: all 36 | module: pebb 37 | log_interval: 800 38 | duration: 300000 # 5 min 39 | device_to_host: true 40 | host_to_device: true 41 | parallel: true 42 | block_size: 51200000 43 | link_type: 2 # PCIe 44 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv31/pesm_1.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | # PESM test #1 27 | # 28 | # Preconditions: 29 | # Set device id to an existing AMD deviceid values 30 | # 31 | # Run test with: 32 | # cd bin 33 | # sudo ./rvs -c conf/pesm2.conf 34 | # 35 | # Expected result: 36 | # Test passes without displaying data for any GPUs 37 | actions: 38 | - name: act1 39 | device: all 40 | deviceid: 26720 41 | module: pesm 42 | monitor: true 43 | - name: act2 44 | device: all 45 | debugwait: 3000 46 | module: pesm 47 | monitor: false 48 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv31/rcqt_single.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2018-2022 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | actions: 27 | 28 | - name: action_1 29 | device: all 30 | module: rcqt 31 | package: rocm-hip-sdk 32 | 33 | - name: action_2 34 | device: all 35 | module: rcqt 36 | packagelist: rocm-hip-libraries rocm-core rocm-dev rocm-hip-runtime-devel rocm-language-runtime rocm-hip-runtime rocm-hip-sdk rocm-utils rocm-smi-lib rocalution rocm-debug-agent rocm-clang-ocl rocm-device-libs hsa-rocr-devel hipcub-devel rocm-ocl-icd rocsolver rocsparse rocsolver-devel rocminfo hipfft-devel rocm-gdb rocm-dbgapi rocfft hipblas-devel rocthrust-devel openmp-extras comgr rccl rocblas hipblas roctracer-dev hip-doc amdgpu-install rocrand hsa-rocr hipfft hipsparse-devel rocsparse-devel rocrand-devel rocm-opencl hip-devel rocprim-devel hipsolver-devel rocfft-devel hsa-amd-aqlprofile hipify-clang miopen-hip-devel rocm-llvm hip-runtime-amd hip-samples rocalution-devel rccl-devel hipsolver rocprofiler-dev miopen-hip rocm-cmake hipsparse rocblas-devel rocm-opencl-devel 37 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv32/gpup_single.conf: -------------------------------------------------------------------------------- 1 | ../nv31/gpup_single.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv32/gst_single.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2018-2023 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | actions: 27 | - name: gpustress-9000-sgemm-false 28 | device: all 29 | module: gst 30 | parallel: true 31 | count: 1 32 | duration: 10000 33 | copy_matrix: false 34 | target_stress: 6000 35 | matrix_size_a: 8640 36 | matrix_size_b: 8640 37 | matrix_size_c: 8640 38 | ops_type: sgemm 39 | lda: 8640 40 | ldb: 8640 41 | ldc: 8640 42 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv32/gst_single_long.conf: -------------------------------------------------------------------------------- 1 | # ################################################################################ 2 | # # 3 | # # Copyright (c) 2018-2023 Advanced Micro Devices, Inc. All rights reserved. 4 | # # 5 | # # MIT LICENSE: 6 | # # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # # this software and associated documentation files (the "Software"), to deal in 8 | # # the Software without restriction, including without limitation the rights to 9 | # # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 10 | # # of the Software, and to permit persons to whom the Software is furnished to do 11 | # # so, subject to the following conditions: 12 | # # 13 | # # The above copyright notice and this permission notice shall be included in all 14 | # # copies or substantial portions of the Software. 15 | # # 16 | # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # # SOFTWARE. 23 | # # 24 | # ############################################################################### 25 | 26 | actions: 27 | - name: gpustress-9000-sgemm-false 28 | device: all 29 | module: gst 30 | parallel: true 31 | count: 1 32 | duration: 300000 # 5 min 33 | copy_matrix: false 34 | target_stress: 6000 35 | matrix_size_a: 8640 36 | matrix_size_b: 8640 37 | matrix_size_c: 8640 38 | ops_type: sgemm 39 | lda: 8640 40 | ldb: 8640 41 | ldc: 8640 42 | -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv32/gst_stress_3_hrs.conf: -------------------------------------------------------------------------------- 1 | ../nv31/gst_stress_3_hrs.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv32/iet_stress.conf: -------------------------------------------------------------------------------- 1 | ../nv31/iet_stress.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv32/iet_stress_long.conf: -------------------------------------------------------------------------------- 1 | ../nv31/iet_stress_long.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv32/mem.conf: -------------------------------------------------------------------------------- 1 | ../nv31/mem.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv32/pbqt_single.conf: -------------------------------------------------------------------------------- 1 | ../nv31/pbqt_single.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv32/pebb_single.conf: -------------------------------------------------------------------------------- 1 | ../nv31/pebb_single.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv32/pebb_single_long.conf: -------------------------------------------------------------------------------- 1 | ../nv31/pebb_single_long.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv32/peqt_single.conf: -------------------------------------------------------------------------------- 1 | ../nv31/peqt_single.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv32/pesm_1.conf: -------------------------------------------------------------------------------- 1 | ../nv31/pesm_1.conf -------------------------------------------------------------------------------- /rdc_libs/rdc_modules/rdc_rvs/conf/nv32/rcqt_single.conf: -------------------------------------------------------------------------------- 1 | ../nv31/rcqt_single.conf -------------------------------------------------------------------------------- /rdci/include/RdciConfigSubSystem.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2024 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef RDCI_INCLUDE_RDCICONFIGSUBSYSTEM_H_ 23 | #define RDCI_INCLUDE_RDCICONFIGSUBSYSTEM_H_ 24 | 25 | #include "RdciSubSystem.h" 26 | 27 | namespace amd { 28 | namespace rdc { 29 | 30 | class RdciConfigSubSystem : public RdciSubSystem { 31 | public: 32 | RdciConfigSubSystem(); 33 | ~RdciConfigSubSystem() override; 34 | void parse_cmd_opts(int argc, char** argv) override; 35 | void process() override; 36 | typedef enum { 37 | CONFIG_COMMAND_NONE = 0, 38 | CONFIG_COMMAND_SET, 39 | CONFIG_COMMAND_GET, 40 | CONFIG_COMMAND_CLEAR, 41 | CONFIG_COMMAND_HELP, 42 | } config_command_type_t; 43 | 44 | private: 45 | void show_help() const; 46 | void display_config_settings(rdc_config_setting_list_t& rdc_configs_list); 47 | config_command_type_t config_cmd_; 48 | static constexpr rdc_field_grp_t JOB_FIELD_ID = 1; 49 | uint32_t group_id_; 50 | uint32_t power_limit_; 51 | uint64_t gfx_max_clock_; 52 | uint64_t memory_max_clock_; 53 | rdc_field_grp_t fgid_; 54 | }; 55 | 56 | } // namespace rdc 57 | } // namespace amd 58 | 59 | #endif // RDCI_INCLUDE_RDCICONFIGSUBSYSTEM_H_ 60 | -------------------------------------------------------------------------------- /rdci/include/RdciDiagSubSystem.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2021 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef RDCI_INCLUDE_RDCIDIAGSUBSYSTEM_H_ 23 | #define RDCI_INCLUDE_RDCIDIAGSUBSYSTEM_H_ 24 | #include 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | #include "RdciSubSystem.h" 31 | 32 | namespace amd { 33 | namespace rdc { 34 | 35 | class RdciDiagSubSystem : public RdciSubSystem { 36 | public: 37 | RdciDiagSubSystem(); 38 | ~RdciDiagSubSystem(); 39 | void parse_cmd_opts(int argc, char** argv) override; 40 | void process() override; 41 | 42 | private: 43 | void show_help() const; 44 | 45 | std::string get_test_name(rdc_diag_test_cases_t test_case) const; 46 | 47 | enum OPERATIONS { 48 | DIAG_UNKNOWN = 0, 49 | DIAG_HELP, 50 | DIAG_RUN, 51 | } diag_ops_; 52 | 53 | rdc_gpu_group_t group_id_; 54 | rdc_diag_level_t run_level_; 55 | }; 56 | 57 | } // namespace rdc 58 | } // namespace amd 59 | 60 | #endif // RDCI_INCLUDE_RDCIDIAGSUBSYSTEM_H_ 61 | -------------------------------------------------------------------------------- /rdci/include/RdciDiscoverySubSystem.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef RDCI_INCLUDE_RDCIDISCOVERYSUBSYSTEM_H_ 23 | #define RDCI_INCLUDE_RDCIDISCOVERYSUBSYSTEM_H_ 24 | 25 | #include "RdciSubSystem.h" 26 | 27 | namespace amd { 28 | namespace rdc { 29 | 30 | class RdciDiscoverySubSystem : public RdciSubSystem { 31 | public: 32 | RdciDiscoverySubSystem(); 33 | void parse_cmd_opts(int argc, char** argv) override; 34 | void process() override; 35 | 36 | private: 37 | bool show_help_; 38 | void show_help() const; 39 | bool is_list_; 40 | bool is_partition_; 41 | void show_attributes(); 42 | void show_attributes_with_partitions(); 43 | bool show_version_; 44 | void show_version(); 45 | }; 46 | 47 | } // namespace rdc 48 | } // namespace amd 49 | 50 | #endif // RDCI_INCLUDE_RDCIDISCOVERYSUBSYSTEM_H_ 51 | -------------------------------------------------------------------------------- /rdci/include/RdciFieldGroupSubSystem.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef RDCI_INCLUDE_RDCIFIELDGROUPSUBSYSTEM_H_ 23 | #define RDCI_INCLUDE_RDCIFIELDGROUPSUBSYSTEM_H_ 24 | 25 | #include 26 | 27 | #include "RdciSubSystem.h" 28 | 29 | namespace amd { 30 | namespace rdc { 31 | 32 | class RdciFieldGroupSubSystem : public RdciSubSystem { 33 | public: 34 | RdciFieldGroupSubSystem(); 35 | void parse_cmd_opts(int argc, char** argv) override; 36 | void process() override; 37 | 38 | private: 39 | void show_help() const; 40 | 41 | enum OPERATIONS { 42 | FIELD_GROUP_UNKNOWN = 0, 43 | FIELD_GROUP_HELP, 44 | FIELD_GROUP_CREATE, 45 | FIELD_GROUP_DELETE, 46 | FIELD_GROUP_LIST, 47 | FIELD_GROUP_INFO 48 | } field_group_ops_; 49 | 50 | bool is_group_set_; 51 | uint32_t group_id_; 52 | std::string group_name_; 53 | std::string field_ids_; 54 | }; 55 | 56 | } // namespace rdc 57 | } // namespace amd 58 | 59 | #endif // RDCI_INCLUDE_RDCIFIELDGROUPSUBSYSTEM_H_ 60 | -------------------------------------------------------------------------------- /rdci/include/RdciGroupSubSystem.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef RDCI_INCLUDE_RDCIGROUPSUBSYSTEM_H_ 23 | #define RDCI_INCLUDE_RDCIGROUPSUBSYSTEM_H_ 24 | 25 | #include 26 | #include 27 | 28 | #include "RdciSubSystem.h" 29 | 30 | namespace amd { 31 | namespace rdc { 32 | 33 | class RdciGroupSubSystem : public RdciSubSystem { 34 | public: 35 | RdciGroupSubSystem(); 36 | void parse_cmd_opts(int argc, char** argv) override; 37 | void process() override; 38 | 39 | private: 40 | void show_help() const; 41 | 42 | enum OPERATIONS { 43 | GROUP_UNKNOWN = 0, 44 | GROUP_HELP, 45 | GROUP_CREATE, 46 | GROUP_DELETE, 47 | GROUP_LIST, 48 | GROUP_ADD_GPUS, 49 | GROUP_INFO 50 | } group_ops_; 51 | 52 | bool is_group_set_; 53 | uint32_t group_id_; 54 | std::string group_name_; 55 | std::string gpu_ids_; 56 | }; 57 | 58 | } // namespace rdc 59 | } // namespace amd 60 | 61 | #endif // RDCI_INCLUDE_RDCIGROUPSUBSYSTEM_H_ 62 | -------------------------------------------------------------------------------- /rdci/include/RdciPolicySubSystem.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2024 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef RDCI_INCLUDE_RDCIPOLICYSYSTEM_H_ 23 | #define RDCI_INCLUDE_RDCIPOLICYSYSTEM_H_ 24 | 25 | #include 26 | 27 | #include "RdciSubSystem.h" 28 | 29 | namespace amd { 30 | namespace rdc { 31 | 32 | class RdciPolicySubSystem : public RdciSubSystem { 33 | public: 34 | RdciPolicySubSystem(); 35 | void parse_cmd_opts(int argc, char** argv) override; 36 | void process() override; 37 | 38 | private: 39 | void show_help() const; 40 | 41 | enum OPERATIONS { 42 | POLICY_UNKNOWN = 0, 43 | POLICY_HELP, 44 | POLICY_SET, 45 | POLICY_GET, 46 | POLICY_REGISTER, 47 | POLICY_CLEAR 48 | } policy_ops_; 49 | 50 | enum OPTIONS { 51 | POLICY_OPT_MAX_PAGE = 0, 52 | POLICY_OPT_TEMP, 53 | POLICY_OPT_POWER, 54 | POLICY_OPT_ACTION, 55 | }; 56 | 57 | std::map options_; 58 | uint32_t group_id_; 59 | 60 | bool is_group_id_set; 61 | bool show_help_; 62 | static volatile sig_atomic_t keep_running_; 63 | static void set_terminating(int sig); 64 | }; 65 | 66 | } // namespace rdc 67 | } // namespace amd 68 | 69 | #endif // RDCI_INCLUDE_RDCIPOLICYSYSTEM_H_ 70 | -------------------------------------------------------------------------------- /rdci/include/RdciStatsSubSystem.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef RDCI_INCLUDE_RDCISTATSSUBSYSTEM_H_ 23 | #define RDCI_INCLUDE_RDCISTATSSUBSYSTEM_H_ 24 | #include 25 | 26 | #include 27 | 28 | #include "RdciSubSystem.h" 29 | 30 | namespace amd { 31 | namespace rdc { 32 | 33 | class RdciStatsSubSystem : public RdciSubSystem { 34 | public: 35 | RdciStatsSubSystem(); 36 | ~RdciStatsSubSystem(); 37 | void parse_cmd_opts(int argc, char** argv) override; 38 | void process() override; 39 | 40 | private: 41 | void show_help() const; 42 | void show_job_stats(const rdc_gpu_usage_info_t& gpu_info) const; 43 | void show_job_stats_json(const rdc_gpu_usage_info_t& gpu_info) const; 44 | 45 | enum OPERATIONS { 46 | STATS_UNKNOWN = 0, 47 | STATS_HELP, 48 | STATS_START_RECORDING, 49 | STATS_STOP_RECORDING, 50 | STATS_DISPLAY, 51 | STATS_REMOVE, 52 | STATS_REMOVE_ALL 53 | } stats_ops_; 54 | 55 | std::string job_id_; 56 | uint32_t group_id_; 57 | bool is_verbose_ = false; 58 | }; 59 | 60 | } // namespace rdc 61 | } // namespace amd 62 | 63 | #endif // RDCI_INCLUDE_RDCISTATSSUBSYSTEM_H_ 64 | -------------------------------------------------------------------------------- /rdci/include/RdciSubSystem.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef RDCI_INCLUDE_RDCISUBSYSTEM_H_ 23 | #define RDCI_INCLUDE_RDCISUBSYSTEM_H_ 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | #include "rdc/rdc.h" 30 | #include "rdc_lib/rdc_common.h" 31 | 32 | namespace amd { 33 | namespace rdc { 34 | 35 | class RdciSubSystem { 36 | public: 37 | RdciSubSystem(); 38 | virtual void parse_cmd_opts(int argc, char** argv) = 0; 39 | virtual void connect(); 40 | 41 | virtual void process() = 0; 42 | virtual ~RdciSubSystem(); 43 | 44 | bool is_json_output() const; 45 | 46 | protected: 47 | void set_json_output(bool is_json); 48 | std::vector split_string(const std::string& s, char delimiter) const; 49 | void show_common_usage() const; 50 | rdc_handle_t rdc_handle_; 51 | std::string ip_port_; 52 | 53 | bool use_auth_; 54 | std::string config_test_; 55 | std::string root_ca_; 56 | std::string client_cert_; 57 | std::string client_key_; 58 | 59 | private: 60 | bool is_json_output_; 61 | }; 62 | 63 | typedef std::shared_ptr RdciSubSystemPtr; 64 | 65 | } // namespace rdc 66 | } // namespace amd 67 | 68 | #endif // RDCI_INCLUDE_RDCISUBSYSTEM_H_ 69 | -------------------------------------------------------------------------------- /rdci/include/RdciTopologyLinkSubSystem.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2024 - present Advanced Micro Devices, Inc. All rights reserved. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | */ 19 | #ifndef RDCI_INCLUDE_RDCITOPOLOGYLINKSYSTEM_H_ 20 | #define RDCI_INCLUDE_RDCITOPOLOGYLINKSYSTEM_H_ 21 | #include 22 | 23 | #include 24 | 25 | #include "RdciSubSystem.h" 26 | namespace amd { 27 | namespace rdc { 28 | class RdciTopologyLinkSubSystem : public RdciSubSystem { 29 | public: 30 | RdciTopologyLinkSubSystem(); 31 | void parse_cmd_opts(int argc, char** argv) override; 32 | void process() override; 33 | 34 | private: 35 | void show_help() const; 36 | enum OPERATIONS { 37 | TOPOLOGY_UNKNOWN = 0, 38 | TOPOLOGY_INDEX, 39 | } topology_ops_; 40 | uint32_t group_index_; 41 | bool is_group_index_set; 42 | }; 43 | } // namespace rdc 44 | } // namespace amd 45 | #endif // RDCI_INCLUDE_RDCITOPOLOGYLINKSYSTEM_H_ -------------------------------------------------------------------------------- /rdci/include/RdciXgmiLinkStatusSubSystem.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2024 - present Advanced Micro Devices, Inc. All rights reserved. 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in 10 | all copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 13 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 14 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 15 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 16 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 17 | THE SOFTWARE. 18 | */ 19 | #ifndef RDCI_INCLUDE_RDCIXMGILINKSTATUSSYSTEM_H_ 20 | #define RDCI_INCLUDE_RDCIXMGILINKSTATUSSYSTEM_H_ 21 | #include 22 | 23 | #include 24 | 25 | #include "RdciSubSystem.h" 26 | namespace amd { 27 | namespace rdc { 28 | class RdciXgmiLinkStatusSubSystem : public RdciSubSystem { 29 | public: 30 | RdciXgmiLinkStatusSubSystem(); 31 | void parse_cmd_opts(int argc, char** argv) override; 32 | void process() override; 33 | 34 | private: 35 | void show_help() const; 36 | enum OPERATIONS { 37 | XMGI_LINK_UNKNOWN = 0, 38 | XMGI_LINK_STATUS, 39 | } link_status_ops_; 40 | }; 41 | } // namespace rdc 42 | } // namespace amd 43 | #endif // RDCI_INCLUDE_RDCIXMGILINKSTATUSSYSTEM_H_ -------------------------------------------------------------------------------- /server/include/rdc/rdc_admin_service.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef SERVER_INCLUDE_RDC_RDC_ADMIN_SERVICE_H_ 23 | #define SERVER_INCLUDE_RDC_RDC_ADMIN_SERVICE_H_ 24 | 25 | #include "amd_smi/amdsmi.h" 26 | #include "rdc.grpc.pb.h" // NOLINT 27 | #include "rdc/rdc_admin_service.h" 28 | 29 | namespace amd { 30 | namespace rdc { 31 | 32 | class RDCAdminServiceImpl final : public ::rdc::RdcAdmin::Service { 33 | public: 34 | RDCAdminServiceImpl(); 35 | ~RDCAdminServiceImpl(); 36 | ::grpc::Status VerifyConnection(::grpc::ServerContext* context, 37 | const ::rdc::VerifyConnectionRequest* request, 38 | ::rdc::VerifyConnectionResponse* reply) override; 39 | 40 | private: 41 | }; 42 | 43 | } // namespace rdc 44 | } // namespace amd 45 | 46 | #endif // SERVER_INCLUDE_RDC_RDC_ADMIN_SERVICE_H_ 47 | -------------------------------------------------------------------------------- /server/rdc.service.in: -------------------------------------------------------------------------------- 1 | 2 | # References: 3 | # https://linuxconfig.org/how-to-create-systemd-service-unit-in-linux 4 | # https://www.linux.com/tutorials/systemd-services-beyond-starting-and-stopping/ 5 | [Unit] 6 | Description=Radeon Data Center Daemon (rdcd) 7 | After=network.target 8 | 9 | # Add any services that must be started before rdcd here 10 | #After= 11 | 12 | # Add any non-service units required by rdcd here 13 | #Requires= 14 | 15 | [Service] 16 | EnvironmentFile=-/@CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_DATAROOTDIR@/@RDC@/conf/rdc_options.conf 17 | User=rdc 18 | Group=rdc 19 | 20 | Type=simple 21 | 22 | CapabilityBoundingSet=CAP_DAC_OVERRIDE 23 | AmbientCapabilities=CAP_DAC_OVERRIDE 24 | 25 | # If we need to start anything before rdcd, use this 26 | # ExecStartPre= 27 | # update-alternative has been run for rdcd and 28 | # soft link will be available in usr/bin 29 | ExecStart=rdcd $RDC_OPTS 30 | 31 | # If we need to start anything after rdcd use this 32 | # ExecStartPost= 33 | 34 | # If we want to change the default time out for the ExecStop (90 sec), 35 | # we can modify that time limit with TimeoutStopSec 36 | # TimeoutStopSec= 37 | 38 | # Note, we can have multiple ExecStop commands if necessary 39 | ExecStop=/bin/kill -15 $MAINPID 40 | #ExecReload= 41 | #ExecStartPost= 42 | #ExecStopPost= 43 | 44 | # StandardOutput=journal 45 | # StandardError=inherit 46 | 47 | [Install] 48 | WantedBy= multi-user.target 49 | -------------------------------------------------------------------------------- /server/rdc_options.conf: -------------------------------------------------------------------------------- 1 | # Append 'rdc' daemon parameters here 2 | RDC_OPTS="" 3 | #RDC_OPTS="-p 50051 -u -d" 4 | -------------------------------------------------------------------------------- /server/run_build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mkdir -p build 4 | cd build 5 | cmake -DROCM_DIR=/opt/rocm .. 6 | make 7 | cd .. 8 | 9 | -------------------------------------------------------------------------------- /server/src/rdc_admin_service.cc: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | */ 23 | #include "rdc/rdc_admin_service.h" 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include "rdc.grpc.pb.h" // NOLINT 36 | 37 | namespace amd { 38 | namespace rdc { 39 | 40 | RDCAdminServiceImpl::RDCAdminServiceImpl() {} 41 | 42 | RDCAdminServiceImpl::~RDCAdminServiceImpl() {} 43 | ::grpc::Status RDCAdminServiceImpl::VerifyConnection(::grpc::ServerContext* context, 44 | const ::rdc::VerifyConnectionRequest* request, 45 | ::rdc::VerifyConnectionResponse* reply) { 46 | (void)context; // Quiet warning for now 47 | 48 | reply->set_echo_magic_num(request->magic_num()); 49 | return ::grpc::Status::OK; 50 | } 51 | 52 | } // namespace rdc 53 | } // namespace amd 54 | -------------------------------------------------------------------------------- /src/DEBIAN_prerm.in: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | do_update_alternatives(){ 4 | # skip update if program doesn't exist 5 | command -v update-alternatives >/dev/null || return 0 6 | binaries=( 7 | rdcd 8 | rdci 9 | ) 10 | 11 | for i in "${binaries[@]}" 12 | do 13 | update-alternatives --remove $i @CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_BINDIR@/$i 14 | done 15 | } 16 | 17 | stop_rdc() { 18 | #stop RDC service if systemd exists and service is running 19 | if [ -d /run/systemd/system ] && $( systemctl is-active --quiet rdc ); then 20 | systemctl stop rdc 21 | fi 22 | return 0 23 | } 24 | 25 | rm_rdc_service() { 26 | local LINK=/lib/systemd/system/rdc.service 27 | if [ -L $LINK ]; then 28 | unlink $LINK 29 | fi 30 | return 0 31 | } 32 | 33 | reload_systemd() { 34 | if [ -d /run/systemd/system ]; then 35 | systemctl daemon-reload 36 | fi 37 | return 0 38 | } 39 | 40 | rm_pyc() { 41 | # remove pyc file generated by python 42 | rm -rf /@CPACK_PACKAGING_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBEXECDIR@/@RDC@/python_binding/__pycache__ 43 | } 44 | 45 | case "$1" in 46 | remove | upgrade ) 47 | stop_rdc 48 | rm_rdc_service 49 | reload_systemd 50 | rm_pyc 51 | do_update_alternatives 52 | ;; 53 | purge) 54 | ;; 55 | *) 56 | exit 0 57 | ;; 58 | esac 59 | -------------------------------------------------------------------------------- /src/RPM_postun.in: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | do_update_alternatives(){ 3 | # skip update if program doesn't exist 4 | command -v update-alternatives >/dev/null || return 0 5 | binaries=( 6 | rdcd 7 | rdci 8 | ) 9 | 10 | for i in "${binaries[@]}" 11 | do 12 | update-alternatives --remove "$i" $RPM_INSTALL_PREFIX0/@CMAKE_INSTALL_BINDIR@/$i 13 | done 14 | } 15 | 16 | stop_rdc() { 17 | #stop RDC service if systemd exists and service is running 18 | if [ -d /run/systemd/system ] && $( systemctl is-active --quiet rdc ); then 19 | systemctl stop rdc 20 | fi 21 | return 0 22 | } 23 | 24 | rm_rdc_service() { 25 | local LINK=@DISTRO_ROOT@/rdc.service 26 | if [ -L $LINK ]; then 27 | unlink $LINK 28 | fi 29 | return 0 30 | } 31 | 32 | reload_systemd() { 33 | if [ -d /run/systemd/system ]; then 34 | systemctl daemon-reload 35 | fi 36 | return 0 37 | } 38 | 39 | if [ "$1" -le 1 ]; then 40 | # perform the below actions for rpm remove($1=0) or upgrade($1=1) operations 41 | stop_rdc 42 | rm_rdc_service 43 | reload_systemd 44 | do_update_alternatives 45 | fi 46 | -------------------------------------------------------------------------------- /src/RPM_preun.in: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $1 -le 1 ]; then 4 | # perform the below actions for rpm remove($1=0) or upgrade($1=1) operations 5 | # remove pyc file generated by python 6 | rm -rf /$RPM_INSTALL_PREFIX0/@CMAKE_INSTALL_LIBEXECDIR@/@RDC@/python_binding/__pycache__ 7 | fi 8 | -------------------------------------------------------------------------------- /src/header_template.hpp.in: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef @include_guard@ 23 | #define @include_guard@ 24 | 25 | #ifndef ROCM_HEADER_WRAPPER_WERROR 26 | #define ROCM_HEADER_WRAPPER_WERROR @deprecated_error@ 27 | #endif 28 | #if ROCM_HEADER_WRAPPER_WERROR /* ROCM_HEADER_WRAPPER_WERROR 1 */ 29 | #error "This file is deprecated. Use file from include path /opt/rocm-ver/include/ and include as rdc/@header_name@" 30 | #else /* ROCM_HEADER_WRAPPER_WERROR */ 31 | #if defined(__GNUC__) 32 | #warning "This file is deprecated. Use file from include path /opt/rocm-ver/include/ and include as rdc/@header_name@" 33 | #else 34 | #pragma message("This file is deprecated. Use file from include path /opt/rocm-ver/include/ and include as rdc/@header_name@") 35 | #endif 36 | #endif /* ROCM_HEADER_WRAPPER_WERROR */ 37 | 38 | @include_statements@ 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /src/rdc64Config.in: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2019 - present Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | 24 | #ifndef INCLUDE_RDC_RDC64CONFIG_H_ 25 | #define INCLUDE_RDC_RDC64CONFIG_H_ 26 | 27 | // This file is generated on build. 28 | 29 | #define amd_smi_VERSION_MAJOR @amd_smi_VERSION_MAJOR@ 30 | #define amd_smi_VERSION_MINOR @amd_smi_VERSION_MINOR@ 31 | #define amd_smi_VERSION_PATCH @amd_smi_VERSION_PATCH@ 32 | #define amd_smi_VERSION_BUILD "@amd_smi_VERSION_BUILD@" 33 | 34 | #endif // INCLUDE_RDC_RDC64CONFIG_H_ 35 | -------------------------------------------------------------------------------- /tests/rdc_tests/functional/rdci_discovery.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef TESTS_RDC_TESTS_FUNCTIONAL_RDCI_DISCOVERY_H_ 23 | #define TESTS_RDC_TESTS_FUNCTIONAL_RDCI_DISCOVERY_H_ 24 | 25 | #include "rdc_tests/test_base.h" 26 | 27 | class TestRdciDiscovery : public TestBase { 28 | public: 29 | TestRdciDiscovery(); 30 | 31 | // @Brief: Destructor for test case of TestRdciDiscovery 32 | virtual ~TestRdciDiscovery(); 33 | 34 | // @Brief: Setup the environment for measurement 35 | virtual void SetUp(); 36 | 37 | // @Brief: Core measurement execution 38 | virtual void Run(); 39 | 40 | // @Brief: Clean up and retrive the resource 41 | virtual void Close(); 42 | 43 | // @Brief: Display results 44 | virtual void DisplayResults() const; 45 | 46 | // @Brief: Display information about what this test does 47 | virtual void DisplayTestInfo(void); 48 | }; 49 | 50 | #endif // TESTS_RDC_TESTS_FUNCTIONAL_RDCI_DISCOVERY_H_ 51 | -------------------------------------------------------------------------------- /tests/rdc_tests/functional/rdci_dmon.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef TESTS_RDC_TESTS_FUNCTIONAL_RDCI_DMON_H_ 23 | #define TESTS_RDC_TESTS_FUNCTIONAL_RDCI_DMON_H_ 24 | 25 | #include "rdc_tests/test_base.h" 26 | 27 | class TestRdciDmon : public TestBase { 28 | public: 29 | TestRdciDmon(); 30 | 31 | // @Brief: Destructor for test case of TestRdciDmon 32 | virtual ~TestRdciDmon(); 33 | 34 | // @Brief: Setup the environment for measurement 35 | virtual void SetUp(); 36 | 37 | // @Brief: Core measurement execution 38 | virtual void Run(); 39 | 40 | // @Brief: Clean up and retrive the resource 41 | virtual void Close(); 42 | 43 | // @Brief: Display results 44 | virtual void DisplayResults() const; 45 | 46 | // @Brief: Display information about what this test does 47 | virtual void DisplayTestInfo(void); 48 | }; 49 | 50 | #endif // TESTS_RDC_TESTS_FUNCTIONAL_RDCI_DMON_H_ 51 | -------------------------------------------------------------------------------- /tests/rdc_tests/functional/rdci_fieldgroup.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef TESTS_RDC_TESTS_FUNCTIONAL_RDCI_FIELDGROUP_H_ 23 | #define TESTS_RDC_TESTS_FUNCTIONAL_RDCI_FIELDGROUP_H_ 24 | 25 | #include "rdc_tests/test_base.h" 26 | 27 | class TestRdciFieldgroup : public TestBase { 28 | public: 29 | TestRdciFieldgroup(); 30 | 31 | // @Brief: Destructor for test case of TestRdciFieldgroup 32 | virtual ~TestRdciFieldgroup(); 33 | 34 | // @Brief: Setup the environment for measurement 35 | virtual void SetUp(); 36 | 37 | // @Brief: Core measurement execution 38 | virtual void Run(); 39 | 40 | // @Brief: Clean up and retrive the resource 41 | virtual void Close(); 42 | 43 | // @Brief: Display results 44 | virtual void DisplayResults() const; 45 | 46 | // @Brief: Display information about what this test does 47 | virtual void DisplayTestInfo(void); 48 | }; 49 | 50 | #endif // TESTS_RDC_TESTS_FUNCTIONAL_RDCI_FIELDGROUP_H_ 51 | -------------------------------------------------------------------------------- /tests/rdc_tests/functional/rdci_group.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef TESTS_RDC_TESTS_FUNCTIONAL_RDCI_GROUP_H_ 23 | #define TESTS_RDC_TESTS_FUNCTIONAL_RDCI_GROUP_H_ 24 | 25 | #include "rdc_tests/test_base.h" 26 | 27 | class TestRdciGroup : public TestBase { 28 | public: 29 | TestRdciGroup(); 30 | 31 | // @Brief: Destructor for test case of TestRdciGroup 32 | virtual ~TestRdciGroup(); 33 | 34 | // @Brief: Setup the environment for measurement 35 | virtual void SetUp(); 36 | 37 | // @Brief: Core measurement execution 38 | virtual void Run(); 39 | 40 | // @Brief: Clean up and retrive the resource 41 | virtual void Close(); 42 | 43 | // @Brief: Display results 44 | virtual void DisplayResults() const; 45 | 46 | // @Brief: Display information about what this test does 47 | virtual void DisplayTestInfo(void); 48 | }; 49 | 50 | #endif // TESTS_RDC_TESTS_FUNCTIONAL_RDCI_GROUP_H_ 51 | -------------------------------------------------------------------------------- /tests/rdc_tests/functional/rdci_stats.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2020 - Advanced Micro Devices, Inc. All rights reserved. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | #ifndef TESTS_RDC_TESTS_FUNCTIONAL_RDCI_STATS_H_ 23 | #define TESTS_RDC_TESTS_FUNCTIONAL_RDCI_STATS_H_ 24 | 25 | #include "rdc_tests/test_base.h" 26 | 27 | class TestRdciStats : public TestBase { 28 | public: 29 | TestRdciStats(); 30 | 31 | // @Brief: Destructor for test case of TestRdciStats 32 | virtual ~TestRdciStats(); 33 | 34 | // @Brief: Setup the environment for measurement 35 | virtual void SetUp(); 36 | 37 | // @Brief: Core measurement execution 38 | virtual void Run(); 39 | 40 | // @Brief: Clean up and retrive the resource 41 | virtual void Close(); 42 | 43 | // @Brief: Display results 44 | virtual void DisplayResults() const; 45 | 46 | // @Brief: Display information about what this test does 47 | virtual void DisplayTestInfo(void); 48 | }; 49 | 50 | #endif // TESTS_RDC_TESTS_FUNCTIONAL_RDCI_STATS_H_ 51 | -------------------------------------------------------------------------------- /tests/rdc_tests/rdctst.exclude: -------------------------------------------------------------------------------- 1 | declare -A FILTER 2 | 3 | # FILTER is meant to be used with a negative gtest filter 4 | 5 | # Permanent exclusions 6 | # These tests are included for debugging, but are not executed in normal 7 | # execution on any ASIC: 8 | PERMANENT_BLACKLIST_ALL_ASICS= 9 | 10 | # This is the temporary blacklist for all ASICs. This is to be used when a test 11 | # is failing consistently 12 | TEMPORARY_BLACKLIST_ALL_ASICS= 13 | 14 | if [ -z $PERMANENT_BLACKLIST_ALL_ASICS -a -z $TEMPORARY_BLACKLIST_ALL_ASICS ]; then 15 | BLACKLIST_ALL_ASICS= 16 | else 17 | BLACKLIST_ALL_ASICS=\ 18 | "$PERMANENT_BLACKLIST_ALL_ASICS:"\ 19 | "$TEMPORARY_BLACKLIST_ALL_ASICS" 20 | fi 21 | 22 | # ASIC specific blacklists 23 | FILTER[vega10]=\ 24 | $BLACKLIST_ALL_ASICS 25 | 26 | # Leave the remaining commands in this RDC version of a blacklist 27 | # as an example, until there are actual tests to blacklist. 28 | return 0 29 | 30 | # SWDEV-207510 31 | FILTER[vega20]=\ 32 | $BLACKLIST_ALL_ASICS\ 33 | "rsmitstReadOnly.TestFrequenciesRead:"\ 34 | "rsmitstReadOnly.TestProcInfoRead" 35 | 36 | # SWDEV-207510 37 | FILTER[arcturus]=\ 38 | $BLACKLIST_ALL_ASICS\ 39 | "rsmitstReadOnly.TestFrequenciesRead:"\ 40 | "rsmitstReadWrite.TestFrequenciesReadWrite:"\ 41 | "rsmitstReadOnly.TestProcInfoRead" 42 | 43 | -------------------------------------------------------------------------------- /tests/rdc_tests/test_utils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ============================================================================= 3 | * ROC Runtime Conformance Release License 4 | * ============================================================================= 5 | * The University of Illinois/NCSA 6 | * Open Source License (NCSA) 7 | * 8 | * Copyright (c) 2019, Advanced Micro Devices, Inc. 9 | * All rights reserved. 10 | * 11 | * Developed by: 12 | * 13 | * AMD Research and AMD ROC Software Development 14 | * 15 | * Advanced Micro Devices, Inc. 16 | * 17 | * www.amd.com 18 | * 19 | * Permission is hereby granted, free of charge, to any person obtaining a copy 20 | * of this software and associated documentation files (the "Software"), to 21 | * deal with the Software without restriction, including without limitation 22 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 23 | * and/or sell copies of the Software, and to permit persons to whom the 24 | * Software is furnished to do so, subject to the following conditions: 25 | * 26 | * - Redistributions of source code must retain the above copyright notice, 27 | * this list of conditions and the following disclaimers. 28 | * - Redistributions in binary form must reproduce the above copyright 29 | * notice, this list of conditions and the following disclaimers in 30 | * the documentation and/or other materials provided with the distribution. 31 | * - Neither the names of , 32 | * nor the names of its contributors may be used to endorse or promote 33 | * products derived from this Software without specific prior written 34 | * permission. 35 | * 36 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 37 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 38 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 39 | * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 40 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 41 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 42 | * DEALINGS WITH THE SOFTWARE. 43 | * 44 | */ 45 | 46 | #ifndef TESTS_RDC_TESTS_TEST_UTILS_H_ 47 | #define TESTS_RDC_TESTS_TEST_UTILS_H_ 48 | 49 | #include "amd_smi/amdsmi.h" 50 | 51 | const char* NameFromFWEnum(amdsmi_fw_block_t blk); 52 | 53 | #endif // TESTS_RDC_TESTS_TEST_UTILS_H_ 54 | -------------------------------------------------------------------------------- /tools/cmake_format.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | set -u 5 | set -o pipefail 6 | 7 | FILES=$(find . -type f \( -name "CMakeLists.txt" -o -name "*.cmake" -o -name "*.cmake.in" \) \ 8 | -not -path "*/\.*" \ 9 | -not -path "*/build/*") 10 | 11 | failed_files=() 12 | 13 | # Check if files are formatted correctly 14 | for file in $FILES; do 15 | echo "Checking $file..." 16 | if ! cmake-format --check "$file"; then 17 | failed_files+=("$file") 18 | echo "::error file=$file::File needs formatting" 19 | fi 20 | done 21 | 22 | if [ ${#failed_files[@]} -ne 0 ]; then 23 | cmake-format -i "${failed_files[@]}" 24 | fi 25 | --------------------------------------------------------------------------------