├── .clang-format ├── .clang-tidy ├── .figures └── clock_frequency.png ├── .github └── workflows │ └── documentation.yml ├── .gitignore ├── CITATION.cff ├── CMakeLists.txt ├── LICENSE ├── README.md ├── bindings ├── CMakeLists.txt ├── __init__.py ├── cpu_hardware_sampler.cpp ├── event.cpp ├── gpu_amd_hardware_sampler.cpp ├── gpu_intel_hardware_sampler.cpp ├── gpu_nvidia_hardware_sampler.cpp ├── hardware_sampler.cpp ├── main.cpp ├── relative_event.cpp ├── relative_event.hpp ├── sample_category.cpp ├── system_hardware_sampler.cpp └── version.cpp ├── cmake ├── Findlevel_zero.cmake └── hwsConfig.cmake.in ├── docs └── CMakeLists.txt ├── examples ├── cpp │ ├── CMakeLists.txt │ └── main.cpp └── python │ └── main.py ├── include └── hws │ ├── core.hpp │ ├── cpu │ ├── cpu_samples.hpp │ ├── hardware_sampler.hpp │ └── utility.hpp │ ├── event.hpp │ ├── gpu_amd │ ├── hardware_sampler.hpp │ ├── rocm_smi_samples.hpp │ └── utility.hpp │ ├── gpu_intel │ ├── hardware_sampler.hpp │ ├── level_zero_device_handle.hpp │ ├── level_zero_device_handle_impl.hpp │ ├── level_zero_samples.hpp │ └── utility.hpp │ ├── gpu_nvidia │ ├── hardware_sampler.hpp │ ├── nvml_device_handle.hpp │ ├── nvml_device_handle_impl.hpp │ ├── nvml_samples.hpp │ └── utility.hpp │ ├── hardware_sampler.hpp │ ├── sample_category.hpp │ ├── system_hardware_sampler.hpp │ ├── utility.hpp │ └── version.hpp.in ├── pyproject.toml └── src └── hws ├── cpu ├── CMakeLists.txt ├── cpu_samples.cpp ├── hardware_sampler.cpp └── utility.cpp ├── event.cpp ├── gpu_amd ├── CMakeLists.txt ├── hardware_sampler.cpp ├── rocm_smi_samples.cpp └── utility.cpp ├── gpu_intel ├── CMakeLists.txt ├── hardware_sampler.cpp ├── level_zero_samples.cpp └── utility.cpp ├── gpu_nvidia ├── CMakeLists.txt ├── hardware_sampler.cpp ├── nvml_samples.cpp └── utility.cpp ├── hardware_sampler.cpp ├── system_hardware_sampler.cpp └── utility.cpp /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | Language: Cpp 3 | AccessModifierOffset: -2 4 | AlignAfterOpenBracket: Align 5 | AlignArrayOfStructures: None 6 | AlignConsecutiveAssignments: None 7 | AlignConsecutiveBitFields: None 8 | AlignConsecutiveDeclarations: None 9 | AlignConsecutiveMacros: None 10 | AlignConsecutiveShortCaseStatements: 11 | Enabled: true 12 | AcrossEmptyLines: false 13 | AcrossComments: false 14 | AlignCaseColons: false 15 | AlignEscapedNewlines: Right 16 | AlignOperands: Align 17 | AlignTrailingComments: 18 | Kind: Always 19 | AllowAllArgumentsOnNextLine: true 20 | AllowAllParametersOfDeclarationOnNextLine: true 21 | AllowBreakBeforeNoexceptSpecifier: OnlyWithParen 22 | AllowShortBlocksOnASingleLine: Empty 23 | AllowShortCaseLabelsOnASingleLine: true 24 | AllowShortEnumsOnASingleLine: true 25 | AllowShortFunctionsOnASingleLine: All 26 | AllowShortIfStatementsOnASingleLine: Never 27 | AllowShortLambdasOnASingleLine: All 28 | AllowShortLoopsOnASingleLine: true 29 | AlwaysBreakAfterDefinitionReturnType: None 30 | AlwaysBreakBeforeMultilineStrings: false 31 | AlwaysBreakAfterReturnType: None 32 | AlwaysBreakTemplateDeclarations: Yes 33 | BinPackArguments: false 34 | BinPackParameters: false 35 | BitFieldColonSpacing: Both 36 | BraceWrapping: 37 | AfterCaseLabel: false 38 | AfterClass: false 39 | AfterControlStatement: Never 40 | AfterEnum: false 41 | AfterFunction: false 42 | AfterNamespace: false 43 | AfterObjCDeclaration: false 44 | AfterStruct: false 45 | AfterUnion: false 46 | AfterExternBlock: false 47 | BeforeCatch: false 48 | BeforeElse: false 49 | BeforeLambdaBody: false 50 | BeforeWhile: false 51 | IndentBraces: false 52 | SplitEmptyFunction: false 53 | SplitEmptyRecord: false 54 | SplitEmptyNamespace: false 55 | BreakAfterAttributes: Never 56 | BreakAfterJavaFieldAnnotations: false 57 | BreakBeforeBinaryOperators: NonAssignment 58 | BreakBeforeBraces: Custom 59 | BreakBeforeConceptDeclarations: Always 60 | BreakBeforeInlineASMColon: OnlyMultiline 61 | BreakBeforeTernaryOperators: true 62 | BreakConstructorInitializers: AfterColon 63 | BreakInheritanceList: AfterComma 64 | BreakStringLiterals: true 65 | ColumnLimit: 0 66 | CommentPragmas: '^ IWYU pragma:' 67 | CompactNamespaces: false 68 | ConstructorInitializerIndentWidth: 4 69 | ContinuationIndentWidth: 4 70 | Cpp11BracedListStyle: false 71 | DerivePointerAlignment: false 72 | DisableFormat: false 73 | EmptyLineAfterAccessModifier: Never 74 | EmptyLineBeforeAccessModifier: LogicalBlock 75 | FixNamespaceComments: true 76 | ForEachMacros: [ 'foreach', 'Q_FOREACH', 'BOOST_FOREACH' ] 77 | IfMacros: [ ] 78 | IncludeBlocks: Regroup 79 | IncludeCategories: 80 | - Regex: '^"hws/' 81 | Priority: 1 82 | - Regex: '^"(pybind|nvml|cuda|rocm_smi|hip|level_zero|subprocess|fmt)' 83 | Priority: 2 84 | - Regex: '^.*' 85 | Priority: 3 86 | IncludeIsMainRegex: '(Test)?$' 87 | IncludeIsMainSourceRegex: '(\.cu|\.hip)' 88 | IndentAccessModifiers: false 89 | IndentCaseBlocks: true 90 | IndentCaseLabels: true 91 | IndentExternBlock: NoIndent 92 | IndentGotoLabels: false 93 | IndentPPDirectives: BeforeHash 94 | IndentRequiresClause: false 95 | IndentWidth: 4 96 | IndentWrappedFunctionNames: false 97 | InsertBraces: true 98 | InsertNewlineAtEOF: true 99 | InsertTrailingCommas: None 100 | IntegerLiteralSeparator: 101 | Binary: 8 102 | Decimal: 3 103 | DecimalMinDigits: 5 104 | Hex: -1 105 | KeepEmptyLinesAtEOF: false 106 | KeepEmptyLinesAtTheStartOfBlocks: false 107 | LambdaBodyIndentation: Signature 108 | LineEnding: DeriveLF 109 | MacroBlockBegin: '' 110 | MacroBlockEnd: '' 111 | Macros: [ ] 112 | MaxEmptyLinesToKeep: 1 113 | NamespaceIndentation: None 114 | NamespaceMacros: [ ] 115 | PPIndentWidth: -1 116 | PackConstructorInitializers: Never 117 | PenaltyBreakAssignment: 2 118 | PenaltyBreakBeforeFirstCallParameter: 19 119 | PenaltyBreakComment: 300 120 | PenaltyBreakFirstLessLess: 120 121 | PenaltyBreakOpenParenthesis: 0 122 | PenaltyBreakString: 1000 123 | PenaltyBreakTemplateDeclaration: 10 124 | PenaltyExcessCharacter: 1000000 125 | PenaltyIndentedWhitespace: 1 126 | PenaltyReturnTypeOnItsOwnLine: 60 127 | PointerAlignment: Right 128 | QualifierAlignment: Custom 129 | QualifierOrder: [ 'inline', 'constexpr', 'static', 'friend', 'restrict', 'const', 'volatile', 'type' ] 130 | ReferenceAlignment: Pointer 131 | ReflowComments: true 132 | RemoveBracesLLVM: false 133 | RemoveParentheses: Leave 134 | RemoveSemicolon: true 135 | RequiresClausePosition: OwnLine 136 | RequiresExpressionIndentation: OuterScope 137 | SeparateDefinitionBlocks: Always 138 | ShortNamespaceLines: 1 139 | SortIncludes: CaseInsensitive 140 | SortUsingDeclarations: LexicographicNumeric 141 | SpaceAfterCStyleCast: true 142 | SpaceAfterLogicalNot: false 143 | SpaceAfterTemplateKeyword: true 144 | SpaceAroundPointerQualifiers: Default 145 | SpaceBeforeAssignmentOperators: true 146 | SpaceBeforeCaseColon: false 147 | SpaceBeforeCpp11BracedList: false 148 | SpaceBeforeCtorInitializerColon: true 149 | SpaceBeforeInheritanceColon: true 150 | SpaceBeforeJsonColon: false 151 | SpaceBeforeParens: ControlStatements 152 | SpaceBeforeRangeBasedForLoopColon: true 153 | SpaceBeforeSquareBrackets: false 154 | SpaceInEmptyBlock: true 155 | SpacesBeforeTrailingComments: 2 156 | SpacesInAngles: false 157 | SpacesInContainerLiterals: true 158 | SpacesInLineCommentPrefix: 159 | Minimum: 1 160 | Maximum: 1 161 | SpacesInParens: Never 162 | SpacesInSquareBrackets: false 163 | Standard: c++20 164 | StatementAttributeLikeMacros: [ ] 165 | StatementMacros: [ 'Q_UNUSED', 'QT_REQUIRE_VERSION' ] 166 | TabWidth: 4 167 | TypeNames: [ ] 168 | TypenameMacros: [ ] 169 | UseTab: Never 170 | WhitespaceSensitiveMacros: [ 'STRINGIZE', 'PP_STRINGIZE', 'BOOST_PP_STRINGIZE' ] -------------------------------------------------------------------------------- /.clang-tidy: -------------------------------------------------------------------------------- 1 | # Generated from CLion Inspection settings 2 | --- 3 | Checks: '-*, 4 | cert-*, 5 | misc-*, 6 | mpi-*, 7 | cppcoreguidelines-*, 8 | -cppcoreguidelines-avoid-do-while, 9 | -cppcoreguidelines-avoid-magic-numbers, 10 | -cppcoreguidelines-pro-bounds-pointer-arithmetic, 11 | -cppcoreguidelines-pro-bounds-constant-array-index, 12 | google-default-arguments, 13 | google-runtime-operator, 14 | google-explicit-constructor, 15 | hicpp-multiway-paths-covered, 16 | hicpp-exception-baseclass, 17 | modernize-*, 18 | -modernize-use-trailing-return-type, 19 | -modernize-avoid-c-arrays, 20 | -modernize-use-using, 21 | -modernize-use-default-member-init, 22 | -modernize-macro-to-enum, 23 | portability-simd-intrinsics, 24 | readability-*, 25 | -readability-redundant-preprocessor, 26 | -readability-named-parameter, 27 | -readability-function-size, 28 | -readability-simplify-boolean-expr, 29 | -readability-identifier-length, 30 | -readability-duplicate-include, 31 | -readability-magic-numbers, 32 | -readability-braces-around-statements, 33 | -readability-redundant-member-init, 34 | -readability-suspicious-call-argument, 35 | -readability-qualified-auto, 36 | -readability-isolate-declaration, 37 | -readability-uppercase-literal-suffix, 38 | -readability-container-data-pointer, 39 | -readability-else-after-return, 40 | -readability-redundant-access-specifiers, 41 | -readability-function-cognitive-complexity, 42 | -readability-implicit-bool-conversion, 43 | -readability-container-contains, 44 | -readability-identifier-naming, 45 | bugprone-*' 46 | 47 | CheckOptions: { misc-non-private-member-variables-in-classes.IgnoreClassesWithAllMemberVariablesBeingPublic: true } -------------------------------------------------------------------------------- /.figures/clock_frequency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SC-SGS/hardware_sampling/fbd284d7d18081c4b44fbff3b402afbe8a81610a/.figures/clock_frequency.png -------------------------------------------------------------------------------- /.github/workflows/documentation.yml: -------------------------------------------------------------------------------- 1 | name: Generate documentation 2 | 3 | # only trigger this action on specific events 4 | on: 5 | push: 6 | branches: 7 | - main 8 | pull_request: 9 | branches: 10 | - main 11 | 12 | jobs: 13 | build-documentation: 14 | runs-on: ubuntu-latest 15 | steps: 16 | # checkout repository 17 | - name: Checkout hws 18 | uses: actions/checkout@v4.2.0 19 | with: 20 | path: hardware_sampling 21 | # install dependencies 22 | - name: Dependencies 23 | run: | 24 | sudo apt update 25 | sudo apt-get install -y doxygen graphviz 26 | # configure project via CMake 27 | - name: Configure 28 | run: | 29 | cd hardware_sampling 30 | mkdir build 31 | cd build 32 | cmake -DHWS_ENABLE_DOCUMENTATION=ON .. 33 | # build project 34 | - name: Generate 35 | run: | 36 | cd hardware_sampling/build 37 | make doc 38 | # deploy generated documentation using github.io 39 | - name: Deploy 40 | uses: peaceiris/actions-gh-pages@v4 41 | with: 42 | github_token: ${{ secrets.GITHUB_TOKEN }} 43 | publish_dir: ./hardware_sampling/docs/html -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # C++ ================================ 2 | Prerequisites 3 | *.d 4 | 5 | # Compiled Object files 6 | *.slo 7 | *.lo 8 | *.o 9 | *.obj 10 | 11 | # Precompiled Headers 12 | *.gch 13 | *.pch 14 | 15 | # Compiled Dynamic libraries 16 | *.so 17 | *.dylib 18 | *.dll 19 | 20 | # Fortran module files 21 | *.mod 22 | *.smod 23 | 24 | # Compiled Static libraries 25 | *.lai 26 | *.la 27 | *.a 28 | *.lib 29 | 30 | # Executables 31 | *.exe 32 | *.out 33 | *.app 34 | 35 | 36 | # CMake ================================ 37 | bin/ 38 | build*/ 39 | docs/html 40 | install*/ 41 | cmake-build*/ 42 | CMakeLists.txt.user 43 | CMakeCache.txt 44 | CMakeSettings.json 45 | CMakeFiles 46 | CMakeScripts 47 | Testing 48 | Makefile 49 | cmake_install.cmake 50 | install_manifest.txt 51 | compile_commands.json 52 | CTestTestfile.cmake 53 | 54 | 55 | # IDEs ================================ 56 | .idea/ 57 | .vscode/ 58 | .vs/ 59 | 60 | # auto-generated version header 61 | include/hws/version.hpp -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # This CITATION.cff file was generated with cffinit. 2 | # Visit https://bit.ly/cffinit to generate yours today! 3 | 4 | cff-version: 1.2.0 5 | title: hws - Hardware Sampling for GPUs and CPUs 6 | message: >- 7 | If you use this software, please cite it using the 8 | metadata from this file. 9 | type: software 10 | authors: 11 | - given-names: Marcel 12 | family-names: Breyer 13 | email: Marcel.Breyer@ipvs.uni-stuttgart.de 14 | affiliation: University of Stuttgart 15 | orcid: 'https://orcid.org/0000-0003-3574-0650' 16 | - given-names: Alexander 17 | family-names: Van Craen 18 | email: Alexander.Van-Craen@ipvs.uni-stuttgart.de 19 | affiliation: University of Stuttgart 20 | orcid: 'https://orcid.org/0000-0002-3336-7226' 21 | - given-names: Dirk 22 | family-names: Pflüger 23 | email: Dirk.Pflueger@ipvs.uni-stuttgart.de 24 | orcid: 'https://orcid.org/0000-0002-4360-0212' 25 | affiliation: University of Stuttgart 26 | repository-code: 'https://github.com/SC-SGS/hardware_sampling' 27 | license: MIT 28 | version: v1.1.1 29 | date-released: '2025-04-29' 30 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## Authors: Marcel Breyer 2 | ## Copyright (C): 2024-today All Rights Reserved 3 | ## License: This file is released under the MIT license. 4 | ## See the LICENSE.md file in the project root for full license information. 5 | ######################################################################################################################## 6 | 7 | cmake_minimum_required(VERSION 3.22) 8 | 9 | project("hws - Hardware Sampling for GPUs and CPUs" 10 | VERSION 1.1.1 11 | LANGUAGES CXX 12 | DESCRIPTION "Hardware sampling (e.g., clock frequencies, memory consumption, temperatures, or energy draw) for CPUs and GPUS.") 13 | 14 | # explicitly set library source files 15 | set(HWS_SOURCES 16 | ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/event.cpp 17 | ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/hardware_sampler.cpp 18 | ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/system_hardware_sampler.cpp 19 | ${CMAKE_CURRENT_SOURCE_DIR}/src/hws/utility.cpp 20 | ) 21 | 22 | # create hardware sampling library 23 | set(HWS_LIBRARY_NAME hws) 24 | add_library(${HWS_LIBRARY_NAME} SHARED ${HWS_SOURCES}) 25 | add_library(hws::hws ALIAS ${HWS_LIBRARY_NAME}) 26 | 27 | # set install target 28 | set(HWS_TARGETS_TO_INSTALL ) 29 | 30 | # use C++17 31 | target_compile_features(${HWS_LIBRARY_NAME} PUBLIC cxx_std_17) 32 | 33 | # add target include directory 34 | target_include_directories(${HWS_LIBRARY_NAME} PUBLIC 35 | $ 36 | $ 37 | ) 38 | 39 | # additional library compile options 40 | target_compile_options(${HWS_LIBRARY_NAME} PUBLIC 41 | $<$:$<$:-Wall -Wextra -Wdouble-promotion -fno-common -Wshadow -Wcast-qual 42 | -Wnull-dereference -Wnon-virtual-dtor -Wextra-semi -Wunreachable-code -Wuninitialized -Wno-ctor-dtor-privacy> 43 | $<$:-Wsuggest-override -Wstrict-null-sentinel -Wlogical-op -Wduplicated-branches -Wimplicit-fallthrough=5> 44 | $<$:-Wmost> 45 | $<$:/W4 /bigobj /wd4459 /Zc:lambda>> 46 | ) 47 | 48 | ## add option to enable/disable error checking in the hardware sampling functions 49 | option(HWS_ENABLE_ERROR_CHECKS "Enable error checking for the hardware sampling functions. May be problematic with smaller sample intervals." OFF) 50 | if (HWS_ENABLE_ERROR_CHECKS) 51 | message(STATUS "Enable error checks for the hardware sampling functions.") 52 | target_compile_definitions(${HWS_LIBRARY_NAME} PRIVATE HWS_ERROR_CHECKS_ENABLED) 53 | endif () 54 | 55 | # specify the sampling interval in milliseconds 56 | set(HWS_SAMPLING_INTERVAL "100" CACHE STRING "The interval in milliseconds in which the hardware information (like clock frequency or power draw) are queried.") 57 | if (NOT ${HWS_SAMPLING_INTERVAL} MATCHES "^[0-9]+$" OR ${HWS_SAMPLING_INTERVAL} LESS_EQUAL 0) 58 | message(FATAL_ERROR "The HWS_SAMPLING_INTERVAL must be a natural number greater 0, but is \"${HWS_SAMPLING_INTERVAL}\"!") 59 | endif () 60 | message(STATUS "Setting the hardware sampler interval to ${HWS_SAMPLING_INTERVAL}ms.") 61 | target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_SAMPLING_INTERVAL=${HWS_SAMPLING_INTERVAL}ms) 62 | 63 | # install fmt as dependency 64 | include(FetchContent) 65 | set(HWS_fmt_VERSION 11.0.2) 66 | find_package(fmt 11.0.2 QUIET) 67 | if (fmt_FOUND) 68 | message(STATUS "Found package fmt.") 69 | else () 70 | message(STATUS "Couldn't find package fmt. Building version ${HWS_fmt_VERSION} from source.") 71 | set(FMT_PEDANTIC OFF CACHE INTERNAL "" FORCE) 72 | set(FMT_WERROR OFF CACHE INTERNAL "" FORCE) 73 | set(FMT_DOC OFF CACHE INTERNAL "" FORCE) 74 | set(FMT_INSTALL ON CACHE INTERNAL "" FORCE) # let {fmt} handle the install target 75 | set(FMT_TEST OFF CACHE INTERNAL "" FORCE) 76 | set(FMT_FUZZ OFF CACHE INTERNAL "" FORCE) 77 | set(FMT_CUDA_TEST OFF CACHE INTERNAL "" FORCE) 78 | set(FMT_MODULE OFF CACHE INTERNAL "" FORCE) 79 | set(FMT_SYSTEM_HEADERS ON CACHE INTERNAL "" FORCE) 80 | # fetch string formatting library fmt 81 | FetchContent_Declare(fmt 82 | GIT_REPOSITORY https://github.com/fmtlib/fmt.git 83 | GIT_TAG ${HWS_fmt_VERSION} 84 | QUIET 85 | ) 86 | FetchContent_MakeAvailable(fmt) 87 | set_property(TARGET fmt PROPERTY POSITION_INDEPENDENT_CODE ON) 88 | add_dependencies(${HWS_LIBRARY_NAME} fmt) 89 | endif () 90 | target_link_libraries(${HWS_LIBRARY_NAME} PUBLIC fmt::fmt) 91 | 92 | ######################################################################################################################## 93 | ## configure version header ## 94 | ######################################################################################################################## 95 | message(STATUS "Configuring version information.") 96 | configure_file( 97 | ${CMAKE_CURRENT_SOURCE_DIR}/include/hws/version.hpp.in 98 | ${CMAKE_CURRENT_SOURCE_DIR}/include/hws/version.hpp 99 | @ONLY 100 | ) 101 | 102 | #################################################################################################################### 103 | ## CPU measurements ## 104 | #################################################################################################################### 105 | set(HWS_ENABLE_CPU_SAMPLING AUTO CACHE STRING "Enable sampling of CPUs.") 106 | set_property(CACHE HWS_ENABLE_CPU_SAMPLING PROPERTY STRINGS AUTO ON OFF) 107 | if (HWS_ENABLE_CPU_SAMPLING MATCHES "AUTO" OR HWS_ENABLE_CPU_SAMPLING) 108 | add_subdirectory(src/hws/cpu) 109 | else () 110 | message(STATUS "Hardware sampling for CPUs disabled!") 111 | endif () 112 | 113 | 114 | #################################################################################################################### 115 | ## NVIDIA GPU sampling via NVML ## 116 | #################################################################################################################### 117 | set(HWS_ENABLE_GPU_NVIDIA_SAMPLING AUTO CACHE STRING "Enable sampling of NVIDIA GPUs.") 118 | set_property(CACHE HWS_ENABLE_GPU_NVIDIA_SAMPLING PROPERTY STRINGS AUTO ON OFF) 119 | if (HWS_ENABLE_GPU_NVIDIA_SAMPLING MATCHES "AUTO" OR HWS_ENABLE_GPU_NVIDIA_SAMPLING) 120 | add_subdirectory(src/hws/gpu_nvidia) 121 | else () 122 | message(STATUS "Hardware sampling for NVIDIA GPUs disabled!") 123 | endif () 124 | 125 | 126 | #################################################################################################################### 127 | ## AMD GPU sampling via ROCm SMI lib ## 128 | #################################################################################################################### 129 | set(HWS_ENABLE_GPU_AMD_SAMPLING AUTO CACHE STRING "Enable sampling of AMD GPUs.") 130 | set_property(CACHE HWS_ENABLE_GPU_AMD_SAMPLING PROPERTY STRINGS AUTO ON OFF) 131 | if (HWS_ENABLE_GPU_AMD_SAMPLING MATCHES "AUTO" OR HWS_ENABLE_GPU_AMD_SAMPLING) 132 | add_subdirectory(src/hws/gpu_amd) 133 | else () 134 | message(STATUS "Hardware sampling for AMD GPUs disabled!") 135 | endif () 136 | 137 | 138 | #################################################################################################################### 139 | ## Intel GPU sampling via Level Zero ## 140 | #################################################################################################################### 141 | set(HWS_ENABLE_GPU_INTEL_SAMPLING AUTO CACHE STRING "Enable sampling of Intel GPUs.") 142 | set_property(CACHE HWS_ENABLE_GPU_INTEL_SAMPLING PROPERTY STRINGS AUTO ON OFF) 143 | if (HWS_ENABLE_GPU_INTEL_SAMPLING MATCHES "AUTO" OR HWS_ENABLE_GPU_INTEL_SAMPLING) 144 | add_subdirectory(src/hws/gpu_intel) 145 | else () 146 | message(STATUS "Hardware sampling for Intel GPUs disabled!") 147 | endif () 148 | 149 | 150 | #################################################################################################################### 151 | ## enable Python bindings ## 152 | #################################################################################################################### 153 | option(HWS_ENABLE_PYTHON_BINDINGS "Build language bindings for Python." ON) 154 | if (HWS_ENABLE_PYTHON_BINDINGS) 155 | add_subdirectory(bindings) 156 | endif () 157 | 158 | 159 | ######################################################################################################################## 160 | ## add documentation ## 161 | ######################################################################################################################## 162 | option(HWS_ENABLE_DOCUMENTATION "Add documentation using Doxygen." OFF) 163 | if (HWS_ENABLE_DOCUMENTATION) 164 | add_subdirectory(docs) 165 | endif () 166 | 167 | 168 | ######################################################################################################################## 169 | ## add support for `make install` ## 170 | ######################################################################################################################## 171 | include(GNUInstallDirs) 172 | ## install all necessary library targets 173 | install(TARGETS ${HWS_LIBRARY_NAME} 174 | EXPORT hws_Targets 175 | ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}" # all files that are neither executables, shared lib or headers 176 | LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" # all shared lib files 177 | RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" # all executables 178 | ) 179 | 180 | ## mark header to install via 'make install' 181 | install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include/" 182 | DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" 183 | ) 184 | 185 | ## manage version comparison 186 | include(CMakePackageConfigHelpers) 187 | write_basic_package_version_file( 188 | "hwsConfigVersion.cmake" 189 | VERSION ${PROJECT_VERSION} 190 | COMPATIBILITY SameMajorVersion 191 | ) 192 | 193 | ## generate configuration file 194 | configure_package_config_file( 195 | "${CMAKE_CURRENT_SOURCE_DIR}/cmake/hwsConfig.cmake.in" 196 | "${PROJECT_BINARY_DIR}/hwsConfig.cmake" 197 | INSTALL_DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake 198 | ) 199 | 200 | ## create and copy install-targets file 201 | install(EXPORT hws_Targets 202 | FILE hwsTargets.cmake 203 | NAMESPACE hws:: 204 | DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake 205 | ) 206 | 207 | ## create file containing the build configuration and version information 208 | install(FILES 209 | "${PROJECT_BINARY_DIR}/hwsConfig.cmake" 210 | "${PROJECT_BINARY_DIR}/hwsConfigVersion.cmake" 211 | "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Findlevel_zero.cmake" 212 | DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/hws/cmake 213 | ) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Scientific Computing (SC) and Simulation of Large Systems (SGS) @ University of Stuttgart 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /bindings/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## Authors: Marcel Breyer 2 | ## Copyright (C): 2024-today All Rights Reserved 3 | ## License: This file is released under the MIT license. 4 | ## See the LICENSE.md file in the project root for full license information. 5 | ######################################################################################################################## 6 | 7 | message(STATUS "Building Python language bindings.") 8 | 9 | find_package(Python COMPONENTS Interpreter Development) 10 | 11 | ## try finding pybind11 12 | set(HWS_pybind11_VERSION master) 13 | find_package(pybind11 2.13.1 QUIET) 14 | if (pybind11_FOUND) 15 | message(STATUS "Found package pybind11.") 16 | else () 17 | message(STATUS "Couldn't find package pybind11. Building version ${HWS_pybind11_VERSION} from source.") 18 | # fetch pybind11 library for creating Python bindings 19 | FetchContent_Declare(pybind11 20 | GIT_REPOSITORY https://github.com/pybind/pybind11.git 21 | GIT_TAG ${HWS_pybind11_VERSION} 22 | GIT_SHALLOW TRUE 23 | QUIET 24 | ) 25 | FetchContent_MakeAvailable(pybind11) 26 | target_include_directories(${HWS_LIBRARY_NAME} PUBLIC 27 | $ 28 | $ 29 | ) 30 | endif () 31 | 32 | # set source files that are always used 33 | set(HWS_PYTHON_BINDINGS_SOURCES 34 | ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp 35 | ${CMAKE_CURRENT_SOURCE_DIR}/relative_event.cpp 36 | ${CMAKE_CURRENT_SOURCE_DIR}/hardware_sampler.cpp 37 | ${CMAKE_CURRENT_SOURCE_DIR}/sample_category.cpp 38 | ${CMAKE_CURRENT_SOURCE_DIR}/system_hardware_sampler.cpp 39 | ${CMAKE_CURRENT_SOURCE_DIR}/version.cpp 40 | ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp 41 | ) 42 | 43 | # add hardware sampling specific source files if the respective sampling is used 44 | get_target_property(HWS_COMPILE_DEFINITIONS ${HWS_LIBRARY_NAME} COMPILE_DEFINITIONS) 45 | if ("HWS_FOR_CPUS_ENABLED" IN_LIST HWS_COMPILE_DEFINITIONS) 46 | list(APPEND HWS_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/cpu_hardware_sampler.cpp) 47 | endif () 48 | if ("HWS_FOR_NVIDIA_GPUS_ENABLED" IN_LIST HWS_COMPILE_DEFINITIONS) 49 | list(APPEND HWS_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gpu_nvidia_hardware_sampler.cpp) 50 | endif () 51 | if ("HWS_FOR_AMD_GPUS_ENABLED" IN_LIST HWS_COMPILE_DEFINITIONS) 52 | list(APPEND HWS_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gpu_amd_hardware_sampler.cpp) 53 | endif () 54 | if ("HWS_FOR_INTEL_GPUS_ENABLED" IN_LIST HWS_COMPILE_DEFINITIONS) 55 | list(APPEND HWS_PYTHON_BINDINGS_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/gpu_intel_hardware_sampler.cpp) 56 | endif () 57 | 58 | # create pybind11 module 59 | set(HWS_PYTHON_BINDINGS_LIBRARY_NAME HardwareSampling) 60 | pybind11_add_module(${HWS_PYTHON_BINDINGS_LIBRARY_NAME} ${HWS_PYTHON_BINDINGS_SOURCES}) 61 | 62 | # add necessary compile options 63 | target_include_directories(${HWS_PYTHON_BINDINGS_LIBRARY_NAME} PRIVATE ${CMAKE_CURRENT_LIST_DIR}/..) 64 | target_link_libraries(${HWS_PYTHON_BINDINGS_LIBRARY_NAME} PRIVATE ${HWS_LIBRARY_NAME}) 65 | target_compile_definitions(${HWS_PYTHON_BINDINGS_LIBRARY_NAME} PRIVATE PYBIND11_DETAILED_ERROR_MESSAGES) 66 | 67 | include(GNUInstallDirs) 68 | # install Python bindings 69 | install(TARGETS ${HWS_PYTHON_BINDINGS_LIBRARY_NAME} 70 | LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" # all shared lib files 71 | ) 72 | 73 | # install the __init__.py file so Python recognizes the package when installed via pip 74 | install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/__init__.py" 75 | DESTINATION "${CMAKE_INSTALL_LIBDIR}" 76 | ) -------------------------------------------------------------------------------- /bindings/__init__.py: -------------------------------------------------------------------------------- 1 | # import all bindings from the compiled hws module 2 | from .HardwareSampling import * 3 | # explicitly set the module level attributes 4 | __doc__ = HardwareSampling.__doc__ 5 | __version__ = HardwareSampling.__version__ -------------------------------------------------------------------------------- /bindings/cpu_hardware_sampler.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Marcel Breyer 3 | * @copyright 2024-today All Rights Reserved 4 | * @license This file is released under the MIT license. 5 | * See the LICENSE.md file in the project root for full license information. 6 | */ 7 | 8 | #include "hws/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} 9 | #include "hws/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler 10 | #include "hws/hardware_sampler.hpp" // hws::hardware_sampler 11 | #include "hws/sample_category.hpp" // hws::sample_category 12 | 13 | #include "fmt/format.h" // fmt::format 14 | #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds 15 | #include "pybind11/pybind11.h" // py::module_ 16 | #include "pybind11/stl.h" // bind STL types 17 | 18 | #include // std::chrono::milliseconds 19 | 20 | namespace py = pybind11; 21 | 22 | void init_cpu_hardware_sampler(py::module_ &m) { 23 | // bind the general samples 24 | py::class_(m, "CpuGeneralSamples") 25 | .def("has_samples", &hws::cpu_general_samples::has_samples, "true if any sample is available, false otherwise") 26 | .def("get_architecture", &hws::cpu_general_samples::get_architecture, "the CPU architecture (e.g., x86_64)") 27 | .def("get_byte_order", &hws::cpu_general_samples::get_byte_order, "the byte order (e.g., little/big endian)") 28 | .def("get_num_cores", &hws::cpu_general_samples::get_num_cores, "the total number of cores of the CPU(s)") 29 | .def("get_num_threads", &hws::cpu_general_samples::get_num_threads, "the number of threads of the CPU(s) including potential hyper-threads") 30 | .def("get_threads_per_core", &hws::cpu_general_samples::get_threads_per_core, "the number of hyper-threads per core") 31 | .def("get_cores_per_socket", &hws::cpu_general_samples::get_cores_per_socket, "the number of physical cores per socket") 32 | .def("get_num_sockets", &hws::cpu_general_samples::get_num_sockets, "the number of sockets") 33 | .def("get_numa_nodes", &hws::cpu_general_samples::get_numa_nodes, "the number of NUMA nodes") 34 | .def("get_vendor_id", &hws::cpu_general_samples::get_vendor_id, "the vendor ID (e.g. GenuineIntel)") 35 | .def("get_name", &hws::cpu_general_samples::get_name, "the name of the CPU") 36 | .def("get_flags", &hws::cpu_general_samples::get_flags, "potential CPU flags (e.g., sse4_1, avx, avx, etc)") 37 | .def("get_compute_utilization", &hws::cpu_general_samples::get_compute_utilization, "the percent the CPU was busy doing work") 38 | .def("get_ipc", &hws::cpu_general_samples::get_ipc, "the instructions-per-cycle count") 39 | .def("get_irq", &hws::cpu_general_samples::get_irq, "the number of interrupts") 40 | .def("get_smi", &hws::cpu_general_samples::get_smi, "the number of system management interrupts") 41 | .def("get_poll", &hws::cpu_general_samples::get_poll, "the number of times the CPU was in the polling state") 42 | .def("get_poll_percent", &hws::cpu_general_samples::get_poll_percent, "the percent of the CPU was in the polling state") 43 | .def("__repr__", [](const hws::cpu_general_samples &self) { 44 | return fmt::format("", self); 45 | }); 46 | 47 | // bind the clock samples 48 | py::class_(m, "CpuClockSamples") 49 | .def("has_samples", &hws::cpu_clock_samples::has_samples, "true if any sample is available, false otherwise") 50 | .def("get_auto_boosted_clock_enabled", &hws::cpu_clock_samples::get_auto_boosted_clock_enabled, "true if frequency boosting is enabled") 51 | .def("get_clock_frequency_min", &hws::cpu_clock_samples::get_clock_frequency_min, "the minimum possible CPU frequency in MHz") 52 | .def("get_clock_frequency_max", &hws::cpu_clock_samples::get_clock_frequency_max, "the maximum possible CPU frequency in MHz") 53 | .def("get_clock_frequency", &hws::cpu_clock_samples::get_clock_frequency, "the average CPU frequency in MHz including idle cores") 54 | .def("get_average_non_idle_clock_frequency", &hws::cpu_clock_samples::get_average_non_idle_clock_frequency, "the average CPU frequency in MHz excluding idle cores") 55 | .def("get_time_stamp_counter", &hws::cpu_clock_samples::get_time_stamp_counter, "the time stamp counter") 56 | .def("__repr__", [](const hws::cpu_clock_samples &self) { 57 | return fmt::format("", self); 58 | }); 59 | 60 | // bind the power samples 61 | py::class_(m, "CpuPowerSamples") 62 | .def("has_samples", &hws::cpu_power_samples::has_samples, "true if any sample is available, false otherwise") 63 | .def("get_power_measurement_type", &hws::cpu_power_samples::get_power_measurement_type, "the type of the power readings: always \"instant/current\"") 64 | .def("get_power_usage", &hws::cpu_power_samples::get_power_usage, "the currently consumed power of the package of the CPU in W") 65 | .def("get_power_total_energy_consumed", &hws::cpu_power_samples::get_power_total_energy_consumption, "the total power consumption in J") 66 | .def("get_core_watt", &hws::cpu_power_samples::get_core_watt, "the currently consumed power of the core part of the CPU in W") 67 | .def("get_ram_watt", &hws::cpu_power_samples::get_ram_watt, "the currently consumed power of the RAM part of the CPU in W") 68 | .def("get_package_rapl_throttle_percent", &hws::cpu_power_samples::get_package_rapl_throttle_percent, "the percent of time the package throttled due to RAPL limiters") 69 | .def("get_dram_rapl_throttle_percent", &hws::cpu_power_samples::get_dram_rapl_throttle_percent, "the percent of time the DRAM throttled due to RAPL limiters") 70 | .def("__repr__", [](const hws::cpu_power_samples &self) { 71 | return fmt::format("", self); 72 | }); 73 | 74 | // bind the memory samples 75 | py::class_(m, "CpuMemorySamples") 76 | .def("has_samples", &hws::cpu_memory_samples::has_samples, "true if any sample is available, false otherwise") 77 | .def("get_cache_size_L1d", &hws::cpu_memory_samples::get_cache_size_L1d, "the size of the L1 data cache") 78 | .def("get_cache_size_L1i", &hws::cpu_memory_samples::get_cache_size_L1i, "the size of the L1 instruction cache") 79 | .def("get_cache_size_L2", &hws::cpu_memory_samples::get_cache_size_L2, "the size of the L2 cache") 80 | .def("get_cache_size_L3", &hws::cpu_memory_samples::get_cache_size_L3, "the size of the L2 cache") 81 | .def("get_memory_total", &hws::cpu_memory_samples::get_memory_total, "the total available memory in Byte") 82 | .def("get_swap_memory_total", &hws::cpu_memory_samples::get_swap_memory_total, "the total available swap memory in Byte") 83 | .def("get_memory_used", &hws::cpu_memory_samples::get_memory_used, "the currently used memory in Byte") 84 | .def("get_memory_free", &hws::cpu_memory_samples::get_memory_free, "the currently free memory in Byte") 85 | .def("get_swap_memory_used", &hws::cpu_memory_samples::get_swap_memory_used, "the currently used swap memory in Byte") 86 | .def("get_swap_memory_free", &hws::cpu_memory_samples::get_swap_memory_free, "the currently free swap memory in Byte") 87 | .def("__repr__", [](const hws::cpu_memory_samples &self) { 88 | return fmt::format("", self); 89 | }); 90 | 91 | // bind the temperature samples 92 | py::class_(m, "CpuTemperatureSamples") 93 | .def("has_samples", &hws::cpu_temperature_samples::has_samples, "true if any sample is available, false otherwise") 94 | .def("get_temperature", &hws::cpu_temperature_samples::get_temperature, "the current temperature of the whole package in °C") 95 | .def("get_core_temperature", &hws::cpu_temperature_samples::get_core_temperature, "the current temperature of the core part of the CPU in °C") 96 | .def("get_core_throttle_percent", &hws::cpu_temperature_samples::get_core_throttle_percent, "the percent of time the CPU has throttled") 97 | .def("__repr__", [](const hws::cpu_temperature_samples &self) { 98 | return fmt::format("", self); 99 | }); 100 | 101 | // bind the gfx samples 102 | py::class_(m, "CpuGfxSamples") 103 | .def("has_samples", &hws::cpu_gfx_samples::has_samples, "true if any sample is available, false otherwise") 104 | .def("get_gfx_render_state_percent", &hws::cpu_gfx_samples::get_gfx_render_state_percent, "the percent of time the iGPU was in the render state") 105 | .def("get_gfx_frequency", &hws::cpu_gfx_samples::get_gfx_frequency, "the current iGPU power consumption in W") 106 | .def("get_average_gfx_frequency", &hws::cpu_gfx_samples::get_average_gfx_frequency, "the average iGPU frequency in MHz") 107 | .def("get_gfx_state_c0_percent", &hws::cpu_gfx_samples::get_gfx_state_c0_percent, "the percent of the time the iGPU was in the c0 state") 108 | .def("get_cpu_works_for_gpu_percent", &hws::cpu_gfx_samples::get_cpu_works_for_gpu_percent, "the percent of time the CPU was doing work for the iGPU") 109 | .def("get_gfx_watt", &hws::cpu_gfx_samples::get_gfx_watt, "the currently consumed power of the iGPU of the CPU in W") 110 | .def("__repr__", [](const hws::cpu_gfx_samples &self) { 111 | return fmt::format("", self); 112 | }); 113 | 114 | // bind the idle state samples 115 | py::class_(m, "CpuIdleStateSamples") 116 | .def("has_samples", &hws::cpu_idle_states_samples::has_samples, "true if any sample is available, false otherwise") 117 | .def("get_idle_states", &hws::cpu_idle_states_samples::get_idle_states, "the map of additional CPU idle states") 118 | .def("get_all_cpus_state_c0_percent", &hws::cpu_idle_states_samples::get_all_cpus_state_c0_percent, "the percent of time all CPUs were in idle state c0") 119 | .def("get_any_cpu_state_c0_percent", &hws::cpu_idle_states_samples::get_any_cpu_state_c0_percent, "the percent of time any CPU was in the idle state c0") 120 | .def("get_low_power_idle_state_percent", &hws::cpu_idle_states_samples::get_low_power_idle_state_percent, "the percent of time the CPUs was in the low power idle state") 121 | .def("get_system_low_power_idle_state_percent", &hws::cpu_idle_states_samples::get_system_low_power_idle_state_percent, "the percent of time the CPU was in the system low power idle state") 122 | .def("get_package_low_power_idle_state_percent", &hws::cpu_idle_states_samples::get_package_low_power_idle_state_percent, "the percent of time the CPU was in the package low power idle state") 123 | .def("__repr__", [](const hws::cpu_gfx_samples &self) { 124 | return fmt::format("", self); 125 | }); 126 | 127 | // bind the CPU hardware sampler class 128 | py::class_(m, "CpuHardwareSampler") 129 | .def(py::init<>(), "construct a new CPU hardware sampler") 130 | .def(py::init(), "construct a new CPU hardware sampler sampling only the provided sample_category samples") 131 | .def(py::init(), "construct a new CPU hardware sampler specifying the used sampling interval") 132 | .def(py::init(), "construct a new CPU hardware sampler specifying the used sampling interval sampling only the provided sample_category samples") 133 | .def("general_samples", &hws::cpu_hardware_sampler::general_samples, "get all general samples") 134 | .def("clock_samples", &hws::cpu_hardware_sampler::clock_samples, "get all clock related samples") 135 | .def("power_samples", &hws::cpu_hardware_sampler::power_samples, "get all power related samples") 136 | .def("memory_samples", &hws::cpu_hardware_sampler::memory_samples, "get all memory related samples") 137 | .def("temperature_samples", &hws::cpu_hardware_sampler::temperature_samples, "get all temperature related samples") 138 | .def("gfx_samples", &hws::cpu_hardware_sampler::gfx_samples, "get all gfx (iGPU) related samples") 139 | .def("idle_state_samples", &hws::cpu_hardware_sampler::idle_state_samples, "get all idle state related samples") 140 | .def("samples_only_as_yaml_string", &hws::cpu_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string") 141 | .def("__repr__", [](const hws::cpu_hardware_sampler &self) { 142 | return fmt::format("", self); 143 | }); 144 | } 145 | -------------------------------------------------------------------------------- /bindings/event.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Marcel Breyer 3 | * @copyright 2024-today All Rights Reserved 4 | * @license This file is released under the MIT license. 5 | * See the LICENSE.md file in the project root for full license information. 6 | */ 7 | 8 | #include "hws/event.hpp" // hws::event 9 | 10 | #include "fmt/format.h" // fmt::format 11 | #include "pybind11/chrono.h" // bind std::chrono types 12 | #include "pybind11/pybind11.h" // py::module_ 13 | #include "pybind11/stl.h" // bind STL types 14 | 15 | namespace py = pybind11; 16 | 17 | void init_event(py::module_ &m) { 18 | // bind a single event 19 | py::class_(m, "Event") 20 | .def(py::init(), "construct a new event using a time point and a name") 21 | .def_readonly("time_point", &hws::event::time_point, "read the time point associated to this event") 22 | .def_readonly("name", &hws::event::name, "read the name associated to this event") 23 | .def("__repr__", [](const hws::event &self) { 24 | return fmt::format("", self.time_point.time_since_epoch(), self.name); 25 | }); 26 | } 27 | -------------------------------------------------------------------------------- /bindings/gpu_intel_hardware_sampler.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Marcel Breyer 3 | * @copyright 2024-today All Rights Reserved 4 | * @license This file is released under the MIT license. 5 | * See the LICENSE.md file in the project root for full license information. 6 | */ 7 | 8 | #include "hws/gpu_intel/hardware_sampler.hpp" // hws::gpu_intel_hardware_sampler 9 | #include "hws/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} 10 | #include "hws/hardware_sampler.hpp" // hws::hardware_sampler 11 | #include "hws/sample_category.hpp" // hws::sample_category 12 | 13 | #include "fmt/format.h" // fmt::format 14 | #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds 15 | #include "pybind11/pybind11.h" // py::module_ 16 | #include "pybind11/stl.h" // bind STL types 17 | 18 | #include // std::chrono::milliseconds 19 | #include // std::size_t 20 | 21 | namespace py = pybind11; 22 | 23 | void init_gpu_intel_hardware_sampler(py::module_ &m) { 24 | // bind the general samples 25 | py::class_(m, "LevelZeroGeneralSamples") 26 | .def("has_samples", &hws::level_zero_general_samples::has_samples, "true if any sample is available, false otherwise") 27 | .def("get_byte_order", &hws::level_zero_general_samples::get_byte_order, "the byte order (e.g., little/big endian)") 28 | .def("get_vendor_id", &hws::level_zero_general_samples::get_vendor_id, "the vendor ID") 29 | .def("get_name", &hws::level_zero_general_samples::get_name, "the model name of the device") 30 | .def("get_flags", &hws::level_zero_general_samples::get_flags, "potential GPU flags (e.g. integrated device)") 31 | .def("get_standby_mode", &hws::level_zero_general_samples::get_standby_mode, "the enabled standby mode (power saving or never)") 32 | .def("get_num_threads_per_eu", &hws::level_zero_general_samples::get_num_threads_per_eu, "the number of threads per EU unit") 33 | .def("get_eu_simd_width", &hws::level_zero_general_samples::get_eu_simd_width, "the physical EU unit SIMD width") 34 | .def("__repr__", [](const hws::level_zero_general_samples &self) { 35 | return fmt::format("", self); 36 | }); 37 | 38 | // bind the clock samples 39 | py::class_(m, "LevelZeroClockSamples") 40 | .def("has_samples", &hws::level_zero_clock_samples::has_samples, "true if any sample is available, false otherwise") 41 | .def("get_clock_frequency_min", &hws::level_zero_clock_samples::get_clock_frequency_min, "the minimum possible GPU clock frequency in MHz") 42 | .def("get_clock_frequency_max", &hws::level_zero_clock_samples::get_clock_frequency_max, "the maximum possible GPU clock frequency in MHz") 43 | .def("get_memory_clock_frequency_min", &hws::level_zero_clock_samples::get_memory_clock_frequency_min, "the minimum possible memory clock frequency in MHz") 44 | .def("get_memory_clock_frequency_max", &hws::level_zero_clock_samples::get_memory_clock_frequency_max, "the maximum possible memory clock frequency in MHz") 45 | .def("get_available_clock_frequencies", &hws::level_zero_clock_samples::get_available_clock_frequencies, "the available GPU clock frequencies in MHz (slowest to fastest)") 46 | .def("get_available_memory_clock_frequencies", &hws::level_zero_clock_samples::get_available_memory_clock_frequencies, "the available memory clock frequencies in MHz (slowest to fastest)") 47 | .def("get_clock_frequency", &hws::level_zero_clock_samples::get_clock_frequency, "the current GPU frequency in MHz") 48 | .def("get_memory_clock_frequency", &hws::level_zero_clock_samples::get_memory_clock_frequency, "the current memory frequency in MHz") 49 | .def("get_throttle_reason", &hws::level_zero_clock_samples::get_throttle_reason, "the current GPU frequency throttle reason (as bitmask)") 50 | .def("get_throttle_reason_string", &hws::level_zero_clock_samples::get_throttle_reason_string, "the current GPU frequency throttle reason (as string)") 51 | .def("get_memory_throttle_reason", &hws::level_zero_clock_samples::get_memory_throttle_reason, "the current memory frequency throttle reason (as bitmask)") 52 | .def("get_memory_throttle_reason_string", &hws::level_zero_clock_samples::get_memory_throttle_reason_string, "the current memory frequency throttle reason (as string)") 53 | .def("get_frequency_limit_tdp", &hws::level_zero_clock_samples::get_frequency_limit_tdp, "the current maximum allowed GPU frequency based on the TDP limit in MHz") 54 | .def("get_memory_frequency_limit_tdp", &hws::level_zero_clock_samples::get_memory_frequency_limit_tdp, "the current maximum allowed memory frequency based on the TDP limit in MHz") 55 | .def("__repr__", [](const hws::level_zero_clock_samples &self) { 56 | return fmt::format("", self); 57 | }); 58 | 59 | // bind the power samples 60 | py::class_(m, "LevelZeroPowerSamples") 61 | .def("has_samples", &hws::level_zero_power_samples::has_samples, "true if any sample is available, false otherwise") 62 | .def("get_power_enforced_limit", &hws::level_zero_power_samples::get_power_enforced_limit, "the actually enforced power limit (W), may be different from power management limit if external limiters are set") 63 | .def("get_power_measurement_type", &hws::level_zero_power_samples::get_power_measurement_type, "the type of the power readings") 64 | .def("get_power_management_mode", &hws::level_zero_power_samples::get_power_management_mode, "true if power management limits are enabled") 65 | .def("get_power_usage", &hws::level_zero_power_samples::get_power_usage, "the current power draw of the GPU in W (calculated from power_total_energy_consumption)") 66 | .def("get_power_total_energy_consumption", &hws::level_zero_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in J") 67 | .def("__repr__", [](const hws::level_zero_power_samples &self) { 68 | return fmt::format("", self); 69 | }); 70 | 71 | // bind the memory samples 72 | py::class_(m, "LevelZeroMemorySamples") 73 | .def("has_samples", &hws::level_zero_memory_samples::has_samples, "true if any sample is available, false otherwise") 74 | .def("get_memory_total", &hws::level_zero_memory_samples::get_memory_total, "the total memory size of the different memory modules in Bytes") 75 | .def("get_visible_memory_total", &hws::level_zero_memory_samples::get_visible_memory_total, "the total allocatable memory size of the different memory modules in Bytes") 76 | .def("get_memory_location", &hws::level_zero_memory_samples::get_memory_location, "the location of the different memory modules (system or device)") 77 | .def("get_num_pcie_lanes_max", &hws::level_zero_memory_samples::get_num_pcie_lanes_max, "the PCIe lane width") 78 | .def("get_pcie_link_generation_max", &hws::level_zero_memory_samples::get_pcie_link_generation_max, "the PCIe generation") 79 | .def("get_pcie_link_speed_max", &hws::level_zero_memory_samples::get_pcie_link_speed_max, "the maximum PCIe bandwidth in bytes/sec") 80 | .def("get_memory_bus_width", &hws::level_zero_memory_samples::get_memory_bus_width, "the bus width of the different memory modules") 81 | .def("get_memory_num_channels", &hws::level_zero_memory_samples::get_memory_num_channels, "the number of memory channels of the different memory modules") 82 | .def("get_memory_free", &hws::level_zero_memory_samples::get_memory_free, "the currently free memory of the different memory modules in Bytes") 83 | .def("get_memory_used", &hws::level_zero_memory_samples::get_memory_used, "the currently used memory of the different memory modules in Bytes") 84 | .def("get_num_pcie_lanes", &hws::level_zero_memory_samples::get_num_pcie_lanes, "the current PCIe lane width") 85 | .def("get_pcie_link_generation", &hws::level_zero_memory_samples::get_pcie_link_generation, "the current PCIe generation") 86 | .def("get_pcie_link_speed", &hws::level_zero_memory_samples::get_pcie_link_speed, "the current PCIe bandwidth in bytes/sec") 87 | .def("__repr__", [](const hws::level_zero_memory_samples &self) { 88 | return fmt::format("", self); 89 | }); 90 | 91 | // bind the temperature samples 92 | py::class_(m, "LevelZeroTemperatureSamples") 93 | .def("has_samples", &hws::level_zero_temperature_samples::has_samples, "true if any sample is available, false otherwise") 94 | .def("get_num_fans", &hws::level_zero_temperature_samples::get_num_fans, "the number of fans") 95 | .def("get_fan_speed_max", &hws::level_zero_temperature_samples::get_fan_speed_max, "the maximum fan speed the user can set in RPM") 96 | .def("get_temperature_max", &hws::level_zero_temperature_samples::get_temperature_max, "the maximum GPU temperature in °C") 97 | .def("get_memory_temperature_max", &hws::level_zero_temperature_samples::get_memory_temperature_max, "the maximum memory temperature in °C") 98 | .def("get_global_temperature_max", &hws::level_zero_temperature_samples::get_global_temperature_max, "the maximum global temperature in °C") 99 | .def("get_fan_speed_percentage", &hws::level_zero_temperature_samples::get_fan_speed_percentage, "the current intended fan speed in %") 100 | .def("get_temperature", &hws::level_zero_temperature_samples::get_temperature, "the current GPU temperature in °C") 101 | .def("get_memory_temperature", &hws::level_zero_temperature_samples::get_memory_temperature, "the current memory temperature in °C") 102 | .def("get_global_temperature", &hws::level_zero_temperature_samples::get_global_temperature, "the current global temperature in °C") 103 | .def("get_psu_temperature", &hws::level_zero_temperature_samples::get_psu_temperature, "the current PSU temperature in °C") 104 | .def("__repr__", [](const hws::level_zero_temperature_samples &self) { 105 | return fmt::format("", self); 106 | }); 107 | 108 | // bind the GPU Intel hardware sampler class 109 | py::class_(m, "GpuIntelHardwareSampler") 110 | .def(py::init<>(), "construct a new Intel GPU hardware sampler for the default device with the default sampling interval") 111 | .def(py::init(), "construct a new Intel GPU hardware sampler for the default device with the default sampling interval sampling only the provided sample_category samples") 112 | .def(py::init(), "construct a new Intel GPU hardware sampler for the specified device with the default sampling interval") 113 | .def(py::init(), "construct a new Intel GPU hardware sampler for the specified device with the default sampling interval sampling only the provided sample_category samples") 114 | .def(py::init(), "construct a new Intel GPU hardware sampler for the default device with the specified sampling interval") 115 | .def(py::init(), "construct a new Intel GPU hardware sampler for the default device with the specified sampling interval sampling only the provided sample_category samples") 116 | .def(py::init(), "construct a new Intel GPU hardware sampler for the specified device and sampling interval") 117 | .def(py::init(), "construct a new Intel GPU hardware sampler for the specified device and sampling interval sampling only the provided sample_category samples") 118 | .def("general_samples", &hws::gpu_intel_hardware_sampler::general_samples, "get all general samples") 119 | .def("clock_samples", &hws::gpu_intel_hardware_sampler::clock_samples, "get all clock related samples") 120 | .def("power_samples", &hws::gpu_intel_hardware_sampler::power_samples, "get all power related samples") 121 | .def("memory_samples", &hws::gpu_intel_hardware_sampler::memory_samples, "get all memory related samples") 122 | .def("temperature_samples", &hws::gpu_intel_hardware_sampler::temperature_samples, "get all temperature related samples") 123 | .def("samples_only_as_yaml_string", &hws::gpu_intel_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string") 124 | .def("__repr__", [](const hws::gpu_intel_hardware_sampler &self) { 125 | return fmt::format("", self); 126 | }); 127 | } 128 | -------------------------------------------------------------------------------- /bindings/gpu_nvidia_hardware_sampler.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Marcel Breyer 3 | * @copyright 2024-today All Rights Reserved 4 | * @license This file is released under the MIT license. 5 | * See the LICENSE.md file in the project root for full license information. 6 | */ 7 | 8 | #include "hws/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler 9 | #include "hws/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} 10 | #include "hws/hardware_sampler.hpp" // hws::hardware_sampler 11 | #include "hws/sample_category.hpp" // hws::sample_category 12 | 13 | #include "fmt/format.h" // fmt::format 14 | #include "pybind11/chrono.h" // automatic bindings for std::chrono::milliseconds 15 | #include "pybind11/pybind11.h" // py::module_ 16 | #include "pybind11/stl.h" // bind STL types 17 | 18 | #include // std::chrono::milliseconds 19 | #include // std::size_t 20 | 21 | namespace py = pybind11; 22 | 23 | void init_gpu_nvidia_hardware_sampler(py::module_ &m) { 24 | // bind the general samples 25 | py::class_(m, "NvmlGeneralSamples") 26 | .def("has_samples", &hws::nvml_general_samples::has_samples, "true if any sample is available, false otherwise") 27 | .def("get_architecture", &hws::nvml_general_samples::get_architecture, "the architecture name of the device") 28 | .def("get_byte_order", &hws::nvml_general_samples::get_byte_order, "the byte order (e.g., little/big endian)") 29 | .def("get_num_cores", &hws::nvml_general_samples::get_num_cores, "the number of CUDA cores") 30 | .def("get_vendor_id", &hws::nvml_general_samples::get_vendor_id, "the vendor ID") 31 | .def("get_name", &hws::nvml_general_samples::get_name, "the name of the device") 32 | .def("get_persistence_mode", &hws::nvml_general_samples::get_persistence_mode, "the persistence mode: if true, the driver is always loaded reducing the latency for the first API call") 33 | .def("get_compute_utilization", &hws::nvml_general_samples::get_compute_utilization, "the GPU compute utilization in percent") 34 | .def("get_memory_utilization", &hws::nvml_general_samples::get_memory_utilization, "the GPU memory utilization in percent") 35 | .def("get_performance_level", &hws::nvml_general_samples::get_performance_level, "the performance state: 0 - 15 where 0 is the maximum performance and 15 the minimum performance") 36 | .def("__repr__", [](const hws::nvml_general_samples &self) { 37 | return fmt::format("", self); 38 | }); 39 | 40 | // bind the clock samples 41 | py::class_(m, "NvmlClockSamples") 42 | .def("has_samples", &hws::nvml_clock_samples::has_samples, "true if any sample is available, false otherwise") 43 | .def("get_auto_boosted_clock_enabled", &hws::nvml_clock_samples::get_auto_boosted_clock_enabled, "true if clock boosting is currently enabled") 44 | .def("get_clock_frequency_min", &hws::nvml_clock_samples::get_clock_frequency_min, "the minimum possible graphics clock frequency in MHz") 45 | .def("get_clock_frequency_max", &hws::nvml_clock_samples::get_clock_frequency_max, "the maximum possible graphics clock frequency in MHz") 46 | .def("get_memory_clock_frequency_min", &hws::nvml_clock_samples::get_memory_clock_frequency_min, "the minimum possible memory clock frequency in MHz") 47 | .def("get_memory_clock_frequency_max", &hws::nvml_clock_samples::get_memory_clock_frequency_max, "the maximum possible memory clock frequency in MHz") 48 | .def("get_sm_clock_frequency_max", &hws::nvml_clock_samples::get_sm_clock_frequency_max, "the maximum possible SM clock frequency in MHz") 49 | .def("get_clock_frequency", &hws::nvml_clock_samples::get_clock_frequency, "the current graphics clock frequency in MHz") 50 | .def("get_available_clock_frequencies", &hws::nvml_clock_samples::get_available_clock_frequencies, "the available clock frequencies in MHz, based on a memory clock frequency (slowest to fastest)") 51 | .def("get_available_memory_clock_frequencies", &hws::nvml_clock_samples::get_available_memory_clock_frequencies, "the available memory clock frequencies in MHz (slowest to fastest)") 52 | .def("get_memory_clock_frequency", &hws::nvml_clock_samples::get_memory_clock_frequency, "the current memory clock frequency in MHz") 53 | .def("get_sm_clock_frequency", &hws::nvml_clock_samples::get_sm_clock_frequency, "the current SM clock frequency in Mhz") 54 | .def("get_throttle_reason", &hws::nvml_clock_samples::get_throttle_reason, "the reason the GPU clock throttled (as bitmask)") 55 | .def("get_throttle_reason_string", &hws::nvml_clock_samples::get_throttle_reason_string, "the reason the GPU clock throttled (as string)") 56 | .def("get_auto_boosted_clock", &hws::nvml_clock_samples::get_auto_boosted_clock, "true if the clocks are currently auto boosted") 57 | .def("__repr__", [](const hws::nvml_clock_samples &self) { 58 | return fmt::format("", self); 59 | }); 60 | 61 | // bind the power samples 62 | py::class_(m, "NvmlPowerSamples") 63 | .def("has_samples", &hws::nvml_power_samples::has_samples, "true if any sample is available, false otherwise") 64 | .def("get_power_management_limit", &hws::nvml_power_samples::get_power_management_limit, "if the GPU draws more power (mW) than the power management limit, the GPU may throttle") 65 | .def("get_power_enforced_limit", &hws::nvml_power_samples::get_power_enforced_limit, "the actually enforced power limit, may be different from power management limit if external limiters are set") 66 | .def("get_power_measurement_type", &hws::nvml_power_samples::get_power_measurement_type, "the type of the power readings: either current power draw or average power draw") 67 | .def("get_power_management_mode", &hws::nvml_power_samples::get_power_management_mode, "true if power management algorithms are supported and active") 68 | .def("get_available_power_profiles", &hws::nvml_power_samples::get_available_power_profiles, "a list of the available power profiles") 69 | .def("get_power_usage", &hws::nvml_power_samples::get_power_usage, "the current power draw of the GPU and its related circuity (e.g., memory) in mW") 70 | .def("get_power_total_energy_consumption", &hws::nvml_power_samples::get_power_total_energy_consumption, "the total power consumption since the last driver reload in mJ") 71 | .def("get_power_profile", &hws::nvml_power_samples::get_power_profile, "the current GPU power state: 0 - 15 where 0 is the maximum power and 15 the minimum power") 72 | .def("__repr__", [](const hws::nvml_power_samples &self) { 73 | return fmt::format("", self); 74 | }); 75 | 76 | // bind the memory samples 77 | py::class_(m, "NvmlMemorySamples") 78 | .def("has_samples", &hws::nvml_memory_samples::has_samples, "true if any sample is available, false otherwise") 79 | .def("get_memory_total", &hws::nvml_memory_samples::get_memory_total, "the total available memory in Byte") 80 | .def("get_num_pcie_lanes_max", &hws::nvml_memory_samples::get_num_pcie_lanes_max, "the maximum number of PCIe lanes") 81 | .def("get_pcie_link_generation_max", &hws::nvml_memory_samples::get_pcie_link_generation_max, "the current PCIe link generation (e.g., PCIe 4.0, PCIe 5.0, etc)") 82 | .def("get_pcie_link_speed_max", &hws::nvml_memory_samples::get_pcie_link_speed_max, "the maximum PCIe link speed in MBPS") 83 | .def("get_memory_bus_width", &hws::nvml_memory_samples::get_memory_bus_width, "the memory bus with in Bit") 84 | .def("get_memory_used", &hws::nvml_memory_samples::get_memory_used, "the currently used memory in Byte") 85 | .def("get_memory_free", &hws::nvml_memory_samples::get_memory_free, "the currently free memory in Byte") 86 | .def("get_num_pcie_lanes", &hws::nvml_memory_samples::get_num_pcie_lanes, "the current PCIe link width (e.g., x16, x8, x4, etc)") 87 | .def("get_pcie_link_generation", &hws::nvml_memory_samples::get_pcie_link_generation, "the current PCIe link generation (may change during runtime to save energy)") 88 | .def("get_pcie_link_speed", &hws::nvml_memory_samples::get_pcie_link_speed, "the current PCIe link speed in MBPS") 89 | .def("__repr__", [](const hws::nvml_memory_samples &self) { 90 | return fmt::format("", self); 91 | }); 92 | 93 | // bind the temperature samples 94 | py::class_(m, "NvmlTemperatureSamples") 95 | .def("has_samples", &hws::nvml_temperature_samples::has_samples, "true if any sample is available, false otherwise") 96 | .def("get_num_fans", &hws::nvml_temperature_samples::get_num_fans, "the number of fans (if any)") 97 | .def("get_fan_speed_min", &hws::nvml_temperature_samples::get_fan_speed_min, "the minimum fan speed the user can set in %") 98 | .def("get_fan_speed_max", &hws::nvml_temperature_samples::get_fan_speed_max, "the maximum fan speed the user can set in %") 99 | .def("get_temperature_max", &hws::nvml_temperature_samples::get_temperature_max, "the maximum graphics temperature threshold in °C") 100 | .def("get_memory_temperature_max", &hws::nvml_temperature_samples::get_memory_temperature_max, "the maximum memory temperature threshold in °C") 101 | .def("get_fan_speed_percentage", &hws::nvml_temperature_samples::get_fan_speed_percentage, "the current intended fan speed in %") 102 | .def("get_temperature", &hws::nvml_temperature_samples::get_temperature, "the current GPU temperature in °C") 103 | .def("__repr__", [](const hws::nvml_temperature_samples &self) { 104 | return fmt::format("", self); 105 | }); 106 | 107 | // bind the GPU NVIDIA hardware sampler class 108 | py::class_(m, "GpuNvidiaHardwareSampler") 109 | .def(py::init<>(), "construct a new NVIDIA GPU hardware sampler for the default device with the default sampling interval") 110 | .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the default device with the default sampling interval sampling only the provided sample_category samples") 111 | .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the specified device with the default sampling interval") 112 | .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the specified device with the default sampling interval sampling only the provided sample_category samples") 113 | .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the default device with the specified sampling interval") 114 | .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the default device with the specified sampling interval sampling only the provided sample_category samples") 115 | .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the specified device and sampling interval") 116 | .def(py::init(), "construct a new NVIDIA GPU hardware sampler for the specified device and sampling interval sampling only the provided sample_category samples") 117 | .def("general_samples", &hws::gpu_nvidia_hardware_sampler::general_samples, "get all general samples") 118 | .def("clock_samples", &hws::gpu_nvidia_hardware_sampler::clock_samples, "get all clock related samples") 119 | .def("power_samples", &hws::gpu_nvidia_hardware_sampler::power_samples, "get all power related samples") 120 | .def("memory_samples", &hws::gpu_nvidia_hardware_sampler::memory_samples, "get all memory related samples") 121 | .def("temperature_samples", &hws::gpu_nvidia_hardware_sampler::temperature_samples, "get all temperature related samples") 122 | .def("samples_only_as_yaml_string", &hws::gpu_nvidia_hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string") 123 | .def("__repr__", [](const hws::gpu_nvidia_hardware_sampler &self) { 124 | return fmt::format("", self); 125 | }); 126 | } 127 | -------------------------------------------------------------------------------- /bindings/hardware_sampler.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Marcel Breyer 3 | * @copyright 2024-today All Rights Reserved 4 | * @license This file is released under the MIT license. 5 | * See the LICENSE.md file in the project root for full license information. 6 | */ 7 | 8 | #include "hws/hardware_sampler.hpp" // hws::hardware_sampler 9 | 10 | #include "hws/event.hpp" // hws::event 11 | #include "hws/utility.hpp" // hws::detail::durations_from_reference_time 12 | 13 | #if defined(HWS_FOR_CPUS_ENABLED) 14 | #include "hws/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler 15 | #endif 16 | #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) 17 | #include "hws/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler 18 | #endif 19 | #if defined(HWS_FOR_AMD_GPUS_ENABLED) 20 | #include "hws/gpu_amd/hardware_sampler.hpp" // hws::gpu_amd_hardware_sampler 21 | #endif 22 | #if defined(HWS_FOR_INTEL_GPUS_ENABLED) 23 | #include "hws/gpu_intel/hardware_sampler.hpp" // hws::gpu_intel_hardware_sampler 24 | #endif 25 | 26 | #include "fmt/format.h" // fmt::format 27 | #include "pybind11/chrono.h" // bind std::chrono types 28 | #include "pybind11/pybind11.h" // py::module_, py::class_ 29 | #include "pybind11/stl.h" // bind STL types 30 | 31 | #include "relative_event.hpp" // hws::detail::relative_event 32 | #include // std::string 33 | 34 | namespace py = pybind11; 35 | 36 | void init_hardware_sampler(py::module_ &m) { 37 | const py::module_ pure_virtual_module = m.def_submodule("__pure_virtual"); 38 | 39 | // bind the pure virtual hardware sampler base class 40 | py::class_ pyhardware_sampler(pure_virtual_module, "__pure_virtual_base_HardwareSampler"); 41 | pyhardware_sampler.def("start", &hws::hardware_sampler::start_sampling, "start the current hardware sampling") 42 | .def("stop", &hws::hardware_sampler::stop_sampling, "stop the current hardware sampling") 43 | .def("pause", &hws::hardware_sampler::pause_sampling, "pause the current hardware sampling") 44 | .def("resume", &hws::hardware_sampler::resume_sampling, "resume the current hardware sampling") 45 | .def("has_started", &hws::hardware_sampler::has_sampling_started, "check whether hardware sampling has already been started") 46 | .def("is_sampling", &hws::hardware_sampler::is_sampling, "check whether the hardware sampling is currently active") 47 | .def("has_stopped", &hws::hardware_sampler::has_sampling_stopped, "check whether hardware sampling has already been stopped") 48 | .def("add_event", py::overload_cast(&hws::hardware_sampler::add_event), "add a new event") 49 | .def("add_event", py::overload_cast(&hws::hardware_sampler::add_event), "add a new event using a time point and a name") 50 | .def("add_event", py::overload_cast(&hws::hardware_sampler::add_event), "add a new event using a name, the current time is used as time point") 51 | .def("num_events", &hws::hardware_sampler::num_events, "get the number of events") 52 | .def("get_events", &hws::hardware_sampler::get_events, "get all events") 53 | .def("get_relative_events", [](const hws::hardware_sampler &self) { 54 | std::vector relative_events{}; 55 | for (const hws::event &e : self.get_events()) { 56 | relative_events.emplace_back(hws::detail::duration_from_reference_time(e.time_point, self.get_event(0).time_point), e.name); 57 | } 58 | return relative_events; }, "get all relative events") 59 | .def("get_event", &hws::hardware_sampler::get_event, "get a specific event") 60 | .def("get_relative_event", [](const hws::hardware_sampler &self, const std::size_t idx) { return hws::detail::relative_event{ hws::detail::duration_from_reference_time(self.get_event(idx).time_point, self.get_event(0).time_point), self.get_event(idx).name }; }, "get a specific relative event") 61 | .def("time_points", &hws::hardware_sampler::sampling_time_points, "get the time points of the respective hardware samples") 62 | .def("relative_time_points", [](const hws::hardware_sampler &self) { return hws::detail::durations_from_reference_time(self.sampling_time_points(), self.get_event(0).time_point); }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)") 63 | .def("sampling_interval", &hws::hardware_sampler::sampling_interval, "get the sampling interval of this hardware sampler (in ms)") 64 | .def("dump_yaml", py::overload_cast(&hws::hardware_sampler::dump_yaml, py::const_), "dump all hardware samples to the given YAML file") 65 | .def("as_yaml_string", &hws::hardware_sampler::as_yaml_string, "return all hardware samples including additional information like events as YAML string") 66 | .def("samples_only_as_yaml_string", &hws::hardware_sampler::samples_only_as_yaml_string, "return all hardware samples as YAML string") 67 | .def("__repr__", [](const hws::hardware_sampler &self) { 68 | #if defined(HWS_FOR_CPUS_ENABLED) 69 | if (dynamic_cast(&self)) { 70 | return fmt::format("", dynamic_cast(self)); 71 | } 72 | #endif 73 | #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) 74 | if (dynamic_cast(&self)) { 75 | return fmt::format("", dynamic_cast(self)); 76 | } 77 | #endif 78 | #if defined(HWS_FOR_AMD_GPUS_ENABLED) 79 | if (dynamic_cast(&self)) { 80 | return fmt::format("", dynamic_cast(self)); 81 | } 82 | #endif 83 | #if defined(HWS_FOR_INTEL_GPUS_ENABLED) 84 | if (dynamic_cast(&self)) { 85 | return fmt::format("", dynamic_cast(self)); 86 | } 87 | #endif 88 | return std::string{ "unknown" }; }); 89 | } 90 | -------------------------------------------------------------------------------- /bindings/main.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Marcel Breyer 3 | * @copyright 2024-today All Rights Reserved 4 | * @license This file is released under the MIT license. 5 | * See the LICENSE.md file in the project root for full license information. 6 | */ 7 | 8 | #include "hws/version.hpp" // hws::version::version 9 | 10 | #include "pybind11/pybind11.h" // PYBIND11_MODULE, py::module_ 11 | 12 | #include // std::string_view 13 | 14 | #define HWS_IS_DEFINED_HELPER(x) #x 15 | #define HWS_IS_DEFINED(x) (std::string_view{ #x } != std::string_view{ HWS_IS_DEFINED_HELPER(x) }) 16 | 17 | namespace py = pybind11; 18 | 19 | // forward declare binding functions 20 | void init_event(py::module_ &); 21 | void init_sample_category(py::module_ &); 22 | void init_relative_event(py::module_ &); 23 | void init_hardware_sampler(py::module_ &); 24 | void init_system_hardware_sampler(py::module_ &); 25 | void init_cpu_hardware_sampler(py::module_ &); 26 | void init_gpu_nvidia_hardware_sampler(py::module_ &); 27 | void init_gpu_amd_hardware_sampler(py::module_ &); 28 | void init_gpu_intel_hardware_sampler(py::module_ &); 29 | void init_version(py::module_ &); 30 | 31 | PYBIND11_MODULE(HardwareSampling, m) { 32 | m.doc() = "Hardware Sampling for CPUs and GPUs"; 33 | m.attr("__version__") = hws::version::version; 34 | 35 | init_event(m); 36 | init_sample_category(m); 37 | init_relative_event(m); 38 | init_hardware_sampler(m); 39 | init_system_hardware_sampler(m); 40 | 41 | // CPU sampling 42 | #if defined(HWS_FOR_CPUS_ENABLED) 43 | init_cpu_hardware_sampler(m); 44 | #endif 45 | m.def("has_cpu_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_CPUS_ENABLED); }); 46 | 47 | // NVIDIA GPU sampling 48 | #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) 49 | init_gpu_nvidia_hardware_sampler(m); 50 | #endif 51 | m.def("has_gpu_nvidia_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_NVIDIA_GPUS_ENABLED); }); 52 | 53 | // AMD GPU sampling 54 | #if defined(HWS_FOR_AMD_GPUS_ENABLED) 55 | init_gpu_amd_hardware_sampler(m); 56 | #endif 57 | m.def("has_gpu_amd_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_AMD_GPUS_ENABLED); }); 58 | 59 | // Intel GPU sampling 60 | #if defined(HWS_FOR_INTEL_GPUS_ENABLED) 61 | init_gpu_intel_hardware_sampler(m); 62 | #endif 63 | m.def("has_gpu_intel_hardware_sampler", []() { return HWS_IS_DEFINED(HWS_FOR_INTEL_GPUS_ENABLED); }); 64 | 65 | init_version(m); 66 | } 67 | -------------------------------------------------------------------------------- /bindings/relative_event.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Marcel Breyer 3 | * @copyright 2024-today All Rights Reserved 4 | * @license This file is released under the MIT license. 5 | * See the LICENSE.md file in the project root for full license information. 6 | */ 7 | 8 | #include "relative_event.hpp" // hws::detail::relative_event 9 | 10 | #include "fmt/format.h" // fmt::format 11 | #include "pybind11/chrono.h" // bind std::chrono types 12 | #include "pybind11/pybind11.h" // py::module_ 13 | #include "pybind11/stl.h" // bind STL types 14 | 15 | namespace py = pybind11; 16 | 17 | void init_relative_event(py::module_ &m) { 18 | // a special python only struct encapsulating a relative event, i.e., an event where its "relative_time_point" member is the time passed since the first event 19 | py::class_(m, "RelativeEvent") 20 | .def(py::init(), "construct a new event using a time point and a name") 21 | .def_readonly("relative_time_point", &hws::detail::relative_event::relative_time_point, "read the relative time point associated to this event") 22 | .def_readonly("name", &hws::detail::relative_event::name, "read the name associated to this event") 23 | .def("__repr__", [](const hws::detail::relative_event &self) { 24 | return fmt::format("", self.relative_time_point, self.name); 25 | }); 26 | } 27 | -------------------------------------------------------------------------------- /bindings/relative_event.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Defines a struct encapsulating a single event with a relative time point. 9 | */ 10 | 11 | #ifndef HWS_BINDINGS_RELATIVE_EVENT_HPP_ 12 | #define HWS_BINDINGS_RELATIVE_EVENT_HPP_ 13 | 14 | #include // std::string 15 | #include // std::move 16 | 17 | namespace hws::detail { 18 | 19 | /** 20 | * @brief A struct encapsulating a single event with a relative time point. 21 | */ 22 | struct relative_event { 23 | /** 24 | * @brief Construct a new event given a time point and name. 25 | * @param[in] time_point_p the time when the event occurred relative to the first event 26 | * @param[in] name_p the name of the event 27 | */ 28 | relative_event(const double relative_time_point_p, std::string name_p) : 29 | relative_time_point{ relative_time_point_p }, 30 | name{ std::move(name_p) } { } 31 | 32 | /// The relative time point this event occurred at. 33 | double relative_time_point; 34 | /// The name of this event. 35 | std::string name; 36 | }; 37 | 38 | } // namespace hws::detail 39 | 40 | #endif // HWS_BINDINGS_RELATIVE_EVENT_HPP_ 41 | -------------------------------------------------------------------------------- /bindings/sample_category.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Marcel Breyer 3 | * @copyright 2024-today All Rights Reserved 4 | * @license This file is released under the MIT license. 5 | * See the LICENSE.md file in the project root for full license information. 6 | */ 7 | 8 | #include "hws/sample_category.hpp" // hws::sample_category 9 | 10 | #include "pybind11/operators.h" // operator overloading 11 | #include "pybind11/pybind11.h" // py::module_, py::overload_cast 12 | 13 | namespace py = pybind11; 14 | 15 | void init_sample_category(py::module_ &m) { 16 | // sample_category enum and bitwise operations on the sample_category enum 17 | py::enum_(m, "SampleCategory") 18 | .value("GENERAL", hws::sample_category::general, "General hardware samples like architecture, names, or utilization.") 19 | .value("CLOCK", hws::sample_category::clock, "Clock-related hardware samples like minimum, maximum, and current frequencies or throttle reasons.") 20 | .value("POWER", hws::sample_category::power, "Power-related hardware samples like current power draw or total energy consumption.") 21 | .value("MEMORY", hws::sample_category::memory, "Memory-related hardware samples like memory usage or PCIe information.") 22 | .value("TEMPERATURE", hws::sample_category::temperature, "Temperature-related hardware samples like maximum and current temperatures.") 23 | .value("GFX", hws::sample_category::gfx, "Gfx-related (iGPU) hardware samples. Only used in the cpu_hardware_sampler.") 24 | .value("IDLE_STATE", hws::sample_category::idle_state, "Idle-state-related hardware samples. Only used in the cpu_hardware_sampler.") 25 | .value("ALL", hws::sample_category::all, "Shortcut to enable all available hardware samples (default).") 26 | .def("__invert__", py::overload_cast(&hws::operator~)) 27 | .def("__and__", py::overload_cast(&hws::operator&)) 28 | .def("__or__", py::overload_cast(&hws::operator|)) 29 | .def("__xor__", py::overload_cast(&hws::operator^)); 30 | } 31 | -------------------------------------------------------------------------------- /bindings/system_hardware_sampler.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Marcel Breyer 3 | * @copyright 2024-today All Rights Reserved 4 | * @license This file is released under the MIT license. 5 | * See the LICENSE.md file in the project root for full license information. 6 | */ 7 | 8 | #include "hws/system_hardware_sampler.hpp" // hws::system_hardware_sampler 9 | 10 | #include "hws/event.hpp" // hws::event 11 | #include "hws/sample_category.hpp" // hws::sample_category 12 | #include "hws/utility.hpp" // hws::detail::durations_from_reference_time 13 | 14 | #include "fmt/format.h" // fmt::format 15 | #include "pybind11/chrono.h" // bind std::chrono types 16 | #include "pybind11/pybind11.h" // py::module_, py::class_ 17 | #include "pybind11/stl.h" // bind STL types 18 | 19 | #include "relative_event.hpp" // hws::detail::relative_event 20 | #include // std::string 21 | 22 | namespace py = pybind11; 23 | 24 | void init_system_hardware_sampler(py::module_ &m) { 25 | // bind the pure virtual hardware sampler base class 26 | py::class_(m, "SystemHardwareSampler") 27 | .def(py::init<>(), "construct a new system hardware sampler with the default sampling interval") 28 | .def(py::init(), "construct a new system hardware sampler with the default sampling interval sampling only the provided sample_category samples") 29 | .def(py::init(), "construct a new system hardware sampler for with the specified sampling interval") 30 | .def(py::init(), "construct a new system hardware sampler for with the specified sampling interval sampling only the provided sample_category samples") 31 | .def("start", &hws::system_hardware_sampler::start_sampling, "start hardware sampling for all available hardware samplers") 32 | .def("stop", &hws::system_hardware_sampler::stop_sampling, "stop hardware sampling for all available hardware samplers") 33 | .def("pause", &hws::system_hardware_sampler::pause_sampling, "pause hardware sampling for all available hardware samplers") 34 | .def("resume", &hws::system_hardware_sampler::resume_sampling, "resume hardware sampling for all available hardware samplers") 35 | .def("has_started", &hws::system_hardware_sampler::has_sampling_started, "check whether hardware sampling has already been started for all hardware samplers") 36 | .def("is_sampling", &hws::system_hardware_sampler::is_sampling, "check whether the hardware sampling is currently active for all hardware samplers") 37 | .def("has_stopped", &hws::system_hardware_sampler::has_sampling_stopped, "check whether hardware sampling has already been stopped for all hardware samplers") 38 | .def("add_event", py::overload_cast(&hws::system_hardware_sampler::add_event), "add a new event to all hardware samplers") 39 | .def("add_event", py::overload_cast(&hws::system_hardware_sampler::add_event), "add a new event using a time point and a name to all hardware samplers") 40 | .def("add_event", py::overload_cast(&hws::system_hardware_sampler::add_event), "add a new event using a name, the current time is used as time point to all hardware samplers") 41 | .def("num_events", &hws::system_hardware_sampler::num_events, "get the number of events separately for each hardware sampler") 42 | .def("get_events", &hws::system_hardware_sampler::get_events, "get all events separately for each hardware sampler") 43 | .def("get_relative_events", [](const hws::system_hardware_sampler &self) { 44 | std::vector> relative_events{}; 45 | for (const std::vector &events : self.get_events()) { 46 | relative_events.emplace_back(); 47 | for (const hws::event &e : events) { 48 | relative_events.back().emplace_back(hws::detail::duration_from_reference_time(e.time_point, events[0].time_point), e.name); 49 | } 50 | } 51 | return relative_events; }, "get all relative events separately for each hardware sampler") 52 | .def("time_points", &hws::system_hardware_sampler::sampling_time_points, "get the time points of the respective hardware samples separately for each hardware sampler") 53 | .def("relative_time_points", [](const hws::system_hardware_sampler &self) { 54 | std::vector> relative_time_points{}; 55 | for (std::size_t s = 0; s < self.num_samplers(); ++s) { 56 | relative_time_points.emplace_back(hws::detail::durations_from_reference_time(self.sampling_time_points()[s], self.get_events()[s][0].time_point)); 57 | } 58 | return relative_time_points; }, "get the relative durations of the respective hardware samples in seconds (as \"normal\" number)") 59 | .def("sampling_interval", &hws::system_hardware_sampler::sampling_interval, "get the sampling interval separately for each hardware sampler (in ms)") 60 | .def("num_samplers", &hws::system_hardware_sampler::num_samplers, "get the number of hardware samplers available for the whole system") 61 | .def("samplers", [](hws::system_hardware_sampler &self) { 62 | std::vector out{}; 63 | for (auto &ptr : self.samplers()) { 64 | out.push_back(ptr.get()); 65 | } 66 | return out; }, "get the hardware samplers available for the whole system") 67 | .def("sampler", [](hws::system_hardware_sampler &self, const std::size_t idx) { return self.sampler(idx).get(); }, "get the i-th hardware sampler available for the whole system") 68 | .def("dump_yaml", py::overload_cast(&hws::system_hardware_sampler::dump_yaml, py::const_), "dump all hardware samples for all hardware samplers to the given YAML file") 69 | .def("as_yaml_string", &hws::system_hardware_sampler::as_yaml_string, "return all hardware samples for all hardware samplers as YAML string") 70 | .def("__repr__", [](const hws::system_hardware_sampler &self) { return fmt::format("", self.num_samplers()); }); 71 | } 72 | -------------------------------------------------------------------------------- /bindings/version.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Marcel Breyer 3 | * @copyright 2024-today All Rights Reserved 4 | * @license This file is released under the MIT license. 5 | * See the LICENSE.md file in the project root for full license information. 6 | */ 7 | 8 | #include "hws/version.hpp" // hws::version 9 | 10 | #include "pybind11/pybind11.h" // py::module_ 11 | 12 | namespace py = pybind11; 13 | 14 | // dummy class 15 | class version { }; 16 | 17 | void init_version(py::module_ &m) { 18 | // bind global version information 19 | // complexity necessary to enforce read-only 20 | py::class_(m, "version") 21 | .def_property_readonly_static("name", [](const py::object & /* self */) { return hws::version::name; }, "the name of the hws library") 22 | .def_property_readonly_static("version", [](const py::object & /* self */) { return hws::version::version; }, "the used version of the hws library") 23 | .def_property_readonly_static("major", [](const py::object & /* self */) { return hws::version::major; }, "the used major version of the hws library") 24 | .def_property_readonly_static("minor", [](const py::object & /* self */) { return hws::version::minor; }, "the used minor version of the hws library") 25 | .def_property_readonly_static("patch", [](const py::object & /* self */) { return hws::version::patch; }, "the used patch version of the hws library"); 26 | } 27 | -------------------------------------------------------------------------------- /cmake/Findlevel_zero.cmake: -------------------------------------------------------------------------------- 1 | ## Copyright 2020-2023, Intel Corporation 2 | ## SPDX-License-Identifier: BSD-3-Clause 3 | 4 | find_path(LEVEL_ZERO_ROOT include/level_zero/ze_api.h 5 | DOC "Root of level_zero installation" 6 | HINTS ${LEVEL_ZERO_ROOT} $ENV{LEVEL_ZERO_ROOT} 7 | PATHS 8 | ${PROJECT_SOURCE_DIR}/level_zero 9 | /opt/level_zero 10 | ) 11 | 12 | find_path(LEVEL_ZERO_INCLUDE_DIR level_zero/ze_api.h 13 | PATHS 14 | ${LEVEL_ZERO_ROOT}/include 15 | PATH_SUFFIXES 16 | include 17 | ) 18 | 19 | find_library(LEVEL_ZERO_LIB_LOADER ze_loader 20 | HINTS 21 | ${LEVEL_ZERO_ROOT}/x86_64-linux-gnu 22 | ${LEVEL_ZERO_ROOT}/lib64 23 | ${LEVEL_ZERO_ROOT}/lib 24 | PATH_SUFFIXES 25 | x86_64-linux-gnu 26 | lib64 27 | lib 28 | ) 29 | 30 | set(LEVEL_ZERO_ERROR_MSG 31 | " 32 | Could not find level_zero! 33 | Use LEVEL_ZERO_ROOT to point to your level_zero installation 34 | " 35 | ) 36 | 37 | include(FindPackageHandleStandardArgs) 38 | FIND_PACKAGE_HANDLE_STANDARD_ARGS(level_zero 39 | ${LEVEL_ZERO_ERROR_MSG} LEVEL_ZERO_INCLUDE_DIR LEVEL_ZERO_LIB_LOADER) 40 | 41 | add_library(level_zero SHARED IMPORTED) 42 | set_target_properties(level_zero PROPERTIES 43 | INTERFACE_INCLUDE_DIRECTORIES ${LEVEL_ZERO_INCLUDE_DIR} 44 | ) 45 | set_target_properties(level_zero PROPERTIES 46 | IMPORTED_LOCATION ${LEVEL_ZERO_LIB_LOADER} 47 | IMPORTED_NO_SONAME TRUE 48 | ) -------------------------------------------------------------------------------- /cmake/hwsConfig.cmake.in: -------------------------------------------------------------------------------- 1 | ## Authors: Marcel Breyer 2 | ## Copyright (C): 2024-today All Rights Reserved 3 | ## License: This file is released under the MIT license. 4 | ## See the LICENSE.md file in the project root for full license information. 5 | ######################################################################################################################## 6 | 7 | @PACKAGE_INIT@ 8 | 9 | # helper functions to check whether a CMake string contains a substring 10 | function(string_contains STR SUBSTRING RESULT_VAR) 11 | string(FIND "${STR}" "${SUBSTRING}" _index) 12 | if(_index EQUAL -1) 13 | set(${RESULT_VAR} OFF PARENT_SCOPE) 14 | else() 15 | set(${RESULT_VAR} ON PARENT_SCOPE) 16 | endif() 17 | endfunction() 18 | 19 | include(CMakeFindDependencyMacro) 20 | 21 | # always try finding {fmt} 22 | # -> CMAKE_PREFIX_PATH necessary if build via FetchContent 23 | # -> doesn't hurt to be set everytime 24 | list(APPEND CMAKE_PREFIX_PATH "${CMAKE_CURRENT_LIST_DIR}/../../../lib/cmake/fmt") 25 | find_dependency(fmt REQUIRED) 26 | 27 | 28 | # load all targets 29 | include("${CMAKE_CURRENT_LIST_DIR}/hwsTargets.cmake") 30 | 31 | # assume hws can be found 32 | set(hws_FOUND ON) 33 | 34 | # get all compile definitions 35 | get_target_property(HWS_COMPILE_DEFINITIONS hws::hws INTERFACE_COMPILE_DEFINITIONS) 36 | 37 | # check whether CPUs are supported 38 | string_contains("${HWS_COMPILE_DEFINITIONS}" "HWS_FOR_CPUS_ENABLED" HWS_HAS_CPU_SUPPORT) 39 | if (HWS_HAS_CPU_SUPPORT) 40 | # check if hws was build with lscpu -> check if lscpu can be found 41 | if (@HWS_LSCPU_FOUND@) 42 | find_program(HWS_HAS_LSCPU_SUPPORT lscpu) 43 | if (NOT HWS_HAS_LSCPU_SUPPORT) 44 | set(hws_FOUND OFF) 45 | set(hws_NOT_FOUND_MESSAGE "Couldn't find necessary program \"lscpu\".") 46 | return() 47 | elseif (NOT hws_FIND_QUIETLY) 48 | message(STATUS "Found \"lscpu\" program.") 49 | endif () 50 | endif () 51 | 52 | # check if hws was build with free -> check if free can be found 53 | if (@HWS_FREE_FOUND@) 54 | find_program(HWS_HAS_FREE_SUPPORT free) 55 | if (NOT HWS_HAS_FREE_SUPPORT) 56 | set(hws_FOUND OFF) 57 | set(hws_NOT_FOUND_MESSAGE "Couldn't find necessary program \"free\".") 58 | return() 59 | elseif (NOT hws_FIND_QUIETLY) 60 | message(STATUS "Found \"free\" program.") 61 | endif () 62 | endif () 63 | 64 | # check if hws was build with free -> check if free can be found 65 | if (@HWS_TURBOSTAT_FOUND@) 66 | find_program(HWS_HAS_TURBOSTAT_SUPPORT turbostat) 67 | if (NOT HWS_HAS_TURBOSTAT_SUPPORT) 68 | set(hws_FOUND OFF) 69 | set(hws_NOT_FOUND_MESSAGE "Couldn't find necessary program \"turbostat\".") 70 | return() 71 | else () 72 | # check required privileges when using turbostat 73 | if (@HWS_TURBOSTAT_EXECUTION_TYPE@ MATCHES "without_root") 74 | # check if turbostat can be executed without root privileges 75 | execute_process(COMMAND turbostat -n 1 -S -q 76 | RESULT_VARIABLE HWS_HAS_TURBOSTAT_SUPPORT_WITHOUT_ROOT 77 | OUTPUT_QUIET 78 | ERROR_QUIET) 79 | if (NOT HWS_HAS_TURBOSTAT_SUPPORT_WITHOUT_ROOT EQUAL 0) 80 | set(hws_FOUND OFF) 81 | set(hws_NOT_FOUND_MESSAGE "turbostat must be configured to run without root privileges.") 82 | return() 83 | endif () 84 | elseif (@HWS_TURBOSTAT_EXECUTION_TYPE@ MATCHES "root") 85 | # check if turbostat can be executed with root privileges and without a password 86 | execute_process(COMMAND sudo -n turbostat -n 1 -i 0.001 -S -q 87 | RESULT_VARIABLE HWS_HAS_TURBOSTAT_SUPPORT_ROOT_REQUIRED 88 | OUTPUT_QUIET 89 | ERROR_QUIET) 90 | if (HWS_HAS_TURBOSTAT_SUPPORT_ROOT_REQUIRED EQUAL 0) 91 | execute_process(COMMAND sudo turbostat -n 1 -i 0.001 -S -q 92 | RESULT_VARIABLE HWS_HAS_TURBOSTAT_SUPPORT_WITH_ROOT 93 | OUTPUT_QUIET 94 | ERROR_QUIET) 95 | if (NOT HWS_HAS_TURBOSTAT_SUPPORT_WITH_ROOT EQUAL 0) 96 | set(hws_FOUND OFF) 97 | set(hws_NOT_FOUND_MESSAGE "turbostat must be configured to run with root privileges and without the need for a password.") 98 | return() 99 | endif () 100 | else () 101 | set(hws_FOUND OFF) 102 | set(hws_NOT_FOUND_MESSAGE "Invalid turbostat configuration.") 103 | return() 104 | endif () 105 | else () 106 | endif () 107 | endif () 108 | endif () 109 | 110 | if (NOT hws_FIND_QUIETLY) 111 | message(STATUS "Enabled support for CPUs via hws.") 112 | endif () 113 | endif () 114 | 115 | # check whether NVIDIA GPUs are supported 116 | string_contains("${HWS_COMPILE_DEFINITIONS}" "HWS_FOR_NVIDIA_GPUS_ENABLED" HWS_HAS_GPU_NVIDIA_SUPPORT) 117 | if (HWS_HAS_GPU_NVIDIA_SUPPORT) 118 | find_dependency(CUDAToolkit) 119 | if (NOT hws_FIND_QUIETLY) 120 | message(STATUS "Enabled support for NVIDIA GPUs via hws.") 121 | endif () 122 | endif () 123 | 124 | # check whether AMD GPUs are supported 125 | string_contains("${HWS_COMPILE_DEFINITIONS}" "HWS_FOR_AMD_GPUS_ENABLED" HWS_HAS_GPU_AMD_SUPPORT) 126 | if (HWS_HAS_GPU_AMD_SUPPORT) 127 | find_dependency(rocm_smi) 128 | find_dependency(HIP) 129 | if (NOT hws_FIND_QUIETLY) 130 | message(STATUS "Enabled support for AMD GPUs via hws.") 131 | endif () 132 | endif () 133 | 134 | # check whether Intel GPUs are supported 135 | string_contains("${HWS_COMPILE_DEFINITIONS}" "HWS_FOR_INTEL_GPUS_ENABLED" HWS_HAS_GPU_INTEL_SUPPORT) 136 | if (HWS_HAS_GPU_INTEL_SUPPORT) 137 | # add Findlevel_zero.cmake to module path 138 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}") 139 | find_dependency(level_zero) 140 | if (NOT hws_FIND_QUIETLY) 141 | message(STATUS "Enabled support for Intel GPUs via hws.") 142 | endif () 143 | endif () 144 | 145 | 146 | # check possible provided version 147 | include(FindPackageHandleStandardArgs) 148 | find_package_handle_standard_args(hws 149 | REQUIRED_VARS hws_FOUND 150 | VERSION_VAR hws_VERSION 151 | ) 152 | 153 | # add additional output if hws could be found 154 | if (NOT hws_FIND_QUIETLY AND hws_FOUND) 155 | # output the available PLSSVM target platforms 156 | message(STATUS "The sampling interval is @HWS_SAMPLING_INTERVAL@ms.") 157 | # output the hws build type 158 | message(STATUS "The hws library was built in @CMAKE_BUILD_TYPE@ mode.") 159 | # output if error checks are enabled 160 | if (@HWS_ENABLE_ERROR_CHECKS@) 161 | message(STATUS "The hws library was built with error checks enabled.") 162 | endif () 163 | endif() 164 | 165 | # sanity checks 166 | check_required_components("hws") -------------------------------------------------------------------------------- /docs/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## Authors: Marcel Breyer 2 | ## Copyright (C): 2024-today All Rights Reserved 3 | ## License: This file is released under the MIT license. 4 | ## See the LICENSE.md file in the project root for full license information. 5 | ######################################################################################################################## 6 | 7 | ######################################################################################################################## 8 | ## setup documentation generation with doxygen ## 9 | ######################################################################################################################## 10 | ## use installed doxygen 11 | find_package(Doxygen REQUIRED OPTIONAL_COMPONENTS dot) 12 | 13 | ## configure doxygen 14 | set(DOXYGEN_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/docs") 15 | set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "${PROJECT_SOURCE_DIR}/README.md") 16 | set(DOXYGEN_FILE_PATTERNS "*.hpp;") 17 | set(DOXYGEN_STRIP_FROM_PATH "${PROJECT_SOURCE_DIR}") 18 | set(DOXYGEN_ABBREVIATE_BRIEF "") 19 | set(DOXYGEN_QUIET "YES") 20 | set(DOXYGEN_HTML_TIMESTAMP "YES") 21 | set(DOXYGEN_NUM_PROC_THREADS 0) 22 | set(DOXYGEN_WARN_NO_PARAMDOC "YES") 23 | set(DOXYGEN_SORT_MEMBER_DOCS "NO") 24 | set(DOXYGEN_INLINE_INHERITED_MEMB "YES") 25 | set(DOXYGEN_USE_MATHJAX "YES") 26 | set(DOXYGEN_EXCLUDE_SYMBOLS "*_HPP_") 27 | 28 | set(DOXYGEN_DOT_IMAGE_FORMAT "svg") 29 | set(DOXYGEN_INTERACTIVE_SVG "YES") 30 | set(DOXYGEN_INCLUDE_GRAPH "NO") 31 | set(DOXYGEN_EXTRACT_PRIVATE "YES") 32 | 33 | ## enable processing of specific attributes and macros 34 | set(DOXYGEN_ENABLE_PREPROCESSING "YES") 35 | set(DOXYGEN_MACRO_EXPANSION "YES") 36 | set(DOXYGEN_EXPAND_ONLY_PREDEF "YES") 37 | set(DOXYGEN_EXPAND_AS_DEFINED "YES") 38 | 39 | set(DOXYGEN_VERBATIM_VARS DOXYGEN_ALIASES) 40 | set(DOXYGEN_ALIASES 41 | [[license="\par License^^\parblock^^" ]] 42 | ) 43 | 44 | ## add doxygen as target 45 | doxygen_add_docs( 46 | doc 47 | "${PROJECT_SOURCE_DIR}/include;${PROJECT_SOURCE_DIR}/README.md;" 48 | WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" 49 | COMMENT "Generating API documentation with Doxygen." 50 | ) 51 | 52 | ## install targets for the documentation 53 | include(GNUInstallDirs) 54 | install(DIRECTORY "${PROJECT_SOURCE_DIR}/docs/html" 55 | DESTINATION "${CMAKE_INSTALL_DOCDIR}" 56 | ) 57 | -------------------------------------------------------------------------------- /examples/cpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## Authors: Marcel Breyer 2 | ## Copyright (C): 2024-today All Rights Reserved 3 | ## License: This file is released under the MIT license. 4 | ## See the LICENSE.md file in the project root for full license information. 5 | ######################################################################################################################## 6 | 7 | cmake_minimum_required(VERSION 3.22) 8 | 9 | project(LibraryUsageExample LANGUAGES CXX) 10 | 11 | find_package(hws REQUIRED) 12 | 13 | add_executable(prog main.cpp) 14 | 15 | target_compile_features(prog PUBLIC cxx_std_17) 16 | target_link_libraries(prog PUBLIC hws::hws) -------------------------------------------------------------------------------- /examples/cpp/main.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Marcel Breyer 3 | * @copyright 2024-today All Rights Reserved 4 | * @license This file is released under the MIT license. 5 | * See the LICENSE.md file in the project root for full license information. 6 | */ 7 | 8 | #include "hws/core.hpp" 9 | 10 | #include // std::size_t 11 | #include // std::iota 12 | #include // std::vector 13 | 14 | int main() { 15 | hws::system_hardware_sampler sampler{}; 16 | // could also be, e.g., 17 | // hws::gpu_nvidia_hardware_sampler sampler{}; 18 | sampler.start_sampling(); 19 | 20 | sampler.add_event("init"); 21 | const std::size_t m_size = 2048; 22 | const std::size_t n_size = 2048; 23 | const std::size_t k_size = 2048; 24 | 25 | std::vector A(m_size * k_size); 26 | std::vector B(k_size * n_size); 27 | std::vector C(m_size * n_size); 28 | 29 | std::iota(A.begin(), A.end(), 0); 30 | std::iota(B.begin(), B.end(), 1); 31 | 32 | sampler.add_event("matmul"); 33 | for (std::size_t m = 0; m < m_size; ++m) { 34 | for (std::size_t n = 0; n < n_size; ++n) { 35 | for (std::size_t k = 0; k < k_size; ++k) { 36 | C[m * n_size + n] += A[m * k_size + k] * B[k * n_size + n]; 37 | } 38 | } 39 | } 40 | 41 | sampler.stop_sampling(); 42 | sampler.dump_yaml("track.yaml"); 43 | 44 | return 0; 45 | } 46 | -------------------------------------------------------------------------------- /examples/python/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | ######################################################################################################################## 5 | # Authors: Marcel Breyer # 6 | # Copyright (C): 2024-today All Rights Reserved # 7 | # License: This file is released under the MIT license. # 8 | # See the LICENSE.md file in the project root for full license information. # 9 | ######################################################################################################################## 10 | 11 | import HardwareSampling as hws 12 | import numpy as np 13 | 14 | sampler = hws.SystemHardwareSampler() 15 | # could also be, e.g., 16 | # sampler = hws.GpuNvidiaHardwareSampler() 17 | sampler.start() 18 | 19 | sampler.add_event("init") 20 | A = np.random.rand(2**14, 2**14) 21 | B = np.random.rand(2**14, 2**14) 22 | 23 | sampler.add_event("matmul") 24 | C = A @ B 25 | 26 | sampler.stop() 27 | sampler.dump_yaml("track.yaml") -------------------------------------------------------------------------------- /include/hws/core.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Core header containing all other necessary other headers. 9 | */ 10 | 11 | #ifndef HWS_CORE_HPP_ 12 | #define HWS_CORE_HPP_ 13 | #pragma once 14 | 15 | #include "hws/event.hpp" 16 | #include "hws/hardware_sampler.hpp" 17 | #include "hws/sample_category.hpp" 18 | #include "hws/system_hardware_sampler.hpp" 19 | #include "hws/version.hpp" 20 | 21 | #if defined(HWS_FOR_CPUS_ENABLED) 22 | #include "hws/cpu/cpu_samples.hpp" 23 | #include "hws/cpu/hardware_sampler.hpp" 24 | #endif 25 | 26 | #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) 27 | #include "hws/gpu_nvidia//nvml_samples.hpp" 28 | #include "hws/gpu_nvidia/hardware_sampler.hpp" 29 | #endif 30 | 31 | #if defined(HWS_FOR_AMD_GPUS_ENABLED) 32 | #include "hws/gpu_amd/hardware_sampler.hpp" 33 | #include "hws/gpu_amd/rocm_smi_samples.hpp" 34 | #endif 35 | 36 | #if defined(HWS_FOR_INTEL_GPUS_ENABLED) 37 | #include "hws/gpu_intel/hardware_sampler.hpp" 38 | #include "hws/gpu_intel/level_zero_samples.hpp" 39 | #endif 40 | 41 | #endif // HWS_CORE_HPP_ 42 | -------------------------------------------------------------------------------- /include/hws/cpu/hardware_sampler.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Defines a hardware sampler for CPUs using the turbostat, lscpu, and free utilities (requires root). 9 | */ 10 | 11 | #ifndef HWS_CPU_HARDWARE_SAMPLER_HPP_ 12 | #define HWS_CPU_HARDWARE_SAMPLER_HPP_ 13 | #pragma once 14 | 15 | #include "hws/cpu/cpu_samples.hpp" // hws::{cpu_general_samples, clock_samples, power_samples, memory_samples, temperature_samples, gfx_samples, idle_state_samples} 16 | #include "hws/hardware_sampler.hpp" // hws::hardware_sampler 17 | #include "hws/sample_category.hpp" // hws::sample_category 18 | 19 | #include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter 20 | 21 | #include // std::chrono::milliseconds, std::chrono_literals namespace 22 | #include // std::ostream forward declaration 23 | 24 | namespace hws { 25 | 26 | using namespace std::chrono_literals; 27 | 28 | /** 29 | * @brief A hardware sampler for the CPU. 30 | * @details If available uses the linux commands `turbostat`, `lscpu`, and `free` to gather its information. 31 | */ 32 | class cpu_hardware_sampler : public hardware_sampler { 33 | public: 34 | /** 35 | * @brief Construct a new CPU hardware sampler with the default sampling interval. 36 | * @param[in] category the sample categories that are enabled for hardware sampling (default: all) 37 | */ 38 | explicit cpu_hardware_sampler(sample_category category = sample_category::all); 39 | /** 40 | * @brief Construct a new CPU hardware sampler with the @p sampling_interval. 41 | * @param[in] sampling_interval the used sampling interval 42 | * @param[in] category the sample categories that are enabled for hardware sampling (default: all) 43 | */ 44 | explicit cpu_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); 45 | 46 | /** 47 | * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member). 48 | */ 49 | cpu_hardware_sampler(const cpu_hardware_sampler &) = delete; 50 | /** 51 | * @brief Delete the move-constructor (already implicitly deleted due to the base class's std::atomic member). 52 | */ 53 | cpu_hardware_sampler(cpu_hardware_sampler &&) noexcept = delete; 54 | /** 55 | * @brief Delete the copy-assignment operator (already implicitly deleted due to the base class's std::atomic member). 56 | */ 57 | cpu_hardware_sampler &operator=(const cpu_hardware_sampler &) = delete; 58 | /** 59 | * @brief Delete the move-assignment operator (already implicitly deleted due to the base class's std::atomic member). 60 | */ 61 | cpu_hardware_sampler &operator=(cpu_hardware_sampler &&) noexcept = delete; 62 | 63 | /** 64 | * @brief Destruct the CPU hardware sampler. If the sampler is still running, stops it. 65 | */ 66 | ~cpu_hardware_sampler() override; 67 | 68 | /** 69 | * @brief Return the general CPU samples of this hardware sampler. 70 | * @return the general CPU samples (`[[nodiscard]]`) 71 | */ 72 | [[nodiscard]] const cpu_general_samples &general_samples() const noexcept { return general_samples_; } 73 | 74 | /** 75 | * @brief Return the clock related CPU samples of this hardware sampler. 76 | * @return the clock related CPU samples (`[[nodiscard]]`) 77 | */ 78 | [[nodiscard]] const cpu_clock_samples &clock_samples() const noexcept { return clock_samples_; } 79 | 80 | /** 81 | * @brief Return the power related CPU samples of this hardware sampler. 82 | * @return the power related CPU samples (`[[nodiscard]]`) 83 | */ 84 | [[nodiscard]] const cpu_power_samples &power_samples() const noexcept { return power_samples_; } 85 | 86 | /** 87 | * @brief Return the memory related CPU samples of this hardware sampler. 88 | * @return the memory related CPU samples (`[[nodiscard]]`) 89 | */ 90 | [[nodiscard]] const cpu_memory_samples &memory_samples() const noexcept { return memory_samples_; } 91 | 92 | /** 93 | * @brief Return the temperature related CPU samples of this hardware sampler. 94 | * @return the temperature related CPU samples (`[[nodiscard]]`) 95 | */ 96 | [[nodiscard]] const cpu_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; } 97 | 98 | /** 99 | * @brief Return the gfx (iGPU) related CPU samples of this hardware sampler. 100 | * @return the gfx (iGPU) related CPU samples (`[[nodiscard]]`) 101 | */ 102 | [[nodiscard]] const cpu_gfx_samples &gfx_samples() const noexcept { return gfx_samples_; } 103 | 104 | /** 105 | * @brief Return the idle state related CPU samples of this hardware sampler. 106 | * @return the idle state related CPU samples (`[[nodiscard]]`) 107 | */ 108 | [[nodiscard]] const cpu_idle_states_samples &idle_state_samples() const noexcept { return idle_state_samples_; } 109 | 110 | /** 111 | * @copydoc hws::hardware_sampler::device_identification 112 | */ 113 | [[nodiscard]] std::string device_identification() const final; 114 | 115 | /** 116 | * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const 117 | */ 118 | [[nodiscard]] std::string samples_only_as_yaml_string() const final; 119 | 120 | private: 121 | /** 122 | * @copydoc hws::hardware_sampler::sampling_loop 123 | */ 124 | void sampling_loop() final; 125 | 126 | /// The general CPU samples. 127 | cpu_general_samples general_samples_{}; 128 | /// The clock related CPU samples. 129 | cpu_clock_samples clock_samples_{}; 130 | /// The power related CPU samples. 131 | cpu_power_samples power_samples_{}; 132 | /// The memory related CPU samples. 133 | cpu_memory_samples memory_samples_{}; 134 | /// The temperature related CPU samples. 135 | cpu_temperature_samples temperature_samples_{}; 136 | /// The gfx (iGPU) related CPU samples. 137 | cpu_gfx_samples gfx_samples_{}; 138 | /// The idle state related CPU samples. 139 | cpu_idle_states_samples idle_state_samples_{}; 140 | }; 141 | 142 | /** 143 | * @brief Output all CPU samples gathered by the @p sampler to the given output-stream @p out. 144 | * @details Sets `std::ios_base::failbit` if the @p sampler is still sampling. 145 | * @param[in,out] out the output-stream to write the CPU samples to 146 | * @param[in] sampler the CPU hardware sampler 147 | * @return the output-stream 148 | */ 149 | std::ostream &operator<<(std::ostream &out, const cpu_hardware_sampler &sampler); 150 | 151 | } // namespace hws 152 | 153 | /// @cond Doxygen_suppress 154 | 155 | template <> 156 | struct fmt::formatter : fmt::ostream_formatter { }; 157 | 158 | /// @endcond 159 | 160 | #endif // HWS_CPU_HARDWARE_SAMPLER_HPP_ 161 | -------------------------------------------------------------------------------- /include/hws/cpu/utility.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Implements utility functionality for the CPU sampler. 9 | */ 10 | 11 | #ifndef HWS_CPU_UTILITY_HPP_ 12 | #define HWS_CPU_UTILITY_HPP_ 13 | #pragma once 14 | 15 | #include "fmt/format.h" // fmt::format 16 | 17 | #include // std::runtime_error 18 | #include // std::string 19 | #include // std::string_view 20 | 21 | namespace hws::detail { 22 | 23 | /** 24 | * @def HWS_SUBPROCESS_ERROR_CHECK 25 | * @brief Defines the `HWS_SUBPROCESS_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise. 26 | * @details Throws an exception if a subprocess call returns with an error. 27 | */ 28 | #if defined(HWS_ERROR_CHECKS_ENABLED) 29 | #define HWS_SUBPROCESS_ERROR_CHECK(subprocess_func) \ 30 | { \ 31 | const int errc = subprocess_func; \ 32 | if (errc != 0) { \ 33 | throw std::runtime_error{ fmt::format("Error calling subprocess function \"{}\"", #subprocess_func) }; \ 34 | } \ 35 | } 36 | #else 37 | #define HWS_SUBPROCESS_ERROR_CHECK(subprocess_func) subprocess_func; 38 | #endif 39 | 40 | /** 41 | * @brief Run a subprocess executing @p cmd_line and returning the stdout and stderr string. 42 | * @param[in] cmd_line the command line to execute 43 | * @return the stdout and stderr content encountered during executing @p cmd_line (`[[nodiscard]]`) 44 | */ 45 | [[nodiscard]] std::string run_subprocess(std::string_view cmd_line); 46 | 47 | } // namespace hws::detail 48 | 49 | #endif // HWS_CPU_UTILITY_HPP_ 50 | -------------------------------------------------------------------------------- /include/hws/event.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Defines an event type. 9 | */ 10 | 11 | #ifndef HWS_EVENT_HPP_ 12 | #define HWS_EVENT_HPP_ 13 | #pragma once 14 | 15 | #include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter 16 | 17 | #include // std::chrono::steady_clock::time_point 18 | #include // std::ostream forward declaration 19 | #include // std::string 20 | #include // std::move 21 | 22 | namespace hws { 23 | 24 | /** 25 | * @brief A struct encapsulating a single event. 26 | */ 27 | struct event { 28 | /** 29 | * @brief Construct a new event given a time point and name. 30 | * @param[in] time_point_p the time when the event occurred 31 | * @param[in] name_p the name of the event 32 | */ 33 | event(const std::chrono::steady_clock::time_point time_point_p, std::string name_p) : 34 | time_point{ time_point_p }, 35 | name{ std::move(name_p) } { } 36 | 37 | /// The time point this event occurred at. 38 | std::chrono::steady_clock::time_point time_point; 39 | /// The name of this event. 40 | std::string name; 41 | }; 42 | 43 | /** 44 | * @brief Output the event @p e to the given output-stream @p out. 45 | * @param[in,out] out the output-stream to write the event to 46 | * @param[in] e the event 47 | * @return the output-stream 48 | */ 49 | std::ostream &operator<<(std::ostream &out, const event &e); 50 | 51 | } // namespace hws 52 | 53 | /// @cond Doxygen_suppress 54 | 55 | template <> 56 | struct fmt::formatter : fmt::ostream_formatter { }; 57 | 58 | /// @endcond 59 | 60 | #endif // HWS_EVENT_HPP_ 61 | -------------------------------------------------------------------------------- /include/hws/gpu_amd/hardware_sampler.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Defines a hardware sampler for AMD GPUs using AMD's ROCm SMI library. 9 | */ 10 | 11 | #ifndef HWS_GPU_AMD_HARDWARE_SAMPLER_HPP_ 12 | #define HWS_GPU_AMD_HARDWARE_SAMPLER_HPP_ 13 | #pragma once 14 | 15 | #include "hws/gpu_amd/rocm_smi_samples.hpp" // hws::{rocm_smi_general_samples, rocm_smi_clock_samples, rocm_smi_power_samples, rocm_smi_memory_samples, rocm_smi_temperature_samples} 16 | #include "hws/hardware_sampler.hpp" // hws::hardware_sampler 17 | #include "hws/sample_category.hpp" // hws::sample_category 18 | 19 | #include "fmt/ostream.h" // fmt::formatter, fmt::ostream_formatter 20 | 21 | #include // std::atomic 22 | #include // std::chrono::milliseconds, std::chrono_literals namespace 23 | #include // std::size_t 24 | #include // std::uint32_t 25 | #include // std::ostream forward declaration 26 | 27 | namespace hws { 28 | 29 | using namespace std::chrono_literals; 30 | 31 | /** 32 | * @brief A hardware sampler for AMD GPUs. 33 | * @details Uses AMD's ROCm SMI library. 34 | */ 35 | class gpu_amd_hardware_sampler : public hardware_sampler { 36 | public: 37 | /** 38 | * @brief Construct a new AMD GPU hardware sampler for the default device with the default sampling interval. 39 | * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment. 40 | * @param[in] category the sample categories that are enabled for hardware sampling (default: all) 41 | */ 42 | explicit gpu_amd_hardware_sampler(sample_category category = sample_category::all); 43 | /** 44 | * @brief Construct a new AMD GPU hardware sampler for device @p device_id with the default sampling interval. 45 | * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment. 46 | * @param[in] device_id the ID of the device to sample 47 | * @param[in] category the sample categories that are enabled for hardware sampling (default: all) 48 | */ 49 | explicit gpu_amd_hardware_sampler(std::size_t device_id, sample_category category = sample_category::all); 50 | /** 51 | * @brief Construct a new AMD GPU hardware sampler for the default device with the @p sampling_interval. 52 | * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment. 53 | * @param[in] sampling_interval the used sampling interval 54 | * @param[in] category the sample categories that are enabled for hardware sampling (default: all) 55 | */ 56 | explicit gpu_amd_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); 57 | /** 58 | * @brief Construct a new AMD GPU hardware sampler for device @p device_id with the @p sampling_interval. 59 | * @details If this is the first AMD GPU sampler, initializes the ROCm SMI environment. 60 | * @param[in] device_id the ID of the device to sample 61 | * @param[in] sampling_interval the used sampling interval 62 | * @param[in] category the sample categories that are enabled for hardware sampling (default: all) 63 | */ 64 | gpu_amd_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); 65 | 66 | /** 67 | * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member). 68 | */ 69 | gpu_amd_hardware_sampler(const gpu_amd_hardware_sampler &) = delete; 70 | /** 71 | * @brief Delete the move-constructor (already implicitly deleted due to the base class's std::atomic member). 72 | */ 73 | gpu_amd_hardware_sampler(gpu_amd_hardware_sampler &&) noexcept = delete; 74 | /** 75 | * @brief Delete the copy-assignment operator (already implicitly deleted due to the base class's std::atomic member). 76 | */ 77 | gpu_amd_hardware_sampler &operator=(const gpu_amd_hardware_sampler &) = delete; 78 | /** 79 | * @brief Delete the move-assignment operator (already implicitly deleted due to the base class's std::atomic member). 80 | */ 81 | gpu_amd_hardware_sampler &operator=(gpu_amd_hardware_sampler &&) noexcept = delete; 82 | 83 | /** 84 | * @brief Destruct the AMD GPU hardware sampler. If the sampler is still running, stops it. 85 | * @details If this is the last AMD GPU sampler, cleans up the ROCm SMI environment. 86 | */ 87 | ~gpu_amd_hardware_sampler() override; 88 | 89 | /** 90 | * @brief Return the general AMD GPU samples of this hardware sampler. 91 | * @return the general AMD GPU samples (`[[nodiscard]]`) 92 | */ 93 | [[nodiscard]] const rocm_smi_general_samples &general_samples() const noexcept { return general_samples_; } 94 | 95 | /** 96 | * @brief Return the clock related AMD GPU samples of this hardware sampler. 97 | * @return the clock related AMD GPU samples (`[[nodiscard]]`) 98 | */ 99 | [[nodiscard]] const rocm_smi_clock_samples &clock_samples() const noexcept { return clock_samples_; } 100 | 101 | /** 102 | * @brief Return the power related AMD GPU samples of this hardware sampler. 103 | * @return the power related AMD GPU samples (`[[nodiscard]]`) 104 | */ 105 | [[nodiscard]] const rocm_smi_power_samples &power_samples() const noexcept { return power_samples_; } 106 | 107 | /** 108 | * @brief Return the memory related AMD GPU samples of this hardware sampler. 109 | * @return the memory related AMD GPU samples (`[[nodiscard]]`) 110 | */ 111 | [[nodiscard]] const rocm_smi_memory_samples &memory_samples() const noexcept { return memory_samples_; } 112 | 113 | /** 114 | * @brief Return the temperature related AMD GPU samples of this hardware sampler. 115 | * @return the temperature related AMD GPU samples (`[[nodiscard]]`) 116 | */ 117 | [[nodiscard]] const rocm_smi_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; } 118 | 119 | /** 120 | * @copydoc hws::hardware_sampler::device_identification 121 | */ 122 | [[nodiscard]] std::string device_identification() const final; 123 | 124 | /** 125 | * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const 126 | */ 127 | [[nodiscard]] std::string samples_only_as_yaml_string() const final; 128 | 129 | private: 130 | /** 131 | * @copydoc hws::hardware_sampler::sampling_loop 132 | */ 133 | void sampling_loop() final; 134 | 135 | /// The ID of the device to sample. 136 | std::uint32_t device_id_{}; 137 | 138 | /// The general AMD GPU samples. 139 | rocm_smi_general_samples general_samples_{}; 140 | /// The clock related AMD GPU samples. 141 | rocm_smi_clock_samples clock_samples_{}; 142 | /// The power related AMD GPU samples. 143 | rocm_smi_power_samples power_samples_{}; 144 | /// The memory related AMD GPU samples. 145 | rocm_smi_memory_samples memory_samples_{}; 146 | /// The temperature related AMD GPU samples. 147 | rocm_smi_temperature_samples temperature_samples_{}; 148 | 149 | /// The total number of currently active AMD GPU hardware samplers. 150 | inline static std::atomic instances_{ 0 }; 151 | /// True if the ROCm SMI environment has been successfully initialized (only done by a single hardware sampler). 152 | inline static std::atomic init_finished_{ false }; 153 | }; 154 | 155 | /** 156 | * @brief Output all AMD GPU samples gathered by the @p sampler to the given output-stream @p out. 157 | * @details Sets `std::ios_base::failbit` if the @p sampler is still sampling. 158 | * @param[in,out] out the output-stream to write the AMD GPU samples to 159 | * @param[in] sampler the AMD GPU hardware sampler 160 | * @return the output-stream 161 | */ 162 | std::ostream &operator<<(std::ostream &out, const gpu_amd_hardware_sampler &sampler); 163 | 164 | } // namespace hws 165 | 166 | /// @cond Doxygen_suppress 167 | 168 | template <> 169 | struct fmt::formatter : fmt::ostream_formatter { }; 170 | 171 | /// @endcond 172 | 173 | #endif // HWS_GPU_AMD_HARDWARE_SAMPLER_HPP_ 174 | -------------------------------------------------------------------------------- /include/hws/gpu_amd/utility.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Implements utility functionality for the AMD GPU sampler. 9 | */ 10 | 11 | #ifndef HWS_GPU_AMD_UTILITY_HPP_ 12 | #define HWS_GPU_AMD_UTILITY_HPP_ 13 | #pragma once 14 | 15 | #include "fmt/format.h" // fmt::format 16 | #include "rocm_smi/rocm_smi.h" // ROCm SMI runtime functions 17 | 18 | #include // std::runtime_error 19 | #include // std::string 20 | 21 | namespace hws::detail { 22 | 23 | /** 24 | * @def HWS_ROCM_SMI_ERROR_CHECK 25 | * @brief Defines the `HWS_ROCM_SMI_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise. 26 | * @details Throws an exception if a ROCm SMI call returns with an error. Additionally outputs a more concrete error string if possible. 27 | */ 28 | /** 29 | * @def HWS_HIP_ERROR_CHECK 30 | * @brief Defines the `HWS_HIP_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise. 31 | * @details Throws an exception if a HIP call returns with an error. Additionally outputs a more concrete error string. 32 | */ 33 | #if defined(HWS_ERROR_CHECKS_ENABLED) 34 | #define HWS_ROCM_SMI_ERROR_CHECK(rocm_smi_func) \ 35 | { \ 36 | const rsmi_status_t errc = rocm_smi_func; \ 37 | if (errc != RSMI_STATUS_SUCCESS) { \ 38 | const char *error_string; \ 39 | const rsmi_status_t ret = rsmi_status_string(errc, &error_string); \ 40 | if (ret == RSMI_STATUS_SUCCESS) { \ 41 | throw std::runtime_error{ fmt::format("Error in ROCm SMI function call \"{}\": {}", #rocm_smi_func, error_string) }; \ 42 | } else { \ 43 | throw std::runtime_error{ fmt::format("Error in ROCm SMI function call \"{}\": {}", #rocm_smi_func, static_cast(errc)) }; \ 44 | } \ 45 | } \ 46 | } 47 | 48 | #define HWS_HIP_ERROR_CHECK(hip_func) \ 49 | { \ 50 | const hipError_t errc = hip_func; \ 51 | if (errc != hipSuccess) { \ 52 | throw std::runtime_error{ fmt::format("Error in HIP function call \"{}\": {}", #hip_func, hipGetErrorString(errc)) }; \ 53 | } \ 54 | } 55 | 56 | #else 57 | #define HWS_ROCM_SMI_ERROR_CHECK(rocm_smi_func) rocm_smi_func; 58 | #define HWS_HIP_ERROR_CHECK(hip_func) \ 59 | { \ 60 | [[maybe_unused]] hipError_t errc = hip_func; \ 61 | } 62 | #endif 63 | 64 | /** 65 | * @brief Convert the performance level value (`rsmi_dev_perf_level_t`) to a string. 66 | * @param[in] perf_level the bitmask to convert to a string 67 | * @return all event throttle reasons (`[[nodiscard]]`) 68 | */ 69 | [[nodiscard]] std::string performance_level_to_string(rsmi_dev_perf_level_t perf_level); 70 | 71 | } // namespace hws::detail 72 | 73 | #endif // HWS_GPU_AMD_UTILITY_HPP_ 74 | -------------------------------------------------------------------------------- /include/hws/gpu_intel/hardware_sampler.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Defines a hardware sampler for Intel GPUs using Intel's Level Zero. 9 | */ 10 | 11 | #ifndef HWS_GPU_INTEL_HARDWARE_SAMPLER_HPP_ 12 | #define HWS_GPU_INTEL_HARDWARE_SAMPLER_HPP_ 13 | #pragma once 14 | 15 | #include "hws/gpu_intel/level_zero_device_handle.hpp" // hws::detail::level_zero_device_handle 16 | #include "hws/gpu_intel/level_zero_samples.hpp" // hws::{level_zero_general_samples, level_zero_clock_samples, level_zero_power_samples, level_zero_memory_samples, level_zero_temperature_samples} 17 | #include "hws/hardware_sampler.hpp" // hws::hardware_sampler 18 | #include "hws/sample_category.hpp" // hws::sample_category 19 | 20 | #include "fmt/format.h" // fmt::formatter, fmt::ostream_formatter 21 | 22 | #include // std::atomic 23 | #include // std::chrono::milliseconds, std::chrono_literals namespace 24 | #include // std::size_t 25 | #include // std::ostream forward declaration 26 | #include // std::string 27 | 28 | namespace hws { 29 | 30 | using namespace std::chrono_literals; 31 | 32 | /** 33 | * @brief A hardware sampler for Intel GPUs. 34 | * @details Uses Intel's Level Zero library. 35 | */ 36 | class gpu_intel_hardware_sampler : public hardware_sampler { 37 | public: 38 | /** 39 | * @brief Construct a new Intel GPU hardware sampler for the default device with the default sampling interval. 40 | * @details If this is the first Intel GPU sampler, initializes the Level Zero environment. 41 | * @param[in] category the sample categories that are enabled for hardware sampling (default: all) 42 | */ 43 | explicit gpu_intel_hardware_sampler(sample_category category = sample_category::all); 44 | /** 45 | * @brief Construct a new Intel GPU hardware sampler for device @p device_id with the default sampling interval. 46 | * @details If this is the first Intel GPU sampler, initializes the Level Zero environment. 47 | * @param[in] device_id the ID of the device to sample 48 | * @param[in] category the sample categories that are enabled for hardware sampling (default: all) 49 | */ 50 | explicit gpu_intel_hardware_sampler(std::size_t device_id, sample_category category = sample_category::all); 51 | /** 52 | * @brief Construct a new Intel GPU hardware sampler for the default device with the @p sampling_interval. 53 | * @details If this is the first Intel GPU sampler, initializes the Level Zero environment. 54 | * @param[in] sampling_interval the used sampling interval 55 | * @param[in] category the sample categories that are enabled for hardware sampling (default: all) 56 | */ 57 | explicit gpu_intel_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); 58 | /** 59 | * @brief Construct a new Intel GPU hardware sampler for device @p device_id with the @p sampling_interval. 60 | * @details If this is the first Intel GPU sampler, initializes the Level Zero environment. 61 | * @param[in] device_id the ID of the device to sample 62 | * @param[in] sampling_interval the used sampling interval 63 | * @param[in] category the sample categories that are enabled for hardware sampling (default: all) 64 | */ 65 | gpu_intel_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); 66 | 67 | /** 68 | * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member). 69 | */ 70 | gpu_intel_hardware_sampler(const gpu_intel_hardware_sampler &) = delete; 71 | /** 72 | * @brief Delete the move-constructor (already implicitly deleted due to the base class's std::atomic member). 73 | */ 74 | gpu_intel_hardware_sampler(gpu_intel_hardware_sampler &&) noexcept = delete; 75 | /** 76 | * @brief Delete the copy-assignment operator (already implicitly deleted due to the base class's std::atomic member). 77 | */ 78 | gpu_intel_hardware_sampler &operator=(const gpu_intel_hardware_sampler &) = delete; 79 | /** 80 | * @brief Delete the move-assignment operator (already implicitly deleted due to the base class's std::atomic member). 81 | */ 82 | gpu_intel_hardware_sampler &operator=(gpu_intel_hardware_sampler &&) noexcept = delete; 83 | 84 | /** 85 | * @brief Destruct the Intel GPU hardware sampler. If the sampler is still running, stops it. 86 | */ 87 | ~gpu_intel_hardware_sampler() override; 88 | 89 | /** 90 | * @brief Return the general Intel GPU samples of this hardware sampler. 91 | * @return the general Intel GPU samples (`[[nodiscard]]`) 92 | */ 93 | [[nodiscard]] const level_zero_general_samples &general_samples() const noexcept { return general_samples_; } 94 | 95 | /** 96 | * @brief Return the clock related Intel GPU samples of this hardware sampler. 97 | * @return the clock related Intel GPU samples (`[[nodiscard]]`) 98 | */ 99 | [[nodiscard]] const level_zero_clock_samples &clock_samples() const noexcept { return clock_samples_; } 100 | 101 | /** 102 | * @brief Return the power related Intel GPU samples of this hardware sampler. 103 | * @return the power related Intel GPU samples (`[[nodiscard]]`) 104 | */ 105 | [[nodiscard]] const level_zero_power_samples &power_samples() const noexcept { return power_samples_; } 106 | 107 | /** 108 | * @brief Return the memory related Intel GPU samples of this hardware sampler. 109 | * @return the memory related Intel GPU samples (`[[nodiscard]]`) 110 | */ 111 | [[nodiscard]] const level_zero_memory_samples &memory_samples() const noexcept { return memory_samples_; } 112 | 113 | /** 114 | * @brief Return the temperature related Intel GPU samples of this hardware sampler. 115 | * @return the temperature related Intel GPU samples (`[[nodiscard]]`) 116 | */ 117 | [[nodiscard]] const level_zero_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; } 118 | 119 | /** 120 | * @copydoc hws::hardware_sampler::device_identification 121 | */ 122 | std::string device_identification() const final; 123 | 124 | /** 125 | * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const 126 | */ 127 | [[nodiscard]] std::string samples_only_as_yaml_string() const final; 128 | 129 | private: 130 | /** 131 | * @copydoc hws::hardware_sampler::sampling_loop 132 | */ 133 | void sampling_loop() final; 134 | 135 | /// The device handle for the device to sample. 136 | detail::level_zero_device_handle device_; 137 | 138 | /// The general Intel GPU samples. 139 | level_zero_general_samples general_samples_{}; 140 | /// The clock related Intel GPU samples. 141 | level_zero_clock_samples clock_samples_{}; 142 | /// The power related Intel GPU samples. 143 | level_zero_power_samples power_samples_{}; 144 | /// The memory related Intel GPU samples. 145 | level_zero_memory_samples memory_samples_{}; 146 | /// The temperature related Intel GPU samples. 147 | level_zero_temperature_samples temperature_samples_{}; 148 | 149 | /// The total number of currently active Intel GPU hardware samplers. 150 | inline static std::atomic instances_{ 0 }; 151 | /// True if the Level Zero environment has been successfully initialized (only done by a single hardware sampler). 152 | inline static std::atomic init_finished_{ false }; 153 | }; 154 | 155 | /** 156 | * @brief Output all Intel GPU samples gathered by the @p sampler to the given output-stream @p out. 157 | * @details Sets `std::ios_base::failbit` if the @p sampler is still sampling. 158 | * @param[in,out] out the output-stream to write the Intel GPU samples to 159 | * @param[in] sampler the Intel GPU hardware sampler 160 | * @return the output-stream 161 | */ 162 | std::ostream &operator<<(std::ostream &out, const gpu_intel_hardware_sampler &sampler); 163 | 164 | } // namespace hws 165 | 166 | /// @cond Doxygen_suppress 167 | 168 | template <> 169 | struct fmt::formatter : fmt::ostream_formatter { }; 170 | 171 | /// @endcond 172 | 173 | #endif // HWS_GPU_INTEL_HARDWARE_SAMPLER_HPP_ 174 | -------------------------------------------------------------------------------- /include/hws/gpu_intel/level_zero_device_handle.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Defines a pImpl class for a Level Zero device handle. 9 | */ 10 | 11 | #ifndef HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_ 12 | #define HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_ 13 | #pragma once 14 | 15 | #include // std::size_t 16 | #include // std::shared_ptr 17 | #include // std::runtime_error 18 | 19 | namespace hws::detail { 20 | 21 | /** 22 | * @brief PImpl class to hide the ze_driver_handle_t and ze_device_handle_t handles from the public interface (and, therefore, the "level_zero/ze_api.h" header). 23 | */ 24 | class level_zero_device_handle { 25 | public: 26 | /** 27 | * @brief Default construct an level_zero_device_handle. Such a handle may not be used in an Level Zero function! 28 | */ 29 | level_zero_device_handle() = default; 30 | /** 31 | * @brief Construct an level_zero_device_handle for the device with ID @p device_id. 32 | * @param[in] device_id the device to get the handle for 33 | */ 34 | explicit level_zero_device_handle(std::size_t device_id); 35 | 36 | /** 37 | * @brief The pImpl helper struct. 38 | */ 39 | struct level_zero_device_handle_impl; 40 | 41 | /** 42 | * @brief Get the level_zero_device_handle implementation used to access the actual ze_driver_handle_t and ze_device_handle_t. 43 | * @throws std::runtime_error if `*this` has been default constructed 44 | * @return the device handle (`[[nodiscard]]`) 45 | */ 46 | [[nodiscard]] level_zero_device_handle_impl &get_impl() { 47 | if (impl == nullptr) { 48 | throw std::runtime_error{ "Pointer to implementation is a nullptr! Maybe *this is default constructed?" }; 49 | } 50 | return *impl; 51 | } 52 | 53 | /** 54 | * @brief Get the level_zero_device_handle implementation used to access the actual ze_driver_handle_t and ze_device_handle_t. 55 | * @throws std::runtime_error if `*this` has been default constructed 56 | * @return the device handle (`[[nodiscard]]`) 57 | */ 58 | [[nodiscard]] const level_zero_device_handle_impl &get_impl() const { 59 | if (impl == nullptr) { 60 | throw std::runtime_error{ "Pointer to implementation is a nullptr! Maybe *this is default constructed?" }; 61 | } 62 | return *impl; 63 | } 64 | 65 | private: 66 | /// The pointer to the actual implementation struct. 67 | std::shared_ptr impl{}; 68 | }; 69 | 70 | } // namespace hws::detail 71 | 72 | #endif // HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_HPP_ 73 | -------------------------------------------------------------------------------- /include/hws/gpu_intel/level_zero_device_handle_impl.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Implements a pImpl class for a Level Zero device handle. 9 | */ 10 | 11 | #ifndef HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_ 12 | #define HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_ 13 | #pragma once 14 | 15 | #include "hws/gpu_intel/level_zero_device_handle.hpp" // hws::detail::level_zero_device_handle 16 | #include "hws/gpu_intel/utility.hpp" // HWS_LEVEL_ZERO_ERROR_CHECK 17 | 18 | #include "fmt/format.h" // fmt::format 19 | #include "level_zero/ze_api.h" // Level Zero runtime functions 20 | 21 | #include // std::size_t 22 | #include // std::uint32_t 23 | #include // std::make_shared 24 | #include // std::runtime_error 25 | #include // std::vector 26 | 27 | namespace hws::detail { 28 | 29 | /** 30 | * @brief The PImpl implementation struct encapsulating a ze_driver_handle_t and ze_device_handle_t. 31 | */ 32 | struct level_zero_device_handle::level_zero_device_handle_impl { 33 | public: 34 | /** 35 | * @brief Get the ze_driver_handle_t and ze_device_handle_t for the device with ID @p device_id. 36 | * @param[in] device_id the device to get the handle for 37 | */ 38 | explicit level_zero_device_handle_impl(const std::size_t device_id) { 39 | // discover the number of drivers 40 | std::uint32_t driver_count{ 0 }; 41 | HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, nullptr)) 42 | 43 | // check if only the single GPU driver has been found 44 | if (driver_count > 1) { 45 | throw std::runtime_error{ fmt::format("Found too many GPU drivers ({})!", driver_count) }; 46 | } 47 | 48 | // get the GPU driver 49 | HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, &driver)) 50 | 51 | // get all GPUs for the current driver 52 | std::uint32_t device_count{ 0 }; 53 | HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, nullptr)) 54 | 55 | // check if enough GPUs have been found 56 | if (driver_count <= device_id) { 57 | throw std::runtime_error{ fmt::format("Found only {} GPUs, but GPU with the ID was requested!", device_count, device_id) }; 58 | } 59 | 60 | // get the GPUs 61 | std::vector all_devices(device_count); 62 | HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, all_devices.data())) 63 | 64 | // save the requested device 65 | device = all_devices[device_id]; 66 | } 67 | 68 | /// The wrapped Level Zero driver handle. 69 | ze_driver_handle_t driver{}; 70 | /// The wrapped Level Zero device handle. 71 | ze_device_handle_t device{}; 72 | }; 73 | 74 | inline level_zero_device_handle::level_zero_device_handle(const std::size_t device_id) : 75 | impl{ std::make_shared(device_id) } { } 76 | 77 | } // namespace hws::detail 78 | 79 | #endif // HWS_GPU_INTEL_LEVEL_ZERO_DEVICE_HANDLE_IMPL_HPP_ 80 | -------------------------------------------------------------------------------- /include/hws/gpu_intel/utility.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Implements utility functionality for the Intel GPU sampler. 9 | */ 10 | 11 | #ifndef HWS_GPU_INTEL_UTILITY_HPP_ 12 | #define HWS_GPU_INTEL_UTILITY_HPP_ 13 | #pragma once 14 | 15 | #include "fmt/format.h" // fmt::format 16 | #include "level_zero/ze_api.h" // Level Zero runtime functions 17 | #include "level_zero/zes_api.h" // Level Zero runtime functions 18 | 19 | #include // std::runtime_error 20 | #include // std::string 21 | #include // std::string_view 22 | #include // std::vector 23 | 24 | namespace hws::detail { 25 | 26 | /** 27 | * @brief Given the Level Zero API error @p errc, returns a useful error string. 28 | * @param[in] errc the Level Zero API error code 29 | * @return the error string (`[[nodiscard]]`) 30 | */ 31 | [[nodiscard]] std::string_view to_result_string(ze_result_t errc); 32 | 33 | /** 34 | * @def HWS_LEVEL_ZERO_ERROR_CHECK 35 | * @brief Defines the `HWS_LEVEL_ZERO_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise. 36 | * @details Throws an exception if a Level Zero call returns with an error. Additionally outputs a more concrete custom error string. 37 | */ 38 | #if defined(HWS_ERROR_CHECKS_ENABLED) 39 | #define HWS_LEVEL_ZERO_ERROR_CHECK(level_zero_func) \ 40 | { \ 41 | const ze_result_t errc = level_zero_func; \ 42 | if (errc != ZE_RESULT_SUCCESS) { \ 43 | throw std::runtime_error{ fmt::format("Error in Level Zero function call \"{}\": {}", #level_zero_func, ::hws::detail::to_result_string(errc)) }; \ 44 | } \ 45 | } 46 | #else 47 | #define HWS_LEVEL_ZERO_ERROR_CHECK(level_zero_func) level_zero_func; 48 | #endif 49 | 50 | /** 51 | * @brief Convert the @p flags to a vector of strings. 52 | * @param[in] flags the flags to convert to strings 53 | * @return a vector containing all flags as strings (`[[nodiscard]]`) 54 | */ 55 | [[nodiscard]] std::vector property_flags_to_vector(ze_device_property_flags_t flags); 56 | 57 | /** 58 | * @brief Convert the throttle reason bitmask to a string representation. If the provided bitmask represents multiple reasons, they are split using "|". 59 | * @param[in] reasons the bitmask to convert to a string 60 | * @return all throttle reasons (`[[nodiscard]]`) 61 | */ 62 | [[nodiscard]] std::string throttle_reason_to_string(zes_freq_throttle_reason_flags_t reasons); 63 | 64 | /** 65 | * @brief Convert a Level Zero memory type to a string representation. 66 | * @param[in] mem_type the Level Zero memory type 67 | * @return the string representation (`[[nodiscard]]`) 68 | */ 69 | [[nodiscard]] std::string memory_module_to_name(zes_mem_type_t mem_type); 70 | 71 | /** 72 | * @brief Convert a Level Zero memory location to a string representation. 73 | * @param[in] mem_loc the Level Zero memory location 74 | * @return the string representation (`[[nodiscard]]`) 75 | */ 76 | [[nodiscard]] std::string memory_location_to_name(zes_mem_loc_t mem_loc); 77 | 78 | } // namespace hws::detail 79 | 80 | #endif // HWS_GPU_INTEL_UTILITY_HPP_ 81 | -------------------------------------------------------------------------------- /include/hws/gpu_nvidia/hardware_sampler.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Defines a hardware sampler for NVIDIA GPUs using NVIDIA's Management Library (NVML). 9 | */ 10 | 11 | #ifndef HWS_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_ 12 | #define HWS_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_ 13 | #pragma once 14 | 15 | #include "hws/gpu_nvidia/nvml_device_handle.hpp" // hws::nvml_device_handle 16 | #include "hws/gpu_nvidia/nvml_samples.hpp" // hws::{nvml_general_samples, nvml_clock_samples, nvml_power_samples, nvml_memory_samples, nvml_temperature_samples} 17 | #include "hws/hardware_sampler.hpp" // hws::hardware_sampler 18 | #include "hws/sample_category.hpp" // hws::sample_category 19 | 20 | #include "fmt/format.h" // fmt::formatter, fmt::ostream_formatter 21 | 22 | #include // std::atomic 23 | #include // std::chrono::milliseconds, std::chrono_literals namespace 24 | #include // std::size_t 25 | #include // std::ostream forward declaration 26 | #include // std::string 27 | 28 | namespace hws { 29 | 30 | using namespace std::chrono_literals; 31 | 32 | /** 33 | * @brief A hardware sampler for NVIDIA GPUs. 34 | * @details Uses NVIDIA's NVML library. 35 | */ 36 | class gpu_nvidia_hardware_sampler : public hardware_sampler { 37 | public: 38 | /** 39 | * @brief Construct a new NVIDIA GPU hardware sampler for the default device with the default sampling interval. 40 | * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment. 41 | * @param[in] category the sample categories that are enabled for hardware sampling (default: all) 42 | */ 43 | explicit gpu_nvidia_hardware_sampler(sample_category category = sample_category::all); 44 | /** 45 | * @brief Construct a new NVIDIA GPU hardware sampler for device @p device_id with the default sampling interval. 46 | * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment. 47 | * @param[in] device_id the ID of the device to sample 48 | * @param[in] category the sample categories that are enabled for hardware sampling (default: all) 49 | */ 50 | explicit gpu_nvidia_hardware_sampler(std::size_t device_id, sample_category category = sample_category::all); 51 | /** 52 | * @brief Construct a new NVIDIA GPU hardware sampler for the default device with the @p sampling_interval. 53 | * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment. 54 | * @param[in] sampling_interval the used sampling interval 55 | * @param[in] category the sample categories that are enabled for hardware sampling (default: all) 56 | */ 57 | explicit gpu_nvidia_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); 58 | /** 59 | * @brief Construct a new NVIDIA GPU hardware sampler for device @p device_id with the @p sampling_interval. 60 | * @details If this is the first NVIDIA GPU sampler, initializes the NVML environment. 61 | * @param[in] device_id the ID of the device to sample 62 | * @param[in] sampling_interval the used sampling interval 63 | * @param[in] category the sample categories that are enabled for hardware sampling (default: all) 64 | */ 65 | gpu_nvidia_hardware_sampler(std::size_t device_id, std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); 66 | 67 | /** 68 | * @brief Delete the copy-constructor (already implicitly deleted due to the base class's std::atomic member). 69 | */ 70 | gpu_nvidia_hardware_sampler(const gpu_nvidia_hardware_sampler &) = delete; 71 | /** 72 | * @brief Delete the move-constructor (already implicitly deleted due to the base class's std::atomic member). 73 | */ 74 | gpu_nvidia_hardware_sampler(gpu_nvidia_hardware_sampler &&) noexcept = delete; 75 | /** 76 | * @brief Delete the copy-assignment operator (already implicitly deleted due to the base class's std::atomic member). 77 | */ 78 | gpu_nvidia_hardware_sampler &operator=(const gpu_nvidia_hardware_sampler &) = delete; 79 | /** 80 | * @brief Delete the move-assignment operator (already implicitly deleted due to the base class's std::atomic member). 81 | */ 82 | gpu_nvidia_hardware_sampler &operator=(gpu_nvidia_hardware_sampler &&) noexcept = delete; 83 | 84 | /** 85 | * @brief Destruct the NVIDIA GPU hardware sampler. If the sampler is still running, stops it. 86 | * @details If this is the last NVIDIA GPU sampler, cleans up the NVML environment. 87 | */ 88 | ~gpu_nvidia_hardware_sampler() override; 89 | 90 | /** 91 | * @brief Return the general NVIDIA GPU samples of this hardware sampler. 92 | * @return the general NVIDIA GPU samples (`[[nodiscard]]`) 93 | */ 94 | [[nodiscard]] const nvml_general_samples &general_samples() const noexcept { return general_samples_; } 95 | 96 | /** 97 | * @brief Return the clock related NVIDIA GPU samples of this hardware sampler. 98 | * @return the clock related NVIDIA GPU samples (`[[nodiscard]]`) 99 | */ 100 | [[nodiscard]] const nvml_clock_samples &clock_samples() const noexcept { return clock_samples_; } 101 | 102 | /** 103 | * @brief Return the power related NVIDIA GPU samples of this hardware sampler. 104 | * @return the power related NVIDIA GPU samples (`[[nodiscard]]`) 105 | */ 106 | [[nodiscard]] const nvml_power_samples &power_samples() const noexcept { return power_samples_; } 107 | 108 | /** 109 | * @brief Return the memory related NVIDIA GPU samples of this hardware sampler. 110 | * @return the memory related NVIDIA GPU samples (`[[nodiscard]]`) 111 | */ 112 | [[nodiscard]] const nvml_memory_samples &memory_samples() const noexcept { return memory_samples_; } 113 | 114 | /** 115 | * @brief Return the temperature related NVIDIA GPU samples of this hardware sampler. 116 | * @return the temperature related NVIDIA GPU samples (`[[nodiscard]]`) 117 | */ 118 | [[nodiscard]] const nvml_temperature_samples &temperature_samples() const noexcept { return temperature_samples_; } 119 | 120 | /** 121 | * @copydoc hws::hardware_sampler::device_identification 122 | */ 123 | [[nodiscard]] std::string device_identification() const final; 124 | 125 | /** 126 | * @copydoc hws::hardware_sampler::samples_only_as_yaml_string() const 127 | */ 128 | [[nodiscard]] std::string samples_only_as_yaml_string() const final; 129 | 130 | private: 131 | /** 132 | * @copydoc hws::hardware_sampler::sampling_loop 133 | */ 134 | void sampling_loop() final; 135 | 136 | /// The device handle for the device to sample. 137 | detail::nvml_device_handle device_{}; 138 | 139 | /// The general NVIDIA GPU samples. 140 | nvml_general_samples general_samples_{}; 141 | /// The clock related NVIDIA GPU samples. 142 | nvml_clock_samples clock_samples_{}; 143 | /// The power related NVIDIA GPU samples. 144 | nvml_power_samples power_samples_{}; 145 | /// The memory related NVIDIA GPU samples. 146 | nvml_memory_samples memory_samples_{}; 147 | /// The temperature related NVIDIA GPU samples. 148 | nvml_temperature_samples temperature_samples_{}; 149 | 150 | /// The total number of currently active NVIDIA GPU hardware samplers. 151 | inline static std::atomic instances_{ 0 }; 152 | /// True if the NVML environment has been successfully initialized (only done by a single hardware sampler). 153 | inline static std::atomic init_finished_{ false }; 154 | }; 155 | 156 | /** 157 | * @brief Output all NVIDIA GPU samples gathered by the @p sampler to the given output-stream @p out. 158 | * @details Sets `std::ios_base::failbit` if the @p sampler is still sampling. 159 | * @param[in,out] out the output-stream to write the NVIDIA GPU samples to 160 | * @param[in] sampler the NVIDIA GPU hardware sampler 161 | * @return the output-stream 162 | */ 163 | std::ostream &operator<<(std::ostream &out, const gpu_nvidia_hardware_sampler &sampler); 164 | 165 | } // namespace hws 166 | 167 | /// @cond Doxygen_suppress 168 | 169 | template <> 170 | struct fmt::formatter : fmt::ostream_formatter { }; 171 | 172 | /// @endcond 173 | 174 | #endif // HWS_GPU_NVIDIA_HARDWARE_SAMPLER_HPP_ 175 | -------------------------------------------------------------------------------- /include/hws/gpu_nvidia/nvml_device_handle.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Defines a pImpl class for an NVML device handle. 9 | */ 10 | 11 | #ifndef HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_ 12 | #define HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_ 13 | #pragma once 14 | 15 | #include // std::size_t 16 | #include // std::shared_ptr 17 | #include // std::runtime_error 18 | 19 | namespace hws::detail { 20 | 21 | /** 22 | * @brief PImpl class to hide the nvmlDevice_t handle from the public interface (and, therefore, the "nvml.h" header). 23 | */ 24 | class nvml_device_handle { 25 | public: 26 | /** 27 | * @brief Default construct an nvml_device_handle. Such a handle may not be used in an NVML function! 28 | */ 29 | nvml_device_handle() = default; 30 | /** 31 | * @brief Construct an nvml_device_handle for the device with ID @p device_id. 32 | * @param[in] device_id the device to get the handle for 33 | */ 34 | explicit nvml_device_handle(std::size_t device_id); 35 | 36 | /** 37 | * @brief The pImpl helper struct. 38 | */ 39 | struct nvml_device_handle_impl; 40 | 41 | /** 42 | * @brief Get the nvml_device_handle implementation used to access the actual nvmlDevice_t. 43 | * @throws std::runtime_error if `*this` has been default constructed 44 | * @return the device handle (`[[nodiscard]]`) 45 | */ 46 | [[nodiscard]] nvml_device_handle_impl &get_impl() { 47 | if (impl == nullptr) { 48 | throw std::runtime_error{ "Pointer to implementation is a nullptr! Maybe *this is default constructed?" }; 49 | } 50 | return *impl; 51 | } 52 | 53 | /** 54 | * @brief Get the nvml_device_handle implementation used to access the actual nvmlDevice_t. 55 | * @throws std::runtime_error if `*this` has been default constructed 56 | * @return the device handle (`[[nodiscard]]`) 57 | */ 58 | [[nodiscard]] const nvml_device_handle_impl &get_impl() const { 59 | if (impl == nullptr) { 60 | throw std::runtime_error{ "Pointer to implementation is a nullptr! Maybe *this is default constructed?" }; 61 | } 62 | return *impl; 63 | } 64 | 65 | private: 66 | /// The pointer to the actual implementation struct. 67 | std::shared_ptr impl{}; 68 | }; 69 | 70 | } // namespace hws::detail 71 | 72 | #endif // HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_HPP_ 73 | -------------------------------------------------------------------------------- /include/hws/gpu_nvidia/nvml_device_handle_impl.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Implements a pImpl class for an NVML device handle. 9 | */ 10 | 11 | #ifndef HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_ 12 | #define HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_ 13 | #pragma once 14 | 15 | #include "hws/gpu_nvidia/nvml_device_handle.hpp" // hws::detail::nvml_device_handle 16 | #include "hws/gpu_nvidia/utility.hpp" // HWS_NVML_ERROR_CHECK 17 | 18 | #include "nvml.h" // nvmlDevice_t 19 | 20 | #include // std::size_t 21 | #include // std::make_shared 22 | 23 | namespace hws::detail { 24 | 25 | /** 26 | * @brief The PImpl implementation struct encapsulating a nvmlDevice_t. 27 | */ 28 | struct nvml_device_handle::nvml_device_handle_impl { 29 | public: 30 | /** 31 | * @brief Get the nvmlDevice_t for the device with ID @p device_id. 32 | * @param[in] device_id the device to get the handle for 33 | */ 34 | explicit nvml_device_handle_impl(const std::size_t device_id) { 35 | HWS_NVML_ERROR_CHECK(nvmlDeviceGetHandleByIndex(static_cast(device_id), &device)) 36 | } 37 | 38 | /// The wrapped NVML device handle. 39 | nvmlDevice_t device{}; 40 | }; 41 | 42 | inline nvml_device_handle::nvml_device_handle(const std::size_t device_id) : 43 | impl{ std::make_shared(device_id) } { } 44 | 45 | } // namespace hws::detail 46 | 47 | #endif // HWS_GPU_NVIDIA_NVML_DEVICE_HANDLE_IMPL_HPP_ 48 | -------------------------------------------------------------------------------- /include/hws/gpu_nvidia/utility.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Implements utility functionality for the NVIDIA GPU sampler. 9 | */ 10 | 11 | #ifndef HWS_GPU_NVIDIA_UTILITY_HPP_ 12 | #define HWS_GPU_NVIDIA_UTILITY_HPP_ 13 | #pragma once 14 | 15 | #include "cuda_runtime_api.h" // CUDA runtime functions 16 | #include "fmt/format.h" // fmt::format 17 | #include "nvml.h" // NVML runtime functions 18 | 19 | #include // std::runtime_error 20 | #include // std::string 21 | 22 | namespace hws::detail { 23 | 24 | /** 25 | * @def HWS_NVML_ERROR_CHECK 26 | * @brief Defines the `HWS_NVML_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise. 27 | * @details Throws an exception if an NVML call returns with an error. Additionally outputs a more concrete error string. 28 | */ 29 | /** 30 | * @def HWS_CUDA_ERROR_CHECK 31 | * @brief Defines the `HWS_CUDA_ERROR_CHECK` macro if `HWS_ERROR_CHECKS_ENABLED` is defined, does nothing otherwise. 32 | * @details Throws an exception if a CUDA call returns with an error. Additionally outputs a more concrete error string. 33 | */ 34 | #if defined(HWS_ERROR_CHECKS_ENABLED) 35 | #define HWS_NVML_ERROR_CHECK(nvml_func) \ 36 | { \ 37 | const nvmlReturn_t errc = nvml_func; \ 38 | if (errc != NVML_SUCCESS) { \ 39 | throw std::runtime_error{ fmt::format("Error in NVML function call \"{}\": {} ({})", #nvml_func, nvmlErrorString(errc), static_cast(errc)) }; \ 40 | } \ 41 | } 42 | 43 | #define HWS_CUDA_ERROR_CHECK(cuda_func) \ 44 | { \ 45 | const cudaError_t errc = cuda_func; \ 46 | if (errc != cudaSuccess) { \ 47 | throw std::runtime_error{ fmt::format("Error in CUDA function call \"{}\": {} ({})", #cuda_func, cudaGetErrorName(errc), cudaGetErrorString(errc)) }; \ 48 | } \ 49 | } 50 | #else 51 | #define HWS_NVML_ERROR_CHECK(nvml_func) nvml_func; 52 | #define HWS_CUDA_ERROR_CHECK(cuda_func) cuda_func; 53 | #endif 54 | 55 | #if CUDA_VERSION >= 12000 56 | 57 | /** 58 | * @brief Convert the clock throttle reason event bitmask to a string representation. If the provided bitmask represents multiple reasons, they are split using "|". 59 | * @param[in] clocks_event_reasons the bitmask to convert to a string 60 | * @return all event throttle reasons (`[[nodiscard]]`) 61 | */ 62 | [[nodiscard]] std::string throttle_event_reason_to_string(unsigned long long clocks_event_reasons); 63 | 64 | #endif 65 | 66 | } // namespace hws::detail 67 | 68 | #endif // HWS_GPU_NVIDIA_UTILITY_HPP_ 69 | -------------------------------------------------------------------------------- /include/hws/hardware_sampler.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Defines the base class for all hardware samplers. 9 | */ 10 | 11 | #ifndef HWS_HARDWARE_SAMPLER_HPP_ 12 | #define HWS_HARDWARE_SAMPLER_HPP_ 13 | #pragma once 14 | 15 | #include "hws/event.hpp" // hws::event 16 | #include "hws/sample_category.hpp" // hws::sample_category 17 | 18 | #include // std::atomic 19 | #include // std::chrono::{system_clock::time_point, steady_clock::time_point, milliseconds} 20 | #include // std::size_t 21 | #include // std::filesystem::path 22 | #include // std::string 23 | #include // std::thread 24 | #include // std::vector 25 | 26 | namespace hws { 27 | 28 | /** 29 | * @brief The base class for all specialized hardware samplers. 30 | */ 31 | class hardware_sampler { 32 | public: 33 | /** 34 | * @brief Construct a new hardware sampler with the provided @p sampling_interval. 35 | * @param[in] sampling_interval the used sampling interval 36 | * @param[in] category the sample categories that are enabled for hardware sampling 37 | * @throws std::invalid_argument if the @p sampling_interval is zero 38 | */ 39 | hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category); 40 | 41 | /** 42 | * @brief Delete the copy-constructor (already implicitly deleted due to the std::atomic member). 43 | */ 44 | hardware_sampler(const hardware_sampler &) = delete; 45 | /** 46 | * @brief Delete the move-constructor (already implicitly deleted due to the std::atomic member). 47 | */ 48 | hardware_sampler(hardware_sampler &&) noexcept = delete; 49 | /** 50 | * @brief Delete the copy-assignment operator (already implicitly deleted due to the std::atomic member). 51 | */ 52 | hardware_sampler &operator=(const hardware_sampler &) = delete; 53 | /** 54 | * @brief Delete the move-assignment operator (already implicitly deleted due to the std::atomic member). 55 | */ 56 | hardware_sampler &operator=(hardware_sampler &&) noexcept = delete; 57 | 58 | /** 59 | * @brief Pure virtual default destructor. 60 | */ 61 | virtual ~hardware_sampler() = 0; 62 | 63 | /** 64 | * @brief Start hardware sampling in a new std::thread. 65 | * @details Once a hardware sampler has been started, it can never be started again, even if `hardware_sampler::stop_sampling` has been called. 66 | * @throws std::runtime_error if the hardware sampler has already been started 67 | */ 68 | void start_sampling(); 69 | /** 70 | * @brief Stop hardware sampling. Signals the running std::thread to stop sampling and joins it. 71 | * @details Once a hardware sampler has been stopped, it can never be stopped again. 72 | * @throws std::runtime_error if the hardware sampler hasn't been started yet 73 | * @throws std::runtime_error if the hardware sampler has already been stopped 74 | */ 75 | void stop_sampling(); 76 | /** 77 | * @brief Pause hardware sampling. 78 | */ 79 | void pause_sampling(); 80 | /** 81 | * @brief Resume hardware sampling. 82 | * @throws std::runtime_error if the hardware sampler has already been stopped 83 | */ 84 | void resume_sampling(); 85 | 86 | /** 87 | * @brief Check whether this hardware sampler has already started sampling. 88 | * @return `true` if the hardware sampler has already started sampling, `false` otherwise (`[[nodiscard]]`) 89 | */ 90 | [[nodiscard]] bool has_sampling_started() const noexcept; 91 | /** 92 | * @brief Check whether this hardware sampler is currently sampling. 93 | * @return `true` if the hardware sampler is currently sampling, `false` otherwise (`[[nodiscard]]`) 94 | */ 95 | [[nodiscard]] bool is_sampling() const noexcept; 96 | /** 97 | * @brief Check whether this hardware sampler has already stopped sampling. 98 | * @return `true` if the hardware sampler has already stopped sampling, `false` otherwise (`[[nodiscard]]`) 99 | */ 100 | [[nodiscard]] bool has_sampling_stopped() const noexcept; 101 | 102 | /** 103 | * @brief Add a new event. 104 | * @param e the event 105 | */ 106 | void add_event(event e); 107 | /** 108 | * @brief Add a new event. 109 | * @param[in] time_point the time point when the event occurred 110 | * @param[in] name the name of the event 111 | */ 112 | void add_event(decltype(event::time_point) time_point, decltype(event::name) name); 113 | /** 114 | * @brief Add a new event. The time_point will be the current time. 115 | * @param[in] name the name of the event 116 | */ 117 | void add_event(decltype(event::name) name); 118 | 119 | /** 120 | * @brief Return the number of recorded events. 121 | * @return the number of events (`[[nodiscard]]`) 122 | */ 123 | [[nodiscard]] std::size_t num_events() const noexcept { return events_.size(); } 124 | 125 | /** 126 | * @brief Return a vector of all recorded events. 127 | * @return the events (`[[nodiscard]]`) 128 | */ 129 | [[nodiscard]] const std::vector &get_events() const noexcept { return events_; } 130 | 131 | /** 132 | * @brief Return the event at index @p idx. 133 | * @param[in] idx the event to return 134 | * @throws std::out_of_range the the @p idx is out of bounce 135 | * @return the event at index @p idx (`[[nodiscard]]`) 136 | */ 137 | [[nodiscard]] event get_event(std::size_t idx) const; 138 | 139 | /** 140 | * @brief Return the time points the samples of this hardware sampler occurred. 141 | * @return the time points (`[[nodiscard]]`) 142 | */ 143 | [[nodiscard]] std::vector sampling_time_points() const noexcept { return time_points_; } 144 | 145 | /** 146 | * @brief Return the sampling interval of this hardware sampler. 147 | * @return the samping interval in milliseconds (`[[nodiscard]]`) 148 | */ 149 | [[nodiscard]] std::chrono::milliseconds sampling_interval() const noexcept { return sampling_interval_; } 150 | 151 | /** 152 | * @brief Dump the hardware samples to the YAML file with @p filename. 153 | * @param[in] filename the YAML file to append the hardware samples to 154 | */ 155 | void dump_yaml(const char *filename) const; 156 | /** 157 | * @copydoc hws::hardware_sampler::dump_yaml(const char *) const 158 | */ 159 | void dump_yaml(const std::string &filename) const; 160 | /** 161 | * @copydoc hws::hardware_sampler::dump_yaml(const char *) const 162 | */ 163 | void dump_yaml(const std::filesystem::path &filename) const; 164 | 165 | /** 166 | * @brief Return the unique device identification. Can be used as unique key in the YAML string. 167 | * @return the unique device identification (`[[nodiscard]]`) 168 | */ 169 | [[nodiscard]] virtual std::string device_identification() const = 0; 170 | 171 | /** 172 | * @brief Return the hardware samples as well as events and time points as YAML string. 173 | * @return the YAML content as string (`[[nodiscard]]`) 174 | */ 175 | [[nodiscard]] std::string as_yaml_string() const; 176 | /** 177 | * @brief Return only the hardware samples as YAML string. 178 | * @throws std::runtime_error if sampling is still running 179 | * @return the YAML content as string (`[[nodiscard]]`) 180 | */ 181 | [[nodiscard]] virtual std::string samples_only_as_yaml_string() const = 0; 182 | 183 | protected: 184 | /** 185 | * @brief Getter the hardware samples. Called in another std::thread. 186 | */ 187 | virtual void sampling_loop() = 0; 188 | 189 | /** 190 | * @brief Add a new time point to this hardware sampler. Called during the sampling loop. 191 | * @param time_point the new time point to add 192 | */ 193 | void add_time_point(std::chrono::steady_clock::time_point time_point); 194 | 195 | /** 196 | * @brief Check whether the @p category is currently enabled for hardware sampling or not. 197 | * @param[in] category the sample_category to check 198 | * @return Returns `true` if @p category is enabled for sampling, otherwise `false` (`[[nodiscard]]`) 199 | */ 200 | [[nodiscard]] bool sample_category_enabled(sample_category category) const noexcept; 201 | 202 | private: 203 | /// A boolean flag indicating whether the sampling has already started. 204 | std::atomic sampling_started_{ false }; 205 | /// A boolean flag indicating whether the sampling has already stopped. 206 | std::atomic sampling_stopped_{ false }; 207 | /// A boolean flag indicating whether the sampling is currently running. 208 | std::atomic sampling_running_{ false }; 209 | 210 | /// The wallclock time where the hardware sampling started. 211 | std::chrono::system_clock::time_point start_date_time_{}; 212 | 213 | /// The different tracked events. 214 | std::vector events_{}; 215 | 216 | /// The std::thread used to getter the hardware samples. 217 | std::thread sampling_thread_{}; 218 | 219 | /// The time points at which this hardware sampler sampled its values. 220 | std::vector time_points_{}; 221 | 222 | /// The sampling interval of this hardware sampler. 223 | const std::chrono::milliseconds sampling_interval_{}; 224 | 225 | /// The bitmask of sample categories to use. 226 | const sample_category sample_category_{}; 227 | }; 228 | 229 | } // namespace hws 230 | 231 | #endif // HWS_HARDWARE_SAMPLER_HPP_ 232 | -------------------------------------------------------------------------------- /include/hws/sample_category.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Defines an enum class with all sample categories to be able to only selectively enable some samples. 9 | */ 10 | 11 | #ifndef HWS_SAMPLE_CATEGORY_HPP_ 12 | #define HWS_SAMPLE_CATEGORY_HPP_ 13 | #pragma once 14 | 15 | namespace hws { 16 | 17 | /** 18 | * @brief Enum class as bitfield containing the possible sample categories. 19 | * @details The sample_category "gfx" and "idle_state" are only used in the cpu_hardware_sampler. 20 | * Additionally, the "all" sample_category is available to easily enable all hardware samples (default). 21 | */ 22 | enum class sample_category : int { 23 | // clang-format off 24 | /// General hardware samples like architecture, names, or utilization. 25 | general = 0b00000001, 26 | /// Clock-related hardware samples like minimum, maximum, and current frequencies or throttle reasons. 27 | clock = 0b00000010, 28 | /// Power-related hardware samples like current power draw or total energy consumption. 29 | power = 0b00000100, 30 | /// Memory-related hardware samples like memory usage or PCIe information. 31 | memory = 0b00001000, 32 | /// Temperature-related hardware samples like maximum and current temperatures. 33 | temperature = 0b00010000, 34 | /// Gfx-related (iGPU) hardware samples. Only used in the cpu_hardware_sampler. 35 | gfx = 0b00100000, 36 | /// Idle-state-related hardware samples. Only used in the cpu_hardware_sampler. 37 | idle_state = 0b01000000, 38 | /// Shortcut to enable all available hardware samples (default). 39 | all = 0b01111111 40 | // clang-format on 41 | }; 42 | 43 | /** 44 | * @brief Compute the bitwise not of @p sc. 45 | * @param[in] sc the sample_category to apply the bitwise not to 46 | * @return the bitwise not result (`[[nodiscard]]`) 47 | */ 48 | [[nodiscard]] constexpr sample_category operator~(const sample_category sc) noexcept { 49 | return static_cast(~static_cast(sc)); 50 | } 51 | 52 | /** 53 | * @brief Compute the bitwise and between @p lhs and @p rhs and return a new sample_category. 54 | * @param[in] lhs the first sample_category 55 | * @param[in] rhs the second sample_category 56 | * @return the bitwise and result (`[[nodiscard]]`) 57 | */ 58 | [[nodiscard]] constexpr sample_category operator&(const sample_category lhs, const sample_category rhs) noexcept { 59 | return static_cast(static_cast(lhs) & static_cast(rhs)); 60 | } 61 | 62 | /** 63 | * @brief Compute the bitwise or between @p lhs and @p rhs and return a new sample_category. 64 | * @param[in] lhs the first sample_category 65 | * @param[in] rhs the second sample_category 66 | * @return the bitwise or result (`[[nodiscard]]`) 67 | */ 68 | [[nodiscard]] constexpr sample_category operator|(const sample_category lhs, const sample_category rhs) noexcept { 69 | return static_cast(static_cast(lhs) | static_cast(rhs)); 70 | } 71 | 72 | /** 73 | * @brief Compute the bitwise xor between @p lhs and @p rhs and return a new sample_category. 74 | * @param[in] lhs the first sample_category 75 | * @param[in] rhs the second sample_category 76 | * @return the bitwise xor result (`[[nodiscard]]`) 77 | */ 78 | [[nodiscard]] constexpr sample_category operator^(const sample_category lhs, const sample_category rhs) noexcept { 79 | return static_cast(static_cast(lhs) ^ static_cast(rhs)); 80 | } 81 | 82 | /** 83 | * @brief Compute the bitwise compound and between @p lhs and @p rhs and return the result in @p lhs. 84 | * @param[in,out] lhs the first sample_category 85 | * @param[in] rhs the second sample_category 86 | * @return a reference to @p lhs containing the bitwise and result 87 | */ 88 | constexpr sample_category &operator&=(sample_category &lhs, const sample_category rhs) noexcept { 89 | lhs = lhs & rhs; 90 | return lhs; 91 | } 92 | 93 | /** 94 | * @brief Compute the bitwise compound or between @p lhs and @p rhs and return the result in @p lhs. 95 | * @param[in,out] lhs the first sample_category 96 | * @param[in] rhs the second sample_category 97 | * @return a reference to @p lhs containing the bitwise or result 98 | */ 99 | constexpr sample_category &operator|=(sample_category &lhs, const sample_category rhs) noexcept { 100 | lhs = lhs | rhs; 101 | return lhs; 102 | } 103 | 104 | /** 105 | * @brief Compute the bitwise compound xor between @p lhs and @p rhs and return the result in @p lhs. 106 | * @param[in,out] lhs the first sample_category 107 | * @param[in] rhs the second sample_category 108 | * @return a reference to @p lhs containing the bitwise xor result 109 | */ 110 | constexpr sample_category &operator^=(sample_category &lhs, const sample_category rhs) noexcept { 111 | lhs = lhs ^ rhs; 112 | return lhs; 113 | } 114 | 115 | } // namespace hws 116 | 117 | #endif // HWS_SAMPLE_CATEGORY_HPP_ 118 | -------------------------------------------------------------------------------- /include/hws/system_hardware_sampler.hpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Defines a hardware sampler for the whole system, i.e., automatically creates CPU and GPU hardware samples if the respective sampler and hardware are available. 9 | */ 10 | 11 | #ifndef HWS_SYSTEM_HARDWARE_SAMPLER_HPP_ 12 | #define HWS_SYSTEM_HARDWARE_SAMPLER_HPP_ 13 | 14 | #include "hws/event.hpp" // hws::event 15 | #include "hws/hardware_sampler.hpp" // hws::hardware_sampler 16 | #include "hws/sample_category.hpp" // hws::sample_category 17 | 18 | #include // std::chrono::{milliseconds, steady_clock::time_point} 19 | #include // std::size_t 20 | #include // std::filesystem::path 21 | #include // std::unique_ptr 22 | #include // std::string 23 | #include // std::vector 24 | 25 | namespace hws { 26 | 27 | /** 28 | * @brief A hardware sampler for the whole system. 29 | * @details Enables hardware samplers for which hardware is available and the CMake configuration found the respective dependencies. 30 | */ 31 | class system_hardware_sampler { 32 | public: 33 | /** 34 | * @brief Construct hardware samplers with the default sampling interval. 35 | * @param[in] category the sample categories that are enabled for hardware sampling (default: all) 36 | */ 37 | explicit system_hardware_sampler(sample_category category = sample_category::all); 38 | /** 39 | * @brief Construct hardware samplers with the provided @p sampling_interval. 40 | * @param[in] sampling_interval the used sampling interval 41 | * @param[in] category the sample categories that are enabled for hardware sampling (default: all) 42 | */ 43 | explicit system_hardware_sampler(std::chrono::milliseconds sampling_interval, sample_category category = sample_category::all); 44 | 45 | /** 46 | * @brief Delete the copy-constructor. 47 | */ 48 | system_hardware_sampler(const system_hardware_sampler &) = delete; 49 | /** 50 | * @brief Delete the move-constructor. 51 | */ 52 | system_hardware_sampler(system_hardware_sampler &&) noexcept = delete; 53 | /** 54 | * @brief Delete the copy-assignment operator. 55 | */ 56 | system_hardware_sampler &operator=(const system_hardware_sampler &) = delete; 57 | /** 58 | * @brief Delete the move-assignment operator. 59 | */ 60 | system_hardware_sampler &operator=(system_hardware_sampler &&) noexcept = delete; 61 | 62 | /** 63 | * @brief Explicitly use the default destructor. 64 | */ 65 | ~system_hardware_sampler() = default; 66 | 67 | /** 68 | * @brief Start hardware sampling for all wrapped hardware samplers. 69 | */ 70 | void start_sampling(); 71 | /** 72 | * @brief Stop hardware sampling for all wrapped hardware samplers. 73 | */ 74 | void stop_sampling(); 75 | /** 76 | * @brief Pause hardware sampling for all wrapped hardware samplers. 77 | */ 78 | void pause_sampling(); 79 | /** 80 | * @brief Resume hardware sampling for all wrapped hardware samplers. 81 | */ 82 | void resume_sampling(); 83 | 84 | /** 85 | * @brief Check whether the hardware samplers have already started sampling. 86 | * @return `true` if **all** hardware samplers have already started sampling, `false` otherwise (`[[nodiscard]]`) 87 | */ 88 | [[nodiscard]] bool has_sampling_started() const noexcept; 89 | /** 90 | * @brief Check whether the hardware samplers are currently sampling. 91 | * @return `true` if **all** hardware samplers are currently sampling, `false` otherwise (`[[nodiscard]]`) 92 | */ 93 | [[nodiscard]] bool is_sampling() const noexcept; 94 | /** 95 | * @brief Check whether the hardware samplers have already stopped sampling. 96 | * @return `true` if **all** hardware samplers have already stopped sampling, `false` otherwise (`[[nodiscard]]`) 97 | */ 98 | [[nodiscard]] bool has_sampling_stopped() const noexcept; 99 | 100 | /** 101 | * @brief Add a new event to all hardware samplers. 102 | * @param e the event 103 | */ 104 | void add_event(event e); 105 | /** 106 | * @brief Add a new event to all hardware samplers. 107 | * @param[in] time_point the time point when the event occurred 108 | * @param[in] name the name of the event 109 | */ 110 | void add_event(decltype(event::time_point) time_point, decltype(event::name) name); 111 | /** 112 | * @brief Add a new event to all hardware samplers. The time_point will be the current time. 113 | * @param[in] name the name of the event 114 | */ 115 | void add_event(decltype(event::name) name); 116 | 117 | /** 118 | * @brief Return the number of recorded events separately for each hardware sampler. 119 | * @return the number of events per hardware sampler (`[[nodiscard]]`) 120 | */ 121 | [[nodiscard]] std::vector num_events() const; 122 | /** 123 | * @brief Return the number of recorded events separately for each hardware sampler. 124 | * @return the events per hardware sampler (`[[nodiscard]]`) 125 | */ 126 | [[nodiscard]] std::vector> get_events() const; 127 | /** 128 | * @brief Return the time points the samples separately for each hardware sampler. 129 | * @return the time points per hardware sampler (`[[nodiscard]]`) 130 | */ 131 | [[nodiscard]] std::vector> sampling_time_points() const; 132 | /** 133 | * @brief Return the sampling interval separately for each hardware sampler. 134 | * @return the samping interval in milliseconds per hardware sampler (`[[nodiscard]]`) 135 | */ 136 | [[nodiscard]] std::vector sampling_interval() const; 137 | 138 | /** 139 | * @brief The number of hardware samplers available for the whole system. 140 | * @return the number of hardware samplers (`[[nodiscard]]`) 141 | */ 142 | [[nodiscard]] std::size_t num_samplers() const noexcept; 143 | /** 144 | * @brief The hardware samplers available for the whole system. 145 | * @return all available hardware samplers (`[[nodiscard]]`) 146 | */ 147 | [[nodiscard]] const std::vector> &samplers() const noexcept; 148 | /** 149 | * @copydoc hws::system_hardware_sampler::samplers() const 150 | */ 151 | [[nodiscard]] std::vector> &samplers() noexcept; 152 | /** 153 | * @brief Return the hardware sampler at index @p idx. 154 | * @param[in] idx the index of the hardware sampler 155 | * @throws std::out_of_range if @p idx is out-of-range 156 | * @return the hardware sampler at index @p idx (`[[nodiscard]]`) 157 | */ 158 | [[nodiscard]] const std::unique_ptr &sampler(std::size_t idx) const; 159 | /** 160 | * @copydoc hws::system_hardware_sampler::sampler(std::size_t idx) const 161 | */ 162 | [[nodiscard]] std::unique_ptr &sampler(std::size_t idx); 163 | 164 | /** 165 | * @brief Dump the hardware samples of all hardware samplers to the YAML file with @p filename. 166 | * @param[in] filename the YAML file to append the hardware samples to 167 | */ 168 | void dump_yaml(const char *filename) const; 169 | /** 170 | * @copydoc hws::system_hardware_sampler::dump_yaml(const char *) const 171 | */ 172 | void dump_yaml(const std::string &filename) const; 173 | /** 174 | * @copydoc hws::system_hardware_sampler::dump_yaml(const char *) const 175 | */ 176 | void dump_yaml(const std::filesystem::path &filename) const; 177 | 178 | /** 179 | * @brief Return the hardware samples as YAML string. 180 | * @return the YAML content as string (`[[nodiscard]]`) 181 | */ 182 | [[nodiscard]] std::string as_yaml_string() const; 183 | /** 184 | * @brief Return only the hardware samples as YAML string. 185 | * @throws std::runtime_error if sampling is still running 186 | * @return the YAML content as string (`[[nodiscard]]`) 187 | */ 188 | [[nodiscard]] std::string samples_only_as_yaml_string() const; 189 | 190 | private: 191 | /// The different hardware sampler for the current system. 192 | std::vector> samplers_; 193 | }; 194 | 195 | } // namespace hws 196 | 197 | #endif // HWS_SYSTEM_HARDWARE_SAMPLER_HPP_ 198 | -------------------------------------------------------------------------------- /include/hws/version.hpp.in: -------------------------------------------------------------------------------- 1 | /** 2 | * @file 3 | * @author Marcel Breyer 4 | * @copyright 2024-today All Rights Reserved 5 | * @license This file is released under the MIT license. 6 | * See the LICENSE.md file in the project root for full license information. 7 | * 8 | * @brief Version information for the hardware sampling. 9 | */ 10 | 11 | #ifndef HWS_VERSION_HPP_ 12 | #define HWS_VERSION_HPP_ 13 | #pragma once 14 | 15 | #include // std::string_view 16 | 17 | namespace hws::version { 18 | 19 | /** 20 | * @brief The name of the library. 21 | * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step. 22 | */ 23 | constexpr std::string_view name = "@PROJECT_NAME@"; 24 | 25 | /** 26 | * @brief The current version of the library in the form: "major.minor.patch". 27 | * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step. 28 | */ 29 | constexpr std::string_view version = "@PROJECT_VERSION@"; 30 | 31 | /** 32 | * @brief The current major version of the library. 33 | * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step. 34 | */ 35 | constexpr int major = @PROJECT_VERSION_MAJOR@; 36 | 37 | /** 38 | * @brief The current minor version of the library. 39 | * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step. 40 | */ 41 | constexpr int minor = @PROJECT_VERSION_MINOR@; 42 | 43 | /** 44 | * @brief The current patch version of the library. 45 | * @details The value gets automatically set during the [`CMake`](https://cmake.org/) configuration step. 46 | */ 47 | constexpr int patch = @PROJECT_VERSION_PATCH@; 48 | 49 | } // namespace hws::version 50 | 51 | #endif // HWS_VERSION_HPP_ 52 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["scikit-build-core"] 3 | build-backend = "scikit_build_core.build" 4 | # set the necessary CMake build options 5 | [tool.scikit-build] 6 | cmake.args = [ 7 | "-DCMAKE_INSTALL_LIBDIR=HardwareSampling", 8 | "-DCMAKE_INSTALL_BINDIR=HardwareSampling", 9 | "-DCMAKE_INSTALL_INCLUDEDIR=HardwareSampling", 10 | "-DCMAKE_INSTALL_MANDIR=HardwareSampling", 11 | "-DCMAKE_INSTALL_DATAROOTDIR=HardwareSampling/cmake", 12 | "-DCMAKE_INSTALL_RPATH=$ORIGIN" 13 | ] 14 | sdist.exclude = ["build*/", "dist/", "docs/html/", ".github", "examples", "install", ".clang*", ".clion*", ".gitignore"] 15 | # project specific metadata 16 | [project] 17 | name = "hardware_sampling" 18 | version = "1.1.1" 19 | description = "hws - Hardware Sampling for GPUs and CPUs (e.g., clock frequencies, memory consumption, temperatures, or energy draw)" 20 | readme = "README.md" 21 | license = { file = "LICENSE" } 22 | authors = [ 23 | { name = "Marcel Breyer" } 24 | ] 25 | maintainers = [ 26 | { name = "University of Stuttgart IPVS - SC", email = "sc@ipvs.uni-stuttgart.de" } 27 | ] 28 | requires-python = ">=3.8" 29 | classifiers = [ 30 | "Development Status :: 5 - Production/Stable", 31 | "Environment :: GPU", 32 | "Intended Audience :: Science/Research", 33 | "License :: OSI Approved :: MIT License", 34 | "Natural Language :: English", 35 | "Operating System :: POSIX :: Linux", 36 | "Programming Language :: C++", 37 | "Programming Language :: Python :: 3" 38 | ] 39 | # project specific URLs 40 | [project.urls] 41 | documentation = "https://sc-sgs.github.io/hardware_sampling/" 42 | repository = "https://github.com/SC-SGS/hardware_sampling.git" 43 | issues = "https://github.com/SC-SGS/hardware_sampling/issues" -------------------------------------------------------------------------------- /src/hws/cpu/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## Authors: Marcel Breyer 2 | ## Copyright (C): 2024-today All Rights Reserved 3 | ## License: This file is released under the MIT license. 4 | ## See the LICENSE.md file in the project root for full license information. 5 | ######################################################################################################################## 6 | 7 | ## check whether lscpu could be found -> used for the CPU targets as well as for ALL host measurements 8 | ## -> checked even if no CPU targets where provided 9 | ## LINUX only 10 | find_program(HWS_LSCPU_FOUND lscpu) 11 | if (HWS_LSCPU_FOUND) 12 | message(STATUS "Enable sampling of CPU information using lscpu.") 13 | target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_VIA_LSCPU_ENABLED) 14 | endif () 15 | 16 | ## check whether free could be found -> used for the CPU targets as well as for ALL host measurements 17 | ## -> checked even if no CPU targets where provided 18 | ## LINUX only 19 | find_program(HWS_FREE_FOUND free) 20 | if (HWS_FREE_FOUND) 21 | message(STATUS "Enable sampling of CPU information using free.") 22 | target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_VIA_FREE_ENABLED) 23 | endif () 24 | 25 | ## check whether turbostat could be found -> used for the CPU targets as well as for ALL host measurements 26 | ## -> checked even if no CPU targets where provided 27 | find_program(HWS_TURBOSTAT_FOUND turbostat) 28 | if (HWS_TURBOSTAT_FOUND) 29 | ## check if the turbostat command works as intended 30 | execute_process(COMMAND sudo -n turbostat -n 1 -i 0.001 -S -q 31 | RESULT_VARIABLE HWS_TURBOSTAT_WITH_ROOT_N 32 | OUTPUT_QUIET 33 | ERROR_QUIET) 34 | if (HWS_TURBOSTAT_WITH_ROOT_N EQUAL 0) 35 | ## can execute with root 36 | execute_process(COMMAND sudo turbostat -n 1 -i 0.001 -S -q 37 | RESULT_VARIABLE HWS_TURBOSTAT_WITH_ROOT 38 | OUTPUT_QUIET 39 | ERROR_QUIET) 40 | if (HWS_TURBOSTAT_WITH_ROOT EQUAL 0) 41 | message(STATUS "Enable sampling of CPU information using turbostat with root privileges.") 42 | 43 | set(HWS_TURBOSTAT_EXECUTION_TYPE "root") 44 | # add compile definitions 45 | target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_VIA_TURBOSTAT_ENABLED) 46 | target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_VIA_TURBOSTAT_ROOT) 47 | else () 48 | message(STATUS "Can't use turbostat even with root privileges!") 49 | message(STATUS "Disabling turbostat support!") 50 | endif () 51 | else () 52 | ## check if turbostat can be executed without root -> potential less data 53 | execute_process(COMMAND turbostat -n 1 -i 0.001 -S -q 54 | RESULT_VARIABLE HWS_TURBOSTAT_WITHOUT_ROOT 55 | OUTPUT_QUIET 56 | ERROR_QUIET) 57 | if (HWS_TURBOSTAT_WITHOUT_ROOT EQUAL 0) 58 | message(STATUS "Enable sampling of CPU information using turbostat without root privileges.") 59 | 60 | set(HWS_TURBOSTAT_EXECUTION_TYPE "without_root") 61 | # add compile definitions 62 | target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_VIA_TURBOSTAT_ENABLED) 63 | else () 64 | message(STATUS "Can't use turbostat (with or without root privileges)!") 65 | message(STATUS "Disabling turbostat support!") 66 | endif () 67 | endif () 68 | endif () 69 | 70 | # check of any CPU related utility could be found 71 | if (NOT (HWS_LSCPU_FOUND OR HWS_FREE_FOUND OR HWS_TURBOSTAT_EXECUTION_TYPE)) 72 | if (HWS_ENABLE_CPU_SAMPLING MATCHES "ON") 73 | message(SEND_ERROR "Cannot find any CPU utility program but CPU sampling was explicitly requested!") 74 | else () 75 | message(STATUS "Cannot find any CPU utility program. Hardware sampling for CPUs disabled.") 76 | endif () 77 | return() 78 | endif () 79 | message(STATUS "Enable sampling of CPU information.") 80 | 81 | ## try finding subprocess.h 82 | set(HWS_subprocess_VERSION b6e1611d430e3019c423d2af26bb162e7ed5c3ae) 83 | find_package(subprocess QUIET) 84 | if (subprocess_FOUND) 85 | message(STATUS "Found package subprocess.h.") 86 | target_include_directories(${HWS_LIBRARY_NAME} PRIVATE ${subprocess_INCLUDE_DIR}) 87 | else () 88 | include(FetchContent) 89 | message(STATUS "Couldn't find package subprocess.h. Building version ${HWS_subprocess_VERSION} from source.") 90 | # fetch subprocess library subprocess.h 91 | FetchContent_Declare(subprocess 92 | GIT_REPOSITORY https://github.com/sheredom/subprocess.h.git 93 | GIT_TAG ${HWS_subprocess_VERSION} 94 | QUIET 95 | ) 96 | FetchContent_MakeAvailable(subprocess) 97 | target_include_directories(${HWS_LIBRARY_NAME} PRIVATE $) 98 | endif () 99 | 100 | # add source file to source file list 101 | target_sources(${HWS_LIBRARY_NAME} PRIVATE 102 | $) 107 | 108 | # add compile definitions 109 | target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_FOR_CPUS_ENABLED) 110 | -------------------------------------------------------------------------------- /src/hws/cpu/utility.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Marcel Breyer 3 | * @copyright 2024-today All Rights Reserved 4 | * @license This file is released under the MIT license. 5 | * See the LICENSE.md file in the project root for full license information. 6 | */ 7 | 8 | #include "hws/cpu/utility.hpp" 9 | 10 | #include "hws/utility.hpp" // hws::detail::split_as 11 | 12 | #include "fmt/format.h" // fmt::format 13 | #include "subprocess.h" // subprocess_s, subprocess_create, subprocess_join, subprocess_stdout, subprocess_option_e 14 | 15 | #include // std::transform 16 | #include // std::size_t 17 | #include // std::FILE, std::fread 18 | #include // std::runtime_error 19 | #include // std::string 20 | #include // std::string_view 21 | #include // std::vector 22 | 23 | namespace hws::detail { 24 | 25 | std::string run_subprocess(const std::string_view cmd_line) { 26 | // search PATH for executable 27 | constexpr int options = subprocess_option_e::subprocess_option_search_user_path; 28 | constexpr static std::string::size_type buffer_size = 4096; 29 | 30 | // extract the separate command line arguments 31 | const std::vector cmd_split = detail::split_as(cmd_line, ' '); 32 | // convert to pointers 33 | std::vector cmd_ptr_split(cmd_split.size()); 34 | std::transform(cmd_split.cbegin(), cmd_split.cend(), cmd_ptr_split.begin(), [](const std::string &s) { return s.data(); }); 35 | cmd_ptr_split.push_back(nullptr); // subprocess wants the array to be terminated by a nullptr 36 | 37 | // create subprocess 38 | subprocess_s proc{}; 39 | HWS_SUBPROCESS_ERROR_CHECK(subprocess_create(cmd_ptr_split.data(), options, &proc)) 40 | // wait until process has finished 41 | int return_code{}; 42 | HWS_SUBPROCESS_ERROR_CHECK(subprocess_join(&proc, &return_code)) 43 | if (return_code != 0) { 44 | throw std::runtime_error{ fmt::format("Error: \"{}\" returned with {}!", cmd_line, return_code) }; 45 | } 46 | 47 | // get output handle and read data -> stdout and stderr are the same handle 48 | std::FILE *out_handle = subprocess_stdout(&proc); 49 | std::string buffer(buffer_size, '\0'); // 4096 characters should be enough 50 | const std::size_t bytes_read = std::fread(buffer.data(), sizeof(typename decltype(buffer)::value_type), buffer.size(), out_handle); 51 | 52 | // destroy subprocess 53 | HWS_SUBPROCESS_ERROR_CHECK(subprocess_destroy(&proc)) 54 | 55 | // create output 56 | return buffer.substr(0, bytes_read); 57 | } 58 | 59 | } // namespace hws::detail 60 | -------------------------------------------------------------------------------- /src/hws/event.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Marcel Breyer 3 | * @copyright 2024-today All Rights Reserved 4 | * @license This file is released under the MIT license. 5 | * See the LICENSE.md file in the project root for full license information. 6 | */ 7 | 8 | #include "hws/event.hpp" 9 | 10 | #include "fmt/chrono.h" // direct formatting of std::chrono types 11 | #include "fmt/format.h" // fmt::format 12 | 13 | #include // std::ostream 14 | 15 | namespace hws { 16 | 17 | std::ostream &operator<<(std::ostream &out, const event &e) { 18 | return out << fmt::format("time_point: {}\n" 19 | "name: {}", 20 | e.time_point.time_since_epoch(), 21 | e.name); 22 | } 23 | 24 | } // namespace hws 25 | -------------------------------------------------------------------------------- /src/hws/gpu_amd/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## Authors: Marcel Breyer 2 | ## Copyright (C): 2024-today All Rights Reserved 3 | ## License: This file is released under the MIT license. 4 | ## See the LICENSE.md file in the project root for full license information. 5 | ######################################################################################################################## 6 | 7 | # try finding ROCm SMI 8 | find_package(rocm_smi QUIET) 9 | 10 | # check if ROCm SMI could be found 11 | if (NOT rocm_smi_FOUND) 12 | if (HWS_ENABLE_GPU_AMD_SAMPLING MATCHES "ON") 13 | message(SEND_ERROR "Cannot find ROCm SMI but AMD GPU sampling was explicitly requested!") 14 | else () 15 | message(STATUS "Cannot find ROCm SMI. Hardware sampling for AMD GPUs disabled.") 16 | endif () 17 | return() 18 | endif () 19 | message(STATUS "Enable sampling of AMD GPU information using ROCm SMI (${rocm_smi_VERSION}).") 20 | 21 | # must also find HIP 22 | find_package(HIP REQUIRED) 23 | 24 | # link against necessary libraries 25 | target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE rocm_smi64 hip::host) 26 | target_include_directories(${HWS_LIBRARY_NAME} PRIVATE ${ROCM_SMI_INCLUDE_DIR}) 27 | 28 | # add source file to source file list 29 | target_sources(${HWS_LIBRARY_NAME} PRIVATE 30 | $) 35 | 36 | # add compile definition 37 | target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_FOR_AMD_GPUS_ENABLED) 38 | -------------------------------------------------------------------------------- /src/hws/gpu_amd/utility.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Marcel Breyer 3 | * @copyright 2024-today All Rights Reserved 4 | * @license This file is released under the MIT license. 5 | * See the LICENSE.md file in the project root for full license information. 6 | */ 7 | 8 | #include "hws/gpu_amd/utility.hpp" 9 | 10 | #include "rocm_smi/rocm_smi.h" // ROCm SMI runtime functions 11 | 12 | #include // std::string 13 | 14 | namespace hws::detail { 15 | 16 | std::string performance_level_to_string(const rsmi_dev_perf_level_t perf_level) { 17 | switch (perf_level) { 18 | case RSMI_DEV_PERF_LEVEL_AUTO: 19 | return "auto"; 20 | case RSMI_DEV_PERF_LEVEL_LOW: 21 | return "low"; 22 | case RSMI_DEV_PERF_LEVEL_HIGH: 23 | return "high"; 24 | case RSMI_DEV_PERF_LEVEL_MANUAL: 25 | return "manual"; 26 | case RSMI_DEV_PERF_LEVEL_STABLE_STD: 27 | return "stable_std"; 28 | case RSMI_DEV_PERF_LEVEL_STABLE_PEAK: 29 | return "stable_peak"; 30 | case RSMI_DEV_PERF_LEVEL_STABLE_MIN_MCLK: 31 | return "stable_min_mclk"; 32 | case RSMI_DEV_PERF_LEVEL_STABLE_MIN_SCLK: 33 | return "stable_min_sclk"; 34 | case RSMI_DEV_PERF_LEVEL_DETERMINISM: 35 | return "determinism"; 36 | case RSMI_DEV_PERF_LEVEL_UNKNOWN: 37 | default: 38 | return "unknown"; 39 | } 40 | } 41 | 42 | } // namespace hws::detail 43 | -------------------------------------------------------------------------------- /src/hws/gpu_intel/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## Authors: Marcel Breyer 2 | ## Copyright (C): 2024-today All Rights Reserved 3 | ## License: This file is released under the MIT license. 4 | ## See the LICENSE.md file in the project root for full license information. 5 | ######################################################################################################################## 6 | 7 | # set the CMAKE_MODULE_PATH to the cmake directory 8 | list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") 9 | 10 | # try finding Level Zero 11 | find_package(level_zero QUIET) 12 | 13 | # check if NVML could be found 14 | if (NOT level_zero_FOUND) 15 | if (HWS_ENABLE_GPU_INTEL_SAMPLING MATCHES "ON") 16 | message(SEND_ERROR "Cannot find Level Zero but Intel GPU sampling was explicitly requested!") 17 | else () 18 | message(STATUS "Cannot find Level Zero. Hardware sampling for Intel GPUs disabled.") 19 | endif () 20 | return() 21 | endif () 22 | message(STATUS "Enable sampling of Intel GPU information using Level Zero.") 23 | 24 | # link against necessary libraries 25 | target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE level_zero) 26 | 27 | # add source file to source file list 28 | target_sources(${HWS_LIBRARY_NAME} PRIVATE 29 | $) 34 | 35 | # add compile definition 36 | target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_FOR_INTEL_GPUS_ENABLED) 37 | -------------------------------------------------------------------------------- /src/hws/gpu_nvidia/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## Authors: Marcel Breyer 2 | ## Copyright (C): 2024-today All Rights Reserved 3 | ## License: This file is released under the MIT license. 4 | ## See the LICENSE.md file in the project root for full license information. 5 | ######################################################################################################################## 6 | 7 | # try finding NVML 8 | find_package(CUDAToolkit QUIET) 9 | 10 | # check if NVML could be found 11 | if (NOT CUDAToolkit_FOUND) 12 | if (HWS_ENABLE_GPU_NVIDIA_SAMPLING MATCHES "ON") 13 | message(SEND_ERROR "Cannot find NVML but NVIDIA GPU sampling was explicitly requested!") 14 | else () 15 | message(STATUS "Cannot find NVML. Hardware sampling for NVIDIA GPUs disabled.") 16 | endif () 17 | return() 18 | endif () 19 | message(STATUS "Enable sampling of NVIDIA GPU information using NVML (${CUDAToolkit_VERSION}).") 20 | 21 | # link against necessary libraries 22 | target_link_libraries(${HWS_LIBRARY_NAME} PRIVATE CUDA::nvml CUDA::cudart) 23 | 24 | # add source file to source file list 25 | target_sources(${HWS_LIBRARY_NAME} PRIVATE 26 | $) 31 | 32 | # add compile definition 33 | target_compile_definitions(${HWS_LIBRARY_NAME} PUBLIC HWS_FOR_NVIDIA_GPUS_ENABLED) 34 | -------------------------------------------------------------------------------- /src/hws/gpu_nvidia/utility.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Marcel Breyer 3 | * @copyright 2024-today All Rights Reserved 4 | * @license This file is released under the MIT license. 5 | * See the LICENSE.md file in the project root for full license information. 6 | */ 7 | 8 | #include "hws/gpu_nvidia/utility.hpp" 9 | 10 | #include "fmt/format.h" // fmt::format 11 | #include "fmt/ranges.h" // fmt::join 12 | #include "nvml.h" // NVML runtime functions 13 | 14 | #include // std::string 15 | #include // std::vector 16 | 17 | namespace hws::detail { 18 | 19 | #if CUDA_VERSION >= 12000 20 | 21 | std::string throttle_event_reason_to_string(const unsigned long long clocks_event_reasons) { 22 | if (clocks_event_reasons == 0ull) { 23 | return "None"; 24 | } else { 25 | std::vector reasons{}; 26 | if ((clocks_event_reasons & nvmlClocksEventReasonApplicationsClocksSetting) != 0ull) { 27 | reasons.emplace_back("ApplicationsClocksSetting"); 28 | } 29 | if ((clocks_event_reasons & nvmlClocksEventReasonDisplayClockSetting) != 0ull) { 30 | reasons.emplace_back("DisplayClockSetting"); 31 | } 32 | if ((clocks_event_reasons & nvmlClocksEventReasonGpuIdle) != 0ull) { 33 | reasons.emplace_back("GpuIdle"); 34 | } 35 | if ((clocks_event_reasons & nvmlClocksEventReasonSwPowerCap) != 0ull) { 36 | reasons.emplace_back("SwPowerCap"); 37 | } 38 | if ((clocks_event_reasons & nvmlClocksEventReasonSwThermalSlowdown) != 0ull) { 39 | reasons.emplace_back("SwThermalSlowdown"); 40 | } 41 | if ((clocks_event_reasons & nvmlClocksEventReasonSyncBoost) != 0ull) { 42 | reasons.emplace_back("SyncBoost"); 43 | } 44 | if ((clocks_event_reasons & nvmlClocksThrottleReasonHwPowerBrakeSlowdown) != 0ull) { 45 | reasons.emplace_back("HwPowerBrakeSlowdown"); 46 | } 47 | if ((clocks_event_reasons & nvmlClocksThrottleReasonHwSlowdown) != 0ull) { 48 | reasons.emplace_back("HwSlowdown"); 49 | } 50 | if ((clocks_event_reasons & nvmlClocksThrottleReasonHwThermalSlowdown) != 0ull) { 51 | reasons.emplace_back("HwThermalSlowdown"); 52 | } 53 | return fmt::format("{}", fmt::join(reasons, "|")); 54 | } 55 | } 56 | 57 | #endif 58 | 59 | } // namespace hws::detail 60 | -------------------------------------------------------------------------------- /src/hws/hardware_sampler.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Marcel Breyer 3 | * @copyright 2024-today All Rights Reserved 4 | * @license This file is released under the MIT license. 5 | * See the LICENSE.md file in the project root for full license information. 6 | */ 7 | 8 | #include "hws/hardware_sampler.hpp" 9 | 10 | #include "hws/event.hpp" // hws::event 11 | #include "hws/utility.hpp" // hws::detail::durations_from_reference_time 12 | #include "hws/version.hpp" // hws::version::version 13 | 14 | #include "fmt/chrono.h" // direct formatting of std::chrono types 15 | #include "fmt/format.h" // fmt::format 16 | #include "fmt/ranges.h" // fmt::join 17 | 18 | #include // std::chrono::{system_clock, steady_clock, duration_cast, milliseconds} 19 | #include // std::size_t 20 | #include // std::exception 21 | #include // std::ofstream 22 | #include // std::cerr, std::endl 23 | #include // std::runtime_error, std::out_of_range 24 | #include // std::thread 25 | #include // std::move 26 | 27 | namespace hws { 28 | 29 | hardware_sampler::hardware_sampler(const std::chrono::milliseconds sampling_interval, const sample_category category) : 30 | sampling_interval_{ sampling_interval }, 31 | sample_category_{ category } { 32 | if (sampling_interval == std::chrono::milliseconds{ 0 }) { 33 | throw std::invalid_argument{ "The sampling interval must be larger than 0ms!" }; 34 | } 35 | } 36 | 37 | hardware_sampler::~hardware_sampler() = default; 38 | 39 | void hardware_sampler::start_sampling() { 40 | // can't start an already running sampler 41 | if (this->has_sampling_started()) { 42 | throw std::runtime_error{ "Can start every hardware sampler only once!" }; 43 | } 44 | 45 | // record start time 46 | start_date_time_ = std::chrono::system_clock::now(); 47 | 48 | // start sampling loop 49 | sampling_started_ = true; 50 | sampling_running_ = true; 51 | this->add_event("sampling_started"); 52 | sampling_thread_ = std::thread{ 53 | [this]() { 54 | try { 55 | this->sampling_loop(); 56 | } catch (const std::exception &e) { 57 | // print useful error message 58 | std::cerr << e.what() << std::endl; 59 | throw; 60 | } 61 | } 62 | }; 63 | } 64 | 65 | void hardware_sampler::stop_sampling() { 66 | // can't stop a hardware sampler that has never been started 67 | if (!this->has_sampling_started()) { 68 | throw std::runtime_error{ "Can't stop a hardware sampler that has never been started!" }; 69 | } 70 | // can't stop an already stopped sampler 71 | if (this->has_sampling_stopped()) { 72 | throw std::runtime_error{ "Can stop every hardware sampler only once!" }; 73 | } 74 | 75 | // stop sampling 76 | sampling_running_ = false; 77 | sampling_stopped_ = true; // -> notifies the sampling std::thread 78 | sampling_thread_.join(); 79 | this->add_event("sampling_stopped"); 80 | } 81 | 82 | void hardware_sampler::pause_sampling() { 83 | sampling_running_ = false; // notifies the sampling std::thread 84 | this->add_event("sampling_paused"); 85 | } 86 | 87 | void hardware_sampler::resume_sampling() { 88 | if (this->has_sampling_stopped()) { 89 | throw std::runtime_error{ "Can't resume a hardware sampler that has already been stopped!" }; 90 | } 91 | sampling_running_ = true; // notifies the sampling std::thread 92 | this->add_event("sampling_resumed"); 93 | } 94 | 95 | bool hardware_sampler::has_sampling_started() const noexcept { 96 | return sampling_started_; 97 | } 98 | 99 | bool hardware_sampler::is_sampling() const noexcept { 100 | return sampling_running_; 101 | } 102 | 103 | bool hardware_sampler::has_sampling_stopped() const noexcept { 104 | return sampling_stopped_; 105 | } 106 | 107 | void hardware_sampler::add_event(event e) { 108 | events_.push_back(std::move(e)); 109 | } 110 | 111 | void hardware_sampler::add_event(decltype(event::time_point) time_point, decltype(event::name) name) { 112 | events_.emplace_back(time_point, name); 113 | } 114 | 115 | void hardware_sampler::add_event(decltype(event::name) name) { 116 | events_.emplace_back(std::chrono::steady_clock::now(), name); 117 | } 118 | 119 | event hardware_sampler::get_event(const std::size_t idx) const { 120 | if (idx >= this->num_events()) { 121 | throw std::out_of_range{ fmt::format("The index {} is out-of-range for the number of events {}!", idx, this->num_events()) }; 122 | } 123 | 124 | return events_[idx]; 125 | } 126 | 127 | void hardware_sampler::dump_yaml(const char *filename) const { 128 | if (!this->has_sampling_stopped()) { 129 | throw std::runtime_error{ "Can dump samples to the YAML file only after the sampling has been stopped!" }; 130 | } 131 | 132 | std::ofstream file{ filename, std::ios_base::app }; 133 | 134 | // begin a new YAML document (only with "---" multiple YAML documents in a single file are allowed) 135 | file << "---\n\n" 136 | << this->as_yaml_string(); 137 | } 138 | 139 | void hardware_sampler::dump_yaml(const std::string &filename) const { 140 | this->dump_yaml(filename.c_str()); 141 | } 142 | 143 | void hardware_sampler::dump_yaml(const std::filesystem::path &filename) const { 144 | this->dump_yaml(filename.string().c_str()); 145 | } 146 | 147 | std::string hardware_sampler::as_yaml_string() const { 148 | if (!this->has_sampling_stopped()) { 149 | throw std::runtime_error{ "Can return samples as string only after the sampling has been stopped!" }; 150 | } 151 | 152 | // generate the event information 153 | std::vector event_time_points{}; 154 | std::vector event_names{}; 155 | for (const auto &[time_point, name] : events_) { 156 | event_time_points.push_back(time_point); 157 | event_names.push_back(fmt::format("\"{}\"", name)); 158 | } 159 | 160 | return fmt::format("device_identification: \"{}\"\n" 161 | "\n" 162 | "version: \"{}\"\n" 163 | "\n" 164 | "start_time: \"{:%Y-%m-%d %X}\"\n" 165 | "\n" 166 | "events:\n" 167 | " time_points:\n" 168 | " unit: \"s\"\n" 169 | " values: [{}]\n" 170 | " names: [{}]\n" 171 | "\n" 172 | "sampling_interval:\n" 173 | " unit: \"ms\"\n" 174 | " values: {}\n" 175 | "\n" 176 | "time_points:\n" 177 | " unit: \"s\"\n" 178 | " values: [{}]\n" 179 | "\n" 180 | "{}\n", 181 | this->device_identification(), 182 | version::version, 183 | start_date_time_, 184 | fmt::join(detail::durations_from_reference_time(event_time_points, this->get_event(0).time_point), ", "), 185 | fmt::join(event_names, ", "), 186 | this->sampling_interval().count(), 187 | fmt::join(detail::durations_from_reference_time(this->sampling_time_points(), this->get_event(0).time_point), ", "), 188 | this->samples_only_as_yaml_string()); 189 | } 190 | 191 | void hardware_sampler::add_time_point(const std::chrono::steady_clock::time_point time_point) { 192 | time_points_.push_back(time_point); 193 | } 194 | 195 | bool hardware_sampler::sample_category_enabled(const sample_category category) const noexcept { 196 | return static_cast(this->sample_category_ & category) != 0; 197 | } 198 | 199 | } // namespace hws 200 | -------------------------------------------------------------------------------- /src/hws/system_hardware_sampler.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Marcel Breyer 3 | * @copyright 2024-today All Rights Reserved 4 | * @license This file is released under the MIT license. 5 | * See the LICENSE.md file in the project root for full license information. 6 | */ 7 | 8 | #include "hws/system_hardware_sampler.hpp" 9 | 10 | #include "hws/event.hpp" // hws::event 11 | #include "hws/sample_category.hpp" // hws::sample_category 12 | 13 | #if defined(HWS_FOR_CPUS_ENABLED) 14 | #include "hws/cpu/hardware_sampler.hpp" // hws::cpu_hardware_sampler 15 | #endif 16 | #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) 17 | #include "hws/gpu_nvidia/hardware_sampler.hpp" // hws::gpu_nvidia_hardware_sampler 18 | #include "hws/gpu_nvidia/utility.hpp" // HWS_CUDA_ERROR_CHECK 19 | 20 | #include "cuda_runtime.h" // cudaGetDeviceCount 21 | #endif 22 | #if defined(HWS_FOR_AMD_GPUS_ENABLED) 23 | #include "hws/gpu_amd/hardware_sampler.hpp" // hws::gpu_amd_hardware_sampler 24 | #include "hws/gpu_amd/utility.hpp" // HWS_HIP_ERROR_CHECK 25 | 26 | #include "hip/hip_runtime.h" // hipGetDeviceCount 27 | #endif 28 | #if defined(HWS_FOR_INTEL_GPUS_ENABLED) 29 | #include "hws/gpu_intel/hardware_sampler.hpp" // hws::gpu_intel_hardware_sampler 30 | #include "hws/gpu_intel/utility.hpp" // HWS_LEVEL_ZERO_ERROR_CHECK 31 | #endif 32 | 33 | #include "fmt/format.h" // fmt::format 34 | 35 | #include // std::for_each, std::all_of 36 | #include // std::chrono::milliseconds 37 | #include // std::size_t 38 | #include // std::uint32_t 39 | #include // std::unique_ptr, std::make_unique 40 | #include // std::accumulate 41 | #include // std::out_of_range 42 | #include // std::vector 43 | 44 | namespace hws { 45 | 46 | system_hardware_sampler::system_hardware_sampler(const sample_category category) : 47 | system_hardware_sampler{ HWS_SAMPLING_INTERVAL, category } { } 48 | 49 | system_hardware_sampler::system_hardware_sampler(const std::chrono::milliseconds sampling_interval, sample_category category) { 50 | // create the hardware samplers based on the available hardware 51 | #if defined(HWS_FOR_CPUS_ENABLED) 52 | { 53 | samplers_.push_back(std::make_unique(sampling_interval, category)); 54 | } 55 | #endif 56 | #if defined(HWS_FOR_NVIDIA_GPUS_ENABLED) 57 | { 58 | int device_count{}; 59 | HWS_CUDA_ERROR_CHECK(cudaGetDeviceCount(&device_count)); 60 | for (int device = 0; device < device_count; ++device) { 61 | samplers_.push_back(std::make_unique(static_cast(device), sampling_interval, category)); 62 | } 63 | } 64 | #endif 65 | #if defined(HWS_FOR_AMD_GPUS_ENABLED) 66 | { 67 | int device_count{}; 68 | HWS_HIP_ERROR_CHECK(hipGetDeviceCount(&device_count)); 69 | for (int device = 0; device < device_count; ++device) { 70 | samplers_.push_back(std::make_unique(static_cast(device), sampling_interval, category)); 71 | } 72 | } 73 | #endif 74 | #if defined(HWS_FOR_INTEL_GPUS_ENABLED) 75 | { 76 | // init level zero driver 77 | HWS_LEVEL_ZERO_ERROR_CHECK(zeInit(ZE_INIT_FLAG_GPU_ONLY)) 78 | 79 | // discover the number of drivers 80 | std::uint32_t driver_count{ 0 }; 81 | HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, nullptr)) 82 | 83 | // check if only the single GPU driver has been found 84 | if (driver_count > 1) { 85 | throw std::runtime_error{ fmt::format("Found too many GPU drivers ({})!", driver_count) }; 86 | } 87 | 88 | // get the GPU driver 89 | ze_driver_handle_t driver{}; 90 | HWS_LEVEL_ZERO_ERROR_CHECK(zeDriverGet(&driver_count, &driver)) 91 | 92 | // get all GPUs for the current driver 93 | std::uint32_t device_count{ 0 }; 94 | HWS_LEVEL_ZERO_ERROR_CHECK(zeDeviceGet(driver, &device_count, nullptr)) 95 | for (std::uint32_t device = 0; device < device_count; ++device) { 96 | samplers_.push_back(std::make_unique(static_cast(device), sampling_interval, category)); 97 | } 98 | } 99 | #endif 100 | } 101 | 102 | void system_hardware_sampler::start_sampling() { 103 | std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->start_sampling(); }); 104 | } 105 | 106 | void system_hardware_sampler::stop_sampling() { 107 | std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->stop_sampling(); }); 108 | } 109 | 110 | void system_hardware_sampler::pause_sampling() { 111 | std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->pause_sampling(); }); 112 | } 113 | 114 | void system_hardware_sampler::resume_sampling() { 115 | std::for_each(samplers_.begin(), samplers_.end(), [](auto &ptr) { ptr->resume_sampling(); }); 116 | } 117 | 118 | bool system_hardware_sampler::has_sampling_started() const noexcept { 119 | return std::all_of(samplers_.cbegin(), samplers_.cend(), [](const auto &ptr) { return ptr->has_sampling_started(); }); 120 | } 121 | 122 | bool system_hardware_sampler::is_sampling() const noexcept { 123 | return std::all_of(samplers_.cbegin(), samplers_.cend(), [](const auto &ptr) { return ptr->is_sampling(); }); 124 | } 125 | 126 | bool system_hardware_sampler::has_sampling_stopped() const noexcept { 127 | return std::all_of(samplers_.cbegin(), samplers_.cend(), [](const auto &ptr) { return ptr->has_sampling_stopped(); }); 128 | } 129 | 130 | void system_hardware_sampler::add_event(event e) { 131 | std::for_each(samplers_.begin(), samplers_.end(), [&e](auto &ptr) { ptr->add_event(e); }); 132 | } 133 | 134 | void system_hardware_sampler::add_event(decltype(event::time_point) time_point, decltype(event::name) name) { 135 | std::for_each(samplers_.begin(), samplers_.end(), [&time_point, &name](auto &ptr) { ptr->add_event(time_point, name); }); 136 | } 137 | 138 | void system_hardware_sampler::add_event(decltype(event::name) name) { 139 | std::for_each(samplers_.begin(), samplers_.end(), [&name](auto &ptr) { ptr->add_event(name); }); 140 | } 141 | 142 | std::vector system_hardware_sampler::num_events() const { 143 | std::vector num_events_per_sampler(this->num_samplers()); 144 | std::transform(samplers_.cbegin(), samplers_.cend(), num_events_per_sampler.begin(), [](const auto &ptr) { return ptr->num_events(); }); 145 | return num_events_per_sampler; 146 | } 147 | 148 | std::vector> system_hardware_sampler::get_events() const { 149 | std::vector> events_per_sampler(this->num_samplers()); 150 | std::transform(samplers_.cbegin(), samplers_.cend(), events_per_sampler.begin(), [](const auto &ptr) { return ptr->get_events(); }); 151 | return events_per_sampler; 152 | } 153 | 154 | std::vector> system_hardware_sampler::sampling_time_points() const { 155 | std::vector> sampling_time_points_per_sampler(this->num_samplers()); 156 | std::transform(samplers_.cbegin(), samplers_.cend(), sampling_time_points_per_sampler.begin(), [](const auto &ptr) { return ptr->sampling_time_points(); }); 157 | return sampling_time_points_per_sampler; 158 | } 159 | 160 | std::vector system_hardware_sampler::sampling_interval() const { 161 | std::vector sampling_interval_per_sampler(this->num_samplers()); 162 | std::transform(samplers_.cbegin(), samplers_.cend(), sampling_interval_per_sampler.begin(), [](const auto &ptr) { return ptr->sampling_interval(); }); 163 | return sampling_interval_per_sampler; 164 | } 165 | 166 | std::size_t system_hardware_sampler::num_samplers() const noexcept { 167 | return samplers_.size(); 168 | } 169 | 170 | std::vector> &system_hardware_sampler::samplers() noexcept { 171 | return samplers_; 172 | } 173 | 174 | const std::vector> &system_hardware_sampler::samplers() const noexcept { 175 | return samplers_; 176 | } 177 | 178 | std::unique_ptr &system_hardware_sampler::sampler(const std::size_t idx) { 179 | if (idx >= samplers_.size()) { 180 | throw std::out_of_range{ fmt::format("Index {} is out-of-range for size {}!", idx, samplers_.size()) }; 181 | } 182 | return samplers_[idx]; 183 | } 184 | 185 | const std::unique_ptr &system_hardware_sampler::sampler(const std::size_t idx) const { 186 | if (idx >= samplers_.size()) { 187 | throw std::out_of_range{ fmt::format("Index {} is out-of-range for size {}!", idx, samplers_.size()) }; 188 | } 189 | return samplers_[idx]; 190 | } 191 | 192 | void system_hardware_sampler::dump_yaml(const char *filename) const { 193 | std::for_each(samplers_.cbegin(), samplers_.cend(), [&filename](const auto &ptr) { ptr->dump_yaml(filename); }); 194 | } 195 | 196 | void system_hardware_sampler::dump_yaml(const std::string &filename) const { 197 | std::for_each(samplers_.cbegin(), samplers_.cend(), [&filename](const auto &ptr) { ptr->dump_yaml(filename); }); 198 | } 199 | 200 | void system_hardware_sampler::dump_yaml(const std::filesystem::path &filename) const { 201 | std::for_each(samplers_.cbegin(), samplers_.cend(), [&filename](const auto &ptr) { ptr->dump_yaml(filename); }); 202 | } 203 | 204 | std::string system_hardware_sampler::as_yaml_string() const { 205 | return std::accumulate(samplers_.cbegin(), samplers_.cend(), std::string{}, [](const std::string str, const auto &ptr) { return str + ptr->as_yaml_string(); }); 206 | } 207 | 208 | std::string system_hardware_sampler::samples_only_as_yaml_string() const { 209 | return std::accumulate(samplers_.cbegin(), samplers_.cend(), std::string{}, [](const std::string str, const auto &ptr) { return str + ptr->samples_only_as_yaml_string(); }); 210 | } 211 | 212 | } // namespace hws 213 | -------------------------------------------------------------------------------- /src/hws/utility.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * @author Marcel Breyer 3 | * @copyright 2024-today All Rights Reserved 4 | * @license This file is released under the MIT license. 5 | * See the LICENSE.md file in the project root for full license information. 6 | */ 7 | 8 | #include "hws/utility.hpp" 9 | 10 | #include // std::min, std::transform, std::all_of 11 | #include // std::tolower, std::isdigit 12 | #include // std::string 13 | #include // std::string_view 14 | #include // std::vector 15 | 16 | namespace hws::detail { 17 | 18 | bool starts_with(const std::string_view sv, const std::string_view start) noexcept { 19 | return sv.substr(0, start.size()) == start; 20 | } 21 | 22 | std::string_view trim(std::string_view str) noexcept { 23 | // trim right 24 | { 25 | const std::string_view::size_type pos = std::min(str.find_last_not_of(" \t\v\r\n\f") + 1, str.size()); 26 | str = str.substr(0, pos); 27 | } 28 | // trim left 29 | { 30 | const std::string_view::size_type pos = std::min(str.find_first_not_of(" \t\v\r\n\f"), str.size()); 31 | str = str.substr(pos); 32 | } 33 | return str; 34 | } 35 | 36 | std::string to_lower_case(const std::string_view str) { 37 | std::string lowercase_str{ str }; 38 | std::transform(str.begin(), str.end(), lowercase_str.begin(), [](const unsigned char c) { return static_cast(std::tolower(static_cast(c))); }); 39 | return lowercase_str; 40 | } 41 | 42 | std::vector split(const std::string_view str, const char delim) { 43 | std::vector split_str; 44 | 45 | // if the input str is empty, return an empty vector 46 | if (str.empty()) { 47 | return split_str; 48 | } 49 | 50 | std::string_view::size_type pos = 0; 51 | std::string_view::size_type next = 0; 52 | while (next != std::string_view::npos) { 53 | next = str.find_first_of(delim, pos); 54 | split_str.emplace_back(next == std::string_view::npos ? str.substr(pos) : str.substr(pos, next - pos)); 55 | pos = next + 1; 56 | } 57 | return split_str; 58 | } 59 | 60 | bool is_integer(std::string_view str) { 61 | return std::all_of(str.cbegin(), str.cend(), [](const char c) { return std::isdigit(static_cast(c)); }); 62 | } 63 | 64 | } // namespace hws::detail 65 | --------------------------------------------------------------------------------