├── .gitattributes ├── .github └── workflows │ ├── android-clang.yaml │ ├── linux-clang.yaml │ └── linux-gcc.yaml ├── .gitignore ├── CMakeLists.txt ├── LICENSE.txt ├── README.md ├── android_build.sh ├── docs └── changelog.md └── source ├── CMakeLists.txt ├── arm_gpuinfo.cpp ├── libgpuinfo.cpp └── libgpuinfo.hpp /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set the default behavior, in case people don't have core.autocrlf set. 2 | * text eol=lf 3 | 4 | # Force these text files to normalized endings 5 | *.c text 6 | *.cpp text 7 | *.h text 8 | *.hpp text 9 | *.md text 10 | *.py text 11 | *.sh text 12 | *.txt text 13 | -------------------------------------------------------------------------------- /.github/workflows/android-clang.yaml: -------------------------------------------------------------------------------- 1 | name: Build for Android with Clang using the NDK 2 | 3 | on: 4 | workflow_dispatch: 5 | pull_request: 6 | branches: 7 | - main 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-22.04 12 | 13 | steps: 14 | - name: Checkout code 15 | uses: actions/checkout@v4 16 | 17 | - name: Set up JDK 21 environment 18 | run: | 19 | echo "export JAVA_HOME=$JAVA_HOME_21_X64" >> $GITHUB_ENV 20 | echo "export PATH=$JAVA_HOME/bin:$PATH" >> $GITHUB_ENV 21 | 22 | - name: Use the built-in Android NDK 23 | run: | 24 | echo "export ANDROID_NDK_HOME=/usr/local/lib/android/sdk/ndk/26.3.11579264" >> $GITHUB_ENV 25 | echo "export PATH=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux/bin:$PATH" >> $GITHUB_ENV 26 | 27 | - name: Build with Clang and Android NDK 28 | run: | 29 | mkdir -p build 30 | cd build 31 | cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 .. 32 | make 33 | 34 | - name: Upload binaries 35 | uses: actions/upload-artifact@v4 36 | with: 37 | name: libgpu-linux-x86 38 | path: | 39 | build/source/arm_gpuinfo -------------------------------------------------------------------------------- /.github/workflows/linux-clang.yaml: -------------------------------------------------------------------------------- 1 | name: Build for Linux with Clang 2 | 3 | on: 4 | workflow_dispatch: 5 | pull_request: 6 | branches: 7 | - main 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-22.04 12 | 13 | steps: 14 | - name: Checkout code 15 | uses: actions/checkout@v4 16 | 17 | - name: Build with Clang 18 | run: | 19 | mkdir -p build 20 | cd build 21 | cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ .. 22 | make 23 | 24 | - name: Upload binaries 25 | uses: actions/upload-artifact@v4 26 | with: 27 | name: libgpu-linux-x86 28 | path: | 29 | build/source/arm_gpuinfo -------------------------------------------------------------------------------- /.github/workflows/linux-gcc.yaml: -------------------------------------------------------------------------------- 1 | name: Build for Linux with GCC 2 | 3 | on: 4 | workflow_dispatch: 5 | pull_request: 6 | branches: 7 | - main 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-22.04 12 | 13 | steps: 14 | - name: Git checkout 15 | uses: actions/checkout@v4 16 | 17 | - name: Build with GCC 18 | run: | 19 | mkdir -p build 20 | cd build 21 | cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ .. 22 | make 23 | 24 | - name: Upload binaries 25 | uses: actions/upload-artifact@v4 26 | with: 27 | name: libgpu-linux-x86 28 | path: | 29 | build/source/arm_gpuinfo 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Editor config files 2 | .vs 3 | .vscode 4 | 5 | # Build and debug output files 6 | /.cache 7 | /bin* 8 | /build* 9 | /log* 10 | /scratch* 11 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # This confidential and proprietary software may be used only as 3 | # authorised by a licensing agreement from Arm Limited. 4 | # Copyright 2023-2024 Arm Ltd. All Rights Reserved. 5 | # The entire notice above must be reproduced on all authorised 6 | # copies and copies may only be made to the extent permitted 7 | # by a licensing agreement from Arm Limited. 8 | # 9 | 10 | cmake_minimum_required(VERSION 3.15) 11 | 12 | set(CMAKE_CXX_STANDARD 14) 13 | 14 | project(libGPUInfo VERSION 1.2.0) 15 | 16 | add_subdirectory(source) 17 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021-2024 Arm Limited 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # About 2 | 3 | libGPUInfo is a small utility library that allows applications to query the 4 | configuration of the Arm® Immortalis™ or Arm Mali™ GPU present in the system. 5 | This information allows developers to adjust application workload complexity to 6 | match the performance capability of the current device. 7 | 8 | This library is able to provide the Arm GPU hardware configuration, as well as 9 | performance metrics for the shader cores inside the GPU. The library is unable 10 | to provide system information, such as the available GPU clock frequencies, 11 | because this is provided by the device manufacturer and is not part of the Arm 12 | GPU itself. 13 | 14 | For offline documentation about the capabilities of the various Arm GPUs on the 15 | market today please refer to the [Arm GPU Datasheet][2]. 16 | 17 | ## Supported devices 18 | 19 | This library aims to support all Arm GPU products from the Mali-T700 series 20 | onwards, ensuring developers have coverage of the vast majority of smartphones 21 | with Arm GPUs that are in use today. If you find a device with an Arm GPU which 22 | does not work, or gives inaccurate results, please open an Issue on the GitHub 23 | issue tracker. 24 | 25 | This library only supports devices using the Arm commercial driver. 26 | 27 | ## Recent changes 28 | 29 | * Change log: [1.x series](./docs/changelog.md) 30 | 31 | ## Related API extensions 32 | 33 | This library is intended to support any Arm device, but some developers prefer 34 | to use functionality within the graphics API when it is available. New devices 35 | can report a similar set of information to this library using in-API queries. 36 | 37 | We recommend using the extensions on devices where it is available. Doing so 38 | means the application automatically gets up-to-date information for all 39 | devices, even those released after the application binary was built. 40 | 41 | For more information please refer to the extension specifications: 42 | 43 | * [VK_ARM_shader_core_properties][3] 44 | * [VK_ARM_shader_core_builtins][4] 45 | 46 | 47 | ## License 48 | 49 | This project is licensed under the MIT license. By downloading any component 50 | from this repository you acknowledge that you accept terms specified in the 51 | [LICENSE.txt](LICENSE.txt) file. 52 | 53 | # Available information 54 | 55 | The query mechanism can report the following information about the GPU: 56 | 57 | * **Name:** The product name string, e.g. "Mali-G710". 58 | * **Architecture:** The product architecture name string, e.g. "Valhall". 59 | * **Model number:** The product ID number, e.g. 0xa002. 60 | * **Shader core count:** The number of shader cores in the design. 61 | * **Shader core mask:** The shader core topology mask. 62 | * **L2 cache count:** The number of L2 cache slices in the design. 63 | * **L2 cache size:** The total L2 cache size, summed over all slices, in bytes. 64 | * **Bus size:** The width of the external data bus, per cache slice, in bits. 65 | 66 | The query mechanism can report the following per-core shader core performance 67 | information: 68 | 69 | * **Execution engine count:** The number of arithmetic macroblocks. 70 | * **FP32 FMA count:** The peak fp32 FMAs per clock, summed over all engines. 71 | * **FP16 FMA count:** The peak fp16 FMAs per clock, summed over all engines. 72 | * **Texel count:** The peak bilinear filtered texture samples per clock. 73 | * **Pixel count:** The peak pixels per clock. 74 | 75 | # Using the library 76 | 77 | The library is very simple to use: 78 | 79 | ```C++ 80 | // Create a connection with the kernel driver ... 81 | std::unique_ptr conn = libarmgpuinfo::instance::create(); 82 | if (!conn) 83 | { 84 | std::cout << "ERROR: Failed to create Mali instance\n"; 85 | return; 86 | } 87 | 88 | // Fetch the information result and do something with it ... 89 | const gpuinfo& info = conn->get_info(); 90 | std::cout << "GPU: " << info.gpu_name << " MP" << info.num_shader_cores << "\n"; 91 | ``` 92 | 93 | Note that the returned instance uses a unique pointer for lifetime management, 94 | and both the instance and the query result will be freed when the instance 95 | drops out of scope. 96 | 97 | ## Handling unknown devices 98 | 99 | The library will be regularly updated to support new Arm GPU products, but it 100 | is inevitable that applications will run on new devices with GPU models that 101 | did not exist at the time they were released. For this there are two failure 102 | modes that applications must consider. 103 | 104 | The most likely error is the case where a connection can be established with 105 | the Arm kernel driver, but the product code is unknown. In this case the call 106 | to `libarmgpuinfo::instance::create()` will succeed but return a partially 107 | populated result. It will include any information that can be determined 108 | programmatically, but will report the GPU name and architecture as "Unknown", 109 | and the per-core shader core performance metrics as zero. 110 | 111 | For example, we can currently show the following information when the product 112 | model is not explicitly supported: 113 | 114 | ```yaml 115 | GPU configuration: 116 | Model number: 0xa862 117 | Core count: 7 118 | L2 cache count: 4 119 | Total L2 cache size: 2097152 bytes 120 | Bus width: 256 bits 121 | ``` 122 | 123 | If the kernel driver interface has changed and the library cannot establish a 124 | connection then we can return no useful information. In this case the 125 | `libarmgpuinfo::instance::create()` function will fail and will return a 126 | `nullptr`. 127 | 128 | # Building 129 | 130 | The library is provided as a single C++ source file and a single C++ header 131 | file. It is expected that developers will copy the files directly into their 132 | existing application build system, so no off-the-shelf build system is provided 133 | for the library integration. 134 | 135 | # Sample application 136 | 137 | The repository also contains a simple command line tool that demonstrates use of 138 | the API, and which can be used for adhoc testing of devices. To build the 139 | Android command line tool: 140 | 141 | * Set `ANDROID_NDK_HOME` to the path of your Android NDK install. 142 | * Run `./android_build.sh [Release|Debug]`. 143 | 144 | The output binary will be `./bin/arm_gpuinfo`. You can run this on the device 145 | and print the results for your device to the terminal using the following 146 | commands: 147 | 148 | ```sh 149 | adb push ./bin/arm_gpuinfo /data/local/tmp 150 | adb shell chmod u+x /data/local/tmp/arm_gpuinfo 151 | adb shell /data/local/tmp/arm_gpuinfo 152 | adb shell rm /data/local/tmp/arm_gpuinfo 153 | ``` 154 | 155 | The generated output is formatted using a YAML-like syntax, but is designed for 156 | human consumption with additional line breaks. To generate strictly compliant 157 | YAML output for use in scripts pass the `--yaml` or `-y` argument on the 158 | `arm_gpuinfo` command line. 159 | 160 | # Support 161 | 162 | If you have issues with the library itself, please raise them in the project's 163 | GitHub issue tracker. 164 | 165 | If you have any questions about Arm GPUs, application development for Arm GPUs, 166 | or general mobile graphics development or technology please submit them on the 167 | [Arm Community graphics forums][1]. 168 | 169 | - - - 170 | 171 | _Copyright © 2023-2024, Arm Limited and contributors._ 172 | 173 | [1]: https://community.arm.com/support-forums/f/graphics-gaming-and-vr-forum/ 174 | [2]: https://developer.arm.com/documentation/102849/latest/ 175 | [3]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_ARM_shader_core_properties.html 176 | [4]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_ARM_shader_core_builtins.html 177 | -------------------------------------------------------------------------------- /android_build.sh: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2023-2024 Arm Limited. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in all 14 | # copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # SOFTWARE. 23 | # 24 | 25 | # ---------------------------------------------------------------------------- 26 | # Configuration 27 | 28 | # Exit immediately if any component command errors 29 | set -e 30 | 31 | BUILD_DIR_64=build_arm64 32 | 33 | # ---------------------------------------------------------------------------- 34 | # Process command line options 35 | if [ "$#" -lt 1 ]; then 36 | BUILD_TYPE=Release 37 | else 38 | BUILD_TYPE=$1 39 | fi 40 | 41 | # ---------------------------------------------------------------------------- 42 | # Build the 64-bit library 43 | mkdir -p ${BUILD_DIR_64} 44 | pushd ${BUILD_DIR_64} 45 | 46 | cmake \ 47 | -DCMAKE_SYSTEM_NAME=Android \ 48 | -DANDROID_PLATFORM=29 \ 49 | -DANDROID_ABI=arm64-v8a \ 50 | -DANDROID_TOOLCHAIN=clang \ 51 | -DANDROID_STL=c++_static \ 52 | -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ 53 | -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake" \ 54 | -DCMAKE_INSTALL_PREFIX=../ \ 55 | .. 56 | 57 | make install -j8 58 | 59 | popd 60 | -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | # Release change log 2 | 3 | This page summarizes the major functional changes in each release. 4 | 5 | 6 | ## 1.2.0 7 | 8 | **Released:** November 2024 9 | 10 | This is a small feature release, adding support for reporting GPU architecture 11 | version numbers as an alternative to parsing product names. 12 | 13 | * **General:** 14 | * **Feature:** C++ namespace changed to `libarmgpuinfo`. 15 | * **Feature:** Supports reporting architecture major/minor versions. 16 | 17 | 18 | ## 1.1.0 19 | 20 | **Released:** June 2024 21 | 22 | This is a small feature release, adding support for new Arm GPUs and some new 23 | GPU configuration values. 24 | 25 | * **General:** 26 | * **Feature:** Supports Immortalis-G925 series hardware. 27 | * **Feature:** Supports new Mali-G310 and Mali-G510 IP configurations. 28 | * **Feature:** Supports reporting shader core topology mask. 29 | 30 | 31 | 32 | ## 1.0.0 33 | 34 | **Released:** June 2023 35 | 36 | The first release of libGPUInfo. 37 | 38 | * **General:** 39 | * **Feature:** Support IP from Mali-T720 (Midgard architecture) through to 40 | Immortalis-G720 (5th Generation architecture). 41 | * **Feature:** Supports querying GPU model number and name. 42 | * **Feature:** Supports querying GPU shader core and cache configuration. 43 | * **Feature:** Supports querying GPU speed-of-light performance metrics. 44 | * **Feature:** Command line utility provided for easy device testing. 45 | 46 | - - - 47 | 48 | _Copyright © 2023-2024, Arm Limited and contributors._ 49 | -------------------------------------------------------------------------------- /source/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2023-2024 Arm Limited. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in all 14 | # copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # SOFTWARE. 23 | # 24 | 25 | add_executable( 26 | arm_gpuinfo 27 | arm_gpuinfo.cpp 28 | libgpuinfo.cpp) 29 | 30 | target_include_directories( 31 | arm_gpuinfo PUBLIC 32 | ".") 33 | 34 | target_compile_options( 35 | arm_gpuinfo PRIVATE 36 | 37 | -Wall 38 | -Wextra 39 | -Wpedantic 40 | -Werror 41 | -Wshadow) 42 | 43 | install(TARGETS arm_gpuinfo DESTINATION ${PACKAGE_ROOT}) 44 | -------------------------------------------------------------------------------- /source/arm_gpuinfo.cpp: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Copyright (c) 2023-2024 Arm Limited. 4 | * 5 | * SPDX-License-Identifier: MIT 6 | * 7 | * Permission is hereby granted, free of charge, to any person obtaining a copy 8 | * of this software and associated documentation files (the "Software"), to 9 | * deal in the Software without restriction, including without limitation the 10 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 11 | * sell copies of the Software, and to permit persons to whom the Software is 12 | * furnished to do so, subject to the following conditions: 13 | * 14 | * The above copyright notice and this permission notice shall be included in all 15 | * copies or substantial portions of the Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | 26 | /** 27 | * @brief An example command line application using libGPUInfo. 28 | * 29 | * This file contains a command line application that will query and print 30 | * key properties about your device, and the Arm GPU that it contains. 31 | * 32 | * It is primarily intended as an example of using the libGPUInfo library, but 33 | * the command line application itself is a useful diagnostic tool for support 34 | * investigations. 35 | * 36 | * On Android devices you can install and run the application from the shell: 37 | * 38 | * adb push arm_gpuinfo /data/local/tmp 39 | * adb shell chmod u+x /data/local/tmp/arm_gpuinfo 40 | * adb shell /data/local/tmp/arm_gpuinfo 41 | * 42 | * The generated output is formatted using a YAML-like syntax, but by default is 43 | * designed for human consumption with additional line breaks. To generate 44 | * strictly compliant YAML output for use in scripts pass the --yaml or -y 45 | * argument on the arm_gpuinfo command line. 46 | */ 47 | 48 | #include 49 | #include 50 | #include 51 | 52 | #if defined(__ANDROID__) 53 | #include 54 | #endif 55 | 56 | #include "libgpuinfo.hpp" 57 | 58 | #if defined(__ANDROID__) 59 | std::string get_android_property( 60 | const char* propertyA, 61 | const char* propertyB=nullptr 62 | ) { 63 | char buf[PROP_VALUE_MAX]; 64 | int size = __system_property_get(propertyA, buf); 65 | 66 | if (!size && propertyB) { 67 | size = __system_property_get(propertyB, buf); 68 | } 69 | 70 | std::string result { buf }; 71 | result[0] = toupper(result[0]); 72 | return result; 73 | } 74 | #endif 75 | 76 | std::string get_kernel_version() { 77 | struct utsname unamedata; 78 | uname(&unamedata); 79 | return { unamedata.release }; 80 | } 81 | 82 | int main(int argc, char *argv[]) 83 | { 84 | bool emit_yaml = false; 85 | for (int i = 1; i < argc; i++) 86 | { 87 | if ((!strcmp(argv[i], "-y")) || (!strcmp(argv[i], "--yaml"))) 88 | { 89 | emit_yaml = true; 90 | } 91 | } 92 | 93 | auto instance = libarmgpuinfo::instance::create(); 94 | if (!instance) 95 | { 96 | std::cout << "ERROR: Failed to create instance\n"; 97 | return 1; 98 | } 99 | 100 | const auto info = instance->get_info(); 101 | 102 | if (emit_yaml) 103 | { 104 | std::cout << "---\n"; 105 | } 106 | 107 | std::cout << "Device configuration:\n"; 108 | #if defined(__ANDROID__) 109 | std::cout << " Manufacturer: " << get_android_property("ro.product.vendor.manufacturer", "ro.product.brand") << "\n"; 110 | std::cout << " Model: " << get_android_property("ro.product.vendor.model", "ro.product.model") << "\n"; 111 | std::cout << " Android version: " << get_android_property("ro.build.version.release") << "\n"; 112 | #endif 113 | std::cout << " Kernel version: " << get_kernel_version() << "\n"; 114 | if (!emit_yaml) 115 | { 116 | std::cout << "\n"; 117 | } 118 | 119 | std::cout << "GPU configuration:\n"; 120 | std::cout << " Name: " << info.gpu_name << "\n"; 121 | std::cout << " Architecture: " << info.architecture_name << "\n"; 122 | std::cout << " Architecture version: " << info.architecture_major 123 | << "." << info.architecture_minor <<"\n"; 124 | std::cout << " Model number: 0x" << std::hex << info.gpu_id << std::dec << "\n"; 125 | std::cout << " Core count: " << info.num_shader_cores << "\n"; 126 | std::cout << " Core mask: 0x" << std::hex << info.shader_core_mask << std::dec << "\n"; 127 | std::cout << " L2 cache count: " << info.num_l2_slices << "\n"; 128 | std::cout << " Total L2 cache size: " << info.num_l2_bytes << " bytes\n"; 129 | std::cout << " Bus width: " << info.num_bus_bits << " bits\n"; 130 | if (!emit_yaml) 131 | { 132 | std::cout << "\n"; 133 | } 134 | 135 | if (!info.num_exec_engines) 136 | { 137 | std::cout << "ERROR: Detected an unknown model " 138 | << std::hex << info.gpu_id << std::dec << "\n"; 139 | return 1; 140 | } 141 | 142 | std::cout << "Per-core statistics:\n"; 143 | std::cout << " Engine count: " << info.num_exec_engines << "\n"; 144 | std::cout << " FP32 FMAs: " << info.num_fp32_fmas_per_cy << "/cy\n"; 145 | std::cout << " FP16 FMAs: " << info.num_fp16_fmas_per_cy << "/cy\n"; 146 | std::cout << " Texels: " << info.num_texels_per_cy << "/cy\n"; 147 | std::cout << " Pixels: " << info.num_pixels_per_cy << "/cy\n"; 148 | if (!emit_yaml) 149 | { 150 | std::cout << "\n"; 151 | } 152 | 153 | std::cout << "Per-GPU statistics:\n"; 154 | std::cout << " FP32 FMAs: " << info.num_fp32_fmas_per_cy * info.num_shader_cores << "/cy\n"; 155 | std::cout << " FP16 FMAs: " << info.num_fp16_fmas_per_cy * info.num_shader_cores << "/cy\n"; 156 | std::cout << " Texels: " << info.num_texels_per_cy * info.num_shader_cores << "/cy\n"; 157 | std::cout << " Pixels: " << info.num_pixels_per_cy * info.num_shader_cores << "/cy\n"; 158 | 159 | return 0; 160 | } 161 | -------------------------------------------------------------------------------- /source/libgpuinfo.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2024 Arm Limited. 3 | * 4 | * SPDX-License-Identifier: MIT 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to 8 | * deal in the Software without restriction, including without limitation the 9 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 | * sell copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | #include "libgpuinfo.hpp" 41 | 42 | #define UNUSED(x) (void)x 43 | 44 | namespace libarmgpuinfo { 45 | 46 | struct product_entry { 47 | uint32_t id; 48 | uint32_t mask; 49 | uint32_t min_cores; 50 | const char* name; 51 | const char* architecture; 52 | std::function get_num_fp32_fmas_per_engine; 53 | std::function get_num_texels; 54 | std::function get_num_pixels; 55 | std::function get_num_exec_engines; 56 | }; 57 | 58 | static const uint32_t MASK_OLD { 0xFFFF }; 59 | static const uint32_t MASK_NEW { 0xF00F }; 60 | 61 | template 62 | static uint32_t get_num( 63 | int core_count, 64 | uint32_t core_features, 65 | uint32_t thread_features 66 | ) { 67 | UNUSED(core_count); 68 | UNUSED(core_features); 69 | UNUSED(thread_features); 70 | 71 | return val; 72 | } 73 | 74 | static uint32_t get_num_eng_g31( 75 | int core_count, 76 | uint32_t core_features, 77 | uint32_t thread_features 78 | ) { 79 | UNUSED(core_features); 80 | 81 | if ((core_count == 1) && ((thread_features & 0xFFFF) == 0x2000)) 82 | { 83 | return 1; 84 | } 85 | 86 | return 2; 87 | } 88 | 89 | static uint32_t get_num_eng_g51( 90 | int core_count, 91 | uint32_t core_features, 92 | uint32_t thread_features 93 | ) { 94 | UNUSED(core_features); 95 | 96 | if ((core_count == 1) && ((thread_features & 0xFFFF) == 0x2000)) 97 | { 98 | return 1; 99 | } 100 | 101 | return 3; 102 | } 103 | 104 | static uint32_t get_num_eng_g52( 105 | int core_count, 106 | uint32_t core_features, 107 | uint32_t thread_features 108 | ) { 109 | UNUSED(core_count); 110 | UNUSED(thread_features); 111 | 112 | return core_features & 0xF; 113 | } 114 | 115 | static uint32_t get_num_fma_g510( 116 | int core_count, 117 | uint32_t core_features, 118 | uint32_t thread_features 119 | ) { 120 | UNUSED(core_count); 121 | UNUSED(thread_features); 122 | 123 | uint32_t variant = core_features & 0xF; 124 | switch(variant) 125 | { 126 | case 0: 127 | return 16; 128 | case 2: 129 | case 3: 130 | return 24; 131 | case 1: 132 | case 4: 133 | case 5: 134 | case 6: 135 | default: 136 | return 32; 137 | } 138 | } 139 | 140 | static uint32_t get_num_tex_g510( 141 | int core_count, 142 | uint32_t core_features, 143 | uint32_t thread_features 144 | ) { 145 | UNUSED(core_count); 146 | UNUSED(thread_features); 147 | 148 | uint32_t variant = core_features & 0xF; 149 | switch(variant) 150 | { 151 | case 0: 152 | case 5: 153 | return 2; 154 | case 1: 155 | case 2: 156 | case 6: 157 | return 4; 158 | case 3: 159 | case 4: 160 | default: 161 | return 8; 162 | } 163 | } 164 | 165 | static uint32_t get_num_pix_g510( 166 | int core_count, 167 | uint32_t core_features, 168 | uint32_t thread_features 169 | ) { 170 | UNUSED(core_count); 171 | UNUSED(thread_features); 172 | 173 | // This returns min(blend, pixel) 174 | // Also limits to 2 for single engine configs 175 | uint32_t variant = core_features & 0xF; 176 | switch(variant) 177 | { 178 | case 0: 179 | case 1: 180 | case 5: 181 | case 6: 182 | return 2; 183 | case 2: 184 | case 3: 185 | case 4: 186 | default: 187 | return 4; 188 | } 189 | } 190 | 191 | static uint32_t get_num_eng_g510( 192 | int core_count, 193 | uint32_t core_features, 194 | uint32_t thread_features 195 | ) { 196 | UNUSED(core_count); 197 | UNUSED(thread_features); 198 | 199 | uint32_t variant = core_features & 0xF; 200 | switch(variant) 201 | { 202 | case 0: 203 | case 1: 204 | case 5: 205 | case 6: 206 | return 1; 207 | case 2: 208 | case 3: 209 | case 4: 210 | default: 211 | return 2; 212 | } 213 | } 214 | 215 | static const std::array PRODUCT_VERSIONS {{ 216 | // ID, ID Mask, Min cores, Name, Arch, FMA/Eng, Texels, Pixels, Engines 217 | product_entry { 0x6956, MASK_OLD, 1, "Mali-T600", "Midgard", get_num<4>, get_num<1>, get_num<1>, get_num<2> }, 218 | product_entry { 0x0620, MASK_OLD, 1, "Mali-T620", "Midgard", get_num<4>, get_num<1>, get_num<1>, get_num<2> }, 219 | product_entry { 0x0720, MASK_OLD, 1, "Mali-T720", "Midgard", get_num<4>, get_num<1>, get_num<1>, get_num<1> }, 220 | product_entry { 0x0750, MASK_OLD, 1, "Mali-T760", "Midgard", get_num<4>, get_num<1>, get_num<1>, get_num<2> }, 221 | product_entry { 0x0820, MASK_OLD, 1, "Mali-T820", "Midgard", get_num<4>, get_num<1>, get_num<1>, get_num<1> }, 222 | product_entry { 0x0830, MASK_OLD, 1, "Mali-T830", "Midgard", get_num<4>, get_num<1>, get_num<1>, get_num<2> }, 223 | product_entry { 0x0860, MASK_OLD, 1, "Mali-T860", "Midgard", get_num<4>, get_num<1>, get_num<1>, get_num<2> }, 224 | product_entry { 0x0880, MASK_OLD, 1, "Mali-T880", "Midgard", get_num<4>, get_num<1>, get_num<1>, get_num<3> }, 225 | product_entry { 0x6000, MASK_NEW, 1, "Mali-G71", "Bifrost", get_num<4>, get_num<1>, get_num<1>, get_num<3> }, 226 | product_entry { 0x6001, MASK_NEW, 1, "Mali-G72", "Bifrost", get_num<4>, get_num<1>, get_num<1>, get_num<3> }, 227 | product_entry { 0x7000, MASK_NEW, 1, "Mali-G51", "Bifrost", get_num<4>, get_num<2>, get_num<2>, get_num_eng_g51 }, 228 | product_entry { 0x7001, MASK_NEW, 1, "Mali-G76", "Bifrost", get_num<8>, get_num<2>, get_num<2>, get_num<3> }, 229 | product_entry { 0x7002, MASK_NEW, 1, "Mali-G52", "Bifrost", get_num<8>, get_num<2>, get_num<2>, get_num_eng_g52 }, 230 | product_entry { 0x7003, MASK_NEW, 1, "Mali-G31", "Bifrost", get_num<4>, get_num<2>, get_num<2>, get_num_eng_g31 }, 231 | product_entry { 0x9000, MASK_NEW, 1, "Mali-G77", "Valhall", get_num<16>, get_num<4>, get_num<2>, get_num<2> }, 232 | product_entry { 0x9001, MASK_NEW, 1, "Mali-G57", "Valhall", get_num<16>, get_num<4>, get_num<2>, get_num<2> }, 233 | product_entry { 0x9003, MASK_NEW, 1, "Mali-G57", "Valhall", get_num<16>, get_num<4>, get_num<2>, get_num<2> }, 234 | product_entry { 0x9004, MASK_NEW, 1, "Mali-G68", "Valhall", get_num<16>, get_num<4>, get_num<2>, get_num<2> }, 235 | product_entry { 0x9002, MASK_NEW, 1, "Mali-G78", "Valhall", get_num<16>, get_num<4>, get_num<2>, get_num<2> }, 236 | product_entry { 0x9005, MASK_NEW, 1, "Mali-G78AE", "Valhall", get_num<16>, get_num<4>, get_num<2>, get_num<2> }, 237 | product_entry { 0xa002, MASK_NEW, 1, "Mali-G710", "Valhall", get_num<32>, get_num<8>, get_num<4>, get_num<2> }, 238 | product_entry { 0xa007, MASK_NEW, 1, "Mali-G610", "Valhall", get_num<32>, get_num<8>, get_num<4>, get_num<2> }, 239 | product_entry { 0xa003, MASK_NEW, 1, "Mali-G510", "Valhall", get_num_fma_g510, get_num_tex_g510, get_num_pix_g510, get_num_eng_g510 }, 240 | product_entry { 0xa004, MASK_NEW, 1, "Mali-G310", "Valhall", get_num_fma_g510, get_num_tex_g510, get_num_pix_g510, get_num_eng_g510 }, 241 | product_entry { 0xb002, MASK_NEW, 10, "Immortalis-G715", "Valhall", get_num<64>, get_num<8>, get_num<4>, get_num<2> }, 242 | product_entry { 0xb002, MASK_NEW, 7, "Mali-G715", "Valhall", get_num<64>, get_num<8>, get_num<4>, get_num<2> }, 243 | product_entry { 0xb002, MASK_NEW, 1, "Mali-G615", "Valhall", get_num<64>, get_num<8>, get_num<4>, get_num<2> }, 244 | product_entry { 0xb003, MASK_NEW, 1, "Mali-G615", "Valhall", get_num<64>, get_num<8>, get_num<4>, get_num<2> }, 245 | product_entry { 0xc000, MASK_NEW, 10, "Immortalis-G720", "Arm 5th Gen", get_num<64>, get_num<8>, get_num<4>, get_num<2> }, 246 | product_entry { 0xc000, MASK_NEW, 6, "Mali-G720", "Arm 5th Gen", get_num<64>, get_num<8>, get_num<4>, get_num<2> }, 247 | product_entry { 0xc000, MASK_NEW, 1, "Mali-G620", "Arm 5th Gen", get_num<64>, get_num<8>, get_num<4>, get_num<2> }, 248 | product_entry { 0xc001, MASK_NEW, 1, "Mali-G620", "Arm 5th Gen", get_num<64>, get_num<8>, get_num<4>, get_num<2> }, 249 | product_entry { 0xd000, MASK_NEW, 10, "Immortalis-G925", "Arm 5th Gen", get_num<64>, get_num<8>, get_num<4>, get_num<2> }, 250 | product_entry { 0xd000, MASK_NEW, 6, "Mali-G725", "Arm 5th Gen", get_num<64>, get_num<8>, get_num<4>, get_num<2> }, 251 | product_entry { 0xd001, MASK_NEW, 1, "Mali-G625", "Arm 5th Gen", get_num<64>, get_num<8>, get_num<4>, get_num<2> }, 252 | }}; 253 | 254 | static uint32_t get_gpu_id( 255 | uint32_t gpu_id 256 | ) { 257 | for (const auto& entry : PRODUCT_VERSIONS) 258 | { 259 | if (((gpu_id & entry.mask) == entry.id)) 260 | { 261 | return entry.id; 262 | } 263 | } 264 | 265 | return gpu_id; 266 | } 267 | 268 | static const char* get_gpu_name( 269 | uint32_t gpu_id, 270 | uint32_t core_count 271 | ) { 272 | for (const auto& entry : PRODUCT_VERSIONS) 273 | { 274 | if((gpu_id == entry.id) && (core_count >= entry.min_cores)) 275 | { 276 | return entry.name; 277 | } 278 | } 279 | 280 | return "Unknown"; 281 | } 282 | 283 | static const char* get_architecture_name( 284 | uint32_t gpu_id 285 | ) { 286 | for (const auto& entry : PRODUCT_VERSIONS) 287 | { 288 | if(gpu_id == entry.id) 289 | { 290 | return entry.architecture; 291 | } 292 | } 293 | 294 | return "Unknown"; 295 | } 296 | 297 | static int get_num_exec_engines( 298 | uint32_t gpu_id, 299 | uint32_t core_count, 300 | uint32_t core_features, 301 | uint32_t thread_features 302 | ) { 303 | for (const auto& entry : PRODUCT_VERSIONS) 304 | { 305 | if((gpu_id == entry.id) && (core_count >= entry.min_cores)) 306 | { 307 | return entry.get_num_exec_engines(core_count, core_features, thread_features); 308 | } 309 | } 310 | 311 | return 0; 312 | } 313 | 314 | static uint32_t get_num_fp32_fmas( 315 | uint32_t gpu_id, 316 | uint32_t core_count, 317 | uint32_t core_features, 318 | uint32_t thread_features 319 | ) { 320 | for (const auto& entry : PRODUCT_VERSIONS) 321 | { 322 | if((gpu_id == entry.id) && (core_count >= entry.min_cores)) 323 | { 324 | return entry.get_num_fp32_fmas_per_engine(core_count, core_features, thread_features) * 325 | entry.get_num_exec_engines(core_count, core_features, thread_features); 326 | } 327 | } 328 | 329 | return 0; 330 | } 331 | 332 | static uint32_t get_num_texels( 333 | uint32_t gpu_id, 334 | uint32_t core_count, 335 | uint32_t core_features, 336 | uint32_t thread_features 337 | ) { 338 | for (const auto& entry : PRODUCT_VERSIONS) 339 | { 340 | if((gpu_id == entry.id) && (core_count >= entry.min_cores)) 341 | { 342 | return entry.get_num_texels(core_count, core_features, thread_features); 343 | } 344 | } 345 | 346 | return 0; 347 | } 348 | 349 | static uint32_t get_num_pixels( 350 | uint32_t gpu_id, 351 | uint32_t core_count, 352 | uint32_t core_features, 353 | uint32_t thread_features 354 | ) { 355 | for (const auto& entry : PRODUCT_VERSIONS) 356 | { 357 | if((gpu_id == entry.id) && (core_count >= entry.min_cores)) 358 | { 359 | return entry.get_num_pixels(core_count, core_features, thread_features); 360 | } 361 | } 362 | 363 | return 0; 364 | } 365 | 366 | /** Kbase Pre R21 ioctl interface. */ 367 | namespace kbase_pre_r21 { 368 | 369 | /** Related to mali0 ioctl interface */ 370 | enum class header_id : uint32_t { 371 | /** Version check. */ 372 | version_check = 0, 373 | /** Base Context Create Kernel Flags. */ 374 | create_kernel_flags = 2, 375 | /** Kbase Func Get Props. */ 376 | get_props = 526, 377 | /** Kbase Func Set Flags. */ 378 | set_flags = 530, 379 | }; 380 | 381 | /** Message header. */ 382 | union uk_header { 383 | /** Number identifying the called UK function. */ 384 | header_id id; 385 | /** The return code of the called UK function. */ 386 | uint32_t ret; 387 | /** Dummy to ensure type has 64-bit alignment */ 388 | uint64_t sizer; 389 | }; 390 | 391 | /** Check version compatibility between kernel and userspace. */ 392 | struct version_check_t { 393 | /** UK header */ 394 | uk_header header; 395 | /** Major version number */ 396 | uint16_t major; 397 | /** Minor version number */ 398 | uint16_t minor; 399 | 400 | bool is_set() const 401 | { 402 | return major || minor; 403 | } 404 | }; 405 | 406 | /** IOCTL parameters to set flags */ 407 | struct set_flags_t { 408 | /** UK header */ 409 | uk_header header; 410 | /** Create flags */ 411 | uint32_t create_flags; 412 | /** Padding */ 413 | uint32_t padding; 414 | }; 415 | 416 | /** Base GPU Num Texture Features Registers. */ 417 | static constexpr const uint32_t base_gpu_num_texture_features_registers = 3; 418 | 419 | /** Base Max Coherent Groups. */ 420 | static constexpr const uint32_t base_max_coherent_groups = 16; 421 | 422 | /** GPU Max Job Slots. */ 423 | static constexpr const uint32_t gpu_max_job_slots = 16; 424 | 425 | /** Kbase UK GPU props. */ 426 | struct uk_gpuprops_t { 427 | /** 428 | * IOCTL parameters to probe GPU properties 429 | * 430 | * NOTE: the raw_props member in this data structure contains the register 431 | * values from which the value of the other members are derived. The derived 432 | * members exist to allow for efficient access and/or shielding the details 433 | * of the layout of the registers. 434 | * 435 | */ 436 | struct gpu_props { 437 | /** Core. */ 438 | struct core { 439 | /** Product specific value. */ 440 | uint32_t product_id; 441 | /** 442 | * Status of the GPU release. 443 | * No defined values, but starts at 0 and increases by one for each 444 | * release status (alpha, beta, EAC, etc.). 445 | * 4 bit values (0-15). 446 | */ 447 | uint16_t version_status; 448 | /** 449 | * Minor release number of the GPU. "P" part of an "RnPn" release number. 450 | * 8 bit values (0-255). 451 | */ 452 | uint16_t minor_revision; 453 | /** 454 | * Major release number of the GPU. "R" part of an "RnPn" release number. 455 | * 4 bit values (0-15). 456 | */ 457 | uint16_t major_revision; 458 | /** Padding. */ 459 | uint16_t padding; 460 | /** 461 | * This property is deprecated since it has not contained the real current 462 | * value of GPU clock speed. It is kept here only for backwards compatibility. 463 | * For the new ioctl interface, it is ignored and is treated as a padding 464 | * to keep the structure of the same size and retain the placement of its 465 | * members. 466 | */ 467 | uint32_t gpu_speed_mhz; 468 | /** 469 | * @usecase GPU clock max speed is required for computing best case 470 | * in tasks as job scheduling ant irq_throttling. (It is not specified in the 471 | * Midgard Architecture). 472 | * Also, GPU clock max speed is used for OpenCL's clGetDeviceInfo() function. 473 | */ 474 | uint32_t gpu_freq_khz_max; 475 | /** 476 | * @usecase GPU clock min speed is required for computing worst case 477 | * in tasks as job scheduling ant irq_throttling. (It is not specified in the 478 | * Midgard Architecture). 479 | */ 480 | uint32_t gpu_freq_khz_min; 481 | /** Size of the shader program counter, in bits. */ 482 | uint32_t log2_program_counter_size; 483 | /** 484 | * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a 485 | * bitpattern where a set bit indicates that the format is supported. 486 | * 487 | * Before using a texture format, it is recommended that the corresponding 488 | * bit be checked. 489 | */ 490 | uint32_t texture_features[base_gpu_num_texture_features_registers]; 491 | /** 492 | * Theoretical maximum memory available to the GPU. It is unlikely that a 493 | * client will be able to allocate all of this memory for their own 494 | * purposes, but this at least provides an upper bound on the memory 495 | * available to the GPU. 496 | * 497 | * This is required for OpenCL's clGetDeviceInfo() call when 498 | * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The 499 | * client will not be expecting to allocate anywhere near this value. 500 | */ 501 | uint64_t gpu_available_memory_size; 502 | }; 503 | 504 | /** 505 | * More information is possible - but associativity and bus width are not 506 | * required by upper-level apis. 507 | */ 508 | struct l2_cache { 509 | /** Log2 Line Size. */ 510 | uint8_t log2_line_size; 511 | /** Log2 Cache Size. */ 512 | uint8_t log2_cache_size; 513 | /** Num L2 Slices. */ 514 | uint8_t num_l2_slices; 515 | /** Padding bytes. */ 516 | uint8_t padding[5]; 517 | }; 518 | 519 | /** Tiler. */ 520 | struct tiler { 521 | /** Max is 4*2^15 */ 522 | uint32_t bin_size_bytes; 523 | /** Max is 2^15 */ 524 | uint32_t max_active_levels; 525 | }; 526 | 527 | /** GPU threading system details. */ 528 | struct thread { 529 | /** Max. number of threads per core */ 530 | uint32_t max_threads; 531 | /** Max. number of threads per workgroup */ 532 | uint32_t max_workgroup_size; 533 | /** Max. number of threads that can synchronize on a simple barrier */ 534 | uint32_t max_barrier_size; 535 | /** Total size [1..65535] of the register file available per core. */ 536 | uint16_t max_registers; 537 | /** Max. tasks [1..255] which may be sent to a core before it becomes blocked. */ 538 | uint8_t max_task_queue; 539 | /** Max. allowed value [1..15] of the Thread Group Split field. */ 540 | uint8_t max_thread_group_split; 541 | /** 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */ 542 | uint8_t impl_tech; 543 | /** Padding bytes. */ 544 | uint8_t padding[7]; 545 | }; 546 | 547 | /** 548 | * A complete description of the GPU's Hardware Configuration Discovery 549 | * registers. 550 | * 551 | * The information is presented inefficiently for access. For frequent access, 552 | * the values should be better expressed in an unpacked form in the 553 | * base_gpu_props structure. 554 | * 555 | * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to 556 | * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device 557 | * behaving differently?". In this case, all information about the 558 | * by the driver. Instead, the raw registers can be processed by the Mali 559 | * Tools software on the host PC. 560 | */ 561 | struct raw { 562 | /** Shader Present. */ 563 | uint64_t shader_present; 564 | /** Tiler Present. */ 565 | uint64_t tiler_present; 566 | /** L2 Present. */ 567 | uint64_t l2_present; 568 | /** Unused 1. */ 569 | uint64_t unused_1; 570 | /** L2 Features. */ 571 | uint32_t l2_features; 572 | /** Suspend Size. */ 573 | uint32_t suspend_size; 574 | /** Mem Features. */ 575 | uint32_t mem_features; 576 | /** Mmu Features. */ 577 | uint32_t mmu_features; 578 | /** As Present. */ 579 | uint32_t as_present; 580 | /** Js Present. */ 581 | uint32_t js_present; 582 | /** Js Features. */ 583 | uint32_t js_features[gpu_max_job_slots]; 584 | /** Tiler Features. */ 585 | uint32_t tiler_features; 586 | /** Texture Features. */ 587 | uint32_t texture_features[3]; 588 | /** GPU ID. */ 589 | uint32_t gpu_id; 590 | /** Thread Max Threads. */ 591 | uint32_t thread_max_threads; 592 | /** Thread Max Workgroup Size. */ 593 | uint32_t thread_max_workgroup_size; 594 | /** Thread Max Barrier Size. */ 595 | uint32_t thread_max_barrier_size; 596 | /** Thread Features. */ 597 | uint32_t thread_features; 598 | /** 599 | * Coherency Mode. 600 | * Note: This is the _selected_ coherency mode rather than the 601 | * available modes as exposed in the coherency_features register. 602 | */ 603 | uint32_t coherency_mode; 604 | }; 605 | 606 | /** 607 | * Coherency group information 608 | * 609 | * Note that the sizes of the members could be reduced. However, the \c group 610 | * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte 611 | * aligned, thus leading to wastage if the other members sizes were reduced. 612 | * 613 | * The groups are sorted by core mask. The core masks are non-repeating and do 614 | * not intersect. 615 | */ 616 | struct coherent_group_info { 617 | /** 618 | * descriptor for a coherent group 619 | * 620 | * \c core_mask exposes all cores in that coherent group, and \c num_cores 621 | * provides a cached population-count for that mask. 622 | * 623 | * @note Whilst all cores are exposed in the mask, not all may be available to 624 | * the application, depending on the Kernel Power policy. 625 | * 626 | * @note if u64s must be 8-byte aligned, then this structure has 32-bits of 627 | * wastage. 628 | */ 629 | struct coherent_group { 630 | /** Core restriction mask required for the group */ 631 | uint64_t core_mask; 632 | /** Number of cores in the group */ 633 | uint16_t num_cores; 634 | /** Padding bytes. */ 635 | uint16_t padding[3]; 636 | }; 637 | 638 | /** Num Groups. */ 639 | uint32_t num_groups; 640 | /** 641 | * Number of core groups (coherent or not) in the GPU. Equivalent to the number of 642 | * L2 Caches. 643 | * The GPU Counter dumping writes 2048 bytes per core group, regardless of whether 644 | * the core groups are coherent or not. Hence this member is needed to calculate 645 | * how much memory is required for dumping. 646 | * @note Do not use it to work out how many valid elements are in the group[] 647 | * member. Use num_groups instead. 648 | */ 649 | uint32_t num_core_groups; 650 | /** Coherency features of the memory, accessed by @ref gpu_mem_features methods. */ 651 | uint32_t coherency; 652 | /** Padding. */ 653 | uint32_t padding; 654 | /** Descriptors of coherent groups */ 655 | coherent_group group[base_max_coherent_groups]; 656 | }; 657 | 658 | /** Core Props. */ 659 | core core_props; 660 | /** L2 Props. */ 661 | l2_cache l2_props; 662 | /** Unused to keep for backwards compatibility. */ 663 | uint64_t unused; 664 | /** Tiler Props. */ 665 | tiler tiler_props; 666 | /** Thread Props. */ 667 | thread thread_props; 668 | /** This member is large, likely to be 128 bytes. */ 669 | raw raw_props; 670 | /** This must be last member of the structure. */ 671 | coherent_group_info coherency_info; 672 | }; 673 | 674 | /** Header. */ 675 | uk_header header; 676 | /** Props. */ 677 | gpu_props props; 678 | }; 679 | 680 | constexpr auto iface_number = 0x80; 681 | 682 | /** Commands describing kbase_pre_r21 ioctl interface. */ 683 | enum command_type { 684 | /** Check version compatibility between JM kernel and userspace. */ 685 | version_check = _IOWR(iface_number, 0x0, version_check_t), 686 | /** Set kernel context creation flags. */ 687 | set_flags = _IOWR(iface_number, 0x212, set_flags_t), 688 | /** Get GPU properties. */ 689 | get_gpuprops = _IOWR(iface_number, 0x20e, uk_gpuprops_t), 690 | }; 691 | 692 | } 693 | 694 | /** Kbase Post R21 ioctl interface. */ 695 | namespace kbase_post_r21 { 696 | 697 | template 698 | class pointer64 { 699 | public: 700 | /** @return Pointer to the object. */ 701 | value_t* get() const { 702 | return reinterpret_cast(static_cast(value)); 703 | } 704 | 705 | /** 706 | * Set pointer value. 707 | * 708 | * @param ptr The new pointer value. 709 | */ 710 | void reset(value_t* ptr) { 711 | value = static_cast(reinterpret_cast(ptr)); 712 | } 713 | 714 | private: 715 | /** Pointer value as uint64_t. */ 716 | uint64_t value { 0 }; 717 | }; 718 | 719 | /** Check version compatibility between kernel and userspace. */ 720 | struct version_check_t { 721 | /** Major version number. */ 722 | uint16_t major; 723 | /** Minor version number */ 724 | uint16_t minor; 725 | 726 | bool is_set() const 727 | { 728 | return major || minor; 729 | } 730 | }; 731 | 732 | /** Set kernel context creation flags. */ 733 | struct set_flags_t { 734 | /** kernel context creation flags. */ 735 | uint32_t create_flags; 736 | }; 737 | 738 | /** 739 | * The ioctl will return the number of bytes stored into buffer or an error 740 | * on failure (e.g. size is too small). If size is specified as 0 then no 741 | * data will be written but the return value will be the number of bytes needed 742 | * for all the properties. 743 | * 744 | * flags may be used in the future to request a different format for the 745 | * buffer. With flags == 0 the following format is used. 746 | * 747 | * The buffer will be filled with pairs of values, a __u32 key identifying the 748 | * property followed by the value. The size of the value is identified using 749 | * the bottom bits of the key. The value then immediately followed the key and 750 | * is tightly packed (there is no padding). All keys and values are 751 | * little-endian. 752 | * 753 | * 00 = __u8 754 | * 01 = __u16 755 | * 10 = __u32 756 | * 11 = __u64 757 | */ 758 | struct get_gpuprops_t { 759 | /** GPU property size. */ 760 | enum class gpuprop_size : uint8_t { 761 | /** Property type is uint8_t. */ 762 | uint8 = 0x0, 763 | /** Property type is uint16_t. */ 764 | uint16 = 0x1, 765 | /** Property type is uint32_t. */ 766 | uint32 = 0x2, 767 | /** Property type is uint64_t. */ 768 | uint64 = 0x3 769 | }; 770 | 771 | /** GPU properties codes. */ 772 | enum class gpuprop_code : uint8_t { 773 | /** Product id. */ 774 | product_id = 1, 775 | /** L2 log2 line size. */ 776 | l2_log2_line_size = 13, 777 | /** L2 log2 cache size. */ 778 | l2_log2_cache_size = 14, 779 | /** L2 num l2 slices. */ 780 | l2_num_l2_slices = 15, 781 | /** Max threads. */ 782 | max_threads = 18, 783 | /** Max registers. */ 784 | max_registers = 21, 785 | /** Raw l2 features. */ 786 | raw_l2_features = 29, 787 | /** Raw core features. */ 788 | raw_core_features = 30, 789 | /** Raw GPU id. */ 790 | raw_gpu_id = 55, 791 | /** Raw thread max threads. */ 792 | raw_thread_max_threads = 56, 793 | /** Raw thread max workgroup size. */ 794 | raw_thread_max_workgroup_size = 57, 795 | /** Raw thread max barrier size. */ 796 | raw_thread_max_barrier_size = 58, 797 | /** Raw thread features. */ 798 | raw_thread_features = 59, 799 | /** Raw coherency mode. */ 800 | raw_coherency_mode = 60, 801 | /** Coherency num groups. */ 802 | coherency_num_groups = 61, 803 | /** Coherency num core groups. */ 804 | coherency_num_core_groups = 62, 805 | /** Coherency coherency. */ 806 | coherency_coherency = 63, 807 | /** Coherency group 0. */ 808 | coherency_group_0 = 64, 809 | /** Coherency group 1. */ 810 | coherency_group_1 = 65, 811 | /** Coherency group 2. */ 812 | coherency_group_2 = 66, 813 | /** Coherency group 3. */ 814 | coherency_group_3 = 67, 815 | /** Num exec engines. */ 816 | num_exec_engines = 82 817 | }; 818 | 819 | /** Pointer to the buffer to store properties into. */ 820 | pointer64 buffer; 821 | 822 | /** Size of the buffer. */ 823 | uint32_t size; 824 | 825 | /** Flags - must be zero for now. */ 826 | uint32_t flags; 827 | }; 828 | 829 | constexpr auto iface_number = 0x80; 830 | 831 | /** Commands describing kbase ioctl interface. */ 832 | enum command_type { 833 | /** Check version compatibility between JM kernel and userspace. */ 834 | version_check_jm = _IOWR(iface_number, 0x0, version_check_t), 835 | /** Check version compatibility between CSF kernel and userspace. */ 836 | version_check_csf = _IOWR(iface_number, 0x34, version_check_t), 837 | /** Set kernel context creation flags. */ 838 | set_flags = _IOW(iface_number, 0x1, set_flags_t), 839 | /** Get GPU properties. */ 840 | get_gpuprops = _IOW(iface_number, 0x3, get_gpuprops_t), 841 | }; 842 | 843 | } 844 | 845 | class prop_decoder { 846 | public: 847 | prop_decoder(std::vector buffer) 848 | : buffer_{ std::move(buffer) } 849 | , data_{ buffer_.data() } 850 | , size_{ buffer_.size() } {} 851 | 852 | bool decode(gpuinfo& info) { 853 | bool success = true; 854 | 855 | uint64_t raw_gpu_id {}; 856 | uint64_t raw_core_features {}; 857 | uint64_t raw_thread_features {}; 858 | 859 | while (size_ > 0) { 860 | auto p = next(success); 861 | if (!success) { 862 | return false; 863 | } 864 | 865 | prop_id_t id = p.first; 866 | uint64_t value = p.second; 867 | 868 | switch (id) { 869 | case prop_id_t::product_id: 870 | info.gpu_id = get_gpu_id(value); 871 | break; 872 | case prop_id_t::l2_log2_cache_size: 873 | info.num_l2_bytes = 1UL << value; 874 | break; 875 | case prop_id_t::l2_num_l2_slices: 876 | info.num_l2_slices = value; 877 | break; 878 | case prop_id_t::raw_l2_features: 879 | // Bus width stored as log2(bus width) in top 8 bits 880 | info.num_bus_bits = 1UL << ((value >> 24) & 0xFF); 881 | break; 882 | case prop_id_t::raw_gpu_id: 883 | raw_gpu_id = value; 884 | break; 885 | case prop_id_t::raw_core_features: 886 | raw_core_features = value; 887 | break; 888 | case prop_id_t::raw_thread_features: 889 | raw_thread_features = value; 890 | break; 891 | case prop_id_t::coherency_num_core_groups: 892 | // Only expect 1 core group in Mali-T700 onwards 893 | assert(value == 1); 894 | break; 895 | case prop_id_t::coherency_group_0: 896 | info.num_shader_cores = __builtin_popcount(value); 897 | info.shader_core_mask = value; 898 | break; 899 | default: 900 | break; 901 | } 902 | } 903 | 904 | // Decode architecture versions 905 | constexpr uint64_t bits4 { 0xF }; 906 | constexpr uint64_t bits8 { 0xFF }; 907 | 908 | constexpr uint64_t compat_shift { 28 }; 909 | constexpr uint64_t compat { 0xF }; 910 | bool is_64bit_id = ((raw_gpu_id >> compat_shift) & bits4) == compat; 911 | 912 | // Old-style 32-bit ID 913 | if (!is_64bit_id) 914 | { 915 | constexpr uint64_t arch_major_offset { 28 }; 916 | constexpr uint64_t arch_minor_offset { 24 }; 917 | info.architecture_major = (raw_gpu_id >> arch_major_offset) & bits4; 918 | info.architecture_minor = (raw_gpu_id >> arch_minor_offset) & bits4; 919 | } 920 | // New-style 64-bit ID 921 | else 922 | { 923 | constexpr uint64_t arch_major_offset { 56 }; 924 | constexpr uint64_t arch_minor_offset { 48 }; 925 | info.architecture_major = (raw_gpu_id >> arch_major_offset) & bits8; 926 | info.architecture_minor = (raw_gpu_id >> arch_minor_offset) & bits8; 927 | } 928 | 929 | info.num_exec_engines = get_num_exec_engines( 930 | info.gpu_id, 931 | info.num_shader_cores, 932 | raw_core_features, 933 | raw_thread_features); 934 | 935 | info.num_fp32_fmas_per_cy = get_num_fp32_fmas( 936 | info.gpu_id, 937 | info.num_shader_cores, 938 | raw_core_features, 939 | raw_thread_features); 940 | 941 | info.num_fp16_fmas_per_cy = info.num_fp32_fmas_per_cy * 2; 942 | 943 | info.num_texels_per_cy = get_num_texels( 944 | info.gpu_id, 945 | info.num_shader_cores, 946 | raw_core_features, 947 | raw_thread_features); 948 | 949 | info.num_pixels_per_cy = get_num_pixels( 950 | info.gpu_id, 951 | info.num_shader_cores, 952 | raw_core_features, 953 | raw_thread_features); 954 | 955 | return true; 956 | } 957 | 958 | private: 959 | /** Property id type. */ 960 | using prop_id_t = kbase_post_r21::get_gpuprops_t::gpuprop_code; 961 | /** Property size type. */ 962 | using prop_size_t = kbase_post_r21::get_gpuprops_t::gpuprop_size; 963 | 964 | static std::pair to_prop_metadata(uint32_t v) { 965 | /* Property id/size encoding is: 966 | * +--------+----------+ 967 | * | 31 2 | 1 0 | 968 | * +--------+----------+ 969 | * | PropId | PropSize | 970 | * +--------+----------+ 971 | */ 972 | static unsigned int id_shift { 2 }; 973 | static unsigned int size_mask { 0b11 }; 974 | 975 | return { static_cast(v >> id_shift), static_cast(v & size_mask) }; 976 | } 977 | 978 | std::pair next(bool& success) { 979 | success = true; 980 | auto p = to_prop_metadata(read_bytes(success)); 981 | if (success) 982 | { 983 | prop_id_t id = p.first; 984 | prop_size_t size = p.second; 985 | 986 | switch (size) { 987 | case prop_size_t::uint8: 988 | return { id, read_bytes(success) }; 989 | case prop_size_t::uint16: 990 | return { id, read_bytes(success) }; 991 | case prop_size_t::uint32: 992 | return { id, read_bytes(success) }; 993 | case prop_size_t::uint64: 994 | return { id, read_bytes(success) }; 995 | } 996 | } 997 | 998 | return {}; 999 | } 1000 | 1001 | template 1002 | T read_bytes(bool& success) { 1003 | // Check we have enough bytes in the buffer 1004 | if (size_ < sizeof(T)) { 1005 | success = false; 1006 | return 0; 1007 | } 1008 | 1009 | T ret {}; 1010 | for (size_t b = 0; b < sizeof(T); b++) 1011 | { 1012 | ret |= static_cast(static_cast(data_[b]) << (8 * b)); 1013 | } 1014 | data_ += sizeof(T); 1015 | size_ -= sizeof(T); 1016 | return ret; 1017 | } 1018 | 1019 | std::vector const buffer_; 1020 | unsigned char const *data_; 1021 | std::size_t size_; 1022 | }; 1023 | 1024 | /* See header for documentation */ 1025 | std::unique_ptr instance::create( 1026 | const uint32_t id 1027 | ) { 1028 | std::string device_path("/dev/mali" + std::to_string(id)); 1029 | 1030 | // Open the kernel driver device node 1031 | const int fd = ::open(device_path.c_str(), O_RDONLY); 1032 | if (fd < 0) { 1033 | return nullptr; 1034 | } 1035 | 1036 | // Check that it is a character device 1037 | struct stat s {}; 1038 | const int fs_result = fstat(fd, &s); 1039 | if ((fs_result < 0) || (S_ISCHR(s.st_mode) == 0)) { 1040 | ::close(fd); 1041 | return nullptr; 1042 | } 1043 | 1044 | // Create the instance 1045 | auto result = std::unique_ptr(new instance(fd)); 1046 | if (!result || !result->valid_) { 1047 | return nullptr; 1048 | } 1049 | 1050 | return result; 1051 | } 1052 | 1053 | /* See header for documentation */ 1054 | const gpuinfo& instance::get_info() const 1055 | { 1056 | return info_; 1057 | }; 1058 | 1059 | /* See header for documentation */ 1060 | instance::~instance() 1061 | { 1062 | ::close(fd_); 1063 | } 1064 | 1065 | /* See header for documentation */ 1066 | instance::instance(int fd): 1067 | fd_(fd) 1068 | { 1069 | if (!check_version()) { 1070 | valid_ = false; 1071 | return; 1072 | } 1073 | 1074 | if (!set_flags()) { 1075 | valid_ = false; 1076 | return; 1077 | } 1078 | 1079 | if (!init_props()) { 1080 | valid_ = false; 1081 | return; 1082 | } 1083 | } 1084 | 1085 | static bool is_supported(unsigned int major, unsigned int minor) 1086 | { 1087 | return (major > 10) || ((major == 10) && (minor >= 2)); 1088 | } 1089 | 1090 | /* See header for documentation */ 1091 | bool instance::check_version() { 1092 | // Probe pre-r21 JM kernel 1093 | // Must be first in the list because CSF reuses an old IOCTL ID 1094 | iface_ = iface_type::pre_r21; 1095 | kbase_pre_r21::version_check_t pre_r21 {}; 1096 | pre_r21.header.id = kbase_pre_r21::header_id::version_check; 1097 | ::ioctl(fd_, kbase_pre_r21::version_check, &pre_r21); 1098 | // If this is non-zero this must be pre-r21 driver, so check version 1099 | if (pre_r21.is_set()) { 1100 | return is_supported(pre_r21.major, pre_r21.minor); 1101 | } 1102 | 1103 | // Probe r21+ JM kernel 1104 | iface_ = iface_type::post_r21; 1105 | kbase_post_r21::version_check_t post_r21 {}; 1106 | ::ioctl(fd_, kbase_post_r21::version_check_jm, &post_r21); 1107 | // If this is non-zero this must be post-r21 JM driver, so check version 1108 | if (post_r21.is_set()) { 1109 | return is_supported(post_r21.major, post_r21.minor); 1110 | } 1111 | 1112 | // Probe r21+ CSF kernel 1113 | ::ioctl(fd_, kbase_post_r21::version_check_csf, &post_r21); 1114 | // If this is any non-zero value this is a valid CSF GPU 1115 | return post_r21.is_set(); 1116 | } 1117 | 1118 | /** Call set flags ioctl. */ 1119 | bool instance::set_flags() { 1120 | static constexpr auto system_monitor_flag_submit_disabled_bit = 1; 1121 | static constexpr auto system_monitor_flag = 1U << system_monitor_flag_submit_disabled_bit; 1122 | 1123 | // Clear errno 1124 | errno = 0; 1125 | 1126 | if (iface_ == iface_type::pre_r21) { 1127 | kbase_pre_r21::set_flags_t flags {}; 1128 | flags.header.id = kbase_pre_r21::header_id::set_flags; 1129 | flags.create_flags = system_monitor_flag; 1130 | ::ioctl(fd_, kbase_pre_r21::set_flags, &flags); 1131 | } else { 1132 | kbase_post_r21::set_flags_t flags { system_monitor_flag }; 1133 | ::ioctl(fd_, kbase_post_r21::set_flags, &flags); 1134 | } 1135 | 1136 | // Mali driver will fail if reinitialized, but it's benign 1137 | // TODO: Does this ever happen with this usage pattern 1138 | return errno == 0 || errno == EINVAL || errno == EPERM; 1139 | } 1140 | 1141 | /* See header for documentation */ 1142 | bool instance::init_props() { 1143 | bool success; 1144 | if (iface_ == iface_type::pre_r21) { 1145 | success = init_props_pre_r21(); 1146 | } else { 1147 | success = init_props_post_r21(); 1148 | } 1149 | 1150 | // Perform some common cleanup on the data 1151 | if (!success) 1152 | { 1153 | return false; 1154 | } 1155 | 1156 | info_.num_l2_bytes *= info_.num_l2_slices; 1157 | info_.gpu_name = get_gpu_name(info_.gpu_id, info_.num_shader_cores); 1158 | info_.architecture_name = get_architecture_name(info_.gpu_id); 1159 | return true; 1160 | } 1161 | 1162 | /* See header for documentation */ 1163 | bool instance::init_props_pre_r21() { 1164 | kbase_pre_r21::uk_gpuprops_t props {}; 1165 | props.header.id = kbase_pre_r21::header_id::get_props; 1166 | errno = 0; 1167 | ::ioctl(fd_, kbase_pre_r21::get_gpuprops, &props); 1168 | if (errno) { 1169 | return false; 1170 | } 1171 | 1172 | info_.gpu_id = get_gpu_id(props.props.core_props.product_id); 1173 | info_.num_l2_bytes = 1UL << props.props.l2_props.log2_cache_size; 1174 | info_.num_l2_slices = props.props.l2_props.num_l2_slices; 1175 | info_.num_bus_bits = 1UL << (props.props.raw_props.l2_features >> 24); 1176 | 1177 | // Old kernel driver must have 32-bit GPU ID 1178 | switch (info_.gpu_id) { 1179 | // Midgard GPUs require manual specification, as not machine readable 1180 | case 0x6956: // Mali-T600 1181 | info_.architecture_major = 4; 1182 | info_.architecture_minor = 0; 1183 | break; 1184 | case 0x0620: // Mali-T620 1185 | info_.architecture_major = 4; 1186 | info_.architecture_minor = 1; 1187 | break; 1188 | case 0x0720: // Mali-T720 1189 | info_.architecture_major = 4; 1190 | info_.architecture_minor = 2; 1191 | break; 1192 | case 0x0750: // Mali-T760 1193 | info_.architecture_major = 5; 1194 | info_.architecture_minor = 0; 1195 | break; 1196 | case 0x0820: // Mali-T820 1197 | case 0x0830: // Mali-T830 1198 | info_.architecture_major = 5; 1199 | info_.architecture_minor = 1; 1200 | break; 1201 | case 0x0860: // Mali-T860 1202 | case 0x0880: // Mali-T880 1203 | info_.architecture_major = 5; 1204 | info_.architecture_minor = 2; 1205 | break; 1206 | // Bifrost onwards report architecture version via config register 1207 | default: 1208 | { 1209 | uint32_t raw_gpu_id = props.props.raw_props.gpu_id; 1210 | constexpr unsigned int arch_major_offset { 28 }; 1211 | constexpr unsigned int arch_minor_offset { 24 }; 1212 | constexpr unsigned int bits4 { 0xF }; 1213 | info_.architecture_major = (raw_gpu_id >> arch_major_offset) & bits4; 1214 | info_.architecture_minor = (raw_gpu_id >> arch_minor_offset) & bits4; 1215 | break; 1216 | } 1217 | } 1218 | 1219 | info_.num_shader_cores = 0; 1220 | // Only expect 1 core group in Mali-T700 onwards 1221 | assert(props.props.coherency_info.num_core_groups == 1); 1222 | for (uint32_t i = 0; i < props.props.coherency_info.num_core_groups; i++) 1223 | { 1224 | info_.num_shader_cores = __builtin_popcount(props.props.coherency_info.group[i].core_mask); 1225 | info_.shader_core_mask = props.props.coherency_info.group[i].core_mask; 1226 | } 1227 | 1228 | info_.num_exec_engines = get_num_exec_engines( 1229 | info_.gpu_id, 1230 | info_.num_shader_cores, 1231 | 0, 0); 1232 | 1233 | info_.num_fp32_fmas_per_cy = get_num_fp32_fmas( 1234 | info_.gpu_id, 1235 | info_.num_shader_cores, 1236 | 0, 0); 1237 | 1238 | info_.num_fp16_fmas_per_cy = info_.num_fp32_fmas_per_cy * 2; 1239 | 1240 | info_.num_texels_per_cy = get_num_texels( 1241 | info_.gpu_id, 1242 | info_.num_shader_cores, 1243 | 0, 0); 1244 | 1245 | info_.num_pixels_per_cy = get_num_pixels( 1246 | info_.gpu_id, 1247 | info_.num_shader_cores, 1248 | 0, 0); 1249 | 1250 | return true; 1251 | } 1252 | 1253 | /* See header for documentation */ 1254 | bool instance::init_props_post_r21() { 1255 | errno = 0; 1256 | 1257 | kbase_post_r21::get_gpuprops_t get_props = {}; 1258 | int size = ::ioctl(fd_, kbase_post_r21::get_gpuprops, &get_props); 1259 | if (errno) { 1260 | return false; 1261 | } 1262 | 1263 | std::vector buffer(static_cast(size)); 1264 | get_props.size = static_cast(size); 1265 | get_props.buffer.reset(buffer.data()); 1266 | ::ioctl(fd_, kbase_post_r21::get_gpuprops, &get_props); 1267 | if (errno) { 1268 | return false; 1269 | } 1270 | 1271 | prop_decoder decoder { buffer }; 1272 | return decoder.decode(info_); 1273 | } 1274 | 1275 | } 1276 | -------------------------------------------------------------------------------- /source/libgpuinfo.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2021-2024 Arm Limited. 3 | * 4 | * SPDX-License-Identifier: MIT 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a copy 7 | * of this software and associated documentation files (the "Software"), to 8 | * deal in the Software without restriction, including without limitation the 9 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 | * sell copies of the Software, and to permit persons to whom the Software is 11 | * furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice shall be included in all 14 | * copies or substantial portions of the Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | */ 24 | 25 | /** 26 | * @brief The core libGPUInfo library interface. 27 | * 28 | * This library provides developers with an easy way to query the Arm 29 | * Immortalis or Arm Mali GPU configuration in their system. This information 30 | * can be used to adjust rendering workload to match the capabilities of the 31 | * device. 32 | * 33 | * The library is simple to use: 34 | * 35 | * // Create a connection with the kernel driver ... 36 | * std::unique_ptr conn = libgpuinfo::instance::create(); 37 | * if (!conn) 38 | * { 39 | * std::cout << "ERROR: Failed to create Mali instance\n"; 40 | * return; 41 | * } 42 | * 43 | * // Fetch the information result and do something with it ... 44 | * const gpuinfo& info = conn->get_info(); 45 | * std::cout << "GPU: " << info.gpu_name << " MP" << info.num_shader_cores << "\n"; 46 | * 47 | * Note that the returned information object is returned by reference, and has 48 | * the same lifetime as the instance object. 49 | */ 50 | 51 | #pragma once 52 | 53 | #include 54 | #include 55 | #include 56 | #include 57 | #include 58 | #include 59 | 60 | #include 61 | #include 62 | #include 63 | #include 64 | 65 | namespace libarmgpuinfo { 66 | 67 | /** Arm GPU information. */ 68 | struct gpuinfo 69 | { 70 | /** GPU name */ 71 | const char* gpu_name; 72 | 73 | /** GPU architecture name */ 74 | const char* architecture_name; 75 | 76 | /** GPU ID */ 77 | uint32_t gpu_id; 78 | 79 | /** GPU architecture major version */ 80 | uint32_t architecture_major; 81 | 82 | /** GPU architecture minor version */ 83 | uint32_t architecture_minor; 84 | 85 | /** Number of shader cores */ 86 | uint32_t num_shader_cores; 87 | 88 | /** Shader core topology mask */ 89 | uint64_t shader_core_mask; 90 | 91 | /** Number of L2 cache slices */ 92 | uint32_t num_l2_slices; 93 | 94 | /** L2 cache size, summed for all slices, in bytes */ 95 | uint32_t num_l2_bytes; 96 | 97 | /** GPU external bus width per cache slice, in bits */ 98 | uint32_t num_bus_bits; 99 | 100 | /** Number of execution engines per core */ 101 | uint32_t num_exec_engines; 102 | 103 | /** Maximum number of 32-bit floating-point FMAs per clock per core */ 104 | uint32_t num_fp32_fmas_per_cy; 105 | 106 | /** Maximum number of 16-bit floating-point FMAs per clock per core */ 107 | uint32_t num_fp16_fmas_per_cy; 108 | 109 | /** Maximum number of bilinear filtered texels per clock per core */ 110 | uint32_t num_texels_per_cy; 111 | 112 | /** Maximum number of output pixels per clock per core */ 113 | uint32_t num_pixels_per_cy; 114 | }; 115 | 116 | 117 | /** Kbase ioctl interface type. */ 118 | enum class iface_type { 119 | /** Pre R21 kernel */ 120 | pre_r21, 121 | /** Post R21 kernel (inclusive) */ 122 | post_r21 123 | }; 124 | 125 | /** 126 | * Mali device driver instance. 127 | */ 128 | class instance 129 | { 130 | public: 131 | /** 132 | * Factory function to create a device instance. 133 | * 134 | * @param id The driver instance, e.g. 0 for /dev/mali0. 135 | * 136 | * @return The created instance, or @c nullptr on failure. 137 | */ 138 | static std::unique_ptr create(const uint32_t id=0); 139 | 140 | /** 141 | * Get the GPU device property information. 142 | * 143 | * The returned reference has the same lifetime as the instance. 144 | * 145 | * @return The device property information. 146 | */ 147 | const gpuinfo& get_info() const; 148 | 149 | /** 150 | * Destroy an instance. 151 | * 152 | * Any returned information references become invalid. 153 | */ 154 | ~instance(); 155 | 156 | private: 157 | /** 158 | * Create a new instance. 159 | * 160 | * @param fd The opened driver file descriptor. 161 | * 162 | */ 163 | instance(int fd); 164 | 165 | /** Check the Mali kernel driver interface version. */ 166 | bool check_version(); 167 | 168 | /** Configure Mali kernel driver connection flags. */ 169 | bool set_flags(); 170 | 171 | /** Query properties and store them locally. */ 172 | bool init_props(); 173 | 174 | /** Get device constants from the old format ioctl. */ 175 | bool init_props_pre_r21(); 176 | 177 | /** Get device constants from the new format ioctl. */ 178 | bool init_props_post_r21(); 179 | 180 | /** The queries device properties. */ 181 | gpuinfo info_ {}; 182 | 183 | /** The driver interface type. */ 184 | iface_type iface_ {}; 185 | 186 | /** The validity state of the object if initialization fails. */ 187 | bool valid_ { true }; 188 | 189 | /** The kernel driver file descriptor. */ 190 | int fd_ {}; 191 | }; 192 | 193 | } 194 | --------------------------------------------------------------------------------