├── .gitattributes
├── .github
    └── workflows
    │   ├── android-clang.yaml
    │   ├── linux-clang.yaml
    │   └── linux-gcc.yaml
├── .gitignore
├── CMakeLists.txt
├── LICENSE.txt
├── README.md
├── android_build.sh
├── docs
    └── changelog.md
└── source
    ├── CMakeLists.txt
    ├── arm_gpuinfo.cpp
    ├── libgpuinfo.cpp
    └── libgpuinfo.hpp


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Set the default behavior, in case people don't have core.autocrlf set.
 2 | * text eol=lf
 3 | 
 4 | # Force these text files to normalized endings
 5 | *.c text
 6 | *.cpp text
 7 | *.h text
 8 | *.hpp text
 9 | *.md text
10 | *.py text
11 | *.sh text
12 | *.txt text
13 | 


--------------------------------------------------------------------------------
/.github/workflows/android-clang.yaml:
--------------------------------------------------------------------------------
 1 | name: Build for Android with Clang using the NDK
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   pull_request:
 6 |     branches:
 7 |       - main
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-22.04
12 | 
13 |     steps:
14 |     - name: Checkout code
15 |       uses: actions/checkout@v4
16 | 
17 |     - name: Set up JDK 21 environment
18 |       run: |
19 |         echo "export JAVA_HOME=$JAVA_HOME_21_X64" >> $GITHUB_ENV
20 |         echo "export PATH=$JAVA_HOME/bin:$PATH" >> $GITHUB_ENV
21 | 
22 |     - name: Use the built-in Android NDK
23 |       run: |
24 |         echo "export ANDROID_NDK_HOME=/usr/local/lib/android/sdk/ndk/26.3.11579264" >> $GITHUB_ENV
25 |         echo "export PATH=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/linux/bin:$PATH" >> $GITHUB_ENV
26 | 
27 |     - name: Build with Clang and Android NDK
28 |       run: |
29 |         mkdir -p build
30 |         cd build
31 |         cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_HOME/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 ..
32 |         make
33 | 
34 |     - name: Upload binaries
35 |       uses: actions/upload-artifact@v4
36 |       with:
37 |         name: libgpu-linux-x86
38 |         path: |
39 |           build/source/arm_gpuinfo


--------------------------------------------------------------------------------
/.github/workflows/linux-clang.yaml:
--------------------------------------------------------------------------------
 1 | name: Build for Linux with Clang
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   pull_request:
 6 |     branches:
 7 |       - main
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-22.04
12 | 
13 |     steps:
14 |     - name: Checkout code
15 |       uses: actions/checkout@v4
16 | 
17 |     - name: Build with Clang
18 |       run: |
19 |         mkdir -p build
20 |         cd build
21 |         cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ ..
22 |         make
23 | 
24 |     - name: Upload binaries
25 |       uses: actions/upload-artifact@v4
26 |       with:
27 |         name: libgpu-linux-x86
28 |         path: |
29 |           build/source/arm_gpuinfo


--------------------------------------------------------------------------------
/.github/workflows/linux-gcc.yaml:
--------------------------------------------------------------------------------
 1 | name: Build for Linux with GCC
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   pull_request:
 6 |     branches:
 7 |       - main
 8 | 
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-22.04
12 | 
13 |     steps:
14 |     - name: Git checkout
15 |       uses: actions/checkout@v4
16 | 
17 |     - name: Build with GCC
18 |       run: |
19 |         mkdir -p build
20 |         cd build
21 |         cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ ..
22 |         make
23 | 
24 |     - name: Upload binaries
25 |       uses: actions/upload-artifact@v4
26 |       with:
27 |         name: libgpu-linux-x86
28 |         path: |
29 |           build/source/arm_gpuinfo
30 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Editor config files
 2 | .vs
 3 | .vscode
 4 | 
 5 | # Build and debug output files
 6 | /.cache
 7 | /bin*
 8 | /build*
 9 | /log*
10 | /scratch*
11 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This confidential and proprietary software may be used only as
 3 | # authorised by a licensing agreement from Arm Limited.
 4 | #    Copyright 2023-2024 Arm Ltd. All Rights Reserved.
 5 | # The entire notice above must be reproduced on all authorised
 6 | # copies and copies may only be made to the extent permitted
 7 | # by a licensing agreement from Arm Limited.
 8 | #
 9 | 
10 | cmake_minimum_required(VERSION 3.15)
11 | 
12 | set(CMAKE_CXX_STANDARD 14)
13 | 
14 | project(libGPUInfo VERSION 1.2.0)
15 | 
16 | add_subdirectory(source)
17 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021-2024 Arm Limited
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # About
  2 | 
  3 | libGPUInfo is a small utility library that allows applications to query the
  4 | configuration of the Arm® Immortalis™ or Arm Mali™ GPU present in the system.
  5 | This information allows developers to adjust application workload complexity to
  6 | match the performance capability of the current device.
  7 | 
  8 | This library is able to provide the Arm GPU hardware configuration, as well as
  9 | performance metrics for the shader cores inside the GPU. The library is unable
 10 | to provide system information, such as the available GPU clock frequencies,
 11 | because this is provided by the device manufacturer and is not part of the Arm
 12 | GPU itself.
 13 | 
 14 | For offline documentation about the capabilities of the various Arm GPUs on the
 15 | market today please refer to the [Arm GPU Datasheet][2].
 16 | 
 17 | ## Supported devices
 18 | 
 19 | This library aims to support all Arm GPU products from the Mali-T700 series
 20 | onwards, ensuring developers have coverage of the vast majority of smartphones
 21 | with Arm GPUs that are in use today. If you find a device with an Arm GPU which
 22 | does not work, or gives inaccurate results, please open an Issue on the GitHub
 23 | issue tracker.
 24 | 
 25 | This library only supports devices using the Arm commercial driver.
 26 | 
 27 | ## Recent changes
 28 | 
 29 | * Change log: [1.x series](./docs/changelog.md)
 30 | 
 31 | ## Related API extensions
 32 | 
 33 | This library is intended to support any Arm device, but some developers prefer
 34 | to use functionality within the graphics API when it is available. New devices
 35 | can report a similar set of information to this library using in-API queries.
 36 | 
 37 | We recommend using the extensions on devices where it is available. Doing so
 38 | means the application automatically gets up-to-date information for all
 39 | devices, even those released after the application binary was built.
 40 | 
 41 | For more information please refer to the extension specifications:
 42 | 
 43 | * [VK_ARM_shader_core_properties][3]
 44 | * [VK_ARM_shader_core_builtins][4]
 45 | 
 46 | 
 47 | ## License
 48 | 
 49 | This project is licensed under the MIT license. By downloading any component
 50 | from this repository you acknowledge that you accept terms specified in the
 51 | [LICENSE.txt](LICENSE.txt) file.
 52 | 
 53 | # Available information
 54 | 
 55 | The query mechanism can report the following information about the GPU:
 56 | 
 57 | * **Name:** The product name string, e.g. "Mali-G710".
 58 | * **Architecture:** The product architecture name string, e.g. "Valhall".
 59 | * **Model number:** The product ID number, e.g. 0xa002.
 60 | * **Shader core count:** The number of shader cores in the design.
 61 | * **Shader core mask:** The shader core topology mask.
 62 | * **L2 cache count:** The number of L2 cache slices in the design.
 63 | * **L2 cache size:** The total L2 cache size, summed over all slices, in bytes.
 64 | * **Bus size:** The width of the external data bus, per cache slice, in bits.
 65 | 
 66 | The query mechanism can report the following per-core shader core performance
 67 | information:
 68 | 
 69 | * **Execution engine count:** The number of arithmetic macroblocks.
 70 | * **FP32 FMA count:** The peak fp32 FMAs per clock, summed over all engines.
 71 | * **FP16 FMA count:** The peak fp16 FMAs per clock, summed over all engines.
 72 | * **Texel count:** The peak bilinear filtered texture samples per clock.
 73 | * **Pixel count:** The peak pixels per clock.
 74 | 
 75 | # Using the library
 76 | 
 77 | The library is very simple to use:
 78 | 
 79 | ```C++
 80 | // Create a connection with the kernel driver ...
 81 | std::unique_ptr<instance> conn = libarmgpuinfo::instance::create();
 82 | if (!conn)
 83 | {
 84 |     std::cout << "ERROR: Failed to create Mali instance\n";
 85 |     return;
 86 | }
 87 | 
 88 | // Fetch the information result and do something with it ...
 89 | const gpuinfo& info = conn->get_info();
 90 | std::cout << "GPU: " << info.gpu_name << " MP" << info.num_shader_cores << "\n";
 91 | ```
 92 | 
 93 | Note that the returned instance uses a unique pointer for lifetime management,
 94 | and both the instance and the query result will be freed when the instance
 95 | drops out of scope.
 96 | 
 97 | ## Handling unknown devices
 98 | 
 99 | The library will be regularly updated to support new Arm GPU products, but it
100 | is inevitable that applications will run on new devices with GPU models that
101 | did not exist at the time they were released. For this there are two failure
102 | modes that applications must consider.
103 | 
104 | The most likely error is the case where a connection can be established with
105 | the Arm kernel driver, but the product code is unknown. In this case the call
106 | to `libarmgpuinfo::instance::create()` will succeed but return a partially
107 | populated result. It will include any information that can be determined
108 | programmatically, but will report the GPU name and architecture as "Unknown",
109 | and the per-core shader core performance metrics as zero.
110 | 
111 | For example, we can currently show the following information when the product
112 | model is not explicitly supported:
113 | 
114 | ```yaml
115 | GPU configuration:
116 |   Model number: 0xa862
117 |   Core count: 7
118 |   L2 cache count: 4
119 |   Total L2 cache size: 2097152 bytes
120 |   Bus width: 256 bits
121 | ```
122 | 
123 | If the kernel driver interface has changed and the library cannot establish a
124 | connection then we can return no useful information. In this case the
125 | `libarmgpuinfo::instance::create()` function will fail and will return a
126 | `nullptr`.
127 | 
128 | # Building
129 | 
130 | The library is provided as a single C++ source file and a single C++ header
131 | file. It is expected that developers will copy the files directly into their
132 | existing application build system, so no off-the-shelf build system is provided
133 | for the library integration.
134 | 
135 | # Sample application
136 | 
137 | The repository also contains a simple command line tool that demonstrates use of
138 | the API, and which can be used for adhoc testing of devices. To build the
139 | Android command line tool:
140 | 
141 | * Set `ANDROID_NDK_HOME` to the path of your Android NDK install.
142 | * Run `./android_build.sh [Release|Debug]`.
143 | 
144 | The output binary will be `./bin/arm_gpuinfo`. You can run this on the device
145 | and print the results for your device to the terminal using the following
146 | commands:
147 | 
148 | ```sh
149 | adb push ./bin/arm_gpuinfo /data/local/tmp
150 | adb shell chmod u+x /data/local/tmp/arm_gpuinfo
151 | adb shell /data/local/tmp/arm_gpuinfo
152 | adb shell rm /data/local/tmp/arm_gpuinfo
153 | ```
154 | 
155 | The generated output is formatted using a YAML-like syntax, but is designed for
156 | human consumption with additional line breaks. To generate strictly compliant
157 | YAML output for use in scripts pass the `--yaml` or `-y` argument on the
158 | `arm_gpuinfo` command line.
159 | 
160 | # Support
161 | 
162 | If you have issues with the library itself, please raise them in the project's
163 | GitHub issue tracker.
164 | 
165 | If you have any questions about Arm GPUs, application development for Arm GPUs,
166 | or general mobile graphics development or technology please submit them on the
167 | [Arm Community graphics forums][1].
168 | 
169 | - - -
170 | 
171 | _Copyright © 2023-2024, Arm Limited and contributors._
172 | 
173 | [1]: https://community.arm.com/support-forums/f/graphics-gaming-and-vr-forum/
174 | [2]: https://developer.arm.com/documentation/102849/latest/
175 | [3]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_ARM_shader_core_properties.html
176 | [4]: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_ARM_shader_core_builtins.html
177 | 


--------------------------------------------------------------------------------
/android_build.sh:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023-2024 Arm Limited.
 3 | #
 4 | # SPDX-License-Identifier: MIT
 5 | #
 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | # of this software and associated documentation files (the "Software"), to deal
 8 | # in the Software without restriction, including without limitation the rights
 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | # SOFTWARE.
23 | #
24 | 
25 | # ----------------------------------------------------------------------------
26 | # Configuration
27 | 
28 | # Exit immediately if any component command errors
29 | set -e
30 | 
31 | BUILD_DIR_64=build_arm64
32 | 
33 | # ----------------------------------------------------------------------------
34 | # Process command line options
35 | if [ "$#" -lt 1 ]; then
36 |     BUILD_TYPE=Release
37 | else
38 |     BUILD_TYPE=$1
39 | fi
40 | 
41 | # ----------------------------------------------------------------------------
42 | # Build the 64-bit library
43 | mkdir -p ${BUILD_DIR_64}
44 | pushd ${BUILD_DIR_64}
45 | 
46 | cmake \
47 |     -DCMAKE_SYSTEM_NAME=Android \
48 |     -DANDROID_PLATFORM=29 \
49 |     -DANDROID_ABI=arm64-v8a \
50 |     -DANDROID_TOOLCHAIN=clang \
51 |     -DANDROID_STL=c++_static \
52 |     -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
53 |     -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake" \
54 |     -DCMAKE_INSTALL_PREFIX=../ \
55 |     ..
56 | 
57 | make install -j8
58 | 
59 | popd
60 | 


--------------------------------------------------------------------------------
/docs/changelog.md:
--------------------------------------------------------------------------------
 1 | # Release change log
 2 | 
 3 | This page summarizes the major functional changes in each release.
 4 | 
 5 | <!-- ---------------------------------------------------------------------- -->
 6 | ## 1.2.0
 7 | 
 8 | **Released:** November 2024
 9 | 
10 | This is a small feature release, adding support for reporting GPU architecture
11 | version numbers as an alternative to parsing product names.
12 | 
13 | * **General:**
14 |   * **Feature:** C++ namespace changed to `libarmgpuinfo`.
15 |   * **Feature:** Supports reporting architecture major/minor versions.
16 | 
17 | <!-- ---------------------------------------------------------------------- -->
18 | ## 1.1.0
19 | 
20 | **Released:** June 2024
21 | 
22 | This is a small feature release, adding support for new Arm GPUs and some new
23 | GPU configuration values.
24 | 
25 | * **General:**
26 |   * **Feature:** Supports Immortalis-G925 series hardware.
27 |   * **Feature:** Supports new Mali-G310 and Mali-G510 IP configurations.
28 |   * **Feature:** Supports reporting shader core topology mask.
29 | 
30 | 
31 | <!-- ---------------------------------------------------------------------- -->
32 | ## 1.0.0
33 | 
34 | **Released:** June 2023
35 | 
36 | The first release of libGPUInfo.
37 | 
38 | * **General:**
39 |   * **Feature:** Support IP from Mali-T720 (Midgard architecture) through to
40 |     Immortalis-G720 (5th Generation architecture).
41 |   * **Feature:** Supports querying GPU model number and name.
42 |   * **Feature:** Supports querying GPU shader core and cache configuration.
43 |   * **Feature:** Supports querying GPU speed-of-light performance metrics.
44 |   * **Feature:** Command line utility provided for easy device testing.
45 | 
46 | - - -
47 | 
48 | _Copyright © 2023-2024, Arm Limited and contributors._
49 | 


--------------------------------------------------------------------------------
/source/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copyright (c) 2023-2024 Arm Limited.
 3 | #
 4 | # SPDX-License-Identifier: MIT
 5 | #
 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | # of this software and associated documentation files (the "Software"), to deal
 8 | # in the Software without restriction, including without limitation the rights
 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | # copies of the Software, and to permit persons to whom the Software is
11 | # furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included in all
14 | # copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | # SOFTWARE.
23 | #
24 | 
25 | add_executable(
26 |     arm_gpuinfo
27 |         arm_gpuinfo.cpp
28 |         libgpuinfo.cpp)
29 | 
30 | target_include_directories(
31 |     arm_gpuinfo PUBLIC
32 |         ".")
33 | 
34 | target_compile_options(
35 |     arm_gpuinfo PRIVATE
36 | 
37 |     -Wall
38 |     -Wextra
39 |     -Wpedantic
40 |     -Werror
41 |     -Wshadow)
42 | 
43 | install(TARGETS arm_gpuinfo DESTINATION ${PACKAGE_ROOT})
44 | 


--------------------------------------------------------------------------------
/source/arm_gpuinfo.cpp:
--------------------------------------------------------------------------------
  1 | 
  2 | /*
  3 |  * Copyright (c) 2023-2024 Arm Limited.
  4 |  *
  5 |  * SPDX-License-Identifier: MIT
  6 |  *
  7 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  8 |  * of this software and associated documentation files (the "Software"), to
  9 |  * deal in the Software without restriction, including without limitation the
 10 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 11 |  * sell copies of the Software, and to permit persons to whom the Software is
 12 |  * furnished to do so, subject to the following conditions:
 13 |  *
 14 |  * The above copyright notice and this permission notice shall be included in all
 15 |  * copies or substantial portions of the Software.
 16 |  *
 17 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 23 |  * SOFTWARE.
 24 |  */
 25 | 
 26 | /**
 27 |  * @brief An example command line application using libGPUInfo.
 28 |  *
 29 |  * This file contains a command line application that will query and print
 30 |  * key properties about your device, and the Arm GPU that it contains.
 31 |  *
 32 |  * It is primarily intended as an example of using the libGPUInfo library, but
 33 |  * the command line application itself is a useful diagnostic tool for support
 34 |  * investigations.
 35 |  *
 36 |  * On Android devices you can install and run the application from the shell:
 37 |  *
 38 |  *     adb push arm_gpuinfo /data/local/tmp
 39 |  *     adb shell chmod u+x /data/local/tmp/arm_gpuinfo
 40 |  *     adb shell /data/local/tmp/arm_gpuinfo
 41 |  *
 42 |  * The generated output is formatted using a YAML-like syntax, but by default is
 43 |  * designed for human consumption with additional line breaks. To generate
 44 |  * strictly compliant YAML output for use in scripts pass the --yaml or -y
 45 |  * argument on the arm_gpuinfo command line.
 46 |  */
 47 | 
 48 | #include <iostream>
 49 | #include <sys/utsname.h>
 50 | #include <cstring>
 51 | 
 52 | #if defined(__ANDROID__)
 53 |     #include <sys/system_properties.h>
 54 | #endif
 55 | 
 56 | #include "libgpuinfo.hpp"
 57 | 
 58 | #if defined(__ANDROID__)
 59 | std::string get_android_property(
 60 |     const char* propertyA,
 61 |     const char* propertyB=nullptr
 62 | ) {
 63 |     char buf[PROP_VALUE_MAX];
 64 |     int size = __system_property_get(propertyA, buf);
 65 | 
 66 |     if (!size && propertyB) {
 67 |         size = __system_property_get(propertyB, buf);
 68 |     }
 69 | 
 70 |     std::string result { buf };
 71 |     result[0] = toupper(result[0]);
 72 |     return result;
 73 | }
 74 | #endif
 75 | 
 76 | std::string get_kernel_version() {
 77 |     struct utsname unamedata;
 78 |     uname(&unamedata);
 79 |     return { unamedata.release };
 80 | }
 81 | 
 82 | int main(int argc, char *argv[])
 83 | {
 84 |     bool emit_yaml = false;
 85 |     for (int i = 1; i < argc; i++)
 86 |     {
 87 |         if ((!strcmp(argv[i], "-y")) || (!strcmp(argv[i], "--yaml")))
 88 |         {
 89 |             emit_yaml = true;
 90 |         }
 91 |     }
 92 | 
 93 |     auto instance = libarmgpuinfo::instance::create();
 94 |     if (!instance)
 95 |     {
 96 |         std::cout << "ERROR: Failed to create instance\n";
 97 |         return 1;
 98 |     }
 99 | 
100 |     const auto info = instance->get_info();
101 | 
102 |     if (emit_yaml)
103 |     {
104 |         std::cout << "---\n";
105 |     }
106 | 
107 |     std::cout << "Device configuration:\n";
108 | #if defined(__ANDROID__)
109 |     std::cout << "  Manufacturer: " << get_android_property("ro.product.vendor.manufacturer", "ro.product.brand") << "\n";
110 |     std::cout << "  Model: " << get_android_property("ro.product.vendor.model", "ro.product.model") << "\n";
111 |     std::cout << "  Android version: " << get_android_property("ro.build.version.release") << "\n";
112 | #endif
113 |     std::cout << "  Kernel version: " << get_kernel_version() << "\n";
114 |     if (!emit_yaml)
115 |     {
116 |         std::cout << "\n";
117 |     }
118 | 
119 |     std::cout << "GPU configuration:\n";
120 |     std::cout << "  Name: " << info.gpu_name << "\n";
121 |     std::cout << "  Architecture: " << info.architecture_name << "\n";
122 |     std::cout << "  Architecture version: " << info.architecture_major
123 |               << "." << info.architecture_minor <<"\n";
124 |     std::cout << "  Model number: 0x" << std::hex << info.gpu_id << std::dec << "\n";
125 |     std::cout << "  Core count: " << info.num_shader_cores << "\n";
126 |     std::cout << "  Core mask: 0x" << std::hex << info.shader_core_mask << std::dec << "\n";
127 |     std::cout << "  L2 cache count: " << info.num_l2_slices << "\n";
128 |     std::cout << "  Total L2 cache size: " << info.num_l2_bytes << " bytes\n";
129 |     std::cout << "  Bus width: " << info.num_bus_bits << " bits\n";
130 |     if (!emit_yaml)
131 |     {
132 |         std::cout << "\n";
133 |     }
134 | 
135 |     if (!info.num_exec_engines)
136 |     {
137 |         std::cout << "ERROR: Detected an unknown model "
138 |                   << std::hex << info.gpu_id << std::dec << "\n";
139 |         return 1;
140 |     }
141 | 
142 |     std::cout << "Per-core statistics:\n";
143 |     std::cout << "  Engine count: " << info.num_exec_engines << "\n";
144 |     std::cout << "  FP32 FMAs: " << info.num_fp32_fmas_per_cy << "/cy\n";
145 |     std::cout << "  FP16 FMAs: " << info.num_fp16_fmas_per_cy << "/cy\n";
146 |     std::cout << "  Texels: " << info.num_texels_per_cy << "/cy\n";
147 |     std::cout << "  Pixels: " << info.num_pixels_per_cy << "/cy\n";
148 |     if (!emit_yaml)
149 |     {
150 |         std::cout << "\n";
151 |     }
152 | 
153 |     std::cout << "Per-GPU statistics:\n";
154 |     std::cout << "  FP32 FMAs: " << info.num_fp32_fmas_per_cy * info.num_shader_cores << "/cy\n";
155 |     std::cout << "  FP16 FMAs: " << info.num_fp16_fmas_per_cy * info.num_shader_cores << "/cy\n";
156 |     std::cout << "  Texels: " << info.num_texels_per_cy * info.num_shader_cores << "/cy\n";
157 |     std::cout << "  Pixels: " << info.num_pixels_per_cy * info.num_shader_cores << "/cy\n";
158 | 
159 |     return 0;
160 | }
161 | 


--------------------------------------------------------------------------------
/source/libgpuinfo.cpp:
--------------------------------------------------------------------------------
   1 | /*
   2 |  * Copyright (c) 2021-2024 Arm Limited.
   3 |  *
   4 |  * SPDX-License-Identifier: MIT
   5 |  *
   6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 |  * of this software and associated documentation files (the "Software"), to
   8 |  * deal in the Software without restriction, including without limitation the
   9 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  10 |  * sell copies of the Software, and to permit persons to whom the Software is
  11 |  * furnished to do so, subject to the following conditions:
  12 |  *
  13 |  * The above copyright notice and this permission notice shall be included in all
  14 |  * copies or substantial portions of the Software.
  15 |  *
  16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22 |  * SOFTWARE.
  23 |  */
  24 | 
  25 | #include <array>
  26 | #include <cassert>
  27 | #include <cerrno>
  28 | #include <cstdint>
  29 | #include <functional>
  30 | #include <iostream>
  31 | #include <memory>
  32 | #include <string>
  33 | #include <vector>
  34 | 
  35 | #include <sys/ioctl.h>
  36 | #include <sys/stat.h>
  37 | #include <fcntl.h>
  38 | #include <unistd.h>
  39 | 
  40 | #include "libgpuinfo.hpp"
  41 | 
  42 | #define UNUSED(x) (void)x
  43 | 
  44 | namespace libarmgpuinfo {
  45 | 
  46 | struct product_entry {
  47 |     uint32_t id;
  48 |     uint32_t mask;
  49 |     uint32_t min_cores;
  50 |     const char* name;
  51 |     const char* architecture;
  52 |     std::function<uint32_t(int, uint32_t, uint32_t)> get_num_fp32_fmas_per_engine;
  53 |     std::function<uint32_t(int, uint32_t, uint32_t)> get_num_texels;
  54 |     std::function<uint32_t(int, uint32_t, uint32_t)> get_num_pixels;
  55 |     std::function<uint32_t(int, uint32_t, uint32_t)> get_num_exec_engines;
  56 | };
  57 | 
  58 | static const uint32_t MASK_OLD { 0xFFFF };
  59 | static const uint32_t MASK_NEW { 0xF00F };
  60 | 
  61 | template <uint32_t val>
  62 | static uint32_t get_num(
  63 |     int core_count,
  64 |     uint32_t core_features,
  65 |     uint32_t thread_features
  66 | ) {
  67 |     UNUSED(core_count);
  68 |     UNUSED(core_features);
  69 |     UNUSED(thread_features);
  70 | 
  71 |     return val;
  72 | }
  73 | 
  74 | static uint32_t get_num_eng_g31(
  75 |     int core_count,
  76 |     uint32_t core_features,
  77 |     uint32_t thread_features
  78 | ) {
  79 |     UNUSED(core_features);
  80 | 
  81 |     if ((core_count == 1) && ((thread_features & 0xFFFF) == 0x2000))
  82 |     {
  83 |         return 1;
  84 |     }
  85 | 
  86 |     return 2;
  87 | }
  88 | 
  89 | static uint32_t get_num_eng_g51(
  90 |     int core_count,
  91 |     uint32_t core_features,
  92 |     uint32_t thread_features
  93 | ) {
  94 |     UNUSED(core_features);
  95 | 
  96 |     if ((core_count == 1) && ((thread_features & 0xFFFF) == 0x2000))
  97 |     {
  98 |         return 1;
  99 |     }
 100 | 
 101 |     return 3;
 102 | }
 103 | 
 104 | static uint32_t get_num_eng_g52(
 105 |     int core_count,
 106 |     uint32_t core_features,
 107 |     uint32_t thread_features
 108 | ) {
 109 |     UNUSED(core_count);
 110 |     UNUSED(thread_features);
 111 | 
 112 |     return core_features & 0xF;
 113 | }
 114 | 
 115 | static uint32_t get_num_fma_g510(
 116 |     int core_count,
 117 |     uint32_t core_features,
 118 |     uint32_t thread_features
 119 | ) {
 120 |     UNUSED(core_count);
 121 |     UNUSED(thread_features);
 122 | 
 123 |     uint32_t variant = core_features & 0xF;
 124 |     switch(variant)
 125 |     {
 126 |         case 0:
 127 |             return 16;
 128 |         case 2:
 129 |         case 3:
 130 |             return 24;
 131 |         case 1:
 132 |         case 4:
 133 |         case 5:
 134 |         case 6:
 135 |         default:
 136 |             return 32;
 137 |     }
 138 | }
 139 | 
 140 | static uint32_t get_num_tex_g510(
 141 |     int core_count,
 142 |     uint32_t core_features,
 143 |     uint32_t thread_features
 144 | ) {
 145 |     UNUSED(core_count);
 146 |     UNUSED(thread_features);
 147 | 
 148 |     uint32_t variant = core_features & 0xF;
 149 |     switch(variant)
 150 |     {
 151 |         case 0:
 152 |         case 5:
 153 |             return 2;
 154 |         case 1:
 155 |         case 2:
 156 |         case 6:
 157 |             return 4;
 158 |         case 3:
 159 |         case 4:
 160 |         default:
 161 |             return 8;
 162 |     }
 163 | }
 164 | 
 165 | static uint32_t get_num_pix_g510(
 166 |     int core_count,
 167 |     uint32_t core_features,
 168 |     uint32_t thread_features
 169 | ) {
 170 |     UNUSED(core_count);
 171 |     UNUSED(thread_features);
 172 | 
 173 |     // This returns min(blend, pixel)
 174 |     // Also limits to 2 for single engine configs
 175 |     uint32_t variant = core_features & 0xF;
 176 |     switch(variant)
 177 |     {
 178 |         case 0:
 179 |         case 1:
 180 |         case 5:
 181 |         case 6:
 182 |             return 2;
 183 |         case 2:
 184 |         case 3:
 185 |         case 4:
 186 |         default:
 187 |             return 4;
 188 |     }
 189 | }
 190 | 
 191 | static uint32_t get_num_eng_g510(
 192 |     int core_count,
 193 |     uint32_t core_features,
 194 |     uint32_t thread_features
 195 | ) {
 196 |     UNUSED(core_count);
 197 |     UNUSED(thread_features);
 198 | 
 199 |     uint32_t variant = core_features & 0xF;
 200 |     switch(variant)
 201 |     {
 202 |         case 0:
 203 |         case 1:
 204 |         case 5:
 205 |         case 6:
 206 |             return 1;
 207 |         case 2:
 208 |         case 3:
 209 |         case 4:
 210 |         default:
 211 |             return 2;
 212 |     }
 213 | }
 214 | 
 215 | static const std::array<product_entry, 35> PRODUCT_VERSIONS {{
 216 |     //                  ID,  ID Mask, Min cores,              Name,           Arch,      FMA/Eng,           Texels,           Pixels,          Engines
 217 |     product_entry { 0x6956, MASK_OLD,         1,       "Mali-T600",      "Midgard",       get_num<4>,       get_num<1>,       get_num<1>,       get_num<2> },
 218 |     product_entry { 0x0620, MASK_OLD,         1,       "Mali-T620",      "Midgard",       get_num<4>,       get_num<1>,       get_num<1>,       get_num<2> },
 219 |     product_entry { 0x0720, MASK_OLD,         1,       "Mali-T720",      "Midgard",       get_num<4>,       get_num<1>,       get_num<1>,       get_num<1> },
 220 |     product_entry { 0x0750, MASK_OLD,         1,       "Mali-T760",      "Midgard",       get_num<4>,       get_num<1>,       get_num<1>,       get_num<2> },
 221 |     product_entry { 0x0820, MASK_OLD,         1,       "Mali-T820",      "Midgard",       get_num<4>,       get_num<1>,       get_num<1>,       get_num<1> },
 222 |     product_entry { 0x0830, MASK_OLD,         1,       "Mali-T830",      "Midgard",       get_num<4>,       get_num<1>,       get_num<1>,       get_num<2> },
 223 |     product_entry { 0x0860, MASK_OLD,         1,       "Mali-T860",      "Midgard",       get_num<4>,       get_num<1>,       get_num<1>,       get_num<2> },
 224 |     product_entry { 0x0880, MASK_OLD,         1,       "Mali-T880",      "Midgard",       get_num<4>,       get_num<1>,       get_num<1>,       get_num<3> },
 225 |     product_entry { 0x6000, MASK_NEW,         1,        "Mali-G71",      "Bifrost",       get_num<4>,       get_num<1>,       get_num<1>,       get_num<3> },
 226 |     product_entry { 0x6001, MASK_NEW,         1,        "Mali-G72",      "Bifrost",       get_num<4>,       get_num<1>,       get_num<1>,       get_num<3> },
 227 |     product_entry { 0x7000, MASK_NEW,         1,        "Mali-G51",      "Bifrost",       get_num<4>,       get_num<2>,       get_num<2>,  get_num_eng_g51 },
 228 |     product_entry { 0x7001, MASK_NEW,         1,        "Mali-G76",      "Bifrost",       get_num<8>,       get_num<2>,       get_num<2>,       get_num<3> },
 229 |     product_entry { 0x7002, MASK_NEW,         1,        "Mali-G52",      "Bifrost",       get_num<8>,       get_num<2>,       get_num<2>,  get_num_eng_g52 },
 230 |     product_entry { 0x7003, MASK_NEW,         1,        "Mali-G31",      "Bifrost",       get_num<4>,       get_num<2>,       get_num<2>,  get_num_eng_g31 },
 231 |     product_entry { 0x9000, MASK_NEW,         1,        "Mali-G77",      "Valhall",      get_num<16>,       get_num<4>,       get_num<2>,       get_num<2> },
 232 |     product_entry { 0x9001, MASK_NEW,         1,        "Mali-G57",      "Valhall",      get_num<16>,       get_num<4>,       get_num<2>,       get_num<2> },
 233 |     product_entry { 0x9003, MASK_NEW,         1,        "Mali-G57",      "Valhall",      get_num<16>,       get_num<4>,       get_num<2>,       get_num<2> },
 234 |     product_entry { 0x9004, MASK_NEW,         1,        "Mali-G68",      "Valhall",      get_num<16>,       get_num<4>,       get_num<2>,       get_num<2> },
 235 |     product_entry { 0x9002, MASK_NEW,         1,        "Mali-G78",      "Valhall",      get_num<16>,       get_num<4>,       get_num<2>,       get_num<2> },
 236 |     product_entry { 0x9005, MASK_NEW,         1,      "Mali-G78AE",      "Valhall",      get_num<16>,       get_num<4>,       get_num<2>,       get_num<2> },
 237 |     product_entry { 0xa002, MASK_NEW,         1,       "Mali-G710",      "Valhall",      get_num<32>,       get_num<8>,       get_num<4>,       get_num<2> },
 238 |     product_entry { 0xa007, MASK_NEW,         1,       "Mali-G610",      "Valhall",      get_num<32>,       get_num<8>,       get_num<4>,       get_num<2> },
 239 |     product_entry { 0xa003, MASK_NEW,         1,       "Mali-G510",      "Valhall", get_num_fma_g510, get_num_tex_g510, get_num_pix_g510, get_num_eng_g510 },
 240 |     product_entry { 0xa004, MASK_NEW,         1,       "Mali-G310",      "Valhall", get_num_fma_g510, get_num_tex_g510, get_num_pix_g510, get_num_eng_g510 },
 241 |     product_entry { 0xb002, MASK_NEW,        10, "Immortalis-G715",      "Valhall",      get_num<64>,       get_num<8>,       get_num<4>,       get_num<2> },
 242 |     product_entry { 0xb002, MASK_NEW,         7,       "Mali-G715",      "Valhall",      get_num<64>,       get_num<8>,       get_num<4>,       get_num<2> },
 243 |     product_entry { 0xb002, MASK_NEW,         1,       "Mali-G615",      "Valhall",      get_num<64>,       get_num<8>,       get_num<4>,       get_num<2> },
 244 |     product_entry { 0xb003, MASK_NEW,         1,       "Mali-G615",      "Valhall",      get_num<64>,       get_num<8>,       get_num<4>,       get_num<2> },
 245 |     product_entry { 0xc000, MASK_NEW,        10, "Immortalis-G720", "Arm 5th Gen",      get_num<64>,       get_num<8>,       get_num<4>,       get_num<2> },
 246 |     product_entry { 0xc000, MASK_NEW,         6,       "Mali-G720", "Arm 5th Gen",      get_num<64>,       get_num<8>,       get_num<4>,       get_num<2> },
 247 |     product_entry { 0xc000, MASK_NEW,         1,       "Mali-G620", "Arm 5th Gen",      get_num<64>,       get_num<8>,       get_num<4>,       get_num<2> },
 248 |     product_entry { 0xc001, MASK_NEW,         1,       "Mali-G620", "Arm 5th Gen",      get_num<64>,       get_num<8>,       get_num<4>,       get_num<2> },
 249 |     product_entry { 0xd000, MASK_NEW,        10, "Immortalis-G925", "Arm 5th Gen",      get_num<64>,       get_num<8>,       get_num<4>,       get_num<2> },
 250 |     product_entry { 0xd000, MASK_NEW,         6,       "Mali-G725", "Arm 5th Gen",      get_num<64>,       get_num<8>,       get_num<4>,       get_num<2> },
 251 |     product_entry { 0xd001, MASK_NEW,         1,       "Mali-G625", "Arm 5th Gen",      get_num<64>,       get_num<8>,       get_num<4>,       get_num<2> },
 252 | }};
 253 | 
 254 | static uint32_t get_gpu_id(
 255 |     uint32_t gpu_id
 256 | ) {
 257 |     for (const auto& entry : PRODUCT_VERSIONS)
 258 |     {
 259 |         if (((gpu_id & entry.mask) == entry.id))
 260 |         {
 261 |             return entry.id;
 262 |         }
 263 |     }
 264 | 
 265 |     return gpu_id;
 266 | }
 267 | 
 268 | static const char* get_gpu_name(
 269 |     uint32_t gpu_id,
 270 |     uint32_t core_count
 271 | ) {
 272 |     for (const auto& entry : PRODUCT_VERSIONS)
 273 |     {
 274 |         if((gpu_id == entry.id) && (core_count >= entry.min_cores))
 275 |         {
 276 |             return entry.name;
 277 |         }
 278 |     }
 279 | 
 280 |     return "Unknown";
 281 | }
 282 | 
 283 | static const char* get_architecture_name(
 284 |     uint32_t gpu_id
 285 | ) {
 286 |     for (const auto& entry : PRODUCT_VERSIONS)
 287 |     {
 288 |         if(gpu_id == entry.id)
 289 |         {
 290 |             return entry.architecture;
 291 |         }
 292 |     }
 293 | 
 294 |     return "Unknown";
 295 | }
 296 | 
 297 | static int get_num_exec_engines(
 298 |     uint32_t gpu_id,
 299 |     uint32_t core_count,
 300 |     uint32_t core_features,
 301 |     uint32_t thread_features
 302 | ) {
 303 |     for (const auto& entry : PRODUCT_VERSIONS)
 304 |     {
 305 |         if((gpu_id == entry.id) && (core_count >= entry.min_cores))
 306 |         {
 307 |             return entry.get_num_exec_engines(core_count, core_features, thread_features);
 308 |         }
 309 |     }
 310 | 
 311 |     return 0;
 312 | }
 313 | 
 314 | static uint32_t get_num_fp32_fmas(
 315 |     uint32_t gpu_id,
 316 |     uint32_t core_count,
 317 |     uint32_t core_features,
 318 |     uint32_t thread_features
 319 | ) {
 320 |     for (const auto& entry : PRODUCT_VERSIONS)
 321 |     {
 322 |         if((gpu_id == entry.id) && (core_count >= entry.min_cores))
 323 |         {
 324 |             return entry.get_num_fp32_fmas_per_engine(core_count, core_features, thread_features) *
 325 |                    entry.get_num_exec_engines(core_count, core_features, thread_features);
 326 |         }
 327 |     }
 328 | 
 329 |     return 0;
 330 | }
 331 | 
 332 | static uint32_t get_num_texels(
 333 |     uint32_t gpu_id,
 334 |     uint32_t core_count,
 335 |     uint32_t core_features,
 336 |     uint32_t thread_features
 337 | ) {
 338 |     for (const auto& entry : PRODUCT_VERSIONS)
 339 |     {
 340 |         if((gpu_id == entry.id) && (core_count >= entry.min_cores))
 341 |         {
 342 |             return entry.get_num_texels(core_count, core_features, thread_features);
 343 |         }
 344 |     }
 345 | 
 346 |     return 0;
 347 | }
 348 | 
 349 | static uint32_t get_num_pixels(
 350 |     uint32_t gpu_id,
 351 |     uint32_t core_count,
 352 |     uint32_t core_features,
 353 |     uint32_t thread_features
 354 | ) {
 355 |     for (const auto& entry : PRODUCT_VERSIONS)
 356 |     {
 357 |         if((gpu_id == entry.id) && (core_count >= entry.min_cores))
 358 |         {
 359 |             return entry.get_num_pixels(core_count, core_features, thread_features);
 360 |         }
 361 |     }
 362 | 
 363 |     return 0;
 364 | }
 365 | 
 366 | /** Kbase Pre R21 ioctl interface. */
 367 | namespace kbase_pre_r21 {
 368 | 
 369 | /** Related to mali0 ioctl interface */
 370 | enum class header_id : uint32_t {
 371 |     /** Version check. */
 372 |     version_check = 0,
 373 |     /** Base Context Create Kernel Flags. */
 374 |     create_kernel_flags = 2,
 375 |     /** Kbase Func Get Props. */
 376 |     get_props = 526,
 377 |     /** Kbase Func Set Flags. */
 378 |     set_flags = 530,
 379 | };
 380 | 
 381 | /** Message header. */
 382 | union uk_header {
 383 |     /** Number identifying the called UK function. */
 384 |     header_id id;
 385 |     /** The return code of the called UK function. */
 386 |     uint32_t ret;
 387 |     /** Dummy to ensure type has 64-bit alignment */
 388 |     uint64_t sizer;
 389 | };
 390 | 
 391 | /** Check version compatibility between kernel and userspace. */
 392 | struct version_check_t {
 393 |     /** UK header */
 394 |     uk_header header;
 395 |     /** Major version number */
 396 |     uint16_t major;
 397 |     /** Minor version number */
 398 |     uint16_t minor;
 399 | 
 400 |     bool is_set() const
 401 |     {
 402 |         return major || minor;
 403 |     }
 404 | };
 405 | 
 406 | /** IOCTL parameters to set flags */
 407 | struct set_flags_t {
 408 |     /** UK header */
 409 |     uk_header header;
 410 |     /** Create flags */
 411 |     uint32_t create_flags;
 412 |     /** Padding */
 413 |     uint32_t padding;
 414 | };
 415 | 
 416 | /** Base GPU Num Texture Features Registers. */
 417 | static constexpr const uint32_t base_gpu_num_texture_features_registers = 3;
 418 | 
 419 | /** Base Max Coherent Groups. */
 420 | static constexpr const uint32_t base_max_coherent_groups = 16;
 421 | 
 422 | /** GPU Max Job Slots. */
 423 | static constexpr const uint32_t gpu_max_job_slots = 16;
 424 | 
 425 | /** Kbase UK GPU props. */
 426 | struct uk_gpuprops_t {
 427 |     /**
 428 |      * IOCTL parameters to probe GPU properties
 429 |      *
 430 |      * NOTE: the raw_props member in this data structure contains the register
 431 |      * values from which the value of the other members are derived. The derived
 432 |      * members exist to allow for efficient access and/or shielding the details
 433 |      * of the layout of the registers.
 434 |      *
 435 |      */
 436 |     struct gpu_props {
 437 |         /** Core. */
 438 |         struct core {
 439 |             /** Product specific value. */
 440 |             uint32_t product_id;
 441 |             /**
 442 |              * Status of the GPU release.
 443 |              * No defined values, but starts at 0 and increases by one for each
 444 |              * release status (alpha, beta, EAC, etc.).
 445 |              * 4 bit values (0-15).
 446 |              */
 447 |             uint16_t version_status;
 448 |             /**
 449 |              * Minor release number of the GPU. "P" part of an "RnPn" release number.
 450 |              * 8 bit values (0-255).
 451 |              */
 452 |             uint16_t minor_revision;
 453 |             /**
 454 |              * Major release number of the GPU. "R" part of an "RnPn" release number.
 455 |              * 4 bit values (0-15).
 456 |              */
 457 |             uint16_t major_revision;
 458 |             /** Padding. */
 459 |             uint16_t padding;
 460 |             /**
 461 |              * This property is deprecated since it has not contained the real current
 462 |              * value of GPU clock speed. It is kept here only for backwards compatibility.
 463 |              * For the new ioctl interface, it is ignored and is treated as a padding
 464 |              * to keep the structure of the same size and retain the placement of its
 465 |              * members.
 466 |              */
 467 |             uint32_t gpu_speed_mhz;
 468 |             /**
 469 |              * @usecase GPU clock max speed is required for computing best case
 470 |              * in tasks as job scheduling ant irq_throttling. (It is not specified in the
 471 |              * Midgard Architecture).
 472 |              * Also, GPU clock max speed is used for OpenCL's clGetDeviceInfo() function.
 473 |              */
 474 |             uint32_t gpu_freq_khz_max;
 475 |             /**
 476 |              * @usecase GPU clock min speed is required for computing worst case
 477 |              * in tasks as job scheduling ant irq_throttling. (It is not specified in the
 478 |              * Midgard Architecture).
 479 |              */
 480 |             uint32_t gpu_freq_khz_min;
 481 |             /** Size of the shader program counter, in bits. */
 482 |             uint32_t log2_program_counter_size;
 483 |             /**
 484 |              * TEXTURE_FEATURES_x registers, as exposed by the GPU. This is a
 485 |              * bitpattern where a set bit indicates that the format is supported.
 486 |              *
 487 |              * Before using a texture format, it is recommended that the corresponding
 488 |              * bit be checked.
 489 |              */
 490 |             uint32_t texture_features[base_gpu_num_texture_features_registers];
 491 |             /**
 492 |              * Theoretical maximum memory available to the GPU. It is unlikely that a
 493 |              * client will be able to allocate all of this memory for their own
 494 |              * purposes, but this at least provides an upper bound on the memory
 495 |              * available to the GPU.
 496 |              *
 497 |              * This is required for OpenCL's clGetDeviceInfo() call when
 498 |              * CL_DEVICE_GLOBAL_MEM_SIZE is requested, for OpenCL GPU devices. The
 499 |              * client will not be expecting to allocate anywhere near this value.
 500 |              */
 501 |             uint64_t gpu_available_memory_size;
 502 |         };
 503 | 
 504 |         /**
 505 |          * More information is possible - but associativity and bus width are not
 506 |          * required by upper-level apis.
 507 |          */
 508 |         struct l2_cache {
 509 |             /** Log2 Line Size. */
 510 |             uint8_t log2_line_size;
 511 |             /** Log2 Cache Size. */
 512 |             uint8_t log2_cache_size;
 513 |             /** Num L2 Slices. */
 514 |             uint8_t num_l2_slices;
 515 |             /** Padding bytes. */
 516 |             uint8_t padding[5];
 517 |         };
 518 | 
 519 |         /** Tiler. */
 520 |         struct tiler {
 521 |             /** Max is 4*2^15 */
 522 |             uint32_t bin_size_bytes;
 523 |             /** Max is 2^15 */
 524 |             uint32_t max_active_levels;
 525 |         };
 526 | 
 527 |         /** GPU threading system details. */
 528 |         struct thread {
 529 |             /** Max. number of threads per core */
 530 |             uint32_t max_threads;
 531 |             /** Max. number of threads per workgroup */
 532 |             uint32_t max_workgroup_size;
 533 |             /** Max. number of threads that can synchronize on a simple barrier */
 534 |             uint32_t max_barrier_size;
 535 |             /** Total size [1..65535] of the register file available per core. */
 536 |             uint16_t max_registers;
 537 |             /** Max. tasks [1..255] which may be sent to a core before it becomes blocked. */
 538 |             uint8_t max_task_queue;
 539 |             /** Max. allowed value [1..15] of the Thread Group Split field. */
 540 |             uint8_t max_thread_group_split;
 541 |             /** 0 = Not specified, 1 = Silicon, 2 = FPGA, 3 = SW Model/Emulation */
 542 |             uint8_t impl_tech;
 543 |             /** Padding bytes. */
 544 |             uint8_t padding[7];
 545 |         };
 546 | 
 547 |         /**
 548 |          * A complete description of the GPU's Hardware Configuration Discovery
 549 |          * registers.
 550 |          *
 551 |          * The information is presented inefficiently for access. For frequent access,
 552 |          * the values should be better expressed in an unpacked form in the
 553 |          * base_gpu_props structure.
 554 |          *
 555 |          * @usecase The raw properties in @ref gpu_raw_gpu_props are necessary to
 556 |          * allow a user of the Mali Tools (e.g. PAT) to determine "Why is this device
 557 |          * behaving differently?". In this case, all information about the
 558 |          * by the driver</b>. Instead, the raw registers can be processed by the Mali
 559 |          * Tools software on the host PC.
 560 |          */
 561 |         struct raw {
 562 |             /** Shader Present. */
 563 |             uint64_t shader_present;
 564 |             /** Tiler Present. */
 565 |             uint64_t tiler_present;
 566 |             /** L2 Present. */
 567 |             uint64_t l2_present;
 568 |             /** Unused 1. */
 569 |             uint64_t unused_1;
 570 |             /** L2 Features. */
 571 |             uint32_t l2_features;
 572 |             /** Suspend Size. */
 573 |             uint32_t suspend_size;
 574 |             /** Mem Features. */
 575 |             uint32_t mem_features;
 576 |             /** Mmu Features. */
 577 |             uint32_t mmu_features;
 578 |             /** As Present. */
 579 |             uint32_t as_present;
 580 |             /** Js Present. */
 581 |             uint32_t js_present;
 582 |             /** Js Features. */
 583 |             uint32_t js_features[gpu_max_job_slots];
 584 |             /** Tiler Features. */
 585 |             uint32_t tiler_features;
 586 |             /** Texture Features. */
 587 |             uint32_t texture_features[3];
 588 |             /** GPU ID. */
 589 |             uint32_t gpu_id;
 590 |             /** Thread Max Threads. */
 591 |             uint32_t thread_max_threads;
 592 |             /** Thread Max Workgroup Size. */
 593 |             uint32_t thread_max_workgroup_size;
 594 |             /** Thread Max Barrier Size. */
 595 |             uint32_t thread_max_barrier_size;
 596 |             /** Thread Features. */
 597 |             uint32_t thread_features;
 598 |             /**
 599 |              * Coherency Mode.
 600 |              * Note: This is the _selected_ coherency mode rather than the
 601 |              * available modes as exposed in the coherency_features register.
 602 |              */
 603 |             uint32_t coherency_mode;
 604 |         };
 605 | 
 606 |         /**
 607 |          * Coherency group information
 608 |          *
 609 |          * Note that the sizes of the members could be reduced. However, the \c group
 610 |          * member might be 8-byte aligned to ensure the u64 core_mask is 8-byte
 611 |          * aligned, thus leading to wastage if the other members sizes were reduced.
 612 |          *
 613 |          * The groups are sorted by core mask. The core masks are non-repeating and do
 614 |          * not intersect.
 615 |          */
 616 |         struct coherent_group_info {
 617 |             /**
 618 |              * descriptor for a coherent group
 619 |              *
 620 |              * \c core_mask exposes all cores in that coherent group, and \c num_cores
 621 |              * provides a cached population-count for that mask.
 622 |              *
 623 |              * @note Whilst all cores are exposed in the mask, not all may be available to
 624 |              * the application, depending on the Kernel Power policy.
 625 |              *
 626 |              * @note if u64s must be 8-byte aligned, then this structure has 32-bits of
 627 |              * wastage.
 628 |              */
 629 |             struct coherent_group {
 630 |                 /** Core restriction mask required for the group */
 631 |                 uint64_t core_mask;
 632 |                 /** Number of cores in the group */
 633 |                 uint16_t num_cores;
 634 |                 /** Padding bytes. */
 635 |                 uint16_t padding[3];
 636 |             };
 637 | 
 638 |             /** Num Groups. */
 639 |             uint32_t num_groups;
 640 |             /**
 641 |              * Number of core groups (coherent or not) in the GPU. Equivalent to the number of
 642 |              * L2 Caches.
 643 |              * The GPU Counter dumping writes 2048 bytes per core group, regardless of whether
 644 |              * the core groups are coherent or not. Hence this member is needed to calculate
 645 |              * how much memory is required for dumping.
 646 |              * @note Do not use it to work out how many valid elements are in the group[]
 647 |              * member. Use num_groups instead.
 648 |              */
 649 |             uint32_t num_core_groups;
 650 |             /** Coherency features of the memory, accessed by @ref gpu_mem_features methods. */
 651 |             uint32_t coherency;
 652 |             /** Padding. */
 653 |             uint32_t padding;
 654 |             /** Descriptors of coherent groups */
 655 |             coherent_group group[base_max_coherent_groups];
 656 |         };
 657 | 
 658 |         /** Core Props. */
 659 |         core core_props;
 660 |         /** L2 Props. */
 661 |         l2_cache l2_props;
 662 |         /** Unused to keep for backwards compatibility. */
 663 |         uint64_t unused;
 664 |         /** Tiler Props. */
 665 |         tiler tiler_props;
 666 |         /** Thread Props. */
 667 |         thread thread_props;
 668 |         /** This member is large, likely to be 128 bytes. */
 669 |         raw raw_props;
 670 |         /** This must be last member of the structure. */
 671 |         coherent_group_info coherency_info;
 672 |     };
 673 | 
 674 |     /** Header. */
 675 |     uk_header header;
 676 |     /** Props. */
 677 |     gpu_props props;
 678 | };
 679 | 
 680 | constexpr auto iface_number = 0x80;
 681 | 
 682 | /** Commands describing kbase_pre_r21 ioctl interface. */
 683 | enum command_type {
 684 |     /** Check version compatibility between JM kernel and userspace. */
 685 |     version_check = _IOWR(iface_number, 0x0, version_check_t),
 686 |     /** Set kernel context creation flags. */
 687 |     set_flags = _IOWR(iface_number, 0x212, set_flags_t),
 688 |     /** Get GPU properties. */
 689 |     get_gpuprops = _IOWR(iface_number, 0x20e, uk_gpuprops_t),
 690 | };
 691 | 
 692 | }
 693 | 
 694 | /** Kbase Post R21 ioctl interface. */
 695 | namespace kbase_post_r21 {
 696 | 
 697 | template <typename value_t>
 698 | class pointer64 {
 699 |   public:
 700 |     /** @return Pointer to the object. */
 701 |     value_t* get() const {
 702 |         return reinterpret_cast<value_t*>(static_cast<uintptr_t>(value));
 703 |     }
 704 | 
 705 |     /**
 706 |      * Set pointer value.
 707 |      *
 708 |      * @param ptr   The new pointer value.
 709 |      */
 710 |     void reset(value_t* ptr) {
 711 |         value = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(ptr));
 712 |     }
 713 | 
 714 |   private:
 715 |     /** Pointer value as uint64_t. */
 716 |     uint64_t value { 0 };
 717 | };
 718 | 
 719 | /** Check version compatibility between kernel and userspace. */
 720 | struct version_check_t {
 721 |     /** Major version number. */
 722 |     uint16_t major;
 723 |     /** Minor version number */
 724 |     uint16_t minor;
 725 | 
 726 |     bool is_set() const
 727 |     {
 728 |         return major || minor;
 729 |     }
 730 | };
 731 | 
 732 | /** Set kernel context creation flags. */
 733 | struct set_flags_t {
 734 |     /** kernel context creation flags. */
 735 |     uint32_t create_flags;
 736 | };
 737 | 
 738 | /**
 739 |  * The ioctl will return the number of bytes stored into buffer or an error
 740 |  * on failure (e.g. size is too small). If size is specified as 0 then no
 741 |  * data will be written but the return value will be the number of bytes needed
 742 |  * for all the properties.
 743 |  *
 744 |  * flags may be used in the future to request a different format for the
 745 |  * buffer. With flags == 0 the following format is used.
 746 |  *
 747 |  * The buffer will be filled with pairs of values, a __u32 key identifying the
 748 |  * property followed by the value. The size of the value is identified using
 749 |  * the bottom bits of the key. The value then immediately followed the key and
 750 |  * is tightly packed (there is no padding). All keys and values are
 751 |  * little-endian.
 752 |  *
 753 |  * 00 = __u8
 754 |  * 01 = __u16
 755 |  * 10 = __u32
 756 |  * 11 = __u64
 757 |  */
 758 | struct get_gpuprops_t {
 759 |     /** GPU property size. */
 760 |     enum class gpuprop_size : uint8_t {
 761 |         /** Property type is uint8_t. */
 762 |         uint8 = 0x0,
 763 |         /** Property type is uint16_t. */
 764 |         uint16 = 0x1,
 765 |         /** Property type is uint32_t. */
 766 |         uint32 = 0x2,
 767 |         /** Property type is uint64_t. */
 768 |         uint64 = 0x3
 769 |     };
 770 | 
 771 |     /** GPU properties codes. */
 772 |     enum class gpuprop_code : uint8_t {
 773 |         /** Product id. */
 774 |         product_id = 1,
 775 |         /** L2 log2 line size. */
 776 |         l2_log2_line_size = 13,
 777 |         /** L2 log2 cache size. */
 778 |         l2_log2_cache_size = 14,
 779 |         /** L2 num l2 slices. */
 780 |         l2_num_l2_slices = 15,
 781 |         /** Max threads. */
 782 |         max_threads = 18,
 783 |         /** Max registers. */
 784 |         max_registers = 21,
 785 |         /** Raw l2 features. */
 786 |         raw_l2_features = 29,
 787 |         /** Raw core features. */
 788 |         raw_core_features = 30,
 789 |         /** Raw GPU id. */
 790 |         raw_gpu_id = 55,
 791 |         /** Raw thread max threads. */
 792 |         raw_thread_max_threads = 56,
 793 |         /** Raw thread max workgroup size. */
 794 |         raw_thread_max_workgroup_size = 57,
 795 |         /** Raw thread max barrier size. */
 796 |         raw_thread_max_barrier_size = 58,
 797 |         /** Raw thread features. */
 798 |         raw_thread_features = 59,
 799 |         /** Raw coherency mode. */
 800 |         raw_coherency_mode = 60,
 801 |         /** Coherency num groups. */
 802 |         coherency_num_groups = 61,
 803 |         /** Coherency num core groups. */
 804 |         coherency_num_core_groups = 62,
 805 |         /** Coherency coherency. */
 806 |         coherency_coherency = 63,
 807 |         /** Coherency group 0. */
 808 |         coherency_group_0 = 64,
 809 |         /** Coherency group 1. */
 810 |         coherency_group_1 = 65,
 811 |         /** Coherency group 2. */
 812 |         coherency_group_2 = 66,
 813 |         /** Coherency group 3. */
 814 |         coherency_group_3 = 67,
 815 |         /** Num exec engines. */
 816 |         num_exec_engines = 82
 817 |     };
 818 | 
 819 |     /** Pointer to the buffer to store properties into. */
 820 |     pointer64<uint8_t> buffer;
 821 | 
 822 |     /** Size of the buffer. */
 823 |     uint32_t size;
 824 | 
 825 |     /** Flags - must be zero for now. */
 826 |     uint32_t flags;
 827 | };
 828 | 
 829 | constexpr auto iface_number = 0x80;
 830 | 
 831 | /** Commands describing kbase ioctl interface. */
 832 | enum command_type {
 833 |     /** Check version compatibility between JM kernel and userspace. */
 834 |     version_check_jm = _IOWR(iface_number, 0x0, version_check_t),
 835 |     /** Check version compatibility between CSF kernel and userspace. */
 836 |     version_check_csf = _IOWR(iface_number, 0x34, version_check_t),
 837 |     /** Set kernel context creation flags. */
 838 |     set_flags = _IOW(iface_number, 0x1, set_flags_t),
 839 |     /** Get GPU properties. */
 840 |     get_gpuprops = _IOW(iface_number, 0x3, get_gpuprops_t),
 841 | };
 842 | 
 843 | }
 844 | 
 845 | class prop_decoder {
 846 |   public:
 847 |     prop_decoder(std::vector<unsigned char> buffer)
 848 |         : buffer_{ std::move(buffer) }
 849 |         , data_{ buffer_.data() }
 850 |         , size_{ buffer_.size() } {}
 851 | 
 852 |     bool decode(gpuinfo& info) {
 853 |         bool success = true;
 854 | 
 855 |         uint64_t raw_gpu_id {};
 856 |         uint64_t raw_core_features {};
 857 |         uint64_t raw_thread_features {};
 858 | 
 859 |         while (size_ > 0) {
 860 |             auto p = next(success);
 861 |             if (!success) {
 862 |                 return false;
 863 |             }
 864 | 
 865 |             prop_id_t id = p.first;
 866 |             uint64_t value = p.second;
 867 | 
 868 |             switch (id) {
 869 |             case prop_id_t::product_id:
 870 |                 info.gpu_id = get_gpu_id(value);
 871 |                 break;
 872 |             case prop_id_t::l2_log2_cache_size:
 873 |                 info.num_l2_bytes = 1UL << value;
 874 |                 break;
 875 |             case prop_id_t::l2_num_l2_slices:
 876 |                 info.num_l2_slices = value;
 877 |                 break;
 878 |             case prop_id_t::raw_l2_features:
 879 |                 // Bus width stored as log2(bus width) in top 8 bits
 880 |                 info.num_bus_bits = 1UL << ((value >> 24) & 0xFF);
 881 |                 break;
 882 |             case prop_id_t::raw_gpu_id:
 883 |                 raw_gpu_id = value;
 884 |                 break;
 885 |             case prop_id_t::raw_core_features:
 886 |                 raw_core_features = value;
 887 |                 break;
 888 |             case prop_id_t::raw_thread_features:
 889 |                 raw_thread_features = value;
 890 |                 break;
 891 |             case prop_id_t::coherency_num_core_groups:
 892 |                 // Only expect 1 core group in Mali-T700 onwards
 893 |                 assert(value == 1);
 894 |                 break;
 895 |             case prop_id_t::coherency_group_0:
 896 |                 info.num_shader_cores = __builtin_popcount(value);
 897 |                 info.shader_core_mask = value;
 898 |                 break;
 899 |             default:
 900 |                 break;
 901 |             }
 902 |         }
 903 | 
 904 |         // Decode architecture versions
 905 |         constexpr uint64_t bits4 { 0xF };
 906 |         constexpr uint64_t bits8 { 0xFF };
 907 | 
 908 |         constexpr uint64_t compat_shift { 28 };
 909 |         constexpr uint64_t compat { 0xF };
 910 |         bool is_64bit_id = ((raw_gpu_id >> compat_shift) & bits4) == compat;
 911 | 
 912 |         // Old-style 32-bit ID
 913 |         if (!is_64bit_id)
 914 |         {
 915 |             constexpr uint64_t arch_major_offset { 28 };
 916 |             constexpr uint64_t arch_minor_offset { 24 };
 917 |             info.architecture_major = (raw_gpu_id >> arch_major_offset) & bits4;
 918 |             info.architecture_minor = (raw_gpu_id >> arch_minor_offset) & bits4;
 919 |         }
 920 |         // New-style 64-bit ID
 921 |         else
 922 |         {
 923 |             constexpr uint64_t arch_major_offset { 56 };
 924 |             constexpr uint64_t arch_minor_offset { 48 };
 925 |             info.architecture_major = (raw_gpu_id >> arch_major_offset) & bits8;
 926 |             info.architecture_minor = (raw_gpu_id >> arch_minor_offset) & bits8;
 927 |         }
 928 | 
 929 |         info.num_exec_engines = get_num_exec_engines(
 930 |             info.gpu_id,
 931 |             info.num_shader_cores,
 932 |             raw_core_features,
 933 |             raw_thread_features);
 934 | 
 935 |         info.num_fp32_fmas_per_cy = get_num_fp32_fmas(
 936 |             info.gpu_id,
 937 |             info.num_shader_cores,
 938 |             raw_core_features,
 939 |             raw_thread_features);
 940 | 
 941 |         info.num_fp16_fmas_per_cy = info.num_fp32_fmas_per_cy * 2;
 942 | 
 943 |         info.num_texels_per_cy = get_num_texels(
 944 |             info.gpu_id,
 945 |             info.num_shader_cores,
 946 |             raw_core_features,
 947 |             raw_thread_features);
 948 | 
 949 |         info.num_pixels_per_cy = get_num_pixels(
 950 |             info.gpu_id,
 951 |             info.num_shader_cores,
 952 |             raw_core_features,
 953 |             raw_thread_features);
 954 | 
 955 |         return true;
 956 |     }
 957 | 
 958 |   private:
 959 |     /** Property id type. */
 960 |     using prop_id_t = kbase_post_r21::get_gpuprops_t::gpuprop_code;
 961 |     /** Property size type. */
 962 |     using prop_size_t = kbase_post_r21::get_gpuprops_t::gpuprop_size;
 963 | 
 964 |     static std::pair<prop_id_t, prop_size_t> to_prop_metadata(uint32_t v)  {
 965 |         /* Property id/size encoding is:
 966 |          * +--------+----------+
 967 |          * | 31   2 | 1      0 |
 968 |          * +--------+----------+
 969 |          * | PropId | PropSize |
 970 |          * +--------+----------+
 971 |          */
 972 |         static unsigned int id_shift { 2 };
 973 |         static unsigned int size_mask { 0b11 };
 974 | 
 975 |         return { static_cast<prop_id_t>(v >> id_shift), static_cast<prop_size_t>(v & size_mask) };
 976 |     }
 977 | 
 978 |     std::pair<prop_id_t, uint64_t> next(bool& success)  {
 979 |         success = true;
 980 |         auto p = to_prop_metadata(read_bytes<uint32_t>(success));
 981 |         if (success)
 982 |         {
 983 |             prop_id_t id = p.first;
 984 |             prop_size_t size = p.second;
 985 | 
 986 |             switch (size) {
 987 |             case prop_size_t::uint8:
 988 |                 return { id, read_bytes<uint8_t>(success) };
 989 |             case prop_size_t::uint16:
 990 |                 return { id, read_bytes<uint16_t>(success) };
 991 |             case prop_size_t::uint32:
 992 |                 return { id, read_bytes<uint32_t>(success) };
 993 |             case prop_size_t::uint64:
 994 |                 return { id, read_bytes<uint64_t>(success) };
 995 |             }
 996 |         }
 997 | 
 998 |         return {};
 999 |     }
1000 | 
1001 |     template <typename T>
1002 |     T read_bytes(bool& success)  {
1003 |         // Check we have enough bytes in the buffer
1004 |         if (size_ < sizeof(T)) {
1005 |             success = false;
1006 |             return 0;
1007 |         }
1008 | 
1009 |         T ret {};
1010 |         for (size_t b = 0; b < sizeof(T); b++)
1011 |         {
1012 |             ret |= static_cast<T>(static_cast<uint64_t>(data_[b]) << (8 * b));
1013 |         }
1014 |         data_ += sizeof(T);
1015 |         size_ -= sizeof(T);
1016 |         return ret;
1017 |     }
1018 | 
1019 |     std::vector<unsigned char> const buffer_;
1020 |     unsigned char const *data_;
1021 |     std::size_t size_;
1022 | };
1023 | 
1024 | /* See header for documentation */
1025 | std::unique_ptr<instance> instance::create(
1026 |     const uint32_t id
1027 | ) {
1028 |     std::string device_path("/dev/mali" + std::to_string(id));
1029 | 
1030 |     // Open the kernel driver device node
1031 |     const int fd = ::open(device_path.c_str(), O_RDONLY);
1032 |     if (fd < 0) {
1033 |         return nullptr;
1034 |     }
1035 | 
1036 |     // Check that it is a character device
1037 |     struct stat s {};
1038 |     const int fs_result = fstat(fd, &s);
1039 |     if ((fs_result < 0) || (S_ISCHR(s.st_mode) == 0)) {
1040 |         ::close(fd);
1041 |         return nullptr;
1042 |     }
1043 | 
1044 |     // Create the instance
1045 |     auto result = std::unique_ptr<instance>(new instance(fd));
1046 |     if (!result || !result->valid_) {
1047 |         return nullptr;
1048 |     }
1049 | 
1050 |     return result;
1051 | }
1052 | 
1053 | /* See header for documentation */
1054 | const gpuinfo& instance::get_info() const
1055 | {
1056 |     return info_;
1057 | };
1058 | 
1059 | /* See header for documentation */
1060 | instance::~instance()
1061 | {
1062 |     ::close(fd_);
1063 | }
1064 | 
1065 | /* See header for documentation */
1066 | instance::instance(int fd):
1067 |     fd_(fd)
1068 | {
1069 |     if (!check_version()) {
1070 |         valid_ = false;
1071 |         return;
1072 |     }
1073 | 
1074 |     if (!set_flags()) {
1075 |         valid_ = false;
1076 |         return;
1077 |     }
1078 | 
1079 |     if (!init_props()) {
1080 |         valid_ = false;
1081 |         return;
1082 |     }
1083 | }
1084 | 
1085 | static bool is_supported(unsigned int major, unsigned int minor)
1086 | {
1087 |     return (major > 10) || ((major == 10) && (minor >= 2));
1088 | }
1089 | 
1090 | /* See header for documentation */
1091 | bool instance::check_version() {
1092 |     // Probe pre-r21 JM kernel
1093 |     // Must be first in the list because CSF reuses an old IOCTL ID
1094 |     iface_ = iface_type::pre_r21;
1095 |     kbase_pre_r21::version_check_t pre_r21 {};
1096 |     pre_r21.header.id = kbase_pre_r21::header_id::version_check;
1097 |     ::ioctl(fd_, kbase_pre_r21::version_check, &pre_r21);
1098 |     // If this is non-zero this must be pre-r21 driver, so check version
1099 |     if (pre_r21.is_set()) {
1100 |         return is_supported(pre_r21.major, pre_r21.minor);
1101 |     }
1102 | 
1103 |     // Probe r21+ JM kernel
1104 |     iface_ = iface_type::post_r21;
1105 |     kbase_post_r21::version_check_t post_r21 {};
1106 |     ::ioctl(fd_, kbase_post_r21::version_check_jm, &post_r21);
1107 |     // If this is non-zero this must be post-r21 JM driver, so check version
1108 |     if (post_r21.is_set()) {
1109 |         return is_supported(post_r21.major, post_r21.minor);
1110 |     }
1111 | 
1112 |     // Probe r21+ CSF kernel
1113 |     ::ioctl(fd_, kbase_post_r21::version_check_csf, &post_r21);
1114 |     // If this is any non-zero value this is a valid CSF GPU
1115 |     return post_r21.is_set();
1116 | }
1117 | 
1118 | /** Call set flags ioctl. */
1119 | bool instance::set_flags() {
1120 |     static constexpr auto system_monitor_flag_submit_disabled_bit = 1;
1121 |     static constexpr auto system_monitor_flag = 1U << system_monitor_flag_submit_disabled_bit;
1122 | 
1123 |     // Clear errno
1124 |     errno = 0;
1125 | 
1126 |     if (iface_ == iface_type::pre_r21) {
1127 |         kbase_pre_r21::set_flags_t flags {};
1128 |         flags.header.id = kbase_pre_r21::header_id::set_flags;
1129 |         flags.create_flags = system_monitor_flag;
1130 |         ::ioctl(fd_, kbase_pre_r21::set_flags, &flags);
1131 |     } else {
1132 |         kbase_post_r21::set_flags_t flags { system_monitor_flag };
1133 |         ::ioctl(fd_, kbase_post_r21::set_flags, &flags);
1134 |     }
1135 | 
1136 |     // Mali driver will fail if reinitialized, but it's benign
1137 |     // TODO: Does this ever happen with this usage pattern
1138 |     return errno == 0 || errno == EINVAL || errno == EPERM;
1139 | }
1140 | 
1141 | /* See header for documentation */
1142 | bool instance::init_props() {
1143 |     bool success;
1144 |     if (iface_ == iface_type::pre_r21) {
1145 |         success = init_props_pre_r21();
1146 |     } else {
1147 |         success = init_props_post_r21();
1148 |     }
1149 | 
1150 |     // Perform some common cleanup on the data
1151 |     if (!success)
1152 |     {
1153 |         return false;
1154 |     }
1155 | 
1156 |     info_.num_l2_bytes *= info_.num_l2_slices;
1157 |     info_.gpu_name = get_gpu_name(info_.gpu_id, info_.num_shader_cores);
1158 |     info_.architecture_name = get_architecture_name(info_.gpu_id);
1159 |     return true;
1160 | }
1161 | 
1162 | /* See header for documentation */
1163 | bool instance::init_props_pre_r21() {
1164 |     kbase_pre_r21::uk_gpuprops_t props {};
1165 |     props.header.id = kbase_pre_r21::header_id::get_props;
1166 |     errno = 0;
1167 |     ::ioctl(fd_, kbase_pre_r21::get_gpuprops, &props);
1168 |     if (errno) {
1169 |         return false;
1170 |     }
1171 | 
1172 |     info_.gpu_id = get_gpu_id(props.props.core_props.product_id);
1173 |     info_.num_l2_bytes = 1UL << props.props.l2_props.log2_cache_size;
1174 |     info_.num_l2_slices = props.props.l2_props.num_l2_slices;
1175 |     info_.num_bus_bits = 1UL << (props.props.raw_props.l2_features >> 24);
1176 | 
1177 |     // Old kernel driver must have 32-bit GPU ID
1178 |     switch (info_.gpu_id) {
1179 |         // Midgard GPUs require manual specification, as not machine readable
1180 |         case 0x6956: // Mali-T600
1181 |             info_.architecture_major = 4;
1182 |             info_.architecture_minor = 0;
1183 |             break;
1184 |         case 0x0620: // Mali-T620
1185 |             info_.architecture_major = 4;
1186 |             info_.architecture_minor = 1;
1187 |             break;
1188 |         case 0x0720: // Mali-T720
1189 |             info_.architecture_major = 4;
1190 |             info_.architecture_minor = 2;
1191 |             break;
1192 |         case 0x0750: // Mali-T760
1193 |             info_.architecture_major = 5;
1194 |             info_.architecture_minor = 0;
1195 |             break;
1196 |         case 0x0820: // Mali-T820
1197 |         case 0x0830: // Mali-T830
1198 |             info_.architecture_major = 5;
1199 |             info_.architecture_minor = 1;
1200 |             break;
1201 |         case 0x0860: // Mali-T860
1202 |         case 0x0880: // Mali-T880
1203 |             info_.architecture_major = 5;
1204 |             info_.architecture_minor = 2;
1205 |             break;
1206 |         // Bifrost onwards report architecture version via config register
1207 |         default:
1208 |         {
1209 |             uint32_t raw_gpu_id = props.props.raw_props.gpu_id;
1210 |             constexpr unsigned int arch_major_offset { 28 };
1211 |             constexpr unsigned int arch_minor_offset { 24 };
1212 |             constexpr unsigned int bits4 { 0xF };
1213 |             info_.architecture_major = (raw_gpu_id >> arch_major_offset) & bits4;
1214 |             info_.architecture_minor = (raw_gpu_id >> arch_minor_offset) & bits4;
1215 |             break;
1216 |         }
1217 |     }
1218 | 
1219 |     info_.num_shader_cores = 0;
1220 |     // Only expect 1 core group in Mali-T700 onwards
1221 |     assert(props.props.coherency_info.num_core_groups == 1);
1222 |     for (uint32_t i = 0; i < props.props.coherency_info.num_core_groups; i++)
1223 |     {
1224 |         info_.num_shader_cores = __builtin_popcount(props.props.coherency_info.group[i].core_mask);
1225 |         info_.shader_core_mask = props.props.coherency_info.group[i].core_mask;
1226 |     }
1227 | 
1228 |     info_.num_exec_engines = get_num_exec_engines(
1229 |         info_.gpu_id,
1230 |         info_.num_shader_cores,
1231 |         0, 0);
1232 | 
1233 |     info_.num_fp32_fmas_per_cy = get_num_fp32_fmas(
1234 |         info_.gpu_id,
1235 |         info_.num_shader_cores,
1236 |         0, 0);
1237 | 
1238 |     info_.num_fp16_fmas_per_cy = info_.num_fp32_fmas_per_cy * 2;
1239 | 
1240 |     info_.num_texels_per_cy = get_num_texels(
1241 |         info_.gpu_id,
1242 |         info_.num_shader_cores,
1243 |         0, 0);
1244 | 
1245 |     info_.num_pixels_per_cy = get_num_pixels(
1246 |         info_.gpu_id,
1247 |         info_.num_shader_cores,
1248 |         0, 0);
1249 | 
1250 |     return true;
1251 | }
1252 | 
1253 | /* See header for documentation */
1254 | bool instance::init_props_post_r21() {
1255 |     errno = 0;
1256 | 
1257 |     kbase_post_r21::get_gpuprops_t get_props = {};
1258 |     int size = ::ioctl(fd_, kbase_post_r21::get_gpuprops, &get_props);
1259 |     if (errno) {
1260 |         return false;
1261 |     }
1262 | 
1263 |     std::vector<unsigned char> buffer(static_cast<std::size_t>(size));
1264 |     get_props.size = static_cast<uint32_t>(size);
1265 |     get_props.buffer.reset(buffer.data());
1266 |     ::ioctl(fd_, kbase_post_r21::get_gpuprops, &get_props);
1267 |     if (errno) {
1268 |         return false;
1269 |     }
1270 | 
1271 |     prop_decoder decoder { buffer };
1272 |     return decoder.decode(info_);
1273 | }
1274 | 
1275 | }
1276 | 


--------------------------------------------------------------------------------
/source/libgpuinfo.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2021-2024 Arm Limited.
  3 |  *
  4 |  * SPDX-License-Identifier: MIT
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  7 |  * of this software and associated documentation files (the "Software"), to
  8 |  * deal in the Software without restriction, including without limitation the
  9 |  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 10 |  * sell copies of the Software, and to permit persons to whom the Software is
 11 |  * furnished to do so, subject to the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice shall be included in all
 14 |  * copies or substantial portions of the Software.
 15 |  *
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22 |  * SOFTWARE.
 23 |  */
 24 | 
 25 | /**
 26 |  * @brief The core libGPUInfo library interface.
 27 |  *
 28 |  * This library provides developers with an easy way to query the Arm
 29 |  * Immortalis or Arm Mali GPU configuration in their system.  This information
 30 |  * can be used to adjust rendering workload to match the capabilities of the
 31 |  * device.
 32 |  *
 33 |  * The library is simple to use:
 34 |  *
 35 |  *     // Create a connection with the kernel driver ...
 36 |  *     std::unique_ptr<instance> conn = libgpuinfo::instance::create();
 37 |  *     if (!conn)
 38 |  *     {
 39 |  *         std::cout << "ERROR: Failed to create Mali instance\n";
 40 |  *         return;
 41 |  *     }
 42 |  *
 43 |  *     // Fetch the information result and do something with it ...
 44 |  *     const gpuinfo& info = conn->get_info();
 45 |  *     std::cout << "GPU: " << info.gpu_name << " MP" << info.num_shader_cores << "\n";
 46 |  *
 47 |  * Note that the returned information object is returned by reference, and has
 48 |  * the same lifetime as the instance object.
 49 |  */
 50 | 
 51 | #pragma once
 52 | 
 53 | #include <array>
 54 | #include <cerrno>
 55 | #include <cstdint>
 56 | #include <vector>
 57 | #include <string>
 58 | #include <memory>
 59 | 
 60 | #include <sys/ioctl.h>
 61 | #include <sys/stat.h>
 62 | #include <fcntl.h>
 63 | #include <unistd.h>
 64 | 
 65 | namespace libarmgpuinfo {
 66 | 
 67 | /** Arm GPU information. */
 68 | struct gpuinfo
 69 | {
 70 |     /** GPU name */
 71 |     const char* gpu_name;
 72 | 
 73 |     /** GPU architecture name */
 74 |     const char* architecture_name;
 75 | 
 76 |     /** GPU ID */
 77 |     uint32_t gpu_id;
 78 | 
 79 |     /** GPU architecture major version */
 80 |     uint32_t architecture_major;
 81 | 
 82 |     /** GPU architecture minor version */
 83 |     uint32_t architecture_minor;
 84 | 
 85 |     /** Number of shader cores */
 86 |     uint32_t num_shader_cores;
 87 | 
 88 |     /** Shader core topology mask */
 89 |     uint64_t shader_core_mask;
 90 | 
 91 |     /** Number of L2 cache slices */
 92 |     uint32_t num_l2_slices;
 93 | 
 94 |     /** L2 cache size, summed for all slices, in bytes */
 95 |     uint32_t num_l2_bytes;
 96 | 
 97 |     /** GPU external bus width per cache slice, in bits */
 98 |     uint32_t num_bus_bits;
 99 | 
100 |     /** Number of execution engines per core */
101 |     uint32_t num_exec_engines;
102 | 
103 |     /** Maximum number of 32-bit floating-point FMAs per clock per core */
104 |     uint32_t num_fp32_fmas_per_cy;
105 | 
106 |     /** Maximum number of 16-bit floating-point FMAs per clock per core */
107 |     uint32_t num_fp16_fmas_per_cy;
108 | 
109 |     /** Maximum number of bilinear filtered texels per clock per core */
110 |     uint32_t num_texels_per_cy;
111 | 
112 |     /** Maximum number of output pixels per clock per core */
113 |     uint32_t num_pixels_per_cy;
114 | };
115 | 
116 | 
117 | /** Kbase ioctl interface type. */
118 | enum class iface_type {
119 |     /** Pre R21 kernel */
120 |     pre_r21,
121 |     /** Post R21 kernel (inclusive) */
122 |     post_r21
123 | };
124 | 
125 | /**
126 |  * Mali device driver instance.
127 |  */
128 | class instance
129 | {
130 | public:
131 |     /**
132 |      * Factory function to create a device instance.
133 |      *
134 |      * @param id   The driver instance, e.g. 0 for /dev/mali0.
135 |      *
136 |      * @return The created instance, or @c nullptr on failure.
137 |      */
138 |     static std::unique_ptr<instance> create(const uint32_t id=0);
139 | 
140 |     /**
141 |      * Get the GPU device property information.
142 |      *
143 |      * The returned reference has the same lifetime as the instance.
144 |      *
145 |      * @return The device property information.
146 |      */
147 |     const gpuinfo& get_info() const;
148 | 
149 |     /**
150 |      * Destroy an instance.
151 |      *
152 |      * Any returned information references become invalid.
153 |      */
154 |     ~instance();
155 | 
156 | private:
157 |     /**
158 |      * Create a new instance.
159 |      *
160 |      * @param fd   The opened driver file descriptor.
161 |      *
162 |      */
163 |     instance(int fd);
164 | 
165 |     /** Check the Mali kernel driver interface version. */
166 |     bool check_version();
167 | 
168 |     /** Configure Mali kernel driver connection flags. */
169 |     bool set_flags();
170 | 
171 |     /** Query properties and store them locally. */
172 |     bool init_props();
173 | 
174 |     /** Get device constants from the old format ioctl. */
175 |     bool init_props_pre_r21();
176 | 
177 |     /** Get device constants from the new format ioctl. */
178 |     bool init_props_post_r21();
179 | 
180 |     /** The queries device properties. */
181 |     gpuinfo info_ {};
182 | 
183 |     /** The driver interface type. */
184 |     iface_type iface_ {};
185 | 
186 |     /** The validity state of the object if initialization fails. */
187 |     bool valid_ { true };
188 | 
189 |     /** The kernel driver file descriptor. */
190 |     int fd_ {};
191 | };
192 | 
193 | }
194 | 


--------------------------------------------------------------------------------