├── .gitignore ├── LICENSE ├── README.md ├── autogen_stubs.sh ├── pyproject.toml ├── src └── fuzzyHSA │ ├── __init__.py │ ├── fuzzer.py │ ├── kfd │ ├── __init__.py │ ├── ops.py │ └── utils.py │ └── utils.py └── test └── kfd_ops.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | build/ 6 | src/fuzzyHSA.egg* 7 | src/fuzzyHSA/kfd/autogen/* 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fuzzyHSA 2 | 3 | Testing and Fuzzying Framework for HSA and AMD low level software. 4 | 5 | Status report of various issues reported by Tinycorp and status of fixes are tracked [here](https://github.com/nod-ai/fuzzyHSA/wiki/Tinygrad-AMD-Linux-Driver-Crash---Hang-tracker-and-updates) 6 | 7 | Analysis of [Tinygrad KFD and HSA backends](https://gist.github.com/fxkamd/ffd02d66a2863e444ec208ea4f3adc48) 8 | 9 | ## Installation 10 | 11 | 1. pip install . 12 | 2. bash autogen_stubs.sh generate 13 | 14 | ## Uninstalling 15 | 16 | 1. bash autogen_stubs.sh clean 17 | 2. pip uninstall fuzzyHSA 18 | 19 | ## Testing 20 | 21 | 1. pip install -e '.[testing]' 22 | 2. python -m pytest test/ 23 | 24 | ## TODO 25 | 26 | * Use kfd_ioctl to create kfd operations in kfd/ops.py. 27 | * Utilize the kfd/ops.py in default fuzz tests. 28 | * Have ability to pass in user defined config for a dynamic fuzz test. 29 | 30 | ## Acknowledgments 31 | 32 | This project would like to thank the [tinycorp](https://tinygrad.org/) for their efforts that push the boundaries. Please go checkout their deep-learning framework [tinygrad](https://github.com/tinygrad/tinygrad) and give it a star! 33 | 34 | ## License 35 | 36 | fuzzyHSA is licensed under the terms of the Apache 2.0 License. 37 | See [LICENSE](LICENSE) for more information. 38 | -------------------------------------------------------------------------------- /autogen_stubs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -euo pipefail 4 | trap "echo 'Error: Script failed.'" ERR 5 | 6 | find_package_path() { 7 | python3 -c "import importlib.util; from pathlib import Path; spec = importlib.util.find_spec('$1'); print(Path(spec.origin).parent if spec and spec.origin else exit(1))" 8 | } 9 | 10 | PACKAGE_NAME="fuzzyHSA" 11 | BASE=$(find_package_path $PACKAGE_NAME)/kfd/autogen 12 | 13 | # setup instructions for clang2py 14 | if [[ ! $(clang2py -V) ]]; then 15 | pushd . 16 | cd /tmp 17 | sudo apt-get install -y --no-install-recommends clang 18 | pip install --upgrade pip setuptools 19 | pip install clang==14.0.6 20 | git clone https://github.com/geohot/ctypeslib.git 21 | cd ctypeslib 22 | pip install --user . 23 | clang2py -V 24 | popd 25 | fi 26 | 27 | fixup() { 28 | sed -i '1s/^/# mypy: ignore-errors\n/' "$1" 29 | sed -i 's/ *$//' "$1" 30 | grep FIXME_STUB "$1" || true 31 | } 32 | 33 | function check_generated_file_existence() { 34 | local filename=$1 35 | local full_path="$BASE/$filename" 36 | if [[ -f "$full_path" ]]; then 37 | return 0 38 | else 39 | return 1 40 | fi 41 | } 42 | 43 | function generate_amd_gpu() { 44 | local filename="amd_gpu.py" 45 | if check_generated_file_existence "$filename"; then 46 | echo "$filename already exists. Skipping generation." 47 | else 48 | local rocm_path="/opt/rocm" 49 | local include_path="$rocm_path/include" 50 | 51 | wget https://raw.githubusercontent.com/ROCm/ROCR-Runtime/201228c4fbd343cebdb6457ded7cb4d55637d60d/src/core/inc/sdma_registers.h -O $BASE/sdma_registers.h 52 | clang2py $BASE/sdma_registers.h --clang-args="-I$include_path -x c++" -o "$BASE/amd_gpu.py" -l /opt/rocm/lib/libhsa-runtime64.so 53 | 54 | NVD_HEADER=$(find /usr/src -name nvd.h | grep 'amdgpu') 55 | [ -f "NVD_HEADER" ] && echo "Couldn't find nvd.h on the system" && exit 1 56 | 57 | sed 's/^\(.*\)\(\s*\/\*\)\(.*\)$/\1 #\2\3/; s/^\(\s*\*\)\(.*\)$/#\1\2/' $NVD_HEADER >>$BASE/amd_gpu.py # comments 58 | sed -i 's/#\s*define\s*\([^ \t]*\)(\([^)]*\))\s*\(.*\)/def \1(\2): return \3/' $BASE/amd_gpu.py # #define name(x) (smth) -> def name(x): return (smth) 59 | sed -i '/#\s*define\s\+\([^ \t]\+\)\s\+\([^ ]\+\)/s//\1 = \2/' $BASE/amd_gpu.py # #define name val -> name = val 60 | 61 | fixup "$BASE/amd_gpu.py" 62 | 63 | echo "Installed $filename at $BASE" 64 | fi 65 | } 66 | 67 | function generate_hsa() { 68 | local filename="hsa.py" 69 | if check_generated_file_existence "$filename"; then 70 | echo "$filename already exists. Skipping generation." 71 | else 72 | local include_path="/opt/rocm/include" 73 | local lib_path="/opt/rocm/lib" 74 | 75 | clang2py \ 76 | $include_path/hsa/hsa.h \ 77 | $include_path/hsa/hsa_ext_amd.h \ 78 | $include_path/hsa/amd_hsa_signal.h \ 79 | $include_path/hsa/amd_hsa_queue.h \ 80 | $include_path/hsa/amd_hsa_kernel_code.h \ 81 | $include_path/hsa/hsa_ext_finalize.h \ 82 | $include_path/hsa/hsa_ext_image.h \ 83 | $include_path/hsa/hsa_ven_amd_aqlprofile.h \ 84 | --clang-args=-I"$include_path" -o "$BASE/$filename" -l "$lib_path"/libhsa-runtime64.so 85 | 86 | sed -i "s\import ctypes\import ctypes, os\g" $BASE/$filename 87 | sed -i "s\'/opt/rocm/\os.getenv('ROCM_PATH', '/opt/rocm/')+'/\g" $BASE/$filename 88 | 89 | fixup "$BASE/$filename" 90 | 91 | echo "Installed $filename at $BASE" 92 | fi 93 | } 94 | 95 | function generate_kfd() { 96 | local filename="kfd.py" 97 | if check_generated_file_existence "$filename"; then 98 | echo "$filename already exists. Skipping generation." 99 | else 100 | clang2py \ 101 | "/usr/include/linux/kfd_ioctl.h" \ 102 | -o "$BASE"/"$filename" \ 103 | -k cdefstum 104 | 105 | sed -i "s\import ctypes\import ctypes, os\g" "$BASE"/"$filename" 106 | 107 | fixup "$BASE"/"$filename" 108 | 109 | echo "Installed $filename at $BASE" 110 | fi 111 | } 112 | 113 | function generate() { 114 | generate_amd_gpu 115 | generate_hsa 116 | generate_kfd 117 | } 118 | 119 | function delete_generated_file() { 120 | local filename="$1" 121 | if check_generated_file_existence "$filename"; then 122 | echo "Cleaning up: removing $filename" 123 | rm "$BASE"/"$filename" 124 | else 125 | echo "No cleanup needed. $filename does not exist." 126 | fi 127 | } 128 | 129 | function clean() { 130 | delete_generated_file "kfd.py" 131 | delete_generated_file "hsa.py" 132 | delete_generated_file "amd_gpu.py" 133 | delete_generated_file "sdma_registers.h" 134 | } 135 | 136 | case "$1" in 137 | generate) 138 | generate 139 | ;; 140 | clean) 141 | clean 142 | ;; 143 | *) 144 | echo "Usage: $0 [generate|clean]" 145 | echo "generate: Prepares and creates bindings for kfd and HSA, converting system headers into usable Python modules." 146 | echo "clean: Removes any previously generated files to ensure a clean state for re-generation." 147 | exit 1 148 | ;; 149 | esac 150 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "fuzzyHSA" 7 | version = "0.1.0" 8 | description = "A Python HSA/KFD Fuzzer" 9 | authors = [{name = "Zachary Streeter"}] 10 | readme = "README.md" 11 | license = {file = "LICENSE"} 12 | 13 | [project.optional-dependencies] 14 | testing = [ 15 | "pytest>=6.0", 16 | ] 17 | 18 | [project.entry-points.console_scripts] 19 | fuzzyHSA = "fuzzyHSA.fuzzer:main" 20 | -------------------------------------------------------------------------------- /src/fuzzyHSA/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amd/fuzzyHSA/351c36ed9d383a4b3d15a24b5e5399ea9a72bcd1/src/fuzzyHSA/__init__.py -------------------------------------------------------------------------------- /src/fuzzyHSA/fuzzer.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | from .utils import check_generated_files 14 | from fuzzyHSA.kfd.ops import KFDDevice 15 | 16 | REQUIRED_FILES = ["kfd.py", "hsa.py", "amd_gpu.py"] 17 | 18 | 19 | def main(): 20 | try: 21 | check_generated_files(REQUIRED_FILES) 22 | print("All required files are present. Continuing with main execution.") 23 | # TODO: continue main execution here 24 | except RuntimeError as e: 25 | print(f"Startup Error: {e}") 26 | return 27 | 28 | 29 | if __name__ == "__main__": 30 | main() 31 | -------------------------------------------------------------------------------- /src/fuzzyHSA/kfd/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/amd/fuzzyHSA/351c36ed9d383a4b3d15a24b5e5399ea9a72bcd1/src/fuzzyHSA/kfd/__init__.py -------------------------------------------------------------------------------- /src/fuzzyHSA/kfd/ops.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | import os 14 | import fcntl 15 | import ctypes, mmap 16 | import pathlib 17 | from posix import O_RDWR 18 | from typing import Dict, List, Any, Optional 19 | 20 | import fuzzyHSA.kfd.autogen.kfd as kfd # importing generated files via the fuzzyHSA package 21 | from .utils import ioctls_from_header, is_usable_gpu 22 | 23 | 24 | class MemoryManager: 25 | """A class to encapsulate memory mapping functionality using libc.""" 26 | 27 | def __init__(self): 28 | """Load libc and set up mmap and munmap function prototypes.""" 29 | self.libc = ctypes.CDLL("libc.so.6") 30 | self.libc.mmap.argtypes = [ 31 | ctypes.c_void_p, 32 | ctypes.c_size_t, 33 | ctypes.c_int, 34 | ctypes.c_int, 35 | ctypes.c_int, 36 | ctypes.c_long, 37 | ] 38 | self.libc.mmap.restype = ctypes.c_void_p 39 | self.libc.munmap.argtypes = [ctypes.c_void_p, ctypes.c_size_t] 40 | self.libc.munmap.restype = ctypes.c_int 41 | 42 | def mmap( 43 | self, 44 | size: int, 45 | prot: int, 46 | flags: int, 47 | fd: int, 48 | start_addr: ctypes.c_void_p = None, 49 | offset: int = 0, 50 | ) -> ctypes.c_void_p: 51 | """ 52 | Memory map a file or device. 53 | 54 | Args: 55 | size: The size of the mapping. 56 | prot: Desired memory protection of the mapping (e.g., PROT_READ | PROT_WRITE). 57 | flags: Determines the visibility of the updates to the mapping (e.g., MAP_SHARED). 58 | fd: File descriptor of the file or device to map. 59 | start_addr: The desired starting address for the mapping. If None, the kernel chooses the address. 60 | offset: Offset from the beginning of the file/device to start the mapping. 61 | 62 | Returns: 63 | A pointer to the mapped memory region. 64 | """ 65 | addr = self.libc.mmap(start_addr, size, prot, flags, fd, offset) 66 | if addr == ctypes.c_void_p(-1).value: 67 | errno = ctypes.get_errno() 68 | raise OSError(errno, os.strerror(errno)) 69 | return addr 70 | 71 | def munmap(self, addr: ctypes.c_void_p, size: int) -> None: 72 | """ 73 | Unmap a previously mapped memory region. 74 | 75 | Args: 76 | addr: The starting address of the memory region to unmap. 77 | size: The size of the memory region. 78 | 79 | Raises: 80 | OSError: If munmap fails. 81 | """ 82 | ret = self.libc.munmap(addr, size) 83 | if ret != 0: 84 | raise OSError("munmap failed") 85 | 86 | 87 | class KFDDevice(MemoryManager): 88 | """ 89 | Represents a Kernel Fusion Driver (KFD) device, providing a high-level interface 90 | for interacting with the device through IOCTL commands and memory management operations. 91 | 92 | Attributes: 93 | KFD_IOCTL (object): An object containing dynamically created IOCTL operations. 94 | fd (int): File descriptor for the /dev/kfd device, allowing direct communication with the device. 95 | node_id (int): The unique identifier for the KFD device node. 96 | 97 | Methods: 98 | __enter__, __exit__: Enable resource management using the 'with' statement. 99 | close(): Closes the device file descriptor. 100 | ioctl(cmd, arg): Performs an IOCTL operation on the device. 101 | create_queue(): Creates a queue on the KFD device using specific IOCTL commands. 102 | allocate_memory(size): Allocates memory on the device (placeholder method). 103 | print_ioctl_functions(): Prints the names of all generated IOCTL functions. 104 | """ 105 | 106 | # class attributes 107 | kfd: int = -1 108 | event_page: Any = ( 109 | None # TODO: fix types in kfd, Optional[kfd.struct_kfd_ioctl_alloc_memory_of_gpu_args] 110 | ) 111 | signals_page: Any = None 112 | signal_number: int = 16 113 | gpus: List[pathlib.Path] = [] 114 | 115 | @classmethod 116 | def initialize_class(cls): 117 | if cls.kfd == -1: 118 | try: 119 | cls.kfd = os.open("/dev/kfd", os.O_RDWR | os.O_CLOEXEC) 120 | cls.gpus = [ 121 | g.parent 122 | for g in pathlib.Path( 123 | "/sys/devices/virtual/kfd/kfd/topology/nodes" 124 | ).glob("*/gpu_id") 125 | if is_usable_gpu(g) 126 | ] 127 | except Exception as e: 128 | cls.kfd = -1 129 | raise RuntimeError( 130 | f"Failed to initialize KFDDevice class with error: {e}" 131 | ) from e 132 | 133 | def __init__(self, device: str): 134 | super().__init__() 135 | self.__class__.initialize_class() 136 | 137 | self.KFD_IOCTL = ioctls_from_header() 138 | self.device_id = int(device.split(":")[1]) if ":" in device else 0 139 | try: 140 | gpu_path = self.__class__.gpus[self.device_id] 141 | self.gpu_id = int((gpu_path / "gpu_id").read_text().strip()) 142 | properties = (gpu_path / "properties").read_text().strip().split("\n") 143 | self.properties = { 144 | line.split()[0]: int(line.split()[1]) for line in properties 145 | } 146 | self.drm_fd = os.open( 147 | f"/dev/dri/renderD{self.properties['drm_render_minor']}", os.O_RDWR 148 | ) 149 | target = self.properties["gfx_target_version"] 150 | self.arch = ( 151 | f"gfx{target // 10000}{(target // 100) % 100:02x}{target % 100:02x}" 152 | ) 153 | except Exception as e: 154 | raise RuntimeError( 155 | f"Failed to initialize KFDDevice instance with error: {e}" 156 | ) from e 157 | 158 | def __enter__(self): 159 | """ 160 | Enables the use of 'with' statement for this class, allowing for automatic 161 | resource management. 162 | 163 | Returns: 164 | self (KFDDevice): The instance itself. 165 | """ 166 | return self 167 | 168 | def __exit__(self, exc_type, exc_val, exc_tb): 169 | """ 170 | Ensures the device is properly closed when exiting a 'with' block. 171 | 172 | Args: 173 | exc_type: Exception type. 174 | exc_val: Exception value. 175 | exc_tb: Exception traceback. 176 | """ 177 | self.close() 178 | 179 | def close(self): 180 | """Closes the device file descriptor, freeing up system resources.""" 181 | os.close(self.__class__.kfd) 182 | 183 | # TODO: not sure I need this since I'm getting the actual ioctls from the headers 184 | def ioctl(self, cmd: int, arg: ctypes.Structure) -> ctypes.Structure: 185 | """ 186 | Performs an IOCTL operation using the device's file descriptor. 187 | 188 | Args: 189 | cmd (int): The IOCTL command to execute. 190 | arg (ctypes.Structure): The argument structure passed to the IOCTL command. 191 | 192 | Returns: 193 | ctypes.Structure: The potentially modified argument structure after the IOCTL call. 194 | 195 | Raises: 196 | OSError: If the IOCTL operation fails. 197 | """ 198 | try: 199 | ret = fcntl.ioctl(self.__class__.kfd, cmd, arg) 200 | return arg 201 | except IOError as e: 202 | raise OSError(f"IOCTL operation failed: {e}") 203 | 204 | def create_queue(self): 205 | """ 206 | Creates a compute queue on the KFD device, utilizing ioctl commands. 207 | """ 208 | # TODO: setup all the necessary instance attributes to create this queue 209 | # queue_args = { 210 | # "ring_base_address": self.aql_ring.va_addr, 211 | # "ring_size": self.aql_ring.size, 212 | # "gpu_id": self.gpu_id, 213 | # "queue_type": kfd.KFD_IOC_QUEUE_TYPE_COMPUTE_AQL, 214 | # "queue_percentage": kfd.KFD_MAX_QUEUE_PERCENTAGE, 215 | # "queue_priority": kfd.KFD_MAX_QUEUE_PRIORITY, 216 | # "eop_buffer_address": self.eop_buffer.va_addr, 217 | # "eop_buffer_size": self.eop_buffer.size, 218 | # "ctx_save_restore_address": self.ctx_save_restore_address.va_addr, 219 | # "ctx_save_restore_size": self.ctx_save_restore_address.size, 220 | # "ctl_stack_size": 0xA000, 221 | # "write_pointer_address": self.gart_aql.va_addr 222 | # + getattr(hsa.amd_queue_t, "write_dispatch_id").offset, 223 | # "read_pointer_address": self.gart_aql.va_addr 224 | # + getattr(hsa.amd_queue_t, "read_dispatch_id").offset, 225 | # } 226 | # size = 0x1000 227 | # addr_flags = mmap.MAP_SHARED | mmap.MAP_ANONYMOUS 228 | # 229 | # addr = kfd_device.mmap(size=size, prot=0, flags=addr_flags, fd=-1, offset=0) 230 | # 231 | # flags = kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT 232 | # mem = kfd_device.KFD_IOCTL.alloc_memory_of_gpu( 233 | # kfd_device.kfd, 234 | # va_addr=addr, 235 | # size=size, 236 | # gpu_id=kfd_device.gpu_id, 237 | # flags=flags, 238 | # mmap_offset=0, 239 | # ) 240 | # 241 | # mem.__setattr__( 242 | # "mapped_gpu_ids", getattr(mem, "mapped_gpu_ids", []) + [kfd_device.gpu_id] 243 | # ) 244 | # c_gpus = (ctypes.c_int32 * len(mem.mapped_gpu_ids))(*mem.mapped_gpu_ids) 245 | # 246 | # stm = kfd_device.KFD_IOCTL.map_memory_to_gpu( 247 | # kfd_device.kfd, 248 | # handle=mem.handle, 249 | # device_ids_array_ptr=ctypes.addressof(c_gpus), 250 | # n_devices=len(mem.mapped_gpu_ids), 251 | # ) 252 | # assert stm.n_success == len(mem.mapped_gpu_ids) 253 | # 254 | # self.aql_queue = self.KFD_IOCTL.create_queue(KFDDevice.kfd, **queue_args) 255 | 256 | def allocate_memory( 257 | self, size: int, memory_flags: Dict[str, int], map_to_gpu: Optional[bool] = None 258 | ) -> Any: 259 | """ 260 | Allocates memory on the KFD device using configuration flags for both mmap and KFD. 261 | 262 | Args: 263 | size (int): The size of the memory to allocate in bytes. 264 | memory_flags (Dict[str, int]): Configuration dictionary containing mmap and KFD flags. 265 | map_to_gpu (Optional[bool], optional): If set to True, maps the allocated memory to the GPU after allocation. 266 | 267 | Returns: 268 | The allocated memory object with optional GPU mapping. 269 | """ 270 | # TODO: should create this function first from the gpu_allocation tests that passes 271 | # then can use in the subsequent test that need it like create_queue 272 | # Still need to test this new function, just wanted to get what I was thinking down first 273 | 274 | mmap_prot = memory_flags["mmap_prot"] 275 | mmap_flags = memory_flags["mmap_flags"] 276 | kfd_flags = memory_flags["kfd_flags"] 277 | 278 | addr = self.mmap(size=size, prot=mmap_prot, flags=mmap_flags, fd=-1, offset=0) 279 | 280 | mem = self.KFD_IOCTL.alloc_memory_of_gpu( 281 | self.kfd, 282 | va_addr=addr, 283 | size=size, 284 | gpu_id=self.gpu_id, 285 | flags=kfd_flags, 286 | mmap_offset=0, 287 | ) 288 | if map_to_gpu: 289 | self.map_memory_to_gpu(mem) 290 | return mem 291 | 292 | def map_memory_to_gpu(self, mem: Any) -> None: 293 | """ 294 | Maps memory to GPU using IOCTL commands. 295 | 296 | Args: 297 | mem: Memory object with GPU memory details. 298 | """ 299 | mem.__setattr__( 300 | "mapped_gpu_ids", getattr(mem, "mapped_gpu_ids", []) + [self.gpu_id] 301 | ) 302 | 303 | c_gpus = (ctypes.c_int32 * len(mem.mapped_gpu_ids))(*mem.mapped_gpu_ids) 304 | stm = self.KFD_IOCTL.map_memory_to_gpu( 305 | self.kfd, 306 | handle=mem.handle, 307 | device_ids_array_ptr=ctypes.addressof(c_gpus), 308 | n_devices=len(mem.mapped_gpu_ids), 309 | ) 310 | assert stm.n_success == len( 311 | mem.mapped_gpu_ids 312 | ), "Not all GPUs were mapped successfully" 313 | 314 | def free_gpu_memory(self, memory: Any) -> None: 315 | """ 316 | Unmaps memory from the GPUs and frees it. 317 | 318 | Parameters: 319 | memory (Any): An object containing GPU memory information, 320 | including mapped GPU IDs and memory addresses. 321 | 322 | Raises: 323 | Exception: If the number of successfully unmapped devices does not match the expected count. 324 | """ 325 | try: 326 | # Unmap memory from GPUs if any GPUs are mapped 327 | gpu_ids = getattr(memory, "mapped_gpu_ids", []) 328 | if gpu_ids: 329 | # Prepare the array of device IDs for the C library call 330 | gpu_ids_array = (ctypes.c_int32 * len(gpu_ids))(*gpu_ids) 331 | result = self.KFD_IOCTL.unmap_memory_from_gpu( 332 | self.kfd, 333 | handle=memory.handle, 334 | device_ids_array_ptr=ctypes.addressof(gpu_ids_array), 335 | n_devices=len(gpu_ids), 336 | ) 337 | 338 | if result.n_success != len(gpu_ids): 339 | raise Exception( 340 | f"Failed to unmap memory from all GPUs. Success count: {result.n_success}" 341 | ) 342 | 343 | # Unmap virtual address and free memory 344 | self.munmap(memory.va_addr, memory.size) 345 | self.KFD_IOCTL.free_memory_of_gpu(self.kfd, handle=memory.handle) 346 | 347 | except Exception as e: 348 | raise OSError(f"Error freeing GPU memeory: {e}") 349 | 350 | def create_sdma_packets(): 351 | structs = {} 352 | packet_definitions = { 353 | name: struct 354 | for name, struct in amd_gpu.__dict__.items() 355 | if name.startswith("struct_SDMA_PKT_") and name.endswith("_TAG") 356 | } 357 | 358 | for name, pkt in packet_definitions.items(): 359 | fields, names = [], set() 360 | 361 | for field_name, field_type in pkt._fields_: 362 | if field_name.endswith("_UNION"): 363 | handle_union_field(fields, field_name, field_type, names) 364 | else: 365 | fields.append((field_name, field_type)) 366 | 367 | # Structure renaming for consistency and readability 368 | new_name = name[16:-4].lower() 369 | struct_type = init_c_struct_t(tuple(fields)) 370 | structs[new_name] = struct_type 371 | 372 | assert_size_matches(struct_type, pkt) 373 | 374 | return type("SDMA_PKTS", (object,), structs) 375 | -------------------------------------------------------------------------------- /src/fuzzyHSA/kfd/utils.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | import ctypes 14 | import pathlib 15 | import re 16 | import functools 17 | import fcntl 18 | import os 19 | from typing import Type, Any 20 | 21 | # importing generated files via the fuzzyHSA package 22 | import fuzzyHSA.kfd.autogen.kfd as kfd 23 | import fuzzyHSA.kfd.autogen.amd_gpu as amd_gpu 24 | 25 | 26 | def is_usable_gpu(gpu_id): 27 | try: 28 | with gpu_id.open() as f: 29 | return int(f.read()) != 0 30 | except OSError: 31 | return False 32 | 33 | 34 | def kfd_ioctl( 35 | idir: int, 36 | nr: int, 37 | user_struct: Type[ctypes.Structure], 38 | fd: int, 39 | made_struct: ctypes.Structure = None, 40 | **kwargs, 41 | ) -> ctypes.Structure: 42 | """ 43 | Execute an ioctl command on a KFD device. 44 | 45 | Args: 46 | idir: The direction of data transfer. 47 | nr: The number associated with the ioctl command. 48 | user_struct: The structure type for the ioctl command. 49 | fd: The file descriptor of the KFD device. 50 | made_struct: An instance of the structure to be used (optional). 51 | **kwargs: Additional arguments to initialize `user_struct` if `made_struct` is not provided. 52 | 53 | Returns: 54 | The structure filled with the results of the ioctl call. 55 | """ 56 | # TODO: ADD if DEBUG env flag here to print this 57 | # print(f"Debug Info - FD: {fd}, IDIR: {idir}, NR: {nr}, GPU ID: {kwargs.get('gpu_id')}, Size: {kwargs.get('size')}") 58 | made = made_struct or user_struct(**kwargs) 59 | if fd < 0 or os.fstat(fd).st_nlink == 0: 60 | raise ValueError("Invalid or closed file descriptor") 61 | try: 62 | ret = fcntl.ioctl( 63 | fd, (idir << 30) | (ctypes.sizeof(made) << 16) | (ord("K") << 8) | nr, made 64 | ) 65 | except OSError as e: 66 | raise RuntimeError( 67 | f"IOCTL operation failed with system error: {os.strerror(e.errno)}" 68 | ) from e 69 | return made 70 | 71 | 72 | def ioctls_from_header() -> Any: 73 | """ 74 | Dynamically create ioctl functions from header definitions in kfd.py. 75 | 76 | Returns: 77 | A dynamically created class instance with ioctl functions as methods. 78 | """ 79 | pattern = r"# (AMDKFD_IOC_[A-Z0-9_]+) = (_IOWR?)\('K',\s*nr,\s*type\) \(\s*(0x[0-9a-fA-F]+)\s*,\s*struct\s+([A-Za-z0-9_]+)\s*\) # macro" 80 | matches = re.findall(pattern, pathlib.Path(kfd.__file__).read_text(), re.MULTILINE) 81 | idirs = {"_IOW": 1, "_IOR": 2, "_IOWR": 3} 82 | fxns = { 83 | name.replace("AMDKFD_IOC_", "").lower(): functools.partial( 84 | kfd_ioctl, idirs[idir], int(nr, 16), getattr(kfd, f"struct_{sname}") 85 | ) 86 | for name, idir, nr, sname in matches 87 | } 88 | return type("KFD_IOCTL", (object,), fxns)() 89 | 90 | 91 | def assert_size_matches(new_struct, original_struct): 92 | assert ctypes.sizeof(new_struct) == ctypes.sizeof( 93 | original_struct 94 | ), f"{ctypes.sizeof(new_struct)} != {ctypes.sizeof(original_struct)}" 95 | 96 | 97 | def handle_union_field(fields, field_name, field_type, names): 98 | assert field_type._fields_[0][0] == "_0", "Union first field must be named '_0'" 99 | union_base = field_type._fields_[0][1] 100 | 101 | for union_field in union_base._fields_: 102 | union_field_name = union_field[0] 103 | if union_field_name in names: 104 | union_field_name = field_name + union_field_name 105 | names.add(union_field_name) 106 | merge_64bit_fields(fields, union_field_name, union_field) 107 | 108 | 109 | def merge_64bit_fields(fields, field_name, union_field): 110 | if field_name.endswith("_63_32") and fields[-1][0].endsuffix("_31_0"): 111 | last_field_name = fields[-1][0][:-6] # Remove the '_31_0' part 112 | fields[-1] = (last_field_name, ctypes.c_ulong, 64) 113 | else: 114 | fields.append((field_name, *union_field[1:])) 115 | 116 | 117 | def print_ioctl_functions(self) -> None: 118 | """ 119 | Prints the names of all IOCTL functions generated by the ioctls_from_header function. 120 | """ 121 | print("Available IOCTL Functions:") 122 | for func_name in dir(self.KFD_IOCTL): 123 | if callable(getattr(self.KFD_IOCTL, func_name)) and not func_name.startswith( 124 | "__" 125 | ): 126 | print(f" - {func_name}") 127 | -------------------------------------------------------------------------------- /src/fuzzyHSA/utils.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | 13 | from pathlib import Path 14 | from typing import Dict, Any, List 15 | import inspect 16 | import importlib.util 17 | 18 | 19 | # TODO: need to have this check for general "filename" 20 | def check_generated_files(filenames: List[str]) -> None: 21 | """ 22 | Checks if the specified autogenerated files exist in the specified package directory. 23 | 24 | Args: 25 | filenames (List[str]): List of filenames to check under the 'autogen' subdirectory. 26 | 27 | Raises: 28 | RuntimeError: If any specified file does not exist. 29 | """ 30 | package_path = Path(importlib.util.find_spec("fuzzyHSA").origin).parent 31 | autogen_path = package_path / "kfd" / "autogen" 32 | 33 | for filename in filenames: 34 | file_path = autogen_path / filename 35 | if not file_path.exists(): 36 | raise RuntimeError( 37 | f"{filename} not found. Please run autogen_stub.sh to generate it." 38 | ) 39 | 40 | 41 | def create_cache_directory(): 42 | cache_dir = Path.home() / ".cache" / "fuzzyHSA" 43 | cache_dir.mkdir(parents=True, exist_ok=True) 44 | 45 | 46 | def query_attributes(obj: Any) -> Dict[str, Any]: 47 | """ 48 | Retrieves all attributes of an object with their current values. 49 | 50 | Args: 51 | obj (Any): The object to inspect. 52 | 53 | Returns: 54 | Dict[str, Any]: A dictionary containing attribute names and their values. 55 | """ 56 | members_dict = { 57 | name: value for name, value in inspect.getmembers(obj) if not callable(value) 58 | } 59 | return members_dict 60 | -------------------------------------------------------------------------------- /test/kfd_ops.py: -------------------------------------------------------------------------------- 1 | import ctypes, mmap 2 | import pprint 3 | import pytest 4 | from fuzzyHSA.utils import query_attributes 5 | from fuzzyHSA.kfd.ops import KFDDevice 6 | import fuzzyHSA.kfd.autogen.kfd as kfd # importing generated files via the fuzzyHSA package 7 | 8 | 9 | @pytest.fixture(scope="module") 10 | def kfd_device(): 11 | """ 12 | Fixture to create and return a KFDDevice instance for testing, 13 | with automatic cleanup to close the device after tests are completed. 14 | """ 15 | device = None 16 | try: 17 | device = KFDDevice("KFD:0") # TODO: should handle multiple devices 18 | yield device 19 | finally: 20 | if device: 21 | device.close() 22 | 23 | 24 | MAP_FIXED, MAP_NORESERVE = 0x10, 0x400 25 | 26 | 27 | class TestKFDDeviceHardwareIntegration: 28 | def test_memory_management(self, kfd_device): 29 | """ 30 | Test memory mapping and unmapping functionality with actual hardware. 31 | """ 32 | # NOTE: using fd=-1 in combination with mmap.MAP_ANONYMOUS is appropriate 33 | # for creating memory mappings that are independent of the file system, 34 | # offering a simple and effective way to manage memory for temporary or 35 | # internal application needs. 36 | 37 | size = 4096 38 | flags = mmap.MAP_PRIVATE | mmap.MAP_ANONYMOUS | MAP_NORESERVE 39 | fd = -1 # Indicates no file descriptor 40 | 41 | addr = kfd_device.mmap(size=size, prot=0, flags=flags, fd=fd, offset=0) 42 | 43 | assert addr is not None, "Failed to mmap memory." 44 | 45 | kfd_device.munmap(addr, size) 46 | 47 | def test_ioctl_operations(self, kfd_device): 48 | """ 49 | Test IOCTL operations. 50 | 51 | TODO: 52 | - Implement individual tests for each IOCTL operation listed below. 53 | - Each test should: 54 | 1. Properly set up any required parameters or structures. 55 | 2. Call the ioctl operation. 56 | 3. Verify the operation's success by checking return values and any changes in state or outputs. 57 | 4. Handle and log errors appropriately. 58 | 5. Clean up any resources used during the test. 59 | 60 | List of IOCTL operations to test: 61 | - acquire_vm: Test VM acquisition functionalities. 62 | - alloc_memory_of_gpu: Test GPU memory allocation and ensure proper handling of memory addresses. 63 | - alloc_queue_gws: Test allocation of queue gateways. 64 | - create_event: Verify event creation and correct initialization of event properties. 65 | - create_queue: Ensure queues are created with correct configurations. 66 | - dbg_address_watch: Test debug functionalities related to address watching. 67 | - dbg_register: Test registration of debug instances. 68 | - dbg_unregister: Verify proper unregistration and cleanup of debug instances. 69 | - dbg_wave_control: Test controls for debug wavefront management. 70 | - destroy_event: Ensure events are cleanly destroyed without residual state. 71 | - destroy_queue: Test queue destruction and resource deallocation. 72 | - free_memory_of_gpu: Ensure GPU memory is freed and that no leaks are present. 73 | - get_clock_counters: Verify retrieval of clock counter data. 74 | - get_dmabuf_info: Test DMA buffer information retrieval. 75 | - get_process_apertures_new: Check acquisition of new process apertures. 76 | - get_queue_wave_state: Verify state retrieval of queue wavefronts. 77 | - get_tile_config: Test configuration retrieval of GPU tiles. 78 | - import_dmabuf: Verify DMA buffer import functionality. 79 | - map_memory_to_gpu: Ensure memory mapping to GPU is handled correctly. 80 | - reset_event: Test resetting of events and state cleanup. 81 | - set_cu_mask: Verify setting of compute unit masks. 82 | - set_event: Test configuration of event parameters. 83 | - set_memory_policy: Verify memory policy settings apply correctly. 84 | - set_scratch_backing_va: Test setting of scratch backing virtual addresses. 85 | - set_trap_handler: Verify trap handler settings are applied. 86 | - set_xnack_mode: Test XNACK mode settings. 87 | - smi_events: Verify handling of system management interface events. 88 | - svm: Test shared virtual memory operations. 89 | - unmap_memory_from_gpu: Ensure memory unmapping is clean and complete. 90 | - update_queue: Test updating queue parameters and effects. 91 | - wait_events: Verify event waiting mechanisms work as expected. 92 | """ 93 | operations = [ 94 | ("acquire_vm", self._test_acquire_vm), 95 | ("alloc_memory_of_gpu", self._test_alloc_memory_of_gpu), 96 | ("map_memory_to_gpu", self._test_map_memory_to_gpu), 97 | ("create_event", self._test_create_event), 98 | ("create_queue", self._test_create_queue), 99 | # TODO: List above 100 | ] 101 | results = {} 102 | for name, operation in operations: 103 | try: 104 | operation(kfd_device) 105 | results[name] = "Passed" 106 | except Exception as e: 107 | results[name] = f"Failed with error: {str(e)}" 108 | 109 | for operation, result in results.items(): 110 | print(f"{operation}: {result}") 111 | 112 | failed_tests = {op: res for op, res in results.items() if "Failed" in res} 113 | if failed_tests: 114 | error_messages = "\n".join( 115 | f"{op}: {res}" for op, res in failed_tests.items() 116 | ) 117 | assert False, f"Some IOCTL operations failed:\n{error_messages}" 118 | 119 | def _test_acquire_vm(self, kfd_device): 120 | kfd_device.KFD_IOCTL.acquire_vm( 121 | kfd_device.kfd, drm_fd=kfd_device.drm_fd, gpu_id=kfd_device.gpu_id 122 | ) 123 | 124 | def _test_alloc_memory_of_gpu(self, kfd_device): 125 | size = 0x1000 126 | addr_flags = mmap.MAP_SHARED | mmap.MAP_ANONYMOUS 127 | 128 | addr = kfd_device.mmap(size=size, prot=0, flags=addr_flags, fd=-1, offset=0) 129 | 130 | flags = kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT 131 | mem = kfd_device.KFD_IOCTL.alloc_memory_of_gpu( 132 | kfd_device.kfd, 133 | va_addr=addr, 134 | size=size, 135 | gpu_id=kfd_device.gpu_id, 136 | flags=flags, 137 | mmap_offset=0, 138 | ) 139 | 140 | def _test_map_memory_to_gpu(self, kfd_device): 141 | size = 0x1000 142 | addr_flags = mmap.MAP_SHARED | mmap.MAP_ANONYMOUS 143 | 144 | addr = kfd_device.mmap(size=size, prot=0, flags=addr_flags, fd=-1, offset=0) 145 | 146 | flags = kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT 147 | mem = kfd_device.KFD_IOCTL.alloc_memory_of_gpu( 148 | kfd_device.kfd, 149 | va_addr=addr, 150 | size=size, 151 | gpu_id=kfd_device.gpu_id, 152 | flags=flags, 153 | mmap_offset=0, 154 | ) 155 | 156 | mem.__setattr__( 157 | "mapped_gpu_ids", getattr(mem, "mapped_gpu_ids", []) + [kfd_device.gpu_id] 158 | ) 159 | c_gpus = (ctypes.c_int32 * len(mem.mapped_gpu_ids))(*mem.mapped_gpu_ids) 160 | 161 | stm = kfd_device.KFD_IOCTL.map_memory_to_gpu( 162 | kfd_device.kfd, 163 | handle=mem.handle, 164 | device_ids_array_ptr=ctypes.addressof(c_gpus), 165 | n_devices=len(mem.mapped_gpu_ids), 166 | ) 167 | assert stm.n_success == len(mem.mapped_gpu_ids) 168 | 169 | def _test_create_event(self, kfd_device): 170 | memory_flags_config = { 171 | "mmap_prot": mmap.PROT_READ | mmap.PROT_WRITE, 172 | "mmap_flags": mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 173 | "kfd_flags": kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT 174 | | kfd.KFD_IOC_ALLOC_MEM_FLAGS_COHERENT 175 | | kfd.KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED 176 | | kfd.KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE 177 | | kfd.KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE 178 | | kfd.KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE, 179 | } 180 | 181 | memory_size = 0x8000 182 | KFDDevice.event_page = kfd_device.allocate_memory( 183 | memory_size, memory_flags_config, map_to_gpu=True 184 | ) 185 | sync_event = kfd_device.KFD_IOCTL.create_event( 186 | KFDDevice.kfd, event_page_offset=KFDDevice.event_page.handle, auto_reset=1 187 | ) 188 | assert sync_event is not None, "Failed to create event." 189 | 190 | def _test_create_queue(self, kfd_device): 191 | """ 192 | Test the functionality of creating a queue on the KFD device. 193 | """ 194 | 195 | sdma_ring_flags_config = { 196 | "mmap_prot": 0, 197 | "mmap_flags": mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 198 | "kfd_flags": kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT 199 | | kfd.KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE 200 | | kfd.KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE 201 | | kfd.KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE 202 | | kfd.KFD_IOC_ALLOC_MEM_FLAGS_COHERENT 203 | | kfd.KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED, 204 | } 205 | 206 | sdma_ring = kfd_device.allocate_memory( 207 | 0x100000, sdma_ring_flags_config, map_to_gpu=True 208 | ) 209 | 210 | gart_flags_config = { 211 | "mmap_prot": mmap.PROT_READ | mmap.PROT_WRITE, 212 | "mmap_flags": mmap.MAP_SHARED | mmap.MAP_ANONYMOUS, 213 | "kfd_flags": kfd.KFD_IOC_ALLOC_MEM_FLAGS_GTT 214 | | kfd.KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE 215 | | kfd.KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE 216 | | kfd.KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE 217 | | kfd.KFD_IOC_ALLOC_MEM_FLAGS_COHERENT 218 | | kfd.KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED, 219 | } 220 | gart_sdma = kfd_device.allocate_memory( 221 | 0x1000, gart_flags_config, map_to_gpu=True 222 | ) 223 | 224 | sdma_queue = kfd_device.KFD_IOCTL.create_queue( 225 | KFDDevice.kfd, 226 | ring_base_address=sdma_ring.va_addr, 227 | ring_size=sdma_ring.size, 228 | gpu_id=kfd_device.gpu_id, 229 | queue_type=kfd.KFD_IOC_QUEUE_TYPE_SDMA, 230 | queue_percentage=kfd.KFD_MAX_QUEUE_PERCENTAGE, 231 | queue_priority=kfd.KFD_MAX_QUEUE_PRIORITY, 232 | write_pointer_address=gart_sdma.va_addr, 233 | read_pointer_address=gart_sdma.va_addr + 8, 234 | ) 235 | 236 | try: 237 | assert sdma_queue.queue_id >= 0, "Queue ID should be a non-negative integer" 238 | assert ( 239 | sdma_queue.ring_size > 0 240 | ), "Ring size should be greater than zero if active" 241 | assert ( 242 | sdma_queue.ring_base_address != 0 243 | ), "Ring base address should not be zero if active" 244 | print(f"Queue {sdma_queue.queue_id} created successfully and is active.") 245 | except AssertionError as error: 246 | print(f"Failed to verify queue active status: {error}") 247 | 248 | 249 | if __name__ == "__main__": 250 | pytest.main([__file__]) 251 | --------------------------------------------------------------------------------