├── README.md ├── apps ├── MetalBitonicSort │ ├── MetalBitonicSort │ └── default.metallib └── README └── corrupt_gpumem ├── va4_screen_glitch_bitonic ├── README ├── flip.dylib └── flip_bufs.c └── var4_random_hang_compute ├── GPU_1607388107_0x100654000_10000 ├── GPU_1607388107_0x100664000_10000 ├── README ├── flip.dylib └── flip_bufs.c /README.md: -------------------------------------------------------------------------------- 1 | # Introduction 2 | This code demonstrates how to corrupt GPU buffers (data/code) 3 | on macOS on both X86 (which uses IOAccelResource) and 4 | ARM (which uses IOGPUResource). 5 | 6 | See examples in `corrupt_gpumem`. 7 | 8 | It works by preloading libraries with `DYLD_INSERT_LIBRARIES` and running 9 | an app, in this case a compute one. 10 | -------------------------------------------------------------------------------- /apps/MetalBitonicSort/MetalBitonicSort: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astarasikov/macos-gpu-fuzzing-public/32fe60389217977111b64bce14270b3090a7ef59/apps/MetalBitonicSort/MetalBitonicSort -------------------------------------------------------------------------------- /apps/MetalBitonicSort/default.metallib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astarasikov/macos-gpu-fuzzing-public/32fe60389217977111b64bce14270b3090a7ef59/apps/MetalBitonicSort/default.metallib -------------------------------------------------------------------------------- /apps/README: -------------------------------------------------------------------------------- 1 | MetalBitonicSort is from below 2 | https://github.com/astarasikov/MetalBitonicSort 3 | -------------------------------------------------------------------------------- /corrupt_gpumem/va4_screen_glitch_bitonic/README: -------------------------------------------------------------------------------- 1 | clang -Wall -dynamiclib -o flip.dylib flip_bufs.c -framework IOKit -arch arm64 -F /System/Library/PrivateFrameworks/ -framework IOAccelerator -framework IOGPU 2 | export DYLD_INSERT_LIBRARIES=./flip.dylib 3 | 4 | #Running 5 | ../../apps/MetalBitonicSort/MetalBitonicSort 6 | 7 | #Description 8 | This sometimes would display parts of other windows, or certain areas 9 | (title bar, menu bar) would flash purple, when running this purely 10 | compute, non-graphic app 11 | -------------------------------------------------------------------------------- /corrupt_gpumem/va4_screen_glitch_bitonic/flip.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astarasikov/macos-gpu-fuzzing-public/32fe60389217977111b64bce14270b3090a7ef59/corrupt_gpumem/va4_screen_glitch_bitonic/flip.dylib -------------------------------------------------------------------------------- /corrupt_gpumem/va4_screen_glitch_bitonic/flip_bufs.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | struct BufferDesc { 9 | void* start; 10 | size_t length; 11 | void* cache; 12 | }; 13 | 14 | #define NUM_BUFFERS_TO_STORE 15 15 | 16 | static struct BufferDesc gBufferDescs[NUM_BUFFERS_TO_STORE] = {}; 17 | static size_t gBuffersStored; 18 | 19 | static void dumpGPUData(void) 20 | { 21 | fprintf(stderr, "%s: DUMPING GPU BUFFERS total=%zu\n", 22 | __func__, gBuffersStored); 23 | for (size_t i = 0; i < gBuffersStored; i++) { 24 | struct BufferDesc desc = gBufferDescs[i]; 25 | 26 | char fname[256] = {}; 27 | fprintf(stderr, "GPU_%ld_%p_%zx", time(NULL), desc.start, desc.length); 28 | sprintf(fname, "GPU_%ld_%p_%zx", time(NULL), desc.start, desc.length); 29 | FILE* fout = NULL; 30 | fout = fopen(fname, "wb"); 31 | if (!fout) { 32 | fprintf(stderr, "failed to open %s\n", fname); 33 | continue; 34 | } 35 | fwrite(desc.start, desc.length, 1, fout); 36 | fclose(fout); 37 | } 38 | } 39 | 40 | static void corruptGPUData(void) 41 | { 42 | static int count = 0; 43 | count++; 44 | if (count < 10 || count % 4 != 0 || !gBuffersStored) { 45 | //wait until at least one buffer is ready 46 | //avoid locking up the machine for too long 47 | 48 | if (count == 1) { 49 | srand(time(NULL)); 50 | } 51 | return; 52 | } 53 | 54 | if (0) { 55 | dumpGPUData(); 56 | return; 57 | } 58 | 59 | fprintf(stderr, "%s: CORRUPTING GPU BUFFERS total=%zu\n", 60 | __func__, gBuffersStored); 61 | for (size_t i = 0; i < gBuffersStored; i++) { 62 | struct BufferDesc desc = gBufferDescs[i]; 63 | //most apps seem to have two buffers: one with GPU code/data 64 | //and the other one with what looks like GPU VAs. I think 65 | //the second one is the ring buffer structures, and corrupting them 66 | //locks up the machine without producing any glitches 67 | if (((uint32_t*)desc.start)[0x10] < 0x10000) { 68 | //we can instead check "gpu_va" when resources are created 69 | //and skip the buffers which are persistently mapped 70 | //continue; 71 | } 72 | 73 | if (!desc.cache) { 74 | //save a good buffer 75 | desc.cache = malloc(desc.length); 76 | memcpy(desc.cache, desc.start, desc.length); 77 | } 78 | else { 79 | //restore a good buffer after the previous 80 | //fuzz iteration 81 | memcpy(desc.start, desc.cache, desc.length); 82 | } 83 | 84 | for (size_t j = 0; j < 32; j++) { 85 | size_t offset = 0x0; 86 | size_t max = (0x9000 - offset) / sizeof(int); 87 | size_t idx = (rand() % max) + offset; 88 | int fuzz = 1 << (rand() % 24); 89 | ((int*)desc.start)[idx] ^= fuzz; 90 | } 91 | } 92 | } 93 | 94 | extern void* IOAccelResourceCreate( 95 | void* io_accelerator, 96 | void* args, 97 | size_t size); 98 | 99 | extern void* IOAccelResourceGetDataBytes(void* io_accel_resource); 100 | extern size_t IOAccelResourceGetDataSize(void* io_accel_resource); 101 | 102 | extern void* fake_IOAccelResourceCreate( 103 | void* io_accelerator, 104 | void* args, 105 | size_t size) 106 | { 107 | void* ret = IOAccelResourceCreate(io_accelerator, args, size); 108 | 109 | if (ret) { 110 | struct BufferDesc desc = {}; 111 | 112 | if (1) { 113 | size_t* ptr = (size_t*)(((size_t)ret) + 0x20); 114 | desc.start = (void*)ptr[0]; 115 | desc.length = ptr[1]; 116 | } else { 117 | desc.start = IOAccelResourceGetDataBytes(ret); 118 | desc.length = IOAccelResourceGetDataSize(ret); 119 | } 120 | if (!desc.start || !desc.length || desc.length > 0x100000) 121 | { 122 | return ret; 123 | } 124 | fprintf(stderr, "%s: registered rsrc base %p size %zx\n", 125 | __func__, desc.start, desc.length); 126 | 127 | if (gBuffersStored < NUM_BUFFERS_TO_STORE) { 128 | gBufferDescs[gBuffersStored] = desc; 129 | gBuffersStored++; 130 | } 131 | } 132 | 133 | return ret; 134 | } 135 | 136 | //these are used on iOS and M1 Macs 137 | typedef void* my_IOGPU_Resource; 138 | extern void * IOGPUResourceGetDataBytes( 139 | my_IOGPU_Resource); 140 | extern size_t IOGPUResourceGetGPUVirtualAddress( 141 | my_IOGPU_Resource); 142 | extern size_t IOGPUResourceGetDataSize( 143 | my_IOGPU_Resource); 144 | extern my_IOGPU_Resource IOGPUResourceCreate( 145 | void *arg1, 146 | void *arg2, 147 | size_t arg3); 148 | 149 | static my_IOGPU_Resource fake_IOGPUResourceCreate( 150 | void *arg1, 151 | void *arg2, 152 | size_t arg3) 153 | { 154 | fprintf(stderr, "%s: arg1=%p arg2=%p arg3=%zx\n", 155 | __func__, arg1, arg2, arg3); 156 | void *ret = IOGPUResourceCreate(arg1, arg2, arg3); 157 | if (!ret) { 158 | return NULL; 159 | } 160 | size_t gpu_va = IOGPUResourceGetGPUVirtualAddress(ret); 161 | size_t gpu_size = IOGPUResourceGetDataSize(ret); 162 | void *gpu_data = IOGPUResourceGetDataBytes(ret); 163 | fprintf(stderr, "%s: ret=%p gpu_va=%zx gpu_data=%p size=%zx\n", 164 | __func__, ret, gpu_va, gpu_data, gpu_size); 165 | 166 | if (gpu_va || !gpu_data || !gpu_size) { 167 | return ret; 168 | } 169 | if (gBuffersStored < NUM_BUFFERS_TO_STORE) { 170 | gBufferDescs[gBuffersStored] = (struct BufferDesc) { 171 | .start = gpu_data, 172 | .length = gpu_size, 173 | }; 174 | gBuffersStored++; 175 | } 176 | return ret; 177 | } 178 | 179 | extern kern_return_t fake_IOConnectCallMethod(mach_port_t connection, // rdi 180 | uint32_t selector, // rsi 181 | uint64_t* input, // rdx 182 | uint32_t inputCnt, // rcx 183 | void* inputStruct, // r8 184 | size_t inputStructCnt, // r9 185 | uint64_t* output, 186 | uint32_t* outputCnt, 187 | void* outputStruct, 188 | size_t* outputStructCntP) 189 | { 190 | kern_return_t ret; 191 | 192 | if (0) { 193 | fprintf(stderr, 194 | "%s: connection=%x, selector=%x, input=%p" 195 | ", inputCnt=%d, inputStruct=%p, inputStructCnt=%lx" 196 | ", output=%p outputCnt=%x outputStruct=%p outputStructCntP=%p\n", 197 | __func__, connection, selector, input, inputCnt, inputStruct, 198 | inputStructCnt, output, outputCnt ? *outputCnt : 0, outputStruct, 199 | outputStructCntP); 200 | } 201 | 202 | corruptGPUData(); 203 | 204 | ret = IOConnectCallMethod( 205 | connection, selector, input, inputCnt, 206 | inputStruct, inputStructCnt, output, 207 | outputCnt, outputStruct, outputStructCntP); 208 | 209 | return ret; 210 | } 211 | 212 | typedef struct interposer { 213 | void* replacement; 214 | void* original; 215 | } interpose_t; 216 | 217 | __attribute__((used)) static const interpose_t interposers[] 218 | __attribute__((section("__DATA, __interpose"))) 219 | = { 220 | { .replacement = (void*)fake_IOConnectCallMethod, 221 | .original = (void*)IOConnectCallMethod }, 222 | { 223 | .replacement = (void*)fake_IOAccelResourceCreate, 224 | .original = (void*)IOAccelResourceCreate, 225 | }, 226 | { 227 | .replacement = (void*)fake_IOGPUResourceCreate, 228 | .original = (void*)IOGPUResourceCreate, 229 | }, 230 | }; 231 | 232 | /* 233 | clang -Wall -dynamiclib -o flip.dylib flip_bufs.c -framework IOKit -F /System/Library/PrivateFrameworks/ -framework IOAccelerator -framework IOGPU -arch x86_64 234 | */ 235 | -------------------------------------------------------------------------------- /corrupt_gpumem/var4_random_hang_compute/GPU_1607388107_0x100654000_10000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astarasikov/macos-gpu-fuzzing-public/32fe60389217977111b64bce14270b3090a7ef59/corrupt_gpumem/var4_random_hang_compute/GPU_1607388107_0x100654000_10000 -------------------------------------------------------------------------------- /corrupt_gpumem/var4_random_hang_compute/GPU_1607388107_0x100664000_10000: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astarasikov/macos-gpu-fuzzing-public/32fe60389217977111b64bce14270b3090a7ef59/corrupt_gpumem/var4_random_hang_compute/GPU_1607388107_0x100664000_10000 -------------------------------------------------------------------------------- /corrupt_gpumem/var4_random_hang_compute/README: -------------------------------------------------------------------------------- 1 | clang -Wall -dynamiclib -o flip.dylib flip_bufs.c -framework IOKit -arch arm64 -F /System/Library/PrivateFrameworks/ -framework IOAccelerator -framework IOGPU 2 | export DYLD_INSERT_LIBRARIES=./flip.dylib 3 | 4 | #Running 5 | ../../apps/MetalBitonicSort/MetalBitonicSort 6 | 7 | #Description 8 | This sometimes would display parts of other windows, or certain areas 9 | (title bar, menu bar) would flash purple, when running this purely 10 | compute, non-graphic app 11 | -------------------------------------------------------------------------------- /corrupt_gpumem/var4_random_hang_compute/flip.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astarasikov/macos-gpu-fuzzing-public/32fe60389217977111b64bce14270b3090a7ef59/corrupt_gpumem/var4_random_hang_compute/flip.dylib -------------------------------------------------------------------------------- /corrupt_gpumem/var4_random_hang_compute/flip_bufs.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | struct BufferDesc { 9 | void* start; 10 | size_t length; 11 | }; 12 | 13 | #define NUM_BUFFERS_TO_STORE 15 14 | 15 | static struct BufferDesc gBufferDescs[NUM_BUFFERS_TO_STORE] = {}; 16 | static size_t gBuffersStored; 17 | 18 | static void dumpGPUData(void) 19 | { 20 | fprintf(stderr, "%s: DUMPING GPU BUFFERS total=%zu\n", 21 | __func__, gBuffersStored); 22 | for (size_t i = 0; i < gBuffersStored; i++) { 23 | struct BufferDesc desc = gBufferDescs[i]; 24 | 25 | char fname[256] = {}; 26 | fprintf(stderr, "GPU_%ld_%p_%zx", time(NULL), desc.start, desc.length); 27 | sprintf(fname, "GPU_%ld_%p_%zx", time(NULL), desc.start, desc.length); 28 | FILE* fout = NULL; 29 | fout = fopen(fname, "wb"); 30 | if (!fout) { 31 | fprintf(stderr, "failed to open %s\n", fname); 32 | continue; 33 | } 34 | fwrite(desc.start, desc.length, 1, fout); 35 | fclose(fout); 36 | } 37 | } 38 | 39 | static void corruptGPUData(void) 40 | { 41 | static int count = 0; 42 | count++; 43 | if (count < 10 || count > 14) { 44 | //avoid locking up the machine for too long 45 | srand(time(NULL)); 46 | return; 47 | } 48 | 49 | #if 0 50 | dumpGPUData(); 51 | return; 52 | #endif 53 | 54 | fprintf(stderr, "%s: CORRUPTING GPU BUFFERS total=%zu\n", 55 | __func__, gBuffersStored); 56 | for (size_t i = 0; i < gBuffersStored; i++) { 57 | struct BufferDesc desc = gBufferDescs[i]; 58 | if (desc.length > 0x100000) { 59 | continue; 60 | } 61 | 62 | for (size_t j = 0; j < 128; j++) { 63 | size_t max = (0x1900 / sizeof(int)); 64 | size_t idx = rand() % max; 65 | int fuzz = 1 << (rand() % 24); 66 | ((int*)desc.start)[idx] ^= fuzz; 67 | } 68 | } 69 | } 70 | 71 | extern void* IOAccelResourceCreate( 72 | void* io_accelerator, 73 | void* args, 74 | size_t size); 75 | 76 | extern void* IOAccelResourceGetDataBytes(void* io_accel_resource); 77 | extern size_t IOAccelResourceGetDataSize(void* io_accel_resource); 78 | 79 | static void fhd(FILE* f, const char* label, unsigned char* ptr, size_t len) 80 | { 81 | if (!f) { 82 | return; 83 | } 84 | if (!ptr) { 85 | return; 86 | } 87 | if (!len) { 88 | return; 89 | } 90 | 91 | fprintf(f, "%s\n", label); 92 | for (size_t i = 0; i < len; i++) { 93 | fprintf(f, "%02x ", ptr[i]); 94 | if (i && ((i % 32) == 0)) { 95 | fputs("\n", f); 96 | } 97 | } 98 | fputs("\n", f); 99 | } 100 | 101 | extern void* fake_IOAccelResourceCreate( 102 | void* io_accelerator, 103 | void* args, 104 | size_t size) 105 | { 106 | void* ret = IOAccelResourceCreate(io_accelerator, args, size); 107 | 108 | if (ret) { 109 | struct BufferDesc desc = {}; 110 | 111 | if (1) { 112 | size_t* ptr = (size_t*)(((size_t)ret) + 0x20); 113 | desc.start = (void*)ptr[0]; 114 | desc.length = ptr[1]; 115 | } else { 116 | desc.start = IOAccelResourceGetDataBytes(ret); 117 | desc.length = IOAccelResourceGetDataSize(ret); 118 | } 119 | if (!desc.start || !desc.length) 120 | { 121 | return ret; 122 | } 123 | fprintf(stderr, "%s: registered rsrc base %p size %zx\n", 124 | __func__, desc.start, desc.length); 125 | 126 | if (gBuffersStored < NUM_BUFFERS_TO_STORE) { 127 | gBufferDescs[gBuffersStored] = desc; 128 | gBuffersStored++; 129 | } 130 | } 131 | 132 | return ret; 133 | } 134 | 135 | //these are used on iOS and M1 Macs 136 | typedef void* my_IOGPU_Resource; 137 | extern void * IOGPUResourceGetDataBytes( 138 | my_IOGPU_Resource); 139 | extern size_t IOGPUResourceGetGPUVirtualAddress( 140 | my_IOGPU_Resource); 141 | extern size_t IOGPUResourceGetDataSize( 142 | my_IOGPU_Resource); 143 | extern my_IOGPU_Resource IOGPUResourceCreate( 144 | void *arg1, 145 | void *arg2, 146 | size_t arg3); 147 | 148 | static my_IOGPU_Resource fake_IOGPUResourceCreate( 149 | void *arg1, 150 | void *arg2, 151 | size_t arg3) 152 | { 153 | fprintf(stderr, "%s: arg1=%p arg2=%p arg3=%zx\n", 154 | __func__, arg1, arg2, arg3); 155 | void *ret = IOGPUResourceCreate(arg1, arg2, arg3); 156 | if (!ret) { 157 | return NULL; 158 | } 159 | size_t gpu_va = IOGPUResourceGetGPUVirtualAddress(ret); 160 | size_t gpu_size = IOGPUResourceGetDataSize(ret); 161 | void *gpu_data = IOGPUResourceGetDataBytes(ret); 162 | fprintf(stderr, "%s: ret=%p gpu_va=%zx gpu_data=%p size=%zx\n", 163 | __func__, ret, gpu_va, gpu_data, gpu_size); 164 | 165 | if (!gpu_data || !gpu_size) { 166 | return ret; 167 | } 168 | if (gBuffersStored < NUM_BUFFERS_TO_STORE) { 169 | gBufferDescs[gBuffersStored] = (struct BufferDesc) { 170 | .start = gpu_data, 171 | .length = gpu_size, 172 | }; 173 | gBuffersStored++; 174 | } 175 | return ret; 176 | } 177 | 178 | extern kern_return_t fake_IOConnectCallMethod(mach_port_t connection, // rdi 179 | uint32_t selector, // rsi 180 | uint64_t* input, // rdx 181 | uint32_t inputCnt, // rcx 182 | void* inputStruct, // r8 183 | size_t inputStructCnt, // r9 184 | uint64_t* output, 185 | uint32_t* outputCnt, 186 | void* outputStruct, 187 | size_t* outputStructCntP) 188 | { 189 | kern_return_t ret; 190 | 191 | if (0) { 192 | fprintf(stderr, 193 | "%s: connection=%x, selector=%x, input=%p" 194 | ", inputCnt=%d, inputStruct=%p, inputStructCnt=%lx" 195 | ", output=%p outputCnt=%x outputStruct=%p outputStructCntP=%p\n", 196 | __func__, connection, selector, input, inputCnt, inputStruct, 197 | inputStructCnt, output, outputCnt ? *outputCnt : 0, outputStruct, 198 | outputStructCntP); 199 | } 200 | 201 | switch (selector) { 202 | //case 0xd: 203 | //case 0x2c: 204 | //case 0x20: 205 | //case 0x8: 206 | //case 0x11: 207 | //case 0x1d: 208 | //case 0x17: 209 | //case 0xf: 210 | //case 0x1b: 211 | case 0x12: 212 | case 0x1e: 213 | case 0: 214 | case 9: 215 | case 0xa: 216 | case 0xb: 217 | //this is SubmitCommandBuffers 218 | corruptGPUData(); 219 | default: 220 | break; 221 | } 222 | 223 | ret = IOConnectCallMethod( 224 | connection, selector, input, inputCnt, 225 | inputStruct, inputStructCnt, output, 226 | outputCnt, outputStruct, outputStructCntP); 227 | 228 | return ret; 229 | } 230 | 231 | typedef struct interposer { 232 | void* replacement; 233 | void* original; 234 | } interpose_t; 235 | 236 | __attribute__((used)) static const interpose_t interposers[] 237 | __attribute__((section("__DATA, __interpose"))) 238 | = { 239 | { .replacement = (void*)fake_IOConnectCallMethod, 240 | .original = (void*)IOConnectCallMethod }, 241 | { 242 | .replacement = (void*)fake_IOAccelResourceCreate, 243 | .original = (void*)IOAccelResourceCreate, 244 | }, 245 | { 246 | .replacement = (void*)fake_IOGPUResourceCreate, 247 | .original = (void*)IOGPUResourceCreate, 248 | }, 249 | }; 250 | 251 | /* 252 | clang -Wall -dynamiclib -o flip.dylib flip_bufs.c -framework IOKit -F /System/Library/PrivateFrameworks/ -framework IOAccelerator -framework IOGPU -arch x86_64 253 | */ 254 | --------------------------------------------------------------------------------