├── .gitignore ├── Makefile ├── README.md ├── demo ├── demo.c ├── demo.h ├── iokit.c ├── shaders.c └── slowfb.c ├── disasm-driver.c ├── disasm └── disasm.c ├── docs ├── Codenames.md └── table.py ├── lib ├── cmdbuf.xml ├── cmdstream.h ├── decode.c ├── decode.h ├── gen_pack.py ├── io.c ├── io.h ├── selectors.h ├── tiling.c ├── tiling.h └── util.h └── wrap ├── APPLE_LICENSE └── wrap.c /.gitignore: -------------------------------------------------------------------------------- 1 | wrap.dylib* 2 | demo-bin* 3 | fb.bin 4 | disasm-bin* 5 | agx_pack.h 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: wrap.dylib demo-bin disasm-bin 2 | .PHONY: clean all 3 | .SUFFIXES: 4 | 5 | clean: 6 | rm -f wrap.dylib demo-bin agx_pack.h 7 | 8 | CFLAGS := -g -Wall -Werror -Wextra -Wno-unused-variable -Wno-unused-function -Wno-unused-parameter 9 | WRAP_HDRS := $(wildcard lib/*.h)\ 10 | 11 | WRAP_SRCS := $(wildcard lib/*.c)\ 12 | $(wildcard wrap/*.c)\ 13 | $(wildcard disasm/*.c)\ 14 | 15 | wrap.dylib: $(WRAP_SRCS) $(WRAP_HDRS) Makefile agx_pack.h 16 | clang -o $@ $(WRAP_SRCS) -I lib/ -I . -dynamiclib -framework IOKit $(CFLAGS) 17 | 18 | DEMO_SRCS := $(wildcard lib/*.c)\ 19 | $(wildcard demo/*.c)\ 20 | $(wildcard disasm/*.c) 21 | 22 | DEMO_HDRS := $(wildcard lib/*.h)\ 23 | 24 | demo-bin: $(DEMO_SRCS) $(DEMO_HDRS) Makefile agx_pack.h 25 | clang -o $@ $(DEMO_SRCS) -I lib/ -I . -I /opt/X11/include -L /opt/X11/lib/ -lX11 -framework IOKit $(CFLAGS) 26 | 27 | agx_pack.h: lib/gen_pack.py lib/cmdbuf.xml Makefile 28 | python3 lib/gen_pack.py lib/cmdbuf.xml > agx_pack.h 29 | 30 | DISASM_SRCS := $(wildcard disasm/*.c)\ 31 | disasm-driver.c 32 | 33 | disasm-bin: $(DISASM_SRCS) Makefile 34 | clang -o $@ $(DISASM_SRCS) $(CFLAGS) 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Asahi GPU 2 | 3 | Research for an open source graphics stack for Apple M1. 4 | 5 | As development of a Mesa driver has begun, development work has moved in-tree in [Mesa](https://gitlab.freedesktop.org/mesa/mesa/). As such this repository is no longer in use. 6 | 7 | ## wrap 8 | 9 | Build with the included makefile `make wrap.dylib`, and insert in any Metal application by setting the environment variable `DYLD_INSERT_LIBRARIES=/Users/bloom/gpu/wrap.dylib`. 10 | 11 | ## Contributors 12 | 13 | * Alyssa Rosenzweig (`bloom`) on IRC, working on the command stream and ISA 14 | * marcan, working on kernel side 15 | 16 | ## Contributing 17 | 18 | All contributors are expected to abide by our [Code of Conduct](https://asahilinux.org/code-of-conduct) and our [Copyright and Reverse Engineering Policy](https://asahilinux.org/copyright). 19 | 20 | For more information, please see our [Contributing](https://asahilinux.org/contribute/) page. 21 | 22 | -------------------------------------------------------------------------------- /demo/demo.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "tiling.h" 8 | #include "demo.h" 9 | #include "util.h" 10 | #include "../agx_pack.h" 11 | 12 | #define WIDTH 1311 13 | #define HEIGHT 717 14 | 15 | static uint64_t 16 | demo_zero(struct agx_allocator *allocator, size_t count) 17 | { 18 | struct agx_ptr ptr = agx_allocate(allocator, count); 19 | memset(ptr.map, 0, count); 20 | return ptr.gpu_va; 21 | } 22 | 23 | /* Upload vertex attribtues */ 24 | 25 | float t = 0.0; 26 | 27 | static uint64_t 28 | demo_attributes(struct agx_allocator *allocator) 29 | { 30 | float attributes1[] = { 31 | t++ , -250.0 , 0.0f, 0.0f, 32 | 1.0f , 0.0f , 0.0f, 1.0f, 33 | -250.0f, -250.0f, 0.0f, 0.0f, 34 | 0.0f , 1.0f , 0.0f, 1.0f, 35 | 0.0f , 250.0f , 0.0f, 0.0f, 36 | 0.0f , 0.0f , 1.0f, 1.0f, 37 | 250.0f , 250.0f , 0.0f, 0.0f, 38 | 0.0f , 0.0f , 1.0f, 1.0f, 39 | }; 40 | 41 | uint32_t attributes2[] = { WIDTH, HEIGHT }; 42 | 43 | uint64_t attribs[2] = { 44 | agx_upload(allocator, attributes1, sizeof(attributes1)), 45 | agx_upload(allocator, attributes2, sizeof(attributes2)) 46 | }; 47 | 48 | return agx_upload(allocator, attribs, sizeof(attribs)); 49 | } 50 | 51 | static uint64_t 52 | demo_viewport(struct agx_allocator *allocator) 53 | { 54 | struct agx_ptr t = agx_allocate(allocator, AGX_VIEWPORT_LENGTH); 55 | bl_pack(t.map, VIEWPORT, cfg) { 56 | cfg.translate_x = WIDTH / 2; 57 | cfg.scale_x = WIDTH / 2; 58 | cfg.translate_y = HEIGHT / 2; 59 | cfg.scale_y = -(HEIGHT / 2); 60 | cfg.near_z = 0.0f; 61 | cfg.far_z = 1.0f; 62 | }; 63 | 64 | return t.gpu_va; 65 | } 66 | 67 | /* FP16 */ 68 | static uint64_t 69 | demo_clear_color(struct agx_allocator *allocator) 70 | { 71 | __fp16 colour[] = { 72 | 0.99f, 0.75f, 0.53f, 1.0f 73 | }; 74 | 75 | return agx_upload(allocator, colour, sizeof(colour)); 76 | } 77 | 78 | static uint64_t 79 | demo_render_target(struct agx_allocator *allocator, struct agx_allocation *framebuffer) 80 | { 81 | struct agx_ptr t = agx_allocate(allocator, AGX_RENDER_TARGET_LENGTH); 82 | bl_pack(t.map, RENDER_TARGET, cfg) { 83 | cfg.unk_0 = 0xa22; 84 | cfg.swizzle_r = AGX_CHANNEL_B; 85 | cfg.swizzle_g = AGX_CHANNEL_G; 86 | cfg.swizzle_b = AGX_CHANNEL_R; 87 | cfg.swizzle_a = AGX_CHANNEL_A; 88 | cfg.width = WIDTH; 89 | cfg.height = WIDTH; 90 | cfg.buffer = framebuffer->gpu_va; 91 | cfg.unk_100 = 0x1000000; 92 | }; 93 | 94 | return t.gpu_va; 95 | } 96 | 97 | /* Fed into fragment writeout */ 98 | static uint64_t 99 | demo_unk0_5(struct agx_allocator *allocator) 100 | { 101 | uint32_t unk[] = { 0, ~0 }; 102 | return agx_upload(allocator, unk, sizeof(unk)); 103 | } 104 | 105 | static uint64_t 106 | make_ptr40(uint8_t tag0, uint8_t tag1, uint8_t tag2, uint64_t ptr) 107 | { 108 | assert(ptr < (1ull << 40)); 109 | 110 | return (tag0 << 0) | (tag1 << 8) | (tag2 << 16) | (ptr << 24); 111 | } 112 | 113 | static uint64_t 114 | demo_launch_fragment(struct agx_allocator *allocator, struct agx_allocation *fsbuf) 115 | { 116 | uint32_t unk[] = { 117 | 0x800000, 118 | 0x1002, // XXX: blob sets 0x10000 bit and adds an extra pointer to unknown data 119 | fsbuf->gpu_va + 0xC0, // XXX: dynalloc -- fragment shader 120 | 0x1440, 121 | 0x0, 122 | }; 123 | 124 | return agx_upload(allocator, unk, sizeof(unk)); 125 | } 126 | 127 | static uint64_t 128 | demo_unk8(struct agx_allocator *allocator) 129 | { 130 | uint32_t unk[] = { 131 | 0x100c0000, 0x100, 0x0, 0x0, 0x0, 132 | }; 133 | 134 | return agx_upload(allocator, unk, sizeof(unk)); 135 | } 136 | 137 | static uint64_t 138 | demo_unk9(struct agx_allocator *allocator) 139 | { 140 | uint8_t unk[] = { 141 | 0x00, 0x00, 0x02, 0x0c, 142 | 0x00, 0x00, 0x01, 0x00, 143 | 0x00, 0x00, 0x00, 0x00, 144 | 0x05, 0x00, 0x00, 0x00 145 | }; 146 | 147 | return agx_upload(allocator, unk, sizeof(unk)); 148 | } 149 | 150 | static uint64_t 151 | demo_unk10(struct agx_allocator *allocator) 152 | { 153 | uint32_t unk[] = { 154 | 0x10000b5, 155 | 0x40200, 156 | 0x7200f00, 157 | 0xe000000, 158 | 0x7200f00, 159 | 0x0e000000, 160 | 0, 161 | }; 162 | 163 | return agx_upload(allocator, unk, sizeof(unk)); 164 | } 165 | 166 | static uint64_t 167 | demo_unk11(struct agx_allocator *allocator) 168 | { 169 | uint32_t unk[] = { 170 | 0x200004a, 171 | 0x200, 172 | 0x7e00000, 173 | 0x7e00000, 174 | 0x1ffff 175 | }; 176 | 177 | return agx_upload(allocator, unk, sizeof(unk)); 178 | } 179 | 180 | static uint64_t 181 | demo_unk12(struct agx_allocator *allocator) 182 | { 183 | uint32_t unk[] = { 184 | 0x410000, 185 | 0x1e3ce508, 186 | 0xa0 187 | }; 188 | 189 | return agx_upload(allocator, unk, sizeof(unk)); 190 | } 191 | 192 | static uint64_t 193 | demo_unk13(struct agx_allocator *allocator) 194 | { 195 | uint32_t unk[] = { 196 | 0x200000, 0x480, 197 | }; 198 | 199 | return agx_upload(allocator, unk, sizeof(unk)); 200 | } 201 | 202 | static uint64_t 203 | demo_unk14(struct agx_allocator *allocator) 204 | { 205 | uint32_t unk[] = { 206 | 0x100, 0x0, 207 | }; 208 | 209 | return agx_upload(allocator, unk, sizeof(unk)); 210 | } 211 | 212 | /* TODO: there appears to be hidden support for line loops/triangle fans/quads 213 | * but still need to confirm on a more substantive workload, also I can't get 214 | * points/lines to work yet.. */ 215 | 216 | static uint64_t 217 | demo_unk2(struct agx_allocator *allocator, struct agx_allocation *vsbuf, struct agx_allocation *fsbuf) 218 | { 219 | struct agx_ptr ptr = agx_allocate(allocator, 0x800); 220 | uint8_t *out = ptr.map; 221 | uint64_t temp = 0; 222 | 223 | assert(vsbuf->gpu_va < (1ull << 32)); 224 | assert(fsbuf->gpu_va < (1ull << 32)); 225 | 226 | // Bind vertex pipeline and start queueing commands 227 | uint32_t bind_vertex[] = { 228 | 0x4000002e, 229 | 0x1002, 230 | vsbuf->gpu_va, 231 | 0x0505, 232 | }; 233 | 234 | memcpy(out, bind_vertex, sizeof(bind_vertex)); 235 | out += sizeof(bind_vertex); 236 | 237 | /* yes, really unaligned */ 238 | *(out++) = 0x0; 239 | 240 | /* Remark: the first argument to each ptr40 is the number of 32-bit 241 | * words pointed to. The data type is inferred at the source. In theory 242 | * this means we can reorder blocks. We can also duplicate blocks. 243 | * Exception: the first block which is tagged 0? Duplication means 244 | * this isn't by length, instead a special record at the end indicates 245 | * the end. */ 246 | 247 | temp = make_ptr40(0x00, 0x00, 0x00, demo_zero(allocator, 16)); 248 | memcpy(out, &temp, 8); 249 | out += 8; 250 | 251 | temp = make_ptr40(0x05, 0x00, 0x00, demo_unk8(allocator)); 252 | memcpy(out, &temp, 8); 253 | out += 8; 254 | 255 | temp = make_ptr40(0x05, 0x00, 0x00, demo_launch_fragment(allocator, fsbuf)); 256 | memcpy(out, &temp, 8); 257 | out += 8; 258 | 259 | temp = make_ptr40(0x04, 0x00, 0x00, demo_unk9(allocator)); 260 | memcpy(out, &temp, 8); 261 | out += 8; 262 | 263 | temp = make_ptr40(0x07, 0x00, 0x00, demo_unk10(allocator)); 264 | memcpy(out, &temp, 8); 265 | out += 8; 266 | 267 | temp = make_ptr40(0x05, 0x00, 0x00, demo_unk11(allocator)); 268 | memcpy(out, &temp, 8); 269 | out += 8; 270 | 271 | temp = make_ptr40(0x0a, 0x00, 0x00, demo_viewport(allocator)); 272 | memcpy(out, &temp, 8); 273 | out += 8; 274 | 275 | temp = make_ptr40(0x03, 0x00, 0x00, demo_unk12(allocator)); 276 | memcpy(out, &temp, 8); 277 | out += 8; 278 | 279 | temp = make_ptr40(0x02, 0x00, 0x00, demo_unk13(allocator)); 280 | memcpy(out, &temp, 8); 281 | out += 8; 282 | 283 | temp = make_ptr40(0x02, 0x00, 0x00, demo_unk14(allocator)); 284 | memcpy(out, &temp, 8); 285 | out += 8; 286 | 287 | /* Must be after the rest */ 288 | 289 | bl_pack(out, DRAW, cfg) { 290 | cfg.primitive = AGX_PRIMITIVE_TRIANGLE_STRIP; 291 | cfg.vertex_start = 0; 292 | cfg.vertex_count = 4; 293 | cfg.instance_count = 1; 294 | }; 295 | 296 | out += AGX_DRAW_LENGTH; 297 | 298 | uint8_t stop[] = { 299 | 0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, // Stop 300 | }; 301 | 302 | memcpy(out, stop, sizeof(stop)); 303 | out += sizeof(stop); 304 | 305 | return ptr.gpu_va; 306 | } 307 | 308 | /* Odd pattern */ 309 | static uint64_t 310 | demo_unk6(struct agx_allocator *allocator) 311 | { 312 | struct agx_ptr ptr = agx_allocate(allocator, 0x4000 * sizeof(uint64_t)); 313 | uint64_t *buf = ptr.map; 314 | memset(buf, 0, sizeof(*buf)); 315 | 316 | for (unsigned i = 1; i < 0x3ff; ++i) 317 | buf[i] = (i + 1); 318 | 319 | return ptr.gpu_va; 320 | } 321 | 322 | #define PTR40(a, b, c, ptr) make_ptr40(0x ## a, 0x ## b, 0x ##c, ptr) 323 | 324 | /* Set arguments to a vertex/compute shader (attribute table or 325 | * kernel arguments respectively). start/sz are word-sized */ 326 | 327 | static uint64_t 328 | demo_bind_arg_words(uint64_t gpu_va, unsigned start, unsigned sz) 329 | { 330 | assert(sz < 8); 331 | assert(gpu_va < (1ull << 40)); 332 | assert(start < 0x80); /* TODO: oliver */ 333 | 334 | return 0x1d | (start << 9) | (sz << 21) | (gpu_va << 24); 335 | } 336 | 337 | static void 338 | demo_vsbuf(uint64_t *buf, struct agx_allocator *allocator, struct agx_allocator *shader_pool) 339 | { 340 | uint32_t vs_offs = demo_vertex_shader(shader_pool); 341 | uint32_t aux0 = demo_vertex_pre(shader_pool); 342 | 343 | uint64_t gpu_va = demo_attributes(allocator); 344 | buf[0] = demo_bind_arg_words(gpu_va, 0, 2); 345 | buf[1] = demo_bind_arg_words(gpu_va + 8, 2, 2); 346 | buf[2] = 0x0000904d | (0x80dull << 32) | ((uint64_t) (vs_offs & 0xFFFF) << 48); 347 | buf[3] = (vs_offs >> 16) | (0x028d << 16) | (0x00380100ull << 32); 348 | buf[4] = (0xc080) | ((uint64_t) aux0 << 16); 349 | } 350 | 351 | static void 352 | demo_fsbuf(uint64_t *buf, struct agx_allocator *allocator, struct agx_allocation *framebuffer, struct agx_allocator *shader_pool) 353 | { 354 | uint32_t clear_offs = demo_clear(shader_pool); 355 | uint32_t aux3_offs = demo_frag_aux3(shader_pool); 356 | uint32_t fs_offs = demo_fragment_shader(shader_pool); 357 | 358 | memset(buf, 0, 128 * 8); 359 | 360 | /* Clear shader */ 361 | buf[ 8] = demo_bind_arg_words(demo_clear_color(allocator), 6, 2); 362 | buf[ 9] = 0x2010bd4d | (0x40dull << 32) | ((uint64_t) (clear_offs & 0xFFFF) << 48); 363 | buf[10] = ((uint64_t) clear_offs >> 16) | (0x18d << 16) | (0x00880100ull << 32); 364 | buf[11] = 0; 365 | buf[12] = 0; 366 | buf[13] = 0; 367 | buf[14] = 0; 368 | buf[15] = 0; 369 | 370 | /* AUX3 */ 371 | buf[16] = PTR40(dd, 00, 10, demo_render_target(allocator, framebuffer)); 372 | buf[17] = demo_bind_arg_words(demo_unk0_5(allocator), 2, 2); 373 | buf[18] = 0x2010bd4d | (0x000dull << 32) | ((uint64_t) (aux3_offs & 0xFFFF) << 48); 374 | buf[19] = ((uint64_t) aux3_offs >> 16) | (0x18d << 16) | (0x00880100ull << 32); 375 | buf[20] = 0; 376 | buf[21] = 0; 377 | buf[22] = 0; 378 | buf[23] = 0; 379 | 380 | /* Fragment shader */ 381 | buf[24] = demo_bind_arg_words(demo_zero(allocator, 8), 2, 2); 382 | buf[25] = 0x2010bd4d | (0x50dull << 32) | ((uint64_t) (fs_offs & 0xFFFF) << 48); 383 | buf[26] = (fs_offs >> 16) | (0x208d << 16) | (0xf3580100ull << 32); 384 | buf[27] = 0x00880002 | (0xc080ull << 32); 385 | buf[28] = 0; 386 | buf[29] = 0; 387 | buf[30] = 0; 388 | buf[31] = 0; 389 | } 390 | 391 | struct cmdbuf { 392 | uint32_t *map; 393 | unsigned offset; 394 | }; 395 | 396 | static void 397 | EMIT32(struct cmdbuf *cmdbuf, uint32_t val) 398 | { 399 | cmdbuf->map[cmdbuf->offset++] = val; 400 | } 401 | 402 | static void 403 | EMIT64(struct cmdbuf *cmdbuf, uint64_t val) 404 | { 405 | EMIT32(cmdbuf, (val & 0xFFFFFFFF)); 406 | EMIT32(cmdbuf, (val >> 32)); 407 | } 408 | 409 | static void 410 | EMIT_WORDS(struct cmdbuf *cmdbuf, uint8_t *buf, size_t count) 411 | { 412 | assert((count & 0x3) == 0); 413 | 414 | for (unsigned i = 0; i < count; i += 4) { 415 | uint32_t u32 = 416 | (buf[i + 3] << 24) | 417 | (buf[i + 2] << 16) | 418 | (buf[i + 1] << 8) | 419 | (buf[i + 0] << 0); 420 | 421 | EMIT32(cmdbuf, u32); 422 | } 423 | } 424 | 425 | static void 426 | EMIT_ZERO_WORDS(struct cmdbuf *cmdbuf, size_t words) 427 | { 428 | memset(cmdbuf->map + cmdbuf->offset, 0, words * 4); 429 | cmdbuf->offset += words; 430 | } 431 | 432 | static void 433 | demo_cmdbuf(uint64_t *buf, struct agx_allocator *allocator, 434 | struct agx_allocation *vsbuf, 435 | struct agx_allocation *fsbuf, 436 | struct agx_allocation *framebuffer, 437 | struct agx_allocator *shaders) 438 | { 439 | demo_vsbuf((uint64_t *) vsbuf->map, allocator, shaders); 440 | demo_fsbuf((uint64_t *) fsbuf->map, allocator, framebuffer, shaders); 441 | 442 | struct cmdbuf _cmdbuf = { 443 | .map = (uint32_t *) buf, 444 | .offset = 0 445 | }; 446 | 447 | struct cmdbuf *cmdbuf = &_cmdbuf; 448 | 449 | /* Vertex stuff */ 450 | EMIT32(cmdbuf, 0x10000); 451 | EMIT32(cmdbuf, 0x780); // Compute: 0x188 452 | EMIT32(cmdbuf, 0x7); 453 | EMIT_ZERO_WORDS(cmdbuf, 5); 454 | EMIT32(cmdbuf, 0x758); // Compute: 0x180 455 | EMIT32(cmdbuf, 0x18); // Compute: 0x0 456 | EMIT32(cmdbuf, 0x758); // Compute: 0x0 457 | EMIT32(cmdbuf, 0x728); // Compute: 0x150 458 | 459 | EMIT32(cmdbuf, 0x30); /* 0x30 */ 460 | EMIT32(cmdbuf, 0x01); /* 0x34. Compute: 0x03 */ 461 | 462 | /* Pointer to data about the vertex and fragment shaders */ 463 | EMIT64(cmdbuf, demo_unk2(allocator, vsbuf, fsbuf)); 464 | 465 | EMIT_ZERO_WORDS(cmdbuf, 20); 466 | 467 | EMIT64(cmdbuf, 0); /* 0x90, compute blob - some zero */ 468 | EMIT64(cmdbuf, 0); // blob - 0x540 bytes of zero, compute blob - null 469 | EMIT64(cmdbuf, 0); // blob - 0x280 bytes of zero 470 | EMIT64(cmdbuf, 0); // a8, compute blob - zero pointer 471 | 472 | EMIT64(cmdbuf, 0); // compute blob - zero pointer 473 | EMIT64(cmdbuf, 0); // compute blob - zero pointer 474 | EMIT64(cmdbuf, 0); // compute blob - zero pointer 475 | 476 | // while zero for vertex, used to include the odd unk6 pattern for compute 477 | EMIT64(cmdbuf, 0); // compute blob - 0x1 478 | EMIT64(cmdbuf, 0); // d0, ompute blob - pointer to odd pattern, compare how it's done later for frag 479 | 480 | // compute 8 bytes of zero, then reconverge at * 481 | 482 | EMIT32(cmdbuf, 0x6b0003); // d8 483 | EMIT32(cmdbuf, 0x3a0012); // dc 484 | 485 | /* Possibly the funny pattern but not actually pointed to for vertex */ 486 | EMIT64(cmdbuf, 1); // e0 487 | EMIT64(cmdbuf, 0); // e8 488 | 489 | EMIT_ZERO_WORDS(cmdbuf, 44); 490 | 491 | EMIT64(cmdbuf, 0); // blob - 0x20 bytes of zero 492 | EMIT64(cmdbuf, 1); // 1a8 493 | 494 | // * compute reconverges here at 0xe0 in my trace 495 | EMIT32(cmdbuf, 0x1c); // 1b0 496 | 497 | // compute 0xe4: [encoder ID -- from selector6 + 2 with blob], 0, 0, 0xffffffff, done for a while 498 | // compute 0x120: 0x9 | 0x128: 0x40 499 | 500 | EMIT32(cmdbuf, 0); // 1b0 - compute: 0x10000 501 | EMIT64(cmdbuf, 0x0); // 1b8 -- compute 0x10000 502 | EMIT32(cmdbuf, 0xffffffff); // note we can zero! 503 | EMIT32(cmdbuf, 0xffffffff); // note we can zero! compute 0 504 | EMIT32(cmdbuf, 0xffffffff); // note we can zero! compute 0 505 | EMIT32(cmdbuf, 0); 506 | 507 | EMIT_ZERO_WORDS(cmdbuf, 40); 508 | 509 | EMIT32(cmdbuf, 0xffff8002); // 0x270 510 | EMIT32(cmdbuf, 0); 511 | EMIT64(cmdbuf, fsbuf->gpu_va + 0x44);// clear -- XXX: dynalloc 512 | EMIT32(cmdbuf, 0); 513 | EMIT32(cmdbuf, 0); 514 | EMIT32(cmdbuf, 0); 515 | EMIT32(cmdbuf, 0x12); 516 | EMIT64(cmdbuf, fsbuf->gpu_va + 0x84); // AUX3 -- 0x290 -- XXX: dynalloc 517 | EMIT64(cmdbuf, demo_zero(allocator, 0x1000)); 518 | EMIT64(cmdbuf, demo_zero(allocator, 0x1000)); 519 | EMIT64(cmdbuf, 0); 520 | 521 | EMIT_ZERO_WORDS(cmdbuf, 48); 522 | 523 | EMIT64(cmdbuf, 4); 524 | EMIT64(cmdbuf, 0xc000); 525 | 526 | /* Note: making these smallers scissors polygons but not clear colour */ 527 | EMIT32(cmdbuf, WIDTH); 528 | EMIT32(cmdbuf, HEIGHT); 529 | EMIT64(cmdbuf, demo_zero(allocator, 0x8000)); 530 | 531 | EMIT_ZERO_WORDS(cmdbuf, 48); 532 | 533 | EMIT64(cmdbuf, 0); // 0x450 534 | EMIT32(cmdbuf, fui(1.0)); // fui(1.0f) 535 | EMIT32(cmdbuf, 0x300); 536 | EMIT64(cmdbuf, 0); 537 | EMIT64(cmdbuf, 0x1000000); 538 | EMIT32(cmdbuf, 0xffffffff); 539 | EMIT32(cmdbuf, 0xffffffff); 540 | EMIT32(cmdbuf, 0xffffffff); 541 | EMIT32(cmdbuf, 0); 542 | 543 | EMIT_ZERO_WORDS(cmdbuf, 8); 544 | 545 | EMIT64(cmdbuf, 0); // 0x4a0 546 | EMIT32(cmdbuf, 0xffff8212); 547 | EMIT32(cmdbuf, 0); 548 | 549 | EMIT64(cmdbuf, fsbuf->gpu_va + 0x4);// XXX: dynalloc -- not referenced 550 | EMIT64(cmdbuf, 0); 551 | 552 | EMIT32(cmdbuf, 0); 553 | EMIT32(cmdbuf, 0x12); 554 | EMIT32(cmdbuf, fsbuf->gpu_va + 0x84); // AUX3 555 | EMIT32(cmdbuf, 0); 556 | 557 | EMIT_ZERO_WORDS(cmdbuf, 44); 558 | 559 | EMIT64(cmdbuf, 1); // 0x580 560 | EMIT64(cmdbuf, 0); 561 | EMIT_ZERO_WORDS(cmdbuf, 4); 562 | 563 | /* Compare compute case ,which has a bit of reordering, but we can swap */ 564 | EMIT32(cmdbuf, 0x1c); // 0x5a0 565 | EMIT32(cmdbuf, 0); 566 | EMIT64(cmdbuf, 0xCAFECAFE); // encoder ID XXX: don't fix 567 | EMIT32(cmdbuf, 0); 568 | EMIT32(cmdbuf, 0xffffffff); 569 | 570 | // remark: opposite order for compute, but we can swap the orders 571 | EMIT32(cmdbuf, 1); 572 | EMIT32(cmdbuf, 0); 573 | EMIT64(cmdbuf, 0); 574 | EMIT64(cmdbuf, 0 /* demo_unk6(allocator) */); 575 | 576 | /* note: width/height act like scissor, but changing the 0s doesn't 577 | * seem to affect (maybe scissor enable bit missing), _and this affects 578 | * the clear_ .. bbox maybe */ 579 | EMIT32(cmdbuf, 0); 580 | EMIT32(cmdbuf, 0); 581 | EMIT32(cmdbuf, WIDTH); // can increase up to 16384 582 | EMIT32(cmdbuf, HEIGHT); 583 | 584 | EMIT32(cmdbuf, 1); 585 | EMIT32(cmdbuf, 8); 586 | EMIT32(cmdbuf, 8); 587 | EMIT32(cmdbuf, 0); 588 | 589 | EMIT_ZERO_WORDS(cmdbuf, 12); 590 | 591 | EMIT32(cmdbuf, 0); // 0x620 592 | EMIT32(cmdbuf, 8); 593 | EMIT32(cmdbuf, 0x20); 594 | EMIT32(cmdbuf, 0x20); 595 | EMIT32(cmdbuf, 0x1); 596 | EMIT32(cmdbuf, 0); 597 | EMIT64(cmdbuf, 0); 598 | 599 | EMIT_ZERO_WORDS(cmdbuf, 72); 600 | 601 | EMIT32(cmdbuf, 0); // 0x760 602 | EMIT32(cmdbuf, 0x1); 603 | EMIT64(cmdbuf, 0x100 | (framebuffer->gpu_va << 16)); 604 | 605 | EMIT32(cmdbuf, 0xa0000); 606 | EMIT32(cmdbuf, 0x4c000000); 607 | EMIT32(cmdbuf, 0x0c001d); 608 | 609 | EMIT32(cmdbuf, 0x640000); 610 | } 611 | 612 | static struct agx_map_entry 613 | demo_map_entry(struct agx_allocation *alloc) 614 | { 615 | return (struct agx_map_entry) { 616 | .unkAAA = 0x20, 617 | .unkBBB = 0x1, 618 | .unka = 0x1ffff, 619 | .index = alloc->index, 620 | }; 621 | } 622 | 623 | static struct agx_map_header 624 | demo_map_header(uint64_t cmdbuf_id, uint64_t encoder_id, unsigned count) 625 | { 626 | return (struct agx_map_header) { 627 | .cmdbuf_id = cmdbuf_id, 628 | .unk2 = 0x1, 629 | .unk3 = 0x528, // 1320 630 | .encoder_id = encoder_id, 631 | .unk6 = 0x0, 632 | .unk7 = 0x780, // 1920 633 | 634 | /* +1 for the sentinel ending */ 635 | .nr_entries_1 = count + 1, 636 | .nr_entries_2 = count + 1, 637 | .unka = 0x0b, 638 | }; 639 | } 640 | 641 | static void 642 | demo_mem_map(void *map, struct agx_allocation *allocs, unsigned count, 643 | uint64_t cmdbuf_id, uint64_t encoder_id) 644 | { 645 | struct agx_map_header *header = map; 646 | struct agx_map_entry *entries = (struct agx_map_entry *) (map + 0x40); 647 | 648 | /* Header precedes the entry */ 649 | *header = demo_map_header(cmdbuf_id, encoder_id, count); 650 | 651 | /* Add an entry for each BO mapped */ 652 | for (unsigned i = 0; i < count; ++i) { 653 | if (allocs[i].type != AGX_ALLOC_REGULAR) 654 | continue; 655 | 656 | entries[i] = (struct agx_map_entry) { 657 | .unkAAA = 0x20, 658 | .unkBBB = 0x1, 659 | .unka = 0x1ffff, 660 | .index = allocs[i].index 661 | }; 662 | } 663 | 664 | /* Final entry is a sentinel */ 665 | entries[count] = (struct agx_map_entry) { 666 | .unkAAA = 0x40, 667 | .unkBBB = 0x1, 668 | .unka = 0x1ffff, 669 | .index = 0 670 | }; 671 | } 672 | 673 | void demo(mach_port_t connection, bool offscreen) 674 | { 675 | struct agx_command_queue command_queue = agx_create_command_queue(connection); 676 | 677 | // XXX: why do BO ids below 6 mess things up..? 678 | for (unsigned i = 0; i < 6; ++i) { 679 | struct agx_allocation dummy = agx_alloc_mem(connection, 4096, AGX_MEMORY_TYPE_FRAMEBUFFER, false); 680 | } 681 | 682 | struct agx_allocation shader = agx_alloc_mem(connection, 0x10000, AGX_MEMORY_TYPE_SHADER, false); 683 | 684 | struct agx_allocator shader_pool = { .backing = shader, }; 685 | 686 | struct agx_allocation bo = agx_alloc_mem(connection, 1920*1080*4*2, AGX_MEMORY_TYPE_FRAMEBUFFER, false); 687 | struct agx_allocator allocator = { .backing = bo }; 688 | 689 | struct agx_allocation vsbuf = agx_alloc_mem(connection, 0x8000, AGX_MEMORY_TYPE_CMDBUF_32, false); 690 | struct agx_allocation fsbuf = agx_alloc_mem(connection, 0x8000, AGX_MEMORY_TYPE_CMDBUF_32, false); 691 | struct agx_allocation framebuffer = agx_alloc_mem(connection, 692 | ALIGN_POT(WIDTH, 64) * ALIGN_POT(HEIGHT, 64) * 4, 693 | AGX_MEMORY_TYPE_FRAMEBUFFER, false); 694 | 695 | struct agx_allocation cmdbuf = agx_alloc_cmdbuf(connection, 0x4000, true); 696 | 697 | struct agx_allocation memmap = agx_alloc_cmdbuf(connection, 0x4000, false); 698 | 699 | uint64_t global_ids = agx_cmdbuf_global_ids(connection); 700 | 701 | struct agx_allocation allocs[] = { 702 | shader, 703 | bo, 704 | vsbuf, 705 | fsbuf, 706 | framebuffer 707 | }; 708 | 709 | demo_mem_map(memmap.map, allocs, sizeof(allocs) / sizeof(allocs[0]), 710 | 0xDEADBEEF, 0xCAFECAFE); // (unk6 + 1, unk6 + 2) but it doesn't really matter 711 | 712 | uint32_t *linear = malloc(WIDTH * HEIGHT * 4); 713 | 714 | if (!offscreen) 715 | slowfb_init((uint8_t *) linear, WIDTH, HEIGHT); 716 | 717 | for (;;) { 718 | demo_cmdbuf(cmdbuf.map, &allocator, &vsbuf, &fsbuf, &framebuffer, &shader_pool); 719 | agx_submit_cmdbuf(connection, &cmdbuf, &memmap, command_queue.id); 720 | 721 | /* Block until it's done */ 722 | IOReturn ret = IODataQueueWaitForAvailableData(command_queue.notif.queue, command_queue.notif.port); 723 | while (IODataQueueDataAvailable(command_queue.notif.queue)) 724 | ret = IODataQueueDequeue(command_queue.notif.queue, NULL, 0); 725 | 726 | /* Dump the framebuffer */ 727 | ash_detile(framebuffer.map, linear, 728 | WIDTH, 32, WIDTH, 729 | 0, 0, WIDTH, HEIGHT); 730 | 731 | shader_pool.offset = 0; 732 | allocator.offset = 0; 733 | 734 | if (offscreen) { 735 | FILE *fp = fopen("fb.bin", "wb"); 736 | fwrite(linear, 1, WIDTH * HEIGHT * 4, fp); 737 | fclose(fp); 738 | 739 | break; 740 | } else { 741 | slowfb_update(WIDTH, HEIGHT); 742 | } 743 | } 744 | } 745 | -------------------------------------------------------------------------------- /demo/demo.h: -------------------------------------------------------------------------------- 1 | #ifndef __DEMO_H 2 | #define __DEMO_H 3 | 4 | #include 5 | #include "io.h" 6 | #include "cmdstream.h" 7 | 8 | /* Dumb watermark allocator for demo purposes */ 9 | 10 | struct agx_allocator { 11 | struct agx_allocation backing; 12 | unsigned offset; 13 | }; 14 | 15 | struct agx_ptr { 16 | void *map; 17 | uint64_t gpu_va; 18 | }; 19 | 20 | static struct agx_ptr 21 | agx_allocate(struct agx_allocator *allocator, size_t size) 22 | { 23 | allocator->offset = (allocator->offset & ~127) + 128; 24 | assert(size < (allocator->backing.size - allocator->offset)); 25 | 26 | struct agx_ptr ptr = { 27 | .map = allocator->backing.map + allocator->offset, 28 | .gpu_va = allocator->backing.gpu_va + allocator->offset, 29 | }; 30 | 31 | allocator->offset += size; 32 | return ptr; 33 | } 34 | 35 | static uint64_t 36 | agx_upload(struct agx_allocator *allocator, void *data, size_t size) 37 | { 38 | struct agx_ptr ptr = agx_allocate(allocator, size); 39 | memcpy(ptr.map, data, size); 40 | return ptr.gpu_va; 41 | } 42 | 43 | void demo(mach_port_t connection, bool offscreen); 44 | uint32_t demo_vertex_shader(struct agx_allocator *allocator); 45 | uint32_t demo_fragment_shader(struct agx_allocator *allocator); 46 | uint32_t demo_vertex_pre(struct agx_allocator *allocator); 47 | uint32_t demo_clear(struct agx_allocator *allocator); 48 | uint32_t demo_frag_aux3(struct agx_allocator *allocator); 49 | 50 | void slowfb_init(uint8_t *framebuffer, int width, int height); 51 | void slowfb_update(int width, int height); 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /demo/iokit.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 Alyssa Rosenzweig 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include "selectors.h" 29 | #include "demo.h" 30 | 31 | /* Sample code for opening a connection to the AGX kernel module via IOKit */ 32 | 33 | #define AGX_SERVICE_TYPE 0x100005 34 | 35 | int main(int argc, char **argv) 36 | { 37 | (void) argc; 38 | (void) argv; 39 | 40 | kern_return_t ret; 41 | 42 | /* TODO: Support other models */ 43 | CFDictionaryRef matching = IOServiceNameMatching("AGXAcceleratorG13G_B0"); 44 | 45 | io_service_t service = 46 | IOServiceGetMatchingService(kIOMasterPortDefault, matching); 47 | 48 | if (!service) { 49 | fprintf(stderr, "G13 (B0) accelerator not found\n"); 50 | return 1; 51 | } 52 | 53 | io_connect_t connection = 0; 54 | ret = IOServiceOpen(service, mach_task_self(), AGX_SERVICE_TYPE, &connection); 55 | 56 | if (ret) { 57 | fprintf(stderr, "Error from IOServiceOpen: %u\n", ret); 58 | return 1; 59 | } 60 | 61 | const char *api = "Equestria"; 62 | char in[16] = { 0 }; 63 | assert(strlen(api) < sizeof(in)); 64 | memcpy(in, api, strlen(api)); 65 | 66 | ret = IOConnectCallStructMethod(connection, AGX_SELECTOR_SET_API, in, 67 | sizeof(in), NULL, NULL); 68 | 69 | /* Oddly, the return codes are flipped for SET_API */ 70 | if (ret != 1) { 71 | fprintf(stderr, "Error setting API: %u\n", ret); 72 | return 1; 73 | } 74 | 75 | char version[456] = { 0 }; 76 | size_t version_len = sizeof(version); 77 | 78 | ret = IOConnectCallStructMethod(connection, AGX_SELECTOR_GET_VERSION, NULL, 0, 79 | version, &version_len); 80 | 81 | if (ret) { 82 | fprintf(stderr, "Error getting version: %u\n", ret); 83 | /* TODO: why? */ 84 | } 85 | 86 | assert(version_len == sizeof(version)); 87 | printf("Kext build date: %s\n", version + (25 * 8)); 88 | 89 | demo(connection, getenv("DISPLAY") == NULL); 90 | 91 | ret = IOServiceClose(connection); 92 | 93 | if (ret) { 94 | fprintf(stderr, "Error from IOServiceClose: %u\n", ret); 95 | return 1; 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /demo/shaders.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "demo.h" 3 | 4 | void agx_disassemble(void *_code, size_t maxlen, FILE *fp); 5 | 6 | #define AGX_STOP \ 7 | 0x88, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, \ 8 | 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00 \ 9 | 10 | #define AGX_BLEND \ 11 | 0x09, 0x00, 0x00, 0x04, 0xf0, 0xfc, 0x80, 0x03 12 | 13 | /* Minimal vertex shader, where u4/u5 are preloaded by the paired compute 14 | * shader's uniform_store 15 | 16 | 0: 9e034a0202800100 imadd $r0_r1, r5, 32, u0 17 | 8: 0e05c22218000000 iadd r1, r1.discard, u1 18 | 10: 0501000500c43200 device_load 1, 0, 0, 4, 0, i32, pair, r0_r1, r0_r1, 0, signed, lsl 1 19 | 18: 3800 wait 0 20 | 1a: 1a89c0821800 fmul r2, r0.discard, u4 21 | 20: 1a81c2a21800 fmul r0, r1.discard, u5 22 | 26: 621100000000 mov r4, 0 23 | 2c: 62050000803f mov r1, 1065353216 24 | 32: 11108280 TODO.st_var 1, r4, 2 25 | 36: 11048380 TODO.st_var 1, r1, 3 26 | 3a: 11088080 TODO.st_var 1, r2, 0 27 | 3e: 91008180 TODO.st_var_final 1, r0, 1 28 | */ 29 | 30 | uint8_t vertex_shader[] = { 31 | 0x9e, 0x03, 0x4a, 0x02, 0x02, 0x80, 0x01, 0x00, 32 | 0x0e, 0x05, 0xc2, 0x22, 0x18, 0x00, 0x00, 0x00, 33 | 0x05, 0x01, 0x00, 0x05, 0x00, 0xc4, 0x32, 0x00, 34 | 0x38, 0x00, 35 | 0x1a, 0x89, 0xc0, 0x82, 0x18, 0x00, 36 | 0x1a, 0x81, 0xc2, 0xa2, 0x18, 0x00, 37 | 0x62, 0x11, 0x00, 0x00, 0x00, 0x00, 38 | 0x62, 0x05, 0x00, 0x00, 0x80, 0x3f, 39 | 0x11, 0x10, 0x82, 0x80, 40 | 0x11, 0x04, 0x83, 0x80, 41 | 0x11, 0x08, 0x80, 0x80, 42 | 0x91, 0x00, 0x81, 0x80, 43 | AGX_STOP 44 | }; 45 | 46 | /* Custom solid colour frag shader 47 | 0: 6200873a mov r0l, 14983 48 | 4: 62020531 mov r0h, 12549 49 | 8: 62040531 mov r1l, 12549 50 | c: 6206003c mov r1h, 15360 51 | 10: 4800c200 TODO.writeout 512, 3 52 | 14: 480c0000 TODO.writeout 12, 0 53 | 18: 09000004f0fc8003 TODO.blend 54 | */ 55 | 56 | uint8_t fragment_shader[] = { 57 | 0x62, 0x00, 0x87, 0x3A, 58 | 0x62, 0x02, 0x05, 0x31, 59 | 0x62, 0x04, 0x05, 0x31, 60 | 0x62, 0x06, 0x00, 0x3c, 61 | 0x48, 0x00, 0xc2, 0x00, 62 | 0x48, 0x0c, 0x00, 0x00, 63 | AGX_BLEND, 64 | AGX_STOP 65 | }; 66 | 67 | 68 | /* 69 | Compute shader implementing (float2) (1.0 / (dims * 0.5)), where dimensions 70 | is the ivec2 of width, height of the framebuffer (the address of which is 71 | preloadeded as u2_u3), since this shows up in our minimal vertex shader... 72 | I've seen Mali do this optimization before, but never so aggressively. 73 | 74 | 0: 0501040d00c43200 device_load 1, 0, 0, 4, 0, i32, pair, r0_r1, u2_u3, 0, signed, lsl 1 75 | 8: 3800 wait 0 76 | a: be890a042c00 convert u32_to_f, $r2, r0.discard, 1 77 | 10: be810a242c00 convert u32_to_f, $r0, r1.discard, 1 78 | 16: 9a85c4020200 fmul $r1, r2.discard, 0.5 79 | 1c: 0a05c282 rcp r1, r1.discard 80 | 20: 9a81c0020200 fmul $r0, r0.discard, 0.5 81 | 26: 0a01c082 rcp r0, r0.discard 82 | 2a: c508803d00803000 uniform_store 2, i16, pair, 0, r1l_r1h, 8 83 | 32: c500a03d00803000 uniform_store 2, i16, pair, 0, r0l_r0h, 10 84 | */ 85 | 86 | uint8_t vertex_pre[] = { 87 | 0x05, 0x01, 0x04, 0x0d, 0x00, 0xc4, 0x32, 0x00, 88 | 0x38, 0x00, 89 | 0xbe, 0x89, 0x0a, 0x04, 0x2c, 0x00, 90 | 0xbe, 0x81, 0x0a, 0x24, 0x2c, 0x00, 91 | 0x9a, 0x85, 0xc4, 0x02, 0x02, 0x00, 92 | 0x0a, 0x05, 0xc2, 0x82, 0x9a, 0x81, 0xc0, 0x02, 0x02, 0x00, 0x0a, 0x01, 93 | 0xc0, 0x82, 0xc5, 0x08, 0x80, 0x3d, 0x00, 0x80, 0x30, 0x00, 0xc5, 0x00, 94 | 0xa0, 0x3d, 0x00, 0x80, 0x30, 0x00, 95 | AGX_STOP 96 | }; 97 | 98 | /* Clears the tilebuffer, where u6-u7 are preloaded with the FP16 clear colour 99 | * by the paired compute shader AUX2 100 | 101 | 0: 7e018c098040 bitop_mov r0, u6 102 | 6: 7e058e098000 bitop_mov r1, u7 103 | c: 09000004f0fc8003 TODO.blend 104 | */ 105 | 106 | uint8_t clear[] = { 107 | 0x7e, 0x01, 0x8c, 0x09, 0x80, 0x40, 108 | 0x7e, 0x05, 0x8e, 0x09, 0x80, 0x00, 109 | AGX_BLEND, 110 | AGX_STOP 111 | }; 112 | 113 | uint8_t frag_aux3[] = { 114 | 0x7e, 0x00, 0x04, 0x09, 0x80, 0x00, 115 | 0xb1, 0x80, 0x00, 0x80, 0x00, 0x4a, 0x00, 0x00, 0x0a, 0x00, 116 | AGX_STOP 117 | }; 118 | 119 | uint32_t 120 | demo_upload_shader(const char *label, struct agx_allocator *allocator, uint8_t *code, size_t sz) 121 | { 122 | #if 0 123 | printf("%s:\n", label); 124 | agx_disassemble(code, sz, stdout); 125 | printf("\n"); 126 | #endif 127 | (void) label; 128 | 129 | return agx_upload(allocator, code, sz); 130 | } 131 | 132 | uint32_t 133 | demo_vertex_shader(struct agx_allocator *allocator) 134 | { 135 | return demo_upload_shader("vs", allocator, vertex_shader, sizeof(vertex_shader)); 136 | } 137 | 138 | uint32_t 139 | demo_fragment_shader(struct agx_allocator *allocator) 140 | { 141 | return demo_upload_shader("fs", allocator, fragment_shader, sizeof(fragment_shader)); 142 | } 143 | 144 | uint32_t 145 | demo_vertex_pre(struct agx_allocator *allocator) 146 | { 147 | return demo_upload_shader("vertex_pre", allocator, vertex_pre, sizeof(vertex_pre)); 148 | } 149 | 150 | uint32_t 151 | demo_clear(struct agx_allocator *allocator) 152 | { 153 | return demo_upload_shader("clear", allocator, clear, sizeof(clear)); 154 | } 155 | 156 | uint32_t 157 | demo_frag_aux3(struct agx_allocator *allocator) 158 | { 159 | return demo_upload_shader("frag_aux3", allocator, frag_aux3, sizeof(frag_aux3)); 160 | } 161 | -------------------------------------------------------------------------------- /demo/slowfb.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2018 Alyssa Rosenzweig 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | Display *d; 30 | Window w; 31 | XImage *image; 32 | GC gc; 33 | 34 | void slowfb_init(uint8_t *framebuffer, int width, int height) { 35 | d = XOpenDisplay(NULL); 36 | assert(d != NULL); 37 | int black = BlackPixel(d, DefaultScreen(d)); 38 | w = XCreateSimpleWindow(d, DefaultRootWindow(d), 0, 0, width, height, 0, black, black); 39 | XSelectInput(d, w, StructureNotifyMask); 40 | XMapWindow(d, w); 41 | gc = XCreateGC(d, w, 0, NULL); 42 | for (;;) { 43 | XEvent e; 44 | XNextEvent(d, &e); 45 | if (e.type == MapNotify) break; 46 | } 47 | image = XCreateImage(d, DefaultVisual(d, 0), 24, ZPixmap, 0, (void *) framebuffer, width, height, 32, 0); 48 | } 49 | 50 | void slowfb_update(int width, int height) { 51 | XPutImage(d, w, gc, image, 0, 0, 0, 0, width, height); 52 | } 53 | -------------------------------------------------------------------------------- /disasm-driver.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | extern void 7 | agx_disassemble(void *_code, size_t maxlen, FILE *fp); 8 | 9 | int main(int argc, char **argv) 10 | { 11 | --argc; 12 | ++argv; 13 | if (argc != 2) 14 | errx(1, "usage: disasm-bin FILE hex-offset"); 15 | 16 | FILE *f = fopen(argv[0], "rb"); 17 | if (!f) 18 | err(2, "input file"); 19 | 20 | off_t offset = strtol(argv[1], NULL, 16); 21 | fseek(f, offset, SEEK_SET); 22 | 23 | char buf[4096]; 24 | int n = fread(buf, 1, sizeof(buf), f); 25 | fclose(f); 26 | 27 | agx_disassemble(buf, n, stdout); 28 | return 0; 29 | } 30 | -------------------------------------------------------------------------------- /disasm/disasm.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 Alyssa Rosenzweig 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | /* Opcode table? Speculative since I don't know the opcode size yet, but this 32 | * should help bootstrap... These opcodes correspond to the bottom 7-bits of 33 | * the first byte, with the 8th bit from the 8th bit of the *second* byte. This 34 | * is still a guess. */ 35 | 36 | enum agx_opcodes { 37 | OPC_FFMA_CMPCT_16 = 0x36, 38 | OPC_FFMA_CMPCT_SAT_16 = 0x76, 39 | OPC_FMUL_16 = 0x96, 40 | OPC_FADD_16 = 0xA6, 41 | OPC_FFMA_16 = 0xB6, 42 | OPC_FMUL_SAT_16 = 0xD6, 43 | OPC_FADD_SAT_16 = 0xE6, 44 | OPC_FFMA_SAT_16 = 0xF6, 45 | 46 | OPC_FROUND_32 = 0x0A, 47 | OPC_FFMA_CMPCT_32 = 0x3A, 48 | OPC_FFMA_CMPCT_SAT_32 = 0x7A, 49 | OPC_FMUL_32 = 0x9A, 50 | OPC_FADD_32 = 0xAA, 51 | OPC_FFMA_32 = 0xBA, 52 | OPC_FMUL_SAT_32 = 0xDA, 53 | OPC_FADD_SAT_32 = 0xEA, 54 | OPC_FFMA_SAT_32 = 0xFA, 55 | 56 | OPC_IADD = 0x0E, 57 | OPC_IMAD = 0x1E, 58 | OPC_ISHL = 0x2E, 59 | /* 0x3e seen with reverse_bits, and used in clz */ 60 | OPC_IADDSAT = 0x4E, 61 | OPC_ISHR = 0xAE, 62 | OPC_I2F = 0xBE, 63 | 64 | OPC_LOAD = 0x05, // todo 65 | OPC_STORE = 0x45, // todo 66 | OPC_FCSEL = 0x02, 67 | OPC_ICSEL = 0x12, 68 | OPC_MOVI = 0x62, 69 | OPC_LD_COMPUTE = 0x72, 70 | OPC_BITOP = 0x7E, 71 | OPC_WAIT = 0x38, // seen after loads? 72 | OPC_STOP = 0x08, 73 | 74 | OPC_LD_VAR_NO_PERSPECTIVE = 0xA1, 75 | OPC_LD_VAR = 0xE1, // perspective 76 | OPC_ST_VAR = 0x11, 77 | OPC_UNKB1 = 0xB1, // seen in aux frag shader 78 | OPC_UNK48 = 0x48, // seen before blending 79 | OPC_BLEND = 0x09, 80 | 81 | // branching instructions, not understood 82 | OPC_UNKD2 = 0xD2, 83 | OPC_UNK42 = 0x42, 84 | OPC_UNK52 = 0x52, 85 | 86 | // not sure what this does, but appears to be 4 bytes 87 | OPC_UNK80 = 0x80, 88 | }; 89 | 90 | #define I 0 91 | #define C 1 92 | 93 | static struct { 94 | const char *name; 95 | unsigned size; 96 | bool complete; 97 | } agx_opcode_table[256] = { 98 | [OPC_FADD_16] = { "fadd.16", 6, I }, 99 | [OPC_FADD_SAT_16] = { "fadd.sat.16", 6, I }, 100 | [OPC_FMUL_16] = { "fmul.16", 6, I }, 101 | [OPC_FMUL_SAT_16] = { "fmul.sat.16", 6, I }, 102 | [OPC_FFMA_CMPCT_16] = { "ffma.cmpct.16", 6, I }, 103 | [OPC_FFMA_CMPCT_SAT_16] = { "ffma.cmpct.sat.16", 6, I }, 104 | [OPC_FFMA_16] = { "ffma.16", 8, I }, 105 | [OPC_FFMA_SAT_16] = { "ffma.sat.16", 8, I }, 106 | 107 | [OPC_FROUND_32] = { "fround.32", 6, I }, 108 | [OPC_FADD_32] = { "fadd.32", 6, C }, 109 | [OPC_FADD_SAT_32] = { "fadd.sat.32", 6, C }, 110 | [OPC_FMUL_32] = { "fmul.32", 6, C }, 111 | [OPC_FMUL_SAT_32] = { "fmul.sat.32", 6, C }, 112 | [OPC_FFMA_32] = { "ffma.32", 8, I }, 113 | [OPC_FFMA_SAT_32] = { "ffma.sat.32", 8, I }, 114 | [OPC_FFMA_CMPCT_32] = { "ffma.cmpct.32", 6, I }, 115 | [OPC_FFMA_CMPCT_SAT_32] = { "ffma.cmpct.sat.32", 6, I }, 116 | 117 | [OPC_I2F] = { "i2f", 6, I }, 118 | [OPC_IADD] = { "iadd", 8, I }, 119 | [OPC_IMAD] = { "imad", 8, I }, 120 | [OPC_ISHL] = { "ishl", 8, I }, 121 | [OPC_IADDSAT] = { "iaddsat", 8, I }, 122 | [OPC_ISHR] = { "ishr", 8, I }, 123 | 124 | [OPC_LOAD] = { "load", 8, I }, 125 | [OPC_LD_VAR_NO_PERSPECTIVE] = { "ld_var.no_perspective", 8, I }, 126 | [OPC_LD_VAR] = { "ld_var", 8, I }, 127 | [OPC_UNKB1] = { "unkb1", 10, I }, 128 | [OPC_STORE] = { "store", 8, I }, 129 | [OPC_ST_VAR] = { "st_var", 4, C }, 130 | [OPC_FCSEL] = { "fcsel", 8, I }, 131 | [OPC_ICSEL] = { "icsel", 8, I }, 132 | [OPC_MOVI] = { "movi", 4, C }, 133 | [OPC_LD_COMPUTE] = { "ld_compute", 4, C }, 134 | [OPC_BITOP] = { "bitop", 6, I }, 135 | [OPC_BLEND] = { "blend", 8, I }, 136 | [OPC_STOP] = { "stop", 4, I }, 137 | 138 | [OPC_WAIT] = { "wait", 2, I }, 139 | [OPC_UNK48] = { "unk48", 4, I }, 140 | [OPC_UNK42] = { "unk42", 6, I }, 141 | [OPC_UNK52] = { "unk52", 6, I }, 142 | [OPC_UNK80] = { "unk80", 4, I }, 143 | [OPC_UNKD2] = { "unkD2", 12, I }, 144 | }; 145 | 146 | #undef I 147 | #undef C 148 | 149 | static unsigned 150 | agx_instr_bytes(uint8_t opc, uint8_t reg) 151 | { 152 | /* For immediate moves, 32-bit immediates are larger */ 153 | if (opc == OPC_MOVI && (reg & 0x1)) 154 | return 6; 155 | else 156 | return agx_opcode_table[opc].size ?: 2; 157 | } 158 | 159 | /* Print float src, including modifiers */ 160 | 161 | struct agx_src { 162 | unsigned type : 2; 163 | unsigned reg; 164 | bool size32; 165 | bool abs; 166 | bool neg; 167 | unsigned unk; 168 | }; 169 | 170 | static void 171 | agx_print_src(FILE *fp, struct agx_src s) 172 | { 173 | /* Known source types: immediates (8-bit only?), constant memory 174 | * (indexing 64-bits at a time from preloaded memory), and general 175 | * purpose registers */ 176 | const char *types[] = { "#", "unk1:", "u", "" }; 177 | 178 | fprintf(fp, ", %s%u%s%s%s%s", 179 | types[s.type], s.reg, 180 | (s.size32 || s.type == 0) ? "" : ((s.reg & 1) ? "h" : "l"), 181 | s.abs ? ".abs" : "", s.neg ? ".neg" : "", 182 | s.unk ? ".unk" : ""); 183 | } 184 | 185 | static void 186 | agx_print_float_src(FILE *fp, unsigned type, unsigned reg, bool size32, bool abs, bool neg) 187 | { 188 | assert(type <= 3); 189 | agx_print_src(fp, (struct agx_src) { 190 | .type = type, .reg = reg, .size32 = size32, 191 | .abs = abs, .neg = neg 192 | }); 193 | } 194 | 195 | /* Decode 12-bit packed float source */ 196 | static struct agx_src 197 | agx_decode_float_src(uint16_t packed) 198 | { 199 | return (struct agx_src) { 200 | .reg = (packed & 0x3F), 201 | .type = (packed & 0xC0) >> 6, 202 | .unk = (packed & 0x100), 203 | .size32 = (packed & 0x200), 204 | .abs = (packed & 0x400), 205 | .neg = (packed & 0x800), 206 | }; 207 | } 208 | 209 | /* When we know more how the encodings relate to each other, these 210 | * per-instruction prints will hopefully disappear, assuming things are 211 | * sufficiently regular. 212 | * 213 | * fadd.f32 is 6 bytes. First two bytes are used for opcode/destination, so we 214 | * have 32-bits to decode here, or 16-bits per source. Since a register is at 215 | * least 6-bits, 2-bit type, 3-bits widen, that leaves only 10-bits unaccounted 216 | * for in the instruction. 217 | * 218 | * Byte 0: [2 - src0 type][6 - src0 value] 219 | * Byte 1: [4 - src1 value lo][1 - neg][1 - abs][1 - size][1 - unk] 220 | * Byte 2: [1 - neg][1 - abs][1 - size][1 - unk][2 - src1 type][2 - src1 value hi] 221 | * Byte 3: [8 - zero] 222 | * 223 | */ 224 | 225 | static void 226 | agx_print_fadd_f32(FILE *fp, uint8_t *code) 227 | { 228 | agx_print_src(fp, agx_decode_float_src(code[2] | ((code[3] & 0xF) << 8))); 229 | agx_print_src(fp, agx_decode_float_src((code[3] >> 4) | (code[4] << 4))); 230 | 231 | if (code[5]) 232 | fprintf(fp, " /* unk5 = %02X */", code[5]); 233 | } 234 | 235 | static void 236 | agx_print_ld_compute(uint8_t *code, FILE *fp) 237 | { 238 | /* 4 bytes, first 2 used for opcode and dest reg, next few bits for the 239 | * component, the rest is a selector for what to load */ 240 | uint16_t arg = code[2] | (code[3] << 8); 241 | 242 | unsigned component = arg & 0x3; 243 | uint16_t selector = arg >> 2; 244 | 245 | fprintf(fp, ", "); 246 | 247 | switch (selector) { 248 | case 0x00: 249 | fprintf(fp, "[threadgroup_position_in_grid]"); 250 | break; 251 | case 0x0c: 252 | fprintf(fp, "[thread_position_in_threadgroup]"); 253 | break; 254 | case 0x0d: 255 | fprintf(fp, "[thread_position_in_simdgroup]"); 256 | break; 257 | case 0x104: 258 | fprintf(fp, "[thread_position_in_grid]"); 259 | break; 260 | default: 261 | fprintf(fp, "[unk_%X]", selector); 262 | break; 263 | } 264 | 265 | fprintf(fp, ".%c", "xyzw"[component]); 266 | } 267 | 268 | static void 269 | agx_print_bitop_src(uint16_t value, FILE *fp) 270 | { 271 | /* different encoding from float srcs -- slightly smaller */ 272 | uint16_t mode = (value >> 6) & 0x0f; 273 | uint16_t v = (value & 0x3f) | ((value >> 4) & 0xc0); 274 | 275 | switch (mode) { 276 | case 0x0: 277 | // 8-bit immediate 278 | fprintf(fp, "#0x%x", v); 279 | break; 280 | case 0x3: 281 | // 16b register 282 | fprintf(fp, "h%d", v); 283 | break; 284 | case 0xb: 285 | // 32b register 286 | assert((v&1) == 0); 287 | fprintf(fp, "w%d", v >> 1); 288 | break; 289 | default: 290 | fprintf(fp, "unk_%x", value); 291 | break; 292 | } 293 | } 294 | 295 | static void 296 | agx_print_bitop(uint8_t *code, FILE *fp) 297 | { 298 | /* 6 bytes */ 299 | /* Universal bitop instruction. Control bits express operation as 300 | * sum-of-products: a&b, ~a&b, a&~b, ~a&~b */ 301 | 302 | /* XXX: dst encoding may not be quite correct either, but is done 303 | * in common code before this point */ 304 | /* XXX: disassemble to "friendly" pseudoop ? */ 305 | 306 | uint8_t control = (code[3] >> 2) & 0x3; 307 | control |= (code[4] >> 4) & 0xc; 308 | fprintf(fp, ", #0x%x, ", control); 309 | 310 | uint16_t src1_bits = code[2] | ((uint16_t)(code[3]&3) << 8) | 311 | ((uint16_t)code[5]&0xc)<<8; 312 | uint16_t src2_bits = (code[3] >> 4) | (((uint16_t)code[4]&0x3f)<<4) | 313 | (((uint16_t)code[5]&0x3)<<10); 314 | 315 | agx_print_bitop_src(src1_bits, fp); 316 | fprintf(fp, ", "); 317 | agx_print_bitop_src(src2_bits, fp); 318 | } 319 | 320 | static float 321 | agx_decode_float_imm8(uint16_t src) 322 | { 323 | float sign = (src & 0x80) ? -1.0f : 1.0f; 324 | int e = ((src & 0x70) >> 4); 325 | 326 | if (e == 0) { 327 | /* denorm */ 328 | return sign * (src & 0x0f) / 64.0f; 329 | } 330 | else { 331 | return sign * ldexpf((src & 0x0f) | 0x10, e - 7); 332 | } 333 | } 334 | 335 | static void 336 | agx_print_fp16_src(uint16_t src, uint16_t type, FILE *fp) 337 | { 338 | /* XXX: type&2 bit may be something odd like code[0]&0x80 */ 339 | 340 | switch (type & 5) { 341 | case 0x0: 342 | /* packed float8 immediate */ 343 | fprintf(fp, "#%ff", agx_decode_float_imm8(src)); 344 | break; 345 | case 0x1: 346 | /* half register */ 347 | fprintf(fp, "h%d", src); 348 | break; 349 | case 0x4: 350 | case 0x5: 351 | /* constant space; extra bit packed in 352 | * bottom bit of type */ 353 | fprintf(fp, "const_%d", ((type&1)<<8) | src); 354 | break; 355 | default: 356 | fprintf(fp, "unk_%x:%x", type, src); 357 | break; 358 | } 359 | 360 | if (type & 0x8) 361 | fprintf(fp, ".abs"); 362 | if (type & 0x10) 363 | fprintf(fp, ".neg"); 364 | 365 | } 366 | 367 | static void 368 | agx_print_fadd16(uint8_t *code, FILE *fp) 369 | { 370 | /* 6 bytes */ 371 | uint16_t src1 = (code[2] & 0x3f) | ((code[5] & 0x0c)<<4); 372 | uint16_t type1 = (code[2] >> 6) | ((code[3] & 0x0f)<<2); 373 | 374 | uint16_t src2 = (code[3] >> 4) | ((code[4] & 0x3)<<4) | ((code[5] & 0x3)<<6); 375 | uint16_t type2 = (code[4] >> 2); 376 | 377 | fprintf(fp, ", "); 378 | agx_print_fp16_src(src1, type1, fp); 379 | fprintf(fp, ", "); 380 | agx_print_fp16_src(src2, type2, fp); 381 | } 382 | 383 | static void 384 | agx_print_st_var(uint8_t *code, FILE *fp) 385 | { 386 | /* 4 bytes, first for opcode. Second for source register third 387 | * indicates the destination, fourth unknown */ 388 | if (code[1] & 0x1) 389 | fprintf(fp, ".unk"); 390 | 391 | fprintf(fp, ", index:%u", code[2] & 0xF); 392 | 393 | if ((code[2] & 0xF0) != 0x80) 394 | fprintf(fp, ", unk2=%X", code[2] >> 4); 395 | 396 | if (code[3] != 0x80) 397 | fprintf(fp, ", unk3=%X", code[3]); 398 | } 399 | 400 | /* Disassembles a single instruction */ 401 | 402 | unsigned 403 | agx_disassemble_instr(uint8_t *code, bool *stop, bool verbose, FILE *fp) 404 | { 405 | /* Decode the opcode first, requires 2 bytes */ 406 | uint8_t opc = (code[0] & 0x7F) | (code[1] & 0x80); 407 | 408 | /* Guess the size */ 409 | unsigned bytes = agx_instr_bytes(opc, code[1]); 410 | 411 | /* Hexdump the instruction */ 412 | 413 | if (verbose || !agx_opcode_table[opc].complete) { 414 | fprintf(fp, "#"); 415 | for (unsigned i = 0; i < bytes; ++i) 416 | fprintf(fp, " %02X", code[i]); 417 | fprintf(fp, "\n"); 418 | } 419 | 420 | unsigned op_unk80 = code[0] & 0x80; /* XXX: what is this? */ 421 | fprintf(fp, "%c", op_unk80 ? '+' : '-'); /* Stay concise.. */ 422 | 423 | if (agx_opcode_table[opc].name) 424 | fputs(agx_opcode_table[opc].name, fp); 425 | else 426 | fprintf(fp, "op_%02X", opc); 427 | 428 | if (opc == OPC_ICSEL) { 429 | unsigned mode = (code[7] & 0xF0) >> 4; 430 | if (mode == 0x1) 431 | fprintf(fp, ".eq"); // output 16-bit bool 432 | else if (mode == 0x2) 433 | fprintf(fp, ".imin"); 434 | else if (mode == 0x3) 435 | fprintf(fp, ".ult"); // output 16-bit bool 436 | else if (mode == 0x4) 437 | fprintf(fp, ".imax"); 438 | else if (mode == 0x5) 439 | fprintf(fp, ".ugt"); // output 16-bit bool 440 | else 441 | fprintf(fp, ".unk%X", mode); 442 | } else if (opc == OPC_FCSEL) { 443 | unsigned mode = (code[7] & 0xF0) >> 4; 444 | 445 | if (mode == 0x6) 446 | fprintf(fp, ".fmin"); 447 | else if (mode == 0xE) 448 | fprintf(fp, ".fmax"); 449 | else 450 | fprintf(fp, ".unk%X", mode); 451 | } 452 | 453 | /* Decode destination register, common to all ALUs (and maybe more?) */ 454 | uint8_t dest = code[1]; 455 | bool dest_32 = dest & 0x1; /* clear for 16-bit */ 456 | unsigned dest_reg = (dest >> 1) & 0x3F; 457 | 458 | /* Maybe it's a 32-bit opcode */ 459 | if (opc == OPC_ST_VAR) 460 | dest_32 = !dest_32; 461 | 462 | fprintf(fp, " %s%u", 463 | dest_32 ? "w" : "h", 464 | dest_reg); 465 | 466 | /* Decode other stuff, TODO */ 467 | switch (opc) { 468 | case OPC_ST_VAR: 469 | agx_print_st_var(code, fp); 470 | break; 471 | case OPC_LD_COMPUTE: 472 | agx_print_ld_compute(code, fp); 473 | break; 474 | case OPC_BITOP: 475 | agx_print_bitop(code, fp); 476 | break; 477 | case OPC_FADD_16: 478 | case OPC_FADD_SAT_16: 479 | case OPC_FMUL_16: 480 | case OPC_FMUL_SAT_16: 481 | agx_print_fadd16(code, fp); 482 | break; 483 | case OPC_MOVI: { 484 | uint32_t imm = code[2] | (code[3] << 8); 485 | 486 | if (dest_32) 487 | imm |= (code[4] << 16) | (code[5] << 24); 488 | 489 | fprintf(fp, ", #0x%X", imm); 490 | break; 491 | } 492 | case OPC_FADD_32: 493 | case OPC_FADD_SAT_32: 494 | case OPC_FMUL_32: 495 | case OPC_FMUL_SAT_32: 496 | agx_print_fadd_f32(fp, code); 497 | break; 498 | default: { 499 | /* Make some guesses */ 500 | bool iadd = opc == OPC_IADD; 501 | 502 | if (bytes > 2) { 503 | agx_print_float_src(fp, 504 | (code[2] & 0xC0) >> 6, 505 | (code[2] & 0x3F) | 506 | (iadd ? ((code[5] & 0x0C) << 4) : 0), 507 | // TODO: why overlap? 508 | code[3] & 0x20, 509 | code[3] & 0x04, 510 | code[3] & 0x08); 511 | 512 | agx_print_float_src(fp, 513 | (code[4] & 0x0C) >> 2, 514 | ((code[3] >> 4) & 0xF) | ((code[4] & 0x3) << 4) | ((code[7] & 0x3) << 6), 515 | code[4] & 0x20, 516 | code[4] & 0x40, 517 | code[4] & 0x80); 518 | } 519 | 520 | if (bytes > 6 && !iadd) { 521 | agx_print_float_src(fp, 522 | (code[5] & 0xC0) >> 6, 523 | (code[5] & 0x3F) | (code[6] & 0xC0), 524 | code[6] & 0x20, 525 | code[6] & 0x04, 526 | code[6] & 0x08); 527 | } 528 | 529 | break; 530 | } 531 | } 532 | 533 | fprintf(fp, "\n"); 534 | 535 | if (code[0] == (OPC_STOP | 0x80)) 536 | *stop = true; 537 | 538 | return bytes; 539 | } 540 | 541 | /* Disassembles a shader */ 542 | 543 | void 544 | agx_disassemble(void *_code, size_t maxlen, FILE *fp) 545 | { 546 | if (maxlen > 256) 547 | maxlen = 256; 548 | 549 | uint8_t *code = _code; 550 | 551 | bool stop = false; 552 | unsigned bytes = 0; 553 | bool verbose = getenv("ASAHI_VERBOSE") != NULL; 554 | 555 | while((bytes + 8) < maxlen && !stop) 556 | bytes += agx_disassemble_instr(code + bytes, &stop, verbose, fp); 557 | 558 | if (!stop) 559 | fprintf(fp, "// error: stop instruction not found\n"); 560 | } 561 | -------------------------------------------------------------------------------- /docs/Codenames.md: -------------------------------------------------------------------------------- 1 | On an Mac Mini M1 (2020): 2 | 3 | * ioreg gives AGXAcceleratorG13G_B0 (with clients of type AGXDeviceUserClient), parent type sgx@4000000 4 | * Also has gfx-asc@6400000 -> AppleASCWrapV4 -> ... -> AGXFirmwareKextG13RTBuddy 5 | * Metal dispatches to AGXMetal13_3 6 | 7 | All in all, looks like this is a **G13** chip. 8 | -------------------------------------------------------------------------------- /docs/table.py: -------------------------------------------------------------------------------- 1 | # Correspondance of bytes spilled to bitfield in question 2 | 3 | table = [ 4 | (340, 6), 5 | (132, 5), 6 | (420, 7), 7 | (548, 8), 8 | (740, 8), 9 | (1140, 9), 10 | (500, 7), 11 | (516, 8), 12 | (356, 6), 13 | (192, 5), 14 | (112, 4), 15 | (52, 2), 16 | (36, 2), 17 | (20, 1), 18 | (4, 1), 19 | (68, 3), 20 | (212, 5), 21 | (372, 6), 22 | (404, 7), 23 | (388, 7) 24 | ] 25 | -------------------------------------------------------------------------------- /lib/cmdbuf.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /lib/cmdstream.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 Alyssa Rosenzweig 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #ifndef __CMDSTREAM_H 25 | #define __CMDSTREAM_H 26 | 27 | #include 28 | 29 | struct agx_map_header { 30 | uint64_t cmdbuf_id; // GUID 31 | uint32_t unk2; // 01 00 00 00 32 | uint32_t unk3; // 28 05 00 80 33 | uint64_t encoder_id; // GUID 34 | uint32_t unk6; // 00 00 00 00 35 | uint32_t unk7; // 80 07 00 00 36 | uint32_t nr_entries_1; 37 | uint32_t nr_entries_2; 38 | uint32_t unka; // 0b 00 00 00 39 | uint32_t padding[4]; 40 | } __attribute__((packed)); 41 | 42 | struct agx_map_entry { 43 | uint32_t unkAAA; // 20 00 00 00 44 | uint32_t unk2; // 00 00 00 00 45 | uint32_t unk3; // 00 00 00 00 46 | uint32_t unk4; // 00 00 00 00 47 | uint32_t unk5; // 00 00 00 00 48 | uint32_t unk6; // 00 00 00 00 49 | uint32_t unkBBB; // 01 00 00 00 50 | uint32_t unk8; // 00 00 00 00 51 | uint32_t unk9; // 00 00 00 00 52 | uint32_t unka; // ff ff 01 00 53 | uint32_t index; 54 | uint32_t padding[5]; 55 | } __attribute__((packed)); 56 | 57 | #endif 58 | -------------------------------------------------------------------------------- /lib/decode.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017-2019 Alyssa Rosenzweig 3 | * Copyright (C) 2017-2019 Connor Abbott 4 | * Copyright (C) 2019 Collabora, Ltd. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the "Software"), 8 | * to deal in the Software without restriction, including without limitation 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 | * and/or sell copies of the Software, and to permit persons to whom the 11 | * Software is furnished to do so, subject to the following conditions: 12 | * 13 | * The above copyright notice and this permission notice (including the next 14 | * paragraph) shall be included in all copies or substantial portions of the 15 | * Software. 16 | * 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | * SOFTWARE. 24 | */ 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | #include "decode.h" 36 | #include "io.h" 37 | 38 | extern void agx_disassemble(void *_code, size_t maxlen, FILE *fp); 39 | 40 | FILE *pandecode_dump_stream; 41 | 42 | /* Memory handling, this can't pull in proper data structures so hardcode some 43 | * things, it should be "good enough" for most use cases */ 44 | 45 | #define MAX_MAPPINGS 4096 46 | 47 | struct agx_allocation mmap_array[MAX_MAPPINGS]; 48 | unsigned mmap_count = 0; 49 | 50 | struct agx_allocation *ro_mappings[MAX_MAPPINGS]; 51 | unsigned ro_mapping_count = 0; 52 | 53 | static struct agx_allocation * 54 | pandecode_find_mapped_gpu_mem_containing_rw(uint64_t addr) 55 | { 56 | for (unsigned i = 0; i < mmap_count; ++i) { 57 | if (addr >= mmap_array[i].gpu_va && (addr - mmap_array[i].gpu_va) < mmap_array[i].size) 58 | return mmap_array + i; 59 | } 60 | 61 | return NULL; 62 | } 63 | 64 | struct agx_allocation * 65 | pandecode_find_mapped_gpu_mem_containing(uint64_t addr) 66 | { 67 | struct agx_allocation *mem = pandecode_find_mapped_gpu_mem_containing_rw(addr); 68 | 69 | if (mem && mem->map && !mem->ro) { 70 | mprotect(mem->map, mem->size, PROT_READ); 71 | mem->ro = true; 72 | ro_mappings[ro_mapping_count++] = mem; 73 | assert(ro_mapping_count < MAX_MAPPINGS); 74 | } 75 | 76 | return mem; 77 | } 78 | 79 | static inline void * 80 | __pandecode_fetch_gpu_mem(const struct agx_allocation *mem, 81 | uint64_t gpu_va, size_t size, 82 | int line, const char *filename) 83 | { 84 | if (!mem) 85 | mem = pandecode_find_mapped_gpu_mem_containing(gpu_va); 86 | 87 | if (!mem) { 88 | fprintf(stderr, "Access to unknown memory %" PRIx64 " in %s:%d\n", 89 | gpu_va, filename, line); 90 | fflush(pandecode_dump_stream); 91 | assert(0); 92 | } 93 | 94 | assert(mem); 95 | assert(size + (gpu_va - mem->gpu_va) <= mem->size); 96 | 97 | return mem->map + gpu_va - mem->gpu_va; 98 | } 99 | 100 | #define pandecode_fetch_gpu_mem(gpu_va, size) \ 101 | __pandecode_fetch_gpu_mem(NULL, gpu_va, size, __LINE__, __FILE__) 102 | 103 | static void 104 | pandecode_map_read_write(void) 105 | { 106 | for (unsigned i = 0; i < ro_mapping_count; ++i) { 107 | ro_mappings[i]->ro = false; 108 | mprotect(ro_mappings[i]->map, ro_mappings[i]->size, 109 | PROT_READ | PROT_WRITE); 110 | } 111 | 112 | ro_mapping_count = 0; 113 | } 114 | 115 | /* Helpers for parsing the cmdstream */ 116 | 117 | #define DUMP_UNPACKED(T, var, str) { \ 118 | pandecode_log(str); \ 119 | bl_print(pandecode_dump_stream, T, var, (pandecode_indent + 1) * 2); \ 120 | } 121 | 122 | #define DUMP_CL(T, cl, str) {\ 123 | bl_unpack(cl, T, temp); \ 124 | DUMP_UNPACKED(T, temp, str "\n"); \ 125 | } 126 | 127 | #define pandecode_log(str) fputs(str, pandecode_dump_stream) 128 | #define pandecode_msg(str) fprintf(pandecode_dump_stream, "// %s", str) 129 | 130 | unsigned pandecode_indent = 0; 131 | 132 | /* To check for memory safety issues, validates that the given pointer in GPU 133 | * memory is valid, containing at least sz bytes. The goal is to detect 134 | * GPU-side memory bugs (NULL pointer dereferences, buffer overflows, or buffer 135 | * overruns) by statically validating pointers. 136 | */ 137 | 138 | static void 139 | pandecode_validate_buffer(uint64_t addr, size_t sz) 140 | { 141 | if (!addr) { 142 | pandecode_msg("XXX: null pointer deref"); 143 | return; 144 | } 145 | 146 | /* Find a BO */ 147 | 148 | struct agx_allocation *bo = 149 | pandecode_find_mapped_gpu_mem_containing(addr); 150 | 151 | if (!bo) { 152 | pandecode_msg("XXX: invalid memory dereference\n"); 153 | return; 154 | } 155 | 156 | /* Bounds check */ 157 | 158 | unsigned offset = addr - bo->gpu_va; 159 | unsigned total = offset + sz; 160 | 161 | if (total > bo->size) { 162 | fprintf(pandecode_dump_stream, "// XXX: buffer overrun. " 163 | "Chunk of size %zu at offset %d in buffer of size %zu. " 164 | "Overrun by %zu bytes. \n", 165 | sz, offset, bo->size, total - bo->size); 166 | return; 167 | } 168 | } 169 | 170 | static struct agx_allocation * 171 | pandecode_find_cmdbuf(unsigned cmdbuf_index) 172 | { 173 | for (unsigned i = 0; i < mmap_count; ++i) { 174 | if (mmap_array[i].type != AGX_ALLOC_CMDBUF) 175 | continue; 176 | 177 | if (mmap_array[i].index != cmdbuf_index) 178 | continue; 179 | 180 | return &mmap_array[i]; 181 | } 182 | 183 | return NULL; 184 | } 185 | 186 | static void 187 | pandecode_dump_bo(struct agx_allocation *bo, const char *name) 188 | { 189 | fprintf(pandecode_dump_stream, "%s %s (%u)\n", name, bo->name ?: "", bo->index); 190 | hexdump(pandecode_dump_stream, bo->map, bo->size, false); 191 | } 192 | 193 | /* Abstraction for command stream parsing */ 194 | typedef unsigned (*decode_cmd)(const uint8_t *map, bool verbose); 195 | 196 | #define STATE_DONE (0xFFFFFFFFu) 197 | 198 | static void 199 | pandecode_stateful(uint64_t va, const char *label, decode_cmd decoder, bool verbose) 200 | { 201 | struct agx_allocation *alloc = pandecode_find_mapped_gpu_mem_containing(va); 202 | assert(alloc != NULL && "nonexistant object"); 203 | fprintf(pandecode_dump_stream, "%s\n", label); 204 | 205 | uint8_t *map = pandecode_fetch_gpu_mem(va, 64); 206 | uint8_t *end = map + alloc->size; 207 | 208 | if (verbose) 209 | pandecode_dump_bo(alloc, label); 210 | 211 | while (map < end) { 212 | unsigned count = decoder(map, verbose); 213 | 214 | /* If we fail to decode, default to a hexdump (don't hang) */ 215 | if (count == 0) { 216 | hexdump(pandecode_dump_stream, map, 8, false); 217 | count = 8; 218 | } 219 | 220 | map += count; 221 | 222 | if (count == STATE_DONE) 223 | break; 224 | } 225 | } 226 | 227 | static unsigned 228 | pandecode_pipeline(const uint8_t *map, UNUSED bool verbose) 229 | { 230 | uint8_t zeroes[16] = { 0 }; 231 | 232 | if (map[0] == 0x4D && map[1] == 0xbd) { 233 | /* TODO: Disambiguation for extended is a guess */ 234 | bl_unpack(map, SET_SHADER_EXTENDED, cmd); 235 | DUMP_UNPACKED(SET_SHADER_EXTENDED, cmd, "Set shader\n"); 236 | 237 | if (cmd.preshader_mode == AGX_PRESHADER_MODE_PRESHADER) { 238 | pandecode_log("Preshader\n"); 239 | agx_disassemble(pandecode_fetch_gpu_mem(cmd.preshader_code, 8192), 240 | 8192, pandecode_dump_stream); 241 | pandecode_log("\n---\n"); 242 | } 243 | 244 | pandecode_log("\n"); 245 | agx_disassemble(pandecode_fetch_gpu_mem(cmd.code, 8192), 246 | 8192, pandecode_dump_stream); 247 | pandecode_log("\n"); 248 | 249 | return AGX_SET_SHADER_EXTENDED_LENGTH; 250 | } else if (map[0] == 0x4D) { 251 | bl_unpack(map, SET_SHADER, cmd); 252 | DUMP_UNPACKED(SET_SHADER, cmd, "Set shader\n"); 253 | 254 | if (cmd.preshader_mode == AGX_PRESHADER_MODE_PRESHADER) { 255 | pandecode_log("Preshader\n"); 256 | agx_disassemble(pandecode_fetch_gpu_mem(cmd.preshader_code, 8192), 257 | 8192, pandecode_dump_stream); 258 | pandecode_log("\n---\n"); 259 | } 260 | 261 | pandecode_log("\n"); 262 | agx_disassemble(pandecode_fetch_gpu_mem(cmd.code, 8192), 263 | 8192, pandecode_dump_stream); 264 | FILE *fp = fopen("vertex.bin", "wb"); 265 | fwrite(pandecode_fetch_gpu_mem(cmd.code, 8192), 1, 8192, fp); 266 | fclose(fp); 267 | pandecode_log("\n"); 268 | 269 | return AGX_SET_SHADER_LENGTH; 270 | } else if (map[0] == 0x1D) { 271 | DUMP_CL(BIND_UNIFORM, map, "Bind uniform"); 272 | return AGX_BIND_UNIFORM_LENGTH; 273 | } else if (memcmp(map, zeroes, 16) == 0) { 274 | /* TODO: Termination */ 275 | return STATE_DONE; 276 | } else { 277 | return 0; 278 | } 279 | } 280 | 281 | static void 282 | pandecode_record(uint64_t va, size_t size, bool verbose) 283 | { 284 | uint8_t *map = pandecode_fetch_gpu_mem(va, size); 285 | uint32_t tag = 0; 286 | memcpy(&tag, map, 4); 287 | 288 | if (tag == 0x00000C00) { 289 | assert(size == AGX_VIEWPORT_LENGTH); 290 | DUMP_CL(VIEWPORT, map, "Viewport"); 291 | } else if (tag == 0x0C020000) { 292 | assert(size == AGX_LINKAGE_LENGTH); 293 | DUMP_CL(LINKAGE, map, "Linkage"); 294 | } else if (tag == 0x800000) { 295 | assert(size == (AGX_BIND_PIPELINE_LENGTH + 4)); 296 | // XXX: why does this raise a bus error? 297 | // uint32_t unk = 0; 298 | // memcpy(map + AGX_BIND_PIPELINE_LENGTH, &unk, 4); 299 | 300 | bl_unpack(map, BIND_PIPELINE, cmd); 301 | pandecode_stateful(cmd.pipeline, "Pipeline", pandecode_pipeline, verbose); 302 | DUMP_UNPACKED(BIND_PIPELINE, cmd, "Bind fragment pipeline\n"); 303 | // fprintf(pandecode_dump_stream, "Unk: %X\n", unk); 304 | } else { 305 | fprintf(pandecode_dump_stream, "Record %" PRIx64 "\n", va); 306 | hexdump(pandecode_dump_stream, map, size, false); 307 | } 308 | } 309 | 310 | static unsigned 311 | pandecode_cmd(const uint8_t *map, bool verbose) 312 | { 313 | if (map[0] == 0x02 && map[1] == 0x10 && map[2] == 0x00 && map[3] == 0x00) { 314 | bl_unpack(map, LAUNCH, cmd); 315 | pandecode_stateful(cmd.pipeline, "Pipeline", pandecode_pipeline, verbose); 316 | DUMP_UNPACKED(LAUNCH, cmd, "Launch\n"); 317 | return AGX_LAUNCH_LENGTH; 318 | } else if (map[0] == 0x2E && map[1] == 0x00 && map[2] == 0x00 && map[3] == 0x40) { 319 | bl_unpack(map, BIND_PIPELINE, cmd); 320 | pandecode_stateful(cmd.pipeline, "Pipeline", pandecode_pipeline, verbose); 321 | DUMP_UNPACKED(BIND_PIPELINE, cmd, "Bind vertex pipeline\n"); 322 | 323 | /* Random unaligned null byte, it's pretty awful.. */ 324 | assert(map[AGX_BIND_PIPELINE_LENGTH] == 0); 325 | return AGX_BIND_PIPELINE_LENGTH + 1; 326 | } else if (map[1] == 0xc0 && map[2] == 0x61) { 327 | DUMP_CL(DRAW, map, "Draw"); 328 | return AGX_DRAW_LENGTH; 329 | } else if (map[0] == 0x00 && map[1] == 0x00 && map[2] == 0x00 && map[3] == 0xc0) { 330 | return STATE_DONE; 331 | } else if (map[1] == 0x00 && map[2] == 0x00) { 332 | /* No need to explicitly dump the record */ 333 | bl_unpack(map, RECORD, cmd); 334 | struct agx_allocation *mem = pandecode_find_mapped_gpu_mem_containing(cmd.data); 335 | 336 | if (mem) 337 | pandecode_record(cmd.data, cmd.size_words * 4, verbose); 338 | else 339 | DUMP_UNPACKED(RECORD, cmd, "Non-existant record (XXX)\n"); 340 | 341 | return AGX_RECORD_LENGTH; 342 | } else if (map[0] == 0 && map[1] == 0 && map[2] == 0xC0 && map[3] == 0x00) { 343 | unsigned zero[16] = { 0 }; 344 | assert(memcmp(map + 4, zero, sizeof(zero)) == 0); 345 | return STATE_DONE; 346 | } else { 347 | return 0; 348 | } 349 | } 350 | 351 | void 352 | pandecode_cmdstream(unsigned cmdbuf_index, bool verbose) 353 | { 354 | pandecode_dump_file_open(); 355 | 356 | struct agx_allocation *cmdbuf = pandecode_find_cmdbuf(cmdbuf_index); 357 | assert(cmdbuf != NULL && "nonexistant command buffer"); 358 | 359 | if (verbose) 360 | pandecode_dump_bo(cmdbuf, "Command buffer"); 361 | 362 | /* TODO: What else is in here? */ 363 | uint64_t *encoder = ((uint64_t *) cmdbuf->map) + 7; 364 | pandecode_stateful(*encoder, "Encoder", pandecode_cmd, verbose); 365 | 366 | pandecode_map_read_write(); 367 | } 368 | 369 | void 370 | pandecode_dump_mappings(void) 371 | { 372 | pandecode_dump_file_open(); 373 | 374 | for (unsigned i = 0; i < mmap_count; ++i) { 375 | if (!mmap_array[i].map || !mmap_array[i].size) 376 | continue; 377 | 378 | assert(mmap_array[i].type < AGX_NUM_ALLOC); 379 | 380 | fprintf(pandecode_dump_stream, "Buffer: type %s, gpu %llx, index %u.bin:\n\n", 381 | agx_alloc_types[mmap_array[i].type], 382 | mmap_array[i].gpu_va, mmap_array[i].index); 383 | 384 | hexdump(pandecode_dump_stream, mmap_array[i].map, mmap_array[i].size, false); 385 | fprintf(pandecode_dump_stream, "\n"); 386 | } 387 | } 388 | 389 | 390 | 391 | static void 392 | pandecode_add_name(struct agx_allocation *mem, uint64_t gpu_va, const char *name) 393 | { 394 | if (!name) { 395 | /* If we don't have a name, assign one */ 396 | 397 | snprintf(mem->name, sizeof(mem->name) - 1, 398 | "memory_%" PRIx64, gpu_va); 399 | } else { 400 | assert((strlen(name) + 1) < sizeof(mem->name)); 401 | memcpy(mem->name, name, strlen(name) + 1); 402 | } 403 | } 404 | 405 | void 406 | pandecode_track_alloc(struct agx_allocation alloc) 407 | { 408 | assert((mmap_count + 1) < MAX_MAPPINGS); 409 | mmap_array[mmap_count++] = alloc; 410 | } 411 | 412 | static char * 413 | pointer_as_memory_reference(uint64_t ptr) 414 | { 415 | struct agx_allocation *mapped; 416 | char *out = malloc(128); 417 | 418 | /* Try to find the corresponding mapped zone */ 419 | 420 | mapped = pandecode_find_mapped_gpu_mem_containing_rw(ptr); 421 | 422 | if (mapped) { 423 | snprintf(out, 128, "%s + %d", mapped->name, (int) (ptr - mapped->gpu_va)); 424 | return out; 425 | } 426 | 427 | /* Just use the raw address if other options are exhausted */ 428 | 429 | snprintf(out, 128, "0x%" PRIx64, ptr); 430 | return out; 431 | 432 | } 433 | 434 | static int pandecode_dump_frame_count = 0; 435 | 436 | void 437 | pandecode_dump_file_open(void) 438 | { 439 | if (pandecode_dump_stream) 440 | return; 441 | 442 | /* This does a getenv every frame, so it is possible to use 443 | * setenv to change the base at runtime. 444 | */ 445 | const char *dump_file_base = getenv("PANDECODE_DUMP_FILE") ?: "pandecode.dump"; 446 | if (!strcmp(dump_file_base, "stderr")) 447 | pandecode_dump_stream = stderr; 448 | else { 449 | char buffer[1024]; 450 | snprintf(buffer, sizeof(buffer), "%s.%04d", dump_file_base, pandecode_dump_frame_count); 451 | printf("pandecode: dump command stream to file %s\n", buffer); 452 | pandecode_dump_stream = fopen(buffer, "w"); 453 | if (!pandecode_dump_stream) 454 | fprintf(stderr, 455 | "pandecode: failed to open command stream log file %s\n", 456 | buffer); 457 | } 458 | } 459 | 460 | static void 461 | pandecode_dump_file_close(void) 462 | { 463 | if (pandecode_dump_stream && pandecode_dump_stream != stderr) { 464 | fclose(pandecode_dump_stream); 465 | pandecode_dump_stream = NULL; 466 | } 467 | } 468 | 469 | void 470 | pandecode_next_frame(void) 471 | { 472 | pandecode_dump_file_close(); 473 | pandecode_dump_frame_count++; 474 | } 475 | 476 | void 477 | pandecode_close(void) 478 | { 479 | pandecode_dump_file_close(); 480 | } 481 | -------------------------------------------------------------------------------- /lib/decode.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017-2019 Lyude Paul 3 | * Copyright (C) 2017-2019 Alyssa Rosenzweig 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a 6 | * copy of this software and associated documentation files (the "Software"), 7 | * to deal in the Software without restriction, including without limitation 8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | * and/or sell copies of the Software, and to permit persons to whom the 10 | * Software is furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice (including the next 13 | * paragraph) shall be included in all copies or substantial portions of the 14 | * Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | * SOFTWARE. 23 | * 24 | */ 25 | 26 | #ifndef __PAN_DECODE_H__ 27 | #define __PAN_DECODE_H__ 28 | 29 | #include "io.h" 30 | 31 | void pandecode_next_frame(void); 32 | 33 | void pandecode_close(void); 34 | 35 | void pandecode_cmdstream(unsigned cmdbuf_index, bool verbose); 36 | 37 | void pandecode_dump_file_open(void); 38 | 39 | void pandecode_track_alloc(struct agx_allocation alloc); 40 | 41 | void pandecode_dump_mappings(void); 42 | 43 | #endif /* __MMAP_TRACE_H__ */ 44 | -------------------------------------------------------------------------------- /lib/gen_pack.py: -------------------------------------------------------------------------------- 1 | #encoding=utf-8 2 | 3 | # Copyright (C) 2016 Intel Corporation 4 | # Copyright (C) 2016 Broadcom 5 | # Copyright (C) 2020 Collabora, Ltd. 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a 8 | # copy of this software and associated documentation files (the "Software"), 9 | # to deal in the Software without restriction, including without limitation 10 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 | # and/or sell copies of the Software, and to permit persons to whom the 12 | # Software is furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice (including the next 15 | # paragraph) shall be included in all copies or substantial portions of the 16 | # Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 24 | # IN THE SOFTWARE. 25 | 26 | import xml.parsers.expat 27 | import sys 28 | import operator 29 | from functools import reduce 30 | 31 | global_prefix = "agx" 32 | 33 | pack_header = """ 34 | /* Generated code, see midgard.xml and gen_pack_header.py 35 | * 36 | * Packets, enums and structures for Panfrost. 37 | * 38 | * This file has been generated, do not hand edit. 39 | */ 40 | 41 | #ifndef BL_PACK_H 42 | #define BL_PACK_H 43 | 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include "lib/util.h" 51 | 52 | #define __gen_unpack_float(x, y, z) uif(__gen_unpack_uint(x, y, z)) 53 | 54 | static inline uint64_t 55 | __gen_uint(uint64_t v, uint32_t start, uint32_t end) 56 | { 57 | #ifndef NDEBUG 58 | const int width = end - start + 1; 59 | if (width < 64) { 60 | const uint64_t max = (1ull << width) - 1; 61 | assert(v <= max); 62 | } 63 | #endif 64 | 65 | return v << start; 66 | } 67 | 68 | static inline uint32_t 69 | __gen_sint(int32_t v, uint32_t start, uint32_t end) 70 | { 71 | #ifndef NDEBUG 72 | const int width = end - start + 1; 73 | if (width < 64) { 74 | const int64_t max = (1ll << (width - 1)) - 1; 75 | const int64_t min = -(1ll << (width - 1)); 76 | assert(min <= v && v <= max); 77 | } 78 | #endif 79 | 80 | return (((uint32_t) v) << start) & ((2ll << end) - 1); 81 | } 82 | 83 | static inline uint64_t 84 | __gen_unpack_uint(const uint8_t *restrict cl, uint32_t start, uint32_t end) 85 | { 86 | uint64_t val = 0; 87 | const int width = end - start + 1; 88 | const uint64_t mask = (width == 64 ? ~0 : (1ull << width) - 1 ); 89 | 90 | for (unsigned byte = start / 8; byte <= end / 8; byte++) { 91 | val |= ((uint64_t) cl[byte]) << ((byte - start / 8) * 8); 92 | } 93 | 94 | return (val >> (start % 8)) & mask; 95 | } 96 | 97 | static inline uint64_t 98 | __gen_unpack_sint(const uint8_t *restrict cl, uint32_t start, uint32_t end) 99 | { 100 | int size = end - start + 1; 101 | int64_t val = __gen_unpack_uint(cl, start, end); 102 | 103 | /* Get the sign bit extended. */ 104 | return (val << (64 - size)) >> (64 - size); 105 | } 106 | 107 | #define bl_prepare(dst, T) \\ 108 | *(dst) = (struct AGX_ ## T){ AGX_ ## T ## _header } 109 | 110 | #define bl_pack(dst, T, name) \\ 111 | for (struct AGX_ ## T name = { AGX_ ## T ## _header }, \\ 112 | *_loop_terminate = (void *) (dst); \\ 113 | __builtin_expect(_loop_terminate != NULL, 1); \\ 114 | ({ AGX_ ## T ## _pack((uint32_t *) (dst), &name); \\ 115 | _loop_terminate = NULL; })) 116 | 117 | #define bl_unpack(src, T, name) \\ 118 | struct AGX_ ## T name; \\ 119 | AGX_ ## T ## _unpack((uint8_t *)(src), &name) 120 | 121 | #define bl_print(fp, T, var, indent) \\ 122 | AGX_ ## T ## _print(fp, &(var), indent) 123 | 124 | """ 125 | 126 | def to_alphanum(name): 127 | substitutions = { 128 | ' ': '_', 129 | '/': '_', 130 | '[': '', 131 | ']': '', 132 | '(': '', 133 | ')': '', 134 | '-': '_', 135 | ':': '', 136 | '.': '', 137 | ',': '', 138 | '=': '', 139 | '>': '', 140 | '#': '', 141 | '&': '', 142 | '*': '', 143 | '"': '', 144 | '+': '', 145 | '\'': '', 146 | } 147 | 148 | for i, j in substitutions.items(): 149 | name = name.replace(i, j) 150 | 151 | return name 152 | 153 | def safe_name(name): 154 | name = to_alphanum(name) 155 | if not name[0].isalpha(): 156 | name = '_' + name 157 | 158 | return name 159 | 160 | def prefixed_upper_name(prefix, name): 161 | if prefix: 162 | name = prefix + "_" + name 163 | return safe_name(name).upper() 164 | 165 | def enum_name(name): 166 | return "{}_{}".format(global_prefix, safe_name(name)).lower() 167 | 168 | def num_from_str(num_str): 169 | if num_str.lower().startswith('0x'): 170 | return int(num_str, base=16) 171 | else: 172 | assert(not num_str.startswith('0') and 'octals numbers not allowed') 173 | return int(num_str) 174 | 175 | MODIFIERS = ["shr", "minus", "align", "log2"] 176 | 177 | def parse_modifier(modifier): 178 | if modifier is None: 179 | return None 180 | 181 | for mod in MODIFIERS: 182 | if modifier[0:len(mod)] == mod: 183 | if mod == "log2": 184 | assert(len(mod) == len(modifier)) 185 | return [mod] 186 | 187 | if modifier[len(mod)] == '(' and modifier[-1] == ')': 188 | ret = [mod, int(modifier[(len(mod) + 1):-1])] 189 | if ret[0] == 'align': 190 | align = ret[1] 191 | # Make sure the alignment is a power of 2 192 | assert(align > 0 and not(align & (align - 1))); 193 | 194 | return ret 195 | 196 | print("Invalid modifier") 197 | assert(False) 198 | 199 | class Field(object): 200 | def __init__(self, parser, attrs): 201 | self.parser = parser 202 | if "name" in attrs: 203 | self.name = safe_name(attrs["name"]).lower() 204 | self.human_name = attrs["name"] 205 | 206 | if ":" in str(attrs["start"]): 207 | (word, bit) = attrs["start"].split(":") 208 | self.start = (int(word) * 32) + int(bit) 209 | else: 210 | self.start = int(attrs["start"]) 211 | 212 | self.end = self.start + int(attrs["size"]) - 1 213 | self.type = attrs["type"] 214 | 215 | if self.type == 'bool' and self.start != self.end: 216 | print("#error Field {} has bool type but more than one bit of size".format(self.name)); 217 | 218 | if "prefix" in attrs: 219 | self.prefix = safe_name(attrs["prefix"]).upper() 220 | else: 221 | self.prefix = None 222 | 223 | if "exact" in attrs: 224 | self.exact = int(attrs["exact"]) 225 | else: 226 | self.exact = None 227 | 228 | self.default = attrs.get("default") 229 | 230 | # Map enum values 231 | if self.type in self.parser.enums and self.default is not None: 232 | self.default = safe_name('{}_{}_{}'.format(global_prefix, self.type, self.default)).upper() 233 | 234 | self.modifier = parse_modifier(attrs.get("modifier")) 235 | 236 | def emit_template_struct(self, dim): 237 | if self.type == 'address': 238 | type = 'uint64_t' 239 | elif self.type == 'bool': 240 | type = 'bool' 241 | elif self.type == 'float': 242 | type = 'float' 243 | elif self.type in ['uint', 'hex'] and self.end - self.start > 32: 244 | type = 'uint64_t' 245 | elif self.type == 'int': 246 | type = 'int32_t' 247 | elif self.type in ['uint', 'uint/float', 'hex']: 248 | type = 'uint32_t' 249 | elif self.type in self.parser.structs: 250 | type = 'struct ' + self.parser.gen_prefix(safe_name(self.type.upper())) 251 | elif self.type in self.parser.enums: 252 | type = 'enum ' + enum_name(self.type) 253 | else: 254 | print("#error unhandled type: %s" % self.type) 255 | type = "uint32_t" 256 | 257 | print(" %-36s %s%s;" % (type, self.name, dim)) 258 | 259 | for value in self.values: 260 | name = prefixed_upper_name(self.prefix, value.name) 261 | print("#define %-40s %d" % (name, value.value)) 262 | 263 | def overlaps(self, field): 264 | return self != field and max(self.start, field.start) <= min(self.end, field.end) 265 | 266 | class Group(object): 267 | def __init__(self, parser, parent, start, count, label): 268 | self.parser = parser 269 | self.parent = parent 270 | self.start = start 271 | self.count = count 272 | self.label = label 273 | self.size = 0 274 | self.length = 0 275 | self.fields = [] 276 | 277 | def get_length(self): 278 | # Determine number of bytes in this group. 279 | calculated = max(field.end // 8 for field in self.fields) + 1 if len(self.fields) > 0 else 0 280 | if self.length > 0: 281 | assert(self.length >= calculated) 282 | else: 283 | self.length = calculated 284 | return self.length 285 | 286 | 287 | def emit_template_struct(self, dim): 288 | if self.count == 0: 289 | print(" /* variable length fields follow */") 290 | else: 291 | if self.count > 1: 292 | dim = "%s[%d]" % (dim, self.count) 293 | 294 | if len(self.fields) == 0: 295 | print(" int dummy;") 296 | 297 | for field in self.fields: 298 | if field.exact is not None: 299 | continue 300 | 301 | field.emit_template_struct(dim) 302 | 303 | class Word: 304 | def __init__(self): 305 | self.size = 32 306 | self.contributors = [] 307 | 308 | class FieldRef: 309 | def __init__(self, field, path, start, end): 310 | self.field = field 311 | self.path = path 312 | self.start = start 313 | self.end = end 314 | 315 | def collect_fields(self, fields, offset, path, all_fields): 316 | for field in fields: 317 | field_path = '{}{}'.format(path, field.name) 318 | field_offset = offset + field.start 319 | 320 | if field.type in self.parser.structs: 321 | sub_struct = self.parser.structs[field.type] 322 | self.collect_fields(sub_struct.fields, field_offset, field_path + '.', all_fields) 323 | continue 324 | 325 | start = field_offset 326 | end = offset + field.end 327 | all_fields.append(self.FieldRef(field, field_path, start, end)) 328 | 329 | def collect_words(self, fields, offset, path, words): 330 | for field in fields: 331 | field_path = '{}{}'.format(path, field.name) 332 | start = offset + field.start 333 | 334 | if field.type in self.parser.structs: 335 | sub_fields = self.parser.structs[field.type].fields 336 | self.collect_words(sub_fields, start, field_path + '.', words) 337 | continue 338 | 339 | end = offset + field.end 340 | contributor = self.FieldRef(field, field_path, start, end) 341 | first_word = contributor.start // 32 342 | last_word = contributor.end // 32 343 | for b in range(first_word, last_word + 1): 344 | if not b in words: 345 | words[b] = self.Word() 346 | words[b].contributors.append(contributor) 347 | 348 | def emit_pack_function(self): 349 | self.get_length() 350 | 351 | words = {} 352 | self.collect_words(self.fields, 0, '', words) 353 | 354 | # Validate the modifier is lossless 355 | for field in self.fields: 356 | if field.modifier is None: 357 | continue 358 | 359 | assert(field.exact is None) 360 | 361 | if field.modifier[0] == "shr": 362 | shift = field.modifier[1] 363 | mask = hex((1 << shift) - 1) 364 | print(" assert((values->{} & {}) == 0);".format(field.name, mask)) 365 | elif field.modifier[0] == "minus": 366 | print(" assert(values->{} >= {});".format(field.name, field.modifier[1])) 367 | elif field.modifier[0] == "log2": 368 | print(" assert(util_is_power_of_two_nonzero(values->{}));".format(field.name)) 369 | 370 | for index in range(self.length // 4): 371 | # Handle MBZ words 372 | if not index in words: 373 | print(" cl[%2d] = 0;" % index) 374 | continue 375 | 376 | word = words[index] 377 | 378 | word_start = index * 32 379 | 380 | v = None 381 | prefix = " cl[%2d] =" % index 382 | 383 | for contributor in word.contributors: 384 | field = contributor.field 385 | name = field.name 386 | start = contributor.start 387 | end = contributor.end 388 | contrib_word_start = (start // 32) * 32 389 | start -= contrib_word_start 390 | end -= contrib_word_start 391 | 392 | value = str(field.exact) if field.exact is not None else "values->{}".format(contributor.path) 393 | if field.modifier is not None: 394 | if field.modifier[0] == "shr": 395 | value = "{} >> {}".format(value, field.modifier[1]) 396 | elif field.modifier[0] == "minus": 397 | value = "{} - {}".format(value, field.modifier[1]) 398 | elif field.modifier[0] == "align": 399 | value = "ALIGN_POT({}, {})".format(value, field.modifier[1]) 400 | elif field.modifier[0] == "log2": 401 | value = "util_logbase2({})".format(value) 402 | 403 | if field.type in ["uint", "hex", "address"]: 404 | s = "__gen_uint(%s, %d, %d)" % \ 405 | (value, start, end) 406 | elif field.type in self.parser.enums: 407 | s = "__gen_uint(%s, %d, %d)" % \ 408 | (value, start, end) 409 | elif field.type == "int": 410 | s = "__gen_sint(%s, %d, %d)" % \ 411 | (value, start, end) 412 | elif field.type == "bool": 413 | s = "__gen_uint(%s, %d, %d)" % \ 414 | (value, start, end) 415 | elif field.type == "float": 416 | assert(start == 0 and end == 31) 417 | s = "__gen_uint(fui({}), 0, 32)".format(value) 418 | else: 419 | s = "#error unhandled field {}, type {}".format(contributor.path, field.type) 420 | 421 | if not s == None: 422 | shift = word_start - contrib_word_start 423 | if shift: 424 | s = "%s >> %d" % (s, shift) 425 | 426 | if contributor == word.contributors[-1]: 427 | print("%s %s;" % (prefix, s)) 428 | else: 429 | print("%s %s |" % (prefix, s)) 430 | prefix = " " 431 | 432 | continue 433 | 434 | # Given a field (start, end) contained in word `index`, generate the 32-bit 435 | # mask of present bits relative to the word 436 | def mask_for_word(self, index, start, end): 437 | field_word_start = index * 32 438 | start -= field_word_start 439 | end -= field_word_start 440 | # Cap multiword at one word 441 | start = max(start, 0) 442 | end = min(end, 32 - 1) 443 | count = (end - start + 1) 444 | return (((1 << count) - 1) << start) 445 | 446 | def emit_unpack_function(self): 447 | # First, verify there is no garbage in unused bits 448 | words = {} 449 | self.collect_words(self.fields, 0, '', words) 450 | 451 | for index in range(self.length // 4): 452 | base = index * 32 453 | word = words.get(index, self.Word()) 454 | masks = [self.mask_for_word(index, c.start, c.end) for c in word.contributors] 455 | mask = reduce(lambda x,y: x | y, masks, 0) 456 | 457 | ALL_ONES = 0xffffffff 458 | 459 | if mask != ALL_ONES: 460 | TMPL = ' if (((const uint32_t *) cl)[{}] & {}) fprintf(stderr, "XXX: Invalid field of {} unpacked at word {}\\n");' 461 | print(TMPL.format(index, hex(mask ^ ALL_ONES), self.label, index)) 462 | 463 | fieldrefs = [] 464 | self.collect_fields(self.fields, 0, '', fieldrefs) 465 | for fieldref in fieldrefs: 466 | field = fieldref.field 467 | convert = None 468 | 469 | args = [] 470 | args.append('cl') 471 | args.append(str(fieldref.start)) 472 | args.append(str(fieldref.end)) 473 | 474 | if field.type in set(["uint", "uint/float", "address", "hex"]) | self.parser.enums: 475 | convert = "__gen_unpack_uint" 476 | elif field.type == "int": 477 | convert = "__gen_unpack_sint" 478 | elif field.type == "bool": 479 | convert = "__gen_unpack_uint" 480 | elif field.type == "float": 481 | convert = "__gen_unpack_float" 482 | else: 483 | s = "/* unhandled field %s, type %s */\n" % (field.name, field.type) 484 | 485 | suffix = "" 486 | prefix = "" 487 | if field.modifier: 488 | if field.modifier[0] == "minus": 489 | suffix = " + {}".format(field.modifier[1]) 490 | elif field.modifier[0] == "shr": 491 | suffix = " << {}".format(field.modifier[1]) 492 | if field.modifier[0] == "log2": 493 | prefix = "1 << " 494 | 495 | decoded = '{}{}({}){}'.format(prefix, convert, ', '.join(args), suffix) 496 | 497 | print(' values->{} = {};'.format(fieldref.path, decoded)) 498 | if field.modifier and field.modifier[0] == "align": 499 | mask = hex(field.modifier[1] - 1) 500 | print(' assert(!(values->{} & {}));'.format(fieldref.path, mask)) 501 | 502 | def emit_print_function(self): 503 | for field in self.fields: 504 | convert = None 505 | name, val = field.human_name, 'values->{}'.format(field.name) 506 | 507 | if field.type in self.parser.structs: 508 | pack_name = self.parser.gen_prefix(safe_name(field.type)).upper() 509 | print(' fprintf(fp, "%*s{}:\\n", indent, "");'.format(field.human_name)) 510 | print(" {}_print(fp, &values->{}, indent + 2);".format(pack_name, field.name)) 511 | elif field.type == "address": 512 | # TODO resolve to name 513 | print(' fprintf(fp, "%*s{}: 0x%" PRIx64 "\\n", indent, "", {});'.format(name, val)) 514 | elif field.type in self.parser.enums: 515 | print(' fprintf(fp, "%*s{}: %s\\n", indent, "", {}_as_str({}));'.format(name, enum_name(field.type), val)) 516 | elif field.type == "int": 517 | print(' fprintf(fp, "%*s{}: %d\\n", indent, "", {});'.format(name, val)) 518 | elif field.type == "bool": 519 | print(' fprintf(fp, "%*s{}: %s\\n", indent, "", {} ? "true" : "false");'.format(name, val)) 520 | elif field.type == "float": 521 | print(' fprintf(fp, "%*s{}: %f\\n", indent, "", {});'.format(name, val)) 522 | elif field.type in ["uint", "hex"] and (field.end - field.start) >= 32: 523 | print(' fprintf(fp, "%*s{}: 0x%" PRIx64 "\\n", indent, "", {});'.format(name, val)) 524 | elif field.type == "hex": 525 | print(' fprintf(fp, "%*s{}: 0x%" PRIx32 "\\n", indent, "", {});'.format(name, val)) 526 | elif field.type == "uint/float": 527 | print(' fprintf(fp, "%*s{}: 0x%X (%f)\\n", indent, "", {}, uif({}));'.format(name, val, val)) 528 | else: 529 | print(' fprintf(fp, "%*s{}: %u\\n", indent, "", {});'.format(name, val)) 530 | 531 | class Value(object): 532 | def __init__(self, attrs): 533 | self.name = attrs["name"] 534 | self.value = int(attrs["value"], 0) 535 | 536 | class Parser(object): 537 | def __init__(self): 538 | self.parser = xml.parsers.expat.ParserCreate() 539 | self.parser.StartElementHandler = self.start_element 540 | self.parser.EndElementHandler = self.end_element 541 | 542 | self.struct = None 543 | self.structs = {} 544 | # Set of enum names we've seen. 545 | self.enums = set() 546 | 547 | def gen_prefix(self, name): 548 | return '{}_{}'.format(global_prefix.upper(), name) 549 | 550 | def start_element(self, name, attrs): 551 | if name == "blxml": 552 | print(pack_header) 553 | elif name == "struct": 554 | name = attrs["name"] 555 | self.no_direct_packing = attrs.get("no-direct-packing", False) 556 | object_name = self.gen_prefix(safe_name(name.upper())) 557 | self.struct = object_name 558 | 559 | self.group = Group(self, None, 0, 1, name) 560 | if "size" in attrs: 561 | self.group.length = int(attrs["size"]) 562 | self.group.align = int(attrs["align"]) if "align" in attrs else None 563 | self.structs[attrs["name"]] = self.group 564 | elif name == "field": 565 | self.group.fields.append(Field(self, attrs)) 566 | self.values = [] 567 | elif name == "enum": 568 | self.values = [] 569 | self.enum = safe_name(attrs["name"]) 570 | self.enums.add(attrs["name"]) 571 | if "prefix" in attrs: 572 | self.prefix = attrs["prefix"] 573 | else: 574 | self.prefix= None 575 | elif name == "value": 576 | self.values.append(Value(attrs)) 577 | 578 | def end_element(self, name): 579 | if name == "struct": 580 | self.emit_struct() 581 | self.struct = None 582 | self.group = None 583 | elif name == "field": 584 | self.group.fields[-1].values = self.values 585 | elif name == "enum": 586 | self.emit_enum() 587 | self.enum = None 588 | elif name == "blxml": 589 | print('#endif') 590 | 591 | def emit_header(self, name): 592 | default_fields = [] 593 | for field in self.group.fields: 594 | if not type(field) is Field: 595 | continue 596 | if field.default is not None: 597 | default_fields.append(" .{} = {}".format(field.name, field.default)) 598 | elif field.type in self.structs: 599 | default_fields.append(" .{} = {{ {}_header }}".format(field.name, self.gen_prefix(safe_name(field.type.upper())))) 600 | 601 | print('#define %-40s\\' % (name + '_header')) 602 | if default_fields: 603 | print(", \\\n".join(default_fields)) 604 | else: 605 | print(' 0') 606 | print('') 607 | 608 | def emit_template_struct(self, name, group): 609 | print("struct %s {" % name) 610 | group.emit_template_struct("") 611 | print("};\n") 612 | 613 | def emit_pack_function(self, name, group): 614 | print("static inline void\n%s_pack(uint32_t * restrict cl,\n%sconst struct %s * restrict values)\n{" % 615 | (name, ' ' * (len(name) + 6), name)) 616 | 617 | group.emit_pack_function() 618 | 619 | print("}\n\n") 620 | 621 | print('#define {} {}'.format (name + "_LENGTH", self.group.length)) 622 | if self.group.align != None: 623 | print('#define {} {}'.format (name + "_ALIGN", self.group.align)) 624 | print('struct {}_packed {{ uint32_t opaque[{}]; }};'.format(name.lower(), self.group.length // 4)) 625 | 626 | def emit_unpack_function(self, name, group): 627 | print("static inline void") 628 | print("%s_unpack(const uint8_t * restrict cl,\n%sstruct %s * restrict values)\n{" % 629 | (name.upper(), ' ' * (len(name) + 8), name)) 630 | 631 | group.emit_unpack_function() 632 | 633 | print("}\n") 634 | 635 | def emit_print_function(self, name, group): 636 | print("static inline void") 637 | print("{}_print(FILE *fp, const struct {} * values, unsigned indent)\n{{".format(name.upper(), name)) 638 | 639 | group.emit_print_function() 640 | 641 | print("}\n") 642 | 643 | def emit_struct(self): 644 | name = self.struct 645 | 646 | self.emit_template_struct(self.struct, self.group) 647 | self.emit_header(name) 648 | if self.no_direct_packing == False: 649 | self.emit_pack_function(self.struct, self.group) 650 | self.emit_unpack_function(self.struct, self.group) 651 | self.emit_print_function(self.struct, self.group) 652 | 653 | def enum_prefix(self, name): 654 | return 655 | 656 | def emit_enum(self): 657 | e_name = enum_name(self.enum) 658 | prefix = e_name if self.enum != 'Format' else global_prefix 659 | print('enum {} {{'.format(e_name)) 660 | 661 | for value in self.values: 662 | name = '{}_{}'.format(prefix, value.name) 663 | name = safe_name(name).upper() 664 | print(' % -36s = %6d,' % (name, value.value)) 665 | print('};\n') 666 | 667 | print("static inline const char *") 668 | print("{}_as_str(enum {} imm)\n{{".format(e_name.lower(), e_name)) 669 | print(" switch (imm) {") 670 | for value in self.values: 671 | name = '{}_{}'.format(prefix, value.name) 672 | name = safe_name(name).upper() 673 | print(' case {}: return "{}";'.format(name, value.name)) 674 | print(' default: return "XXX: INVALID";') 675 | print(" }") 676 | print("}\n") 677 | 678 | def parse(self, filename): 679 | file = open(filename, "rb") 680 | self.parser.ParseFile(file) 681 | file.close() 682 | 683 | if len(sys.argv) < 2: 684 | print("No input xml file specified") 685 | sys.exit(1) 686 | 687 | input_file = sys.argv[1] 688 | 689 | p = Parser() 690 | p.parse(input_file) 691 | -------------------------------------------------------------------------------- /lib/io.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 Alyssa Rosenzweig 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #include 25 | #include 26 | #include 27 | #include "io.h" 28 | #include "selectors.h" 29 | #include "util.h" 30 | 31 | struct agx_allocation 32 | agx_alloc_mem(mach_port_t connection, size_t size, enum agx_memory_type type, bool write_combine) 33 | { 34 | uint32_t mode = 0x430; // shared, ? 35 | uint32_t cache = write_combine ? 0x400 : 0x0; 36 | 37 | uint32_t args_in[24] = { 0 }; 38 | args_in[1] = write_combine ? 0x400 : 0x0; 39 | args_in[2] = 0x2580320; //0x18000; // unk 40 | args_in[3] = 0x1; // unk; 41 | args_in[4] = 0x4000101; //0x1000101; // unk 42 | args_in[5] = mode; 43 | args_in[16] = size; 44 | args_in[20] = type; 45 | args_in[21] = 0x3; 46 | 47 | uint64_t out[10] = { 0 }; 48 | size_t out_sz = sizeof(out); 49 | 50 | kern_return_t ret = IOConnectCallMethod(connection, 51 | AGX_SELECTOR_ALLOCATE_MEM, NULL, 0, args_in, 52 | sizeof(args_in), NULL, 0, out, &out_sz); 53 | 54 | assert(ret == 0); 55 | assert(out_sz == sizeof(out)); 56 | 57 | return (struct agx_allocation) { 58 | .type = AGX_ALLOC_REGULAR, 59 | .guid = out[5], 60 | .index = (out[3] >> 32ull), 61 | .gpu_va = out[0], 62 | .map = (void *) out[1], 63 | .size = size 64 | }; 65 | } 66 | 67 | struct agx_allocation 68 | agx_alloc_cmdbuf(mach_port_t connection, size_t size, bool cmdbuf) 69 | { 70 | struct agx_create_cmdbuf_resp out = {}; 71 | size_t out_sz = sizeof(out); 72 | 73 | uint64_t inputs[2] = { 74 | size, 75 | cmdbuf ? 1 : 0 76 | }; 77 | 78 | kern_return_t ret = IOConnectCallMethod(connection, 79 | AGX_SELECTOR_CREATE_CMDBUF, inputs, 2, NULL, 0, NULL, 80 | NULL, &out, &out_sz); 81 | 82 | assert(ret == 0); 83 | assert(out_sz == sizeof(out)); 84 | assert(out.size == size); 85 | 86 | return (struct agx_allocation) { 87 | .type = cmdbuf ? AGX_ALLOC_CMDBUF : AGX_ALLOC_MEMMAP, 88 | .index = out.id, 89 | .map = out.map, 90 | .size = out.size, 91 | .guid = 0, /* TODO? */ 92 | }; 93 | } 94 | 95 | uint64_t 96 | agx_cmdbuf_global_ids(mach_port_t connection) 97 | { 98 | uint32_t out[4] = {}; 99 | size_t out_sz = sizeof(out); 100 | 101 | kern_return_t ret = IOConnectCallStructMethod(connection, 102 | 0x6, 103 | NULL, 0, &out, &out_sz); 104 | 105 | assert(ret == 0); 106 | assert(out_sz == sizeof(out)); 107 | assert(out[2] == (out[0] + 0x1000000)); 108 | 109 | /* Returns a 32-bit but is 64-bit in Instruments, extend with the 110 | * missing high bit */ 111 | return (out[0]) | (1ull << 32ull); 112 | } 113 | 114 | void 115 | agx_submit_cmdbuf(mach_port_t connection, struct agx_allocation *cmdbuf, struct agx_allocation *mappings, uint64_t scalar) 116 | { 117 | struct agx_submit_cmdbuf_req req = { 118 | .unk0 = 0x10, 119 | .unk1 = 0x1, 120 | .cmdbuf = cmdbuf->index, 121 | .mappings = mappings->index, 122 | .unk2 = 0x0, 123 | .unk3 = 0x1, 124 | }; 125 | 126 | assert(sizeof(req) == 40); 127 | 128 | kern_return_t ret = IOConnectCallMethod(connection, 129 | AGX_SELECTOR_SUBMIT_COMMAND_BUFFERS, 130 | &scalar, 1, 131 | &req, sizeof(req), 132 | NULL, 0, NULL, 0); 133 | 134 | assert(ret == 0); 135 | return; 136 | } 137 | 138 | struct agx_notification_queue 139 | agx_create_notification_queue(mach_port_t connection) 140 | { 141 | struct agx_create_notification_queue_resp resp; 142 | size_t resp_size = sizeof(resp); 143 | assert(resp_size == 0x10); 144 | 145 | kern_return_t ret = IOConnectCallStructMethod(connection, 146 | AGX_SELECTOR_CREATE_NOTIFICATION_QUEUE, 147 | NULL, 0, &resp, &resp_size); 148 | 149 | assert(resp_size == sizeof(resp)); 150 | assert(ret == 0); 151 | 152 | mach_port_t notif_port = IODataQueueAllocateNotificationPort(); 153 | IOConnectSetNotificationPort(connection, 0, notif_port, resp.unk2); 154 | 155 | return (struct agx_notification_queue) { 156 | .port = notif_port, 157 | .queue = resp.queue, 158 | .id = resp.unk2 159 | }; 160 | } 161 | 162 | struct agx_command_queue 163 | agx_create_command_queue(mach_port_t connection) 164 | { 165 | struct agx_command_queue queue = {}; 166 | 167 | { 168 | uint8_t buffer[1024 + 8] = { 0 }; 169 | const char *path = "/tmp/a.out"; 170 | assert(strlen(path) < 1022); 171 | memcpy(buffer + 0, path, strlen(path)); 172 | 173 | /* Copy to the end */ 174 | unsigned END_LEN = MIN2(strlen(path), 1024 - strlen(path)); 175 | unsigned SKIP = strlen(path) - END_LEN; 176 | unsigned OFFS = 1024 - END_LEN; 177 | memcpy(buffer + OFFS, path + SKIP, END_LEN); 178 | 179 | buffer[1024] = 0x2; 180 | 181 | struct agx_create_command_queue_resp out = {}; 182 | size_t out_sz = sizeof(out); 183 | 184 | kern_return_t ret = IOConnectCallStructMethod(connection, 185 | AGX_SELECTOR_CREATE_COMMAND_QUEUE, 186 | buffer, sizeof(buffer), 187 | &out, &out_sz); 188 | 189 | assert(ret == 0); 190 | assert(out_sz == sizeof(out)); 191 | 192 | queue.id = out.id; 193 | assert(queue.id); 194 | } 195 | 196 | queue.notif = agx_create_notification_queue(connection); 197 | 198 | { 199 | uint64_t scalars[2] = { 200 | queue.id, 201 | queue.notif.id 202 | }; 203 | 204 | kern_return_t ret = IOConnectCallScalarMethod(connection, 205 | 0x1D, 206 | scalars, 2, NULL, NULL); 207 | 208 | assert(ret == 0); 209 | } 210 | 211 | { 212 | uint64_t scalars[2] = { 213 | queue.id, 214 | 0x1ffffffffull 215 | }; 216 | 217 | kern_return_t ret = IOConnectCallScalarMethod(connection, 218 | 0x29, 219 | scalars, 2, NULL, NULL); 220 | 221 | assert(ret == 0); 222 | } 223 | 224 | return queue; 225 | } 226 | -------------------------------------------------------------------------------- /lib/io.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 Alyssa Rosenzweig 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #ifndef __AGX_IO_H 25 | #define __AGX_IO_H 26 | 27 | #include 28 | #include 29 | #include "selectors.h" 30 | 31 | enum agx_alloc_type { 32 | AGX_ALLOC_REGULAR = 0, 33 | AGX_ALLOC_MEMMAP = 1, 34 | AGX_ALLOC_CMDBUF = 2, 35 | AGX_NUM_ALLOC, 36 | }; 37 | 38 | static const char *agx_alloc_types[AGX_NUM_ALLOC] = { "mem", "map", "cmd" }; 39 | 40 | struct agx_allocation { 41 | enum agx_alloc_type type; 42 | size_t size; 43 | 44 | /* Index unique only up to type, process-local */ 45 | unsigned index; 46 | 47 | /* Globally unique value (system wide) for tracing. Exists for 48 | * resources, command buffers, GPU submissions, segments, segent lists, 49 | * encoders, accelerators, and channels. Corresponds to Instruments' 50 | * magic table metal-gpu-submission-to-command-buffer-id */ 51 | uint64_t guid; 52 | 53 | /* If CPU mapped, CPU address. NULL if not mapped */ 54 | void *map; 55 | 56 | /* If type REGULAR, mapped GPU address */ 57 | uint64_t gpu_va; 58 | 59 | /* Human-readable label, or NULL if none */ 60 | char *name; 61 | 62 | /* Used while decoding, marked read-only */ 63 | bool ro; 64 | }; 65 | 66 | struct agx_notification_queue { 67 | mach_port_t port; 68 | IODataQueueMemory *queue; 69 | unsigned id; 70 | }; 71 | 72 | struct agx_command_queue { 73 | unsigned id; 74 | struct agx_notification_queue notif; 75 | }; 76 | 77 | struct agx_allocation agx_alloc_mem(mach_port_t connection, size_t size, enum agx_memory_type type, bool write_combine); 78 | struct agx_allocation agx_alloc_cmdbuf(mach_port_t connection, size_t size, bool cmdbuf); 79 | void agx_submit_cmdbuf(mach_port_t connection, struct agx_allocation *cmdbuf, struct agx_allocation *mappings, uint64_t scalar); 80 | struct agx_command_queue agx_create_command_queue(mach_port_t connection); 81 | uint64_t agx_cmdbuf_global_ids(mach_port_t connection); 82 | 83 | #endif 84 | -------------------------------------------------------------------------------- /lib/selectors.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 Alyssa Rosenzweig 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #ifndef __AGX_SELECTOR_H 25 | #define __AGX_SELECTOR_H 26 | 27 | #include 28 | 29 | enum agx_selector { 30 | AGX_SELECTOR_GET_GLOBAL_IDS = 0x6, 31 | AGX_SELECTOR_SET_API = 0x7, 32 | AGX_SELECTOR_CREATE_COMMAND_QUEUE = 0x8, 33 | AGX_SELECTOR_FREE_COMMAND_QUEUE = 0x9, 34 | AGX_SELECTOR_ALLOCATE_MEM = 0xA, 35 | AGX_SELECTOR_FREE_MEM = 0xB, 36 | AGX_SELECTOR_CREATE_CMDBUF = 0xF, 37 | AGX_SELECTOR_FREE_CMDBUF = 0x10, 38 | AGX_SELECTOR_CREATE_NOTIFICATION_QUEUE = 0x11, 39 | AGX_SELECTOR_FREE_NOTIFICATION_QUEUE = 0x12, 40 | AGX_SELECTOR_SUBMIT_COMMAND_BUFFERS = 0x1E, 41 | AGX_SELECTOR_GET_VERSION = 0x23, 42 | AGX_NUM_SELECTORS = 0x30 43 | }; 44 | 45 | static const char *selector_table[AGX_NUM_SELECTORS] = { 46 | "unk0", 47 | "unk1", 48 | "unk2", 49 | "unk3", 50 | "unk4", 51 | "unk5", 52 | "GET_GLOBAL_IDS", 53 | "SET_API", 54 | "CREATE_COMMAND_QUEUE", 55 | "FREE_COMMAND_QUEUE", 56 | "ALLOCATE_MEM", 57 | "FREE_MEM", 58 | "unkC", 59 | "unkD", 60 | "unkE", 61 | "CREATE_CMDBUF", 62 | "FREE_CMDBUF", 63 | "CREATE_NOTIFICATION_QUEUE", 64 | "FREE_NOTIFICATION_QUEUE", 65 | "unk13", 66 | "unk14", 67 | "unk15", 68 | "unk16", 69 | "unk17", 70 | "unk18", 71 | "unk19", 72 | "unk1A", 73 | "unk1B", 74 | "unk1C", 75 | "unk1D", 76 | "SUBMIT_COMMAND_BUFFERS", 77 | "unk1F", 78 | "unk20", 79 | "unk21", 80 | "unk22", 81 | "GET_VERSION", 82 | "unk24", 83 | "unk25", 84 | "unk26", 85 | "unk27", 86 | "unk28", 87 | "unk29", 88 | "unk2A", 89 | "unk2B", 90 | "unk2C", 91 | "unk2D", 92 | "unk2E", 93 | "unk2F" 94 | }; 95 | 96 | static inline const char * 97 | wrap_selector_name(uint32_t selector) 98 | { 99 | return (selector < AGX_NUM_SELECTORS) ? selector_table[selector] : "unk??"; 100 | } 101 | 102 | struct agx_create_command_queue_resp { 103 | uint64_t id; 104 | uint32_t unk2; // 90 0A 08 27 105 | uint32_t unk3; // 0 106 | } __attribute__((packed)); 107 | 108 | struct agx_create_cmdbuf_resp { 109 | void *map; 110 | uint32_t size; 111 | uint32_t id; 112 | } __attribute__((packed)); 113 | 114 | struct agx_create_notification_queue_resp { 115 | IODataQueueMemory *queue; 116 | uint32_t unk2; // 1 117 | uint32_t unk3; // 0 118 | } __attribute__((packed)); 119 | 120 | struct agx_submit_cmdbuf_req { 121 | uint32_t unk0; 122 | uint32_t unk1; 123 | uint32_t cmdbuf; 124 | uint32_t mappings; 125 | void *user_0; 126 | void *user_1; 127 | uint32_t unk2; 128 | uint32_t unk3; 129 | } __attribute__((packed)); 130 | 131 | /* Memory allocation isn't really understood yet. By comparing SHADER/CMDBUF_32 132 | * vs everything else, it appears the 0x40000000 bit indicates the GPU VA must 133 | * be be in the first 4GiB */ 134 | 135 | enum agx_memory_type { 136 | AGX_MEMORY_TYPE_NORMAL = 0x00000000, /* used for user allocations */ 137 | AGX_MEMORY_TYPE_UNK = 0x08000000, /* unknown */ 138 | AGX_MEMORY_TYPE_CMDBUF_64 = 0x18000000, /* used for command buffer storage */ 139 | AGX_MEMORY_TYPE_SHADER = 0x48000000, /* used for shader memory, with VA = 0 */ 140 | AGX_MEMORY_TYPE_CMDBUF_32 = 0x58000000, /* used for command buffers, with VA < 32-bit */ 141 | AGX_MEMORY_TYPE_FRAMEBUFFER = 0x00888F00, /* used for framebuffer backing */ 142 | }; 143 | 144 | static inline const char * 145 | agx_memory_type_name(uint32_t type) 146 | { 147 | switch (type) { 148 | case AGX_MEMORY_TYPE_NORMAL: return "normal"; 149 | case AGX_MEMORY_TYPE_UNK: return "unk"; 150 | case AGX_MEMORY_TYPE_CMDBUF_64: return "cmdbuf_64"; 151 | case AGX_MEMORY_TYPE_SHADER: return "shader"; 152 | case AGX_MEMORY_TYPE_CMDBUF_32: return "cmdbuf_32"; 153 | case AGX_MEMORY_TYPE_FRAMEBUFFER: return "framebuffer"; 154 | default: return NULL; 155 | } 156 | } 157 | 158 | #endif 159 | -------------------------------------------------------------------------------- /lib/tiling.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 Alyssa Rosenzweig 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | /* Z-order with 64x64 tiles: 30 | * 31 | * [y5][x5][y4][x4][y3][x3][y2][x2][y1][x1][y0][x0] 32 | * 33 | * Efficient tiling algorithm described in 34 | * https://fgiesen.wordpress.com/2011/01/17/texture-tiling-and-swizzling/ but 35 | * for posterity, we split into X and Y parts, and are faced with the problem 36 | * of incrementing: 37 | * 38 | * 0 [x5] 0 [x4] 0 [x3] 0 [x2] 0 [x1] 0 [x0] 39 | * 40 | * To do so, we fill in the "holes" with 1's by adding the bitwise inverse of 41 | * the mask of bits we care about 42 | * 43 | * 0 [x5] 0 [x4] 0 [x3] 0 [x2] 0 [x1] 0 [x0] 44 | * + 1 0 1 0 1 0 1 0 1 0 1 0 45 | * ------------------------------------------ 46 | * 1 [x5] 1 [x4] 1 [x3] 1 [x2] 1 [x1] 1 [x0] 47 | * 48 | * Then when we add one, the holes are passed over by forcing carry bits high. 49 | * Finally, we need to zero out the holes, by ANDing with the mask of bits we 50 | * care about. In total, we get the expression (X + ~mask + 1) & mask, and 51 | * applying the two's complement identity, we are left with (X - mask) & mask 52 | */ 53 | 54 | #define TILE_WIDTH 64 55 | #define TILE_HEIGHT 64 56 | #define TILE_SHIFT 6 57 | #define TILE_MASK ((1 << TILE_SHIFT) - 1) 58 | 59 | /* mask of bits used for X coordinate in a tile */ 60 | #define SPACE_MASK 0x555 // 0b010101010101 61 | 62 | #define MAX2(x, y) (((x) > (y)) ? (x) : (y)) 63 | #define MIN2(x, y) (((x) < (y)) ? (x) : (y)) 64 | 65 | static uint32_t 66 | ash_space_bits(unsigned x) 67 | { 68 | assert(x < TILE_WIDTH); 69 | return ((x & 1) << 0) | ((x & 2) << 1) | ((x & 4) << 2) | 70 | ((x & 8) << 3) | ((x & 16) << 4) | ((x & 32) << 5); 71 | } 72 | 73 | static void 74 | ash_detile_unaligned_32(uint32_t *tiled, uint32_t *linear, 75 | unsigned width, unsigned linear_pitch, 76 | unsigned sx, unsigned sy, unsigned smaxx, unsigned smaxy) 77 | { 78 | unsigned tiles_per_row = (width + TILE_WIDTH - 1) >> TILE_SHIFT; 79 | unsigned y_offs = ash_space_bits(sy & TILE_MASK); 80 | unsigned x_offs_start = ash_space_bits(sx & TILE_MASK); 81 | 82 | for (unsigned y = sy; y < smaxy; ++y) { 83 | unsigned tile_y = (y >> TILE_SHIFT); 84 | unsigned tile_row = tile_y * tiles_per_row; 85 | unsigned x_offs = x_offs_start; 86 | 87 | uint32_t *linear_row = linear; 88 | 89 | for (unsigned x = sx; x < smaxx; ++x) { 90 | unsigned tile_x = (x >> TILE_SHIFT); 91 | unsigned tile_idx = (tile_row + tile_x); 92 | unsigned tile_base = tile_idx * (TILE_WIDTH * TILE_HEIGHT); 93 | 94 | *(linear_row++) = tiled[tile_base + y_offs + x_offs]; 95 | x_offs = (x_offs - SPACE_MASK) & SPACE_MASK; 96 | } 97 | 98 | y_offs = (((y_offs >> 1) - SPACE_MASK) & SPACE_MASK) << 1; 99 | linear += linear_pitch; 100 | } 101 | } 102 | 103 | /* Assumes sx, smaxx are both aligned to TILE_WIDTH */ 104 | static void 105 | ash_detile_aligned_32(uint32_t *tiled, uint32_t *linear, 106 | unsigned width, unsigned linear_pitch, 107 | unsigned sx, unsigned sy, unsigned smaxx, unsigned smaxy) 108 | { 109 | unsigned tiles_per_row = (width + TILE_WIDTH - 1) >> TILE_SHIFT; 110 | unsigned y_offs = 0; 111 | 112 | for (unsigned y = sy; y < smaxy; ++y) { 113 | unsigned tile_y = (y >> TILE_SHIFT); 114 | unsigned tile_row = tile_y * tiles_per_row; 115 | unsigned x_offs = 0; 116 | 117 | uint32_t *linear_row = linear; 118 | 119 | for (unsigned x = sx; x < smaxx; x += TILE_WIDTH) { 120 | unsigned tile_x = (x >> TILE_SHIFT); 121 | unsigned tile_idx = (tile_row + tile_x); 122 | unsigned tile_base = tile_idx * (TILE_WIDTH * TILE_HEIGHT); 123 | uint32_t *tile = tiled + tile_base + y_offs; 124 | 125 | for (unsigned j = 0; j < TILE_WIDTH; ++j) { 126 | /* Written in a funny way to avoid inner shift, 127 | * do it free as part of x_offs instead */ 128 | uint32_t *in = (uint32_t *) (((uint8_t *) tile) + x_offs); 129 | *(linear_row++) = *in; 130 | x_offs = (x_offs - (SPACE_MASK << 2)) & (SPACE_MASK << 2); 131 | } 132 | } 133 | 134 | y_offs = (((y_offs >> 1) - SPACE_MASK) & SPACE_MASK) << 1; 135 | linear += linear_pitch; 136 | } 137 | } 138 | 139 | static void 140 | ash_detile_32(uint32_t *tiled, uint32_t *linear, 141 | unsigned width, unsigned linear_pitch, 142 | unsigned sx, unsigned sy, unsigned smaxx, unsigned smaxy) 143 | { 144 | if (sx & TILE_MASK) { 145 | ash_detile_unaligned_32(tiled, linear, width, linear_pitch, sx, sy, 146 | MIN2(TILE_WIDTH - (sx & TILE_MASK), smaxx - sx), smaxy); 147 | sx = (sx & ~TILE_MASK) + 1; 148 | } 149 | 150 | if ((smaxx & TILE_MASK) && (smaxx > sx)) { 151 | ash_detile_unaligned_32(tiled, linear, width, linear_pitch, 152 | MAX2(sx, smaxx & ~TILE_MASK), sy, 153 | smaxx, smaxy); 154 | smaxx = (smaxx & ~TILE_MASK); 155 | } 156 | 157 | if (smaxx > sx) { 158 | ash_detile_aligned_32(tiled, linear, width, linear_pitch, 159 | sx, sy, smaxx, smaxy); 160 | } 161 | } 162 | 163 | void 164 | ash_detile(uint32_t *tiled, uint32_t *linear, 165 | unsigned width, unsigned bpp, unsigned linear_pitch, 166 | unsigned sx, unsigned sy, unsigned smaxx, unsigned smaxy) 167 | { 168 | /* TODO: parametrize with macro magic */ 169 | assert(bpp == 32); 170 | 171 | ash_detile_32(tiled, linear, width, linear_pitch, sx, sy, smaxx, smaxy); 172 | } 173 | -------------------------------------------------------------------------------- /lib/tiling.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 Alyssa Rosenzweig 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #ifndef __ASH_DETILE_H 25 | #define __ASH_DETILE_H 26 | 27 | void ash_detile(uint32_t *tiled, uint32_t *linear, 28 | unsigned width, unsigned bpp, unsigned linear_pitch, 29 | unsigned sx, unsigned sy, unsigned smaxx, unsigned smaxy); 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /lib/util.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2021 Alyssa Rosenzweig 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | * SOFTWARE. 22 | */ 23 | 24 | #ifndef __UTIL_H 25 | #define __UTIL_H 26 | 27 | #include 28 | 29 | #define UNUSED __attribute__((unused)) 30 | #define MAX2(x, y) (((x) > (y)) ? (x) : (y)) 31 | #define MIN2(x, y) (((x) < (y)) ? (x) : (y)) 32 | #define ALIGN_POT(v, pot) (((v) + ((pot) - 1)) & ~((pot) - 1)) 33 | 34 | static uint32_t 35 | fui(float f) 36 | { 37 | uint32_t u = 0; 38 | memcpy(&u, &f, 4); 39 | return u; 40 | } 41 | 42 | static float 43 | uif(uint32_t u) 44 | { 45 | float f = 0; 46 | memcpy(&f, &u, 4); 47 | return f; 48 | } 49 | 50 | /* Pretty-printer */ 51 | static void 52 | hexdump(FILE *fp, const uint8_t *hex, size_t cnt, bool with_strings) 53 | { 54 | unsigned zero_count = 0; 55 | 56 | for (unsigned i = 0; i < cnt; ++i) { 57 | if ((i & 0xF) == 0) 58 | fprintf(fp, "%06X ", i); 59 | 60 | uint8_t v = hex[i]; 61 | 62 | if (v == 0 && (i & 0xF) == 0) { 63 | /* Check if we're starting an aligned run of zeroes */ 64 | unsigned zero_count = 0; 65 | 66 | for (unsigned j = i; j < cnt; ++j) { 67 | if (hex[j] == 0) 68 | zero_count++; 69 | else 70 | break; 71 | } 72 | 73 | if (zero_count >= 32) { 74 | fprintf(fp, "*\n"); 75 | i += (zero_count & ~0xF) - 1; 76 | continue; 77 | } 78 | } 79 | 80 | fprintf(fp, "%02X ", hex[i]); 81 | if ((i & 0xF) == 0xF && with_strings) { 82 | fprintf(fp, " | "); 83 | for (unsigned j = i & ~0xF; j <= i; ++j) { 84 | uint8_t c = hex[j]; 85 | fputc((c < 32 || c > 128) ? '.' : c, fp); 86 | } 87 | } 88 | 89 | if ((i & 0xF) == 0xF) 90 | fprintf(fp, "\n"); 91 | } 92 | 93 | fprintf(fp, "\n"); 94 | } 95 | 96 | #endif 97 | -------------------------------------------------------------------------------- /wrap/APPLE_LICENSE: -------------------------------------------------------------------------------- 1 | APPLE PUBLIC SOURCE LICENSE 2 | Version 2.0 - August 6, 2003 3 | 4 | Please read this License carefully before downloading this software. By 5 | downloading or using this software, you are agreeing to be bound by the terms 6 | of this License. If you do not or cannot agree to the terms of this License, 7 | please do not download or use the software. 8 | 9 | Apple Note: In January 2007, Apple changed its corporate name from "Apple 10 | Computer, Inc." to "Apple Inc." This change has been reflected below and 11 | copyright years updated, but no other changes have been made to the APSL 2.0. 12 | 13 | 1. General; Definitions. This License applies to any program or other 14 | work which Apple Inc. ("Apple") makes publicly available and which contains a 15 | notice placed by Apple identifying such program or work as "Original Code" and 16 | stating that it is subject to the terms of this Apple Public Source License 17 | version 2.0 ("License"). As used in this License: 18 | 19 | 1.1 "Applicable Patent Rights" mean: (a) in the case where Apple is the 20 | grantor of rights, (i) claims of patents that are now or hereafter acquired, 21 | owned by or assigned to Apple and (ii) that cover subject matter contained in 22 | the Original Code, but only to the extent necessary to use, reproduce and/or 23 | distribute the Original Code without infringement; and (b) in the case where 24 | You are the grantor of rights, (i) claims of patents that are now or hereafter 25 | acquired, owned by or assigned to You and (ii) that cover subject matter in 26 | Your Modifications, taken alone or in combination with Original Code. 27 | 28 | 1.2 "Contributor" means any person or entity that creates or contributes to 29 | the creation of Modifications. 30 | 31 | 1.3 "Covered Code" means the Original Code, Modifications, the combination 32 | of Original Code and any Modifications, and/or any respective portions thereof. 33 | 34 | 1.4 "Externally Deploy" means: (a) to sublicense, distribute or otherwise 35 | make Covered Code available, directly or indirectly, to anyone other than You; 36 | and/or (b) to use Covered Code, alone or as part of a Larger Work, in any way 37 | to provide a service, including but not limited to delivery of content, through 38 | electronic communication with a client other than You. 39 | 40 | 1.5 "Larger Work" means a work which combines Covered Code or portions 41 | thereof with code not governed by the terms of this License. 42 | 43 | 1.6 "Modifications" mean any addition to, deletion from, and/or change to, 44 | the substance and/or structure of the Original Code, any previous 45 | Modifications, the combination of Original Code and any previous Modifications, 46 | and/or any respective portions thereof. When code is released as a series of 47 | files, a Modification is: (a) any addition to or deletion from the contents of 48 | a file containing Covered Code; and/or (b) any new file or other representation 49 | of computer program statements that contains any part of Covered Code. 50 | 51 | 1.7 "Original Code" means (a) the Source Code of a program or other work as 52 | originally made available by Apple under this License, including the Source 53 | Code of any updates or upgrades to such programs or works made available by 54 | Apple under this License, and that has been expressly identified by Apple as 55 | such in the header file(s) of such work; and (b) the object code compiled from 56 | such Source Code and originally made available by Apple under this License 57 | 58 | 1.8 "Source Code" means the human readable form of a program or other work 59 | that is suitable for making modifications to it, including all modules it 60 | contains, plus any associated interface definition files, scripts used to 61 | control compilation and installation of an executable (object code). 62 | 63 | 1.9 "You" or "Your" means an individual or a legal entity exercising rights 64 | under this License. For legal entities, "You" or "Your" includes any entity 65 | which controls, is controlled by, or is under common control with, You, where 66 | "control" means (a) the power, direct or indirect, to cause the direction or 67 | management of such entity, whether by contract or otherwise, or (b) ownership 68 | of fifty percent (50%) or more of the outstanding shares or beneficial 69 | ownership of such entity. 70 | 71 | 2. Permitted Uses; Conditions & Restrictions. Subject to the terms and 72 | conditions of this License, Apple hereby grants You, effective on the date You 73 | accept this License and download the Original Code, a world-wide, royalty-free, 74 | non-exclusive license, to the extent of Apple's Applicable Patent Rights and 75 | copyrights covering the Original Code, to do the following: 76 | 77 | 2.1 Unmodified Code. You may use, reproduce, display, perform, internally 78 | distribute within Your organization, and Externally Deploy verbatim, unmodified 79 | copies of the Original Code, for commercial or non-commercial purposes, 80 | provided that in each instance: 81 | 82 | (a) You must retain and reproduce in all copies of Original Code the 83 | copyright and other proprietary notices and disclaimers of Apple as they appear 84 | in the Original Code, and keep intact all notices in the Original Code that 85 | refer to this License; and 86 | 87 | (b) You must include a copy of this License with every copy of Source Code 88 | of Covered Code and documentation You distribute or Externally Deploy, and You 89 | may not offer or impose any terms on such Source Code that alter or restrict 90 | this License or the recipients' rights hereunder, except as permitted under 91 | Section 6. 92 | 93 | 2.2 Modified Code. You may modify Covered Code and use, reproduce, 94 | display, perform, internally distribute within Your organization, and 95 | Externally Deploy Your Modifications and Covered Code, for commercial or 96 | non-commercial purposes, provided that in each instance You also meet all of 97 | these conditions: 98 | 99 | (a) You must satisfy all the conditions of Section 2.1 with respect to the 100 | Source Code of the Covered Code; 101 | 102 | (b) You must duplicate, to the extent it does not already exist, the notice 103 | in Exhibit A in each file of the Source Code of all Your Modifications, and 104 | cause the modified files to carry prominent notices stating that You changed 105 | the files and the date of any change; and 106 | 107 | (c) If You Externally Deploy Your Modifications, You must make Source Code 108 | of all Your Externally Deployed Modifications either available to those to whom 109 | You have Externally Deployed Your Modifications, or publicly available. Source 110 | Code of Your Externally Deployed Modifications must be released under the terms 111 | set forth in this License, including the license grants set forth in Section 3 112 | below, for as long as you Externally Deploy the Covered Code or twelve (12) 113 | months from the date of initial External Deployment, whichever is longer. You 114 | should preferably distribute the Source Code of Your Externally Deployed 115 | Modifications electronically (e.g. download from a web site). 116 | 117 | 2.3 Distribution of Executable Versions. In addition, if You Externally 118 | Deploy Covered Code (Original Code and/or Modifications) in object code, 119 | executable form only, You must include a prominent notice, in the code itself 120 | as well as in related documentation, stating that Source Code of the Covered 121 | Code is available under the terms of this License with information on how and 122 | where to obtain such Source Code. 123 | 124 | 2.4 Third Party Rights. You expressly acknowledge and agree that although 125 | Apple and each Contributor grants the licenses to their respective portions of 126 | the Covered Code set forth herein, no assurances are provided by Apple or any 127 | Contributor that the Covered Code does not infringe the patent or other 128 | intellectual property rights of any other entity. Apple and each Contributor 129 | disclaim any liability to You for claims brought by any other entity based on 130 | infringement of intellectual property rights or otherwise. As a condition to 131 | exercising the rights and licenses granted hereunder, You hereby assume sole 132 | responsibility to secure any other intellectual property rights needed, if any. 133 | For example, if a third party patent license is required to allow You to 134 | distribute the Covered Code, it is Your responsibility to acquire that license 135 | before distributing the Covered Code. 136 | 137 | 3. Your Grants. In consideration of, and as a condition to, the licenses 138 | granted to You under this License, You hereby grant to any person or entity 139 | receiving or distributing Covered Code under this License a non-exclusive, 140 | royalty-free, perpetual, irrevocable license, under Your Applicable Patent 141 | Rights and other intellectual property rights (other than patent) owned or 142 | controlled by You, to use, reproduce, display, perform, modify, sublicense, 143 | distribute and Externally Deploy Your Modifications of the same scope and 144 | extent as Apple's licenses under Sections 2.1 and 2.2 above. 145 | 146 | 4. Larger Works. You may create a Larger Work by combining Covered Code 147 | with other code not governed by the terms of this License and distribute the 148 | Larger Work as a single product. In each such instance, You must make sure the 149 | requirements of this License are fulfilled for the Covered Code or any portion 150 | thereof. 151 | 152 | 5. Limitations on Patent License. Except as expressly stated in Section 153 | 2, no other patent rights, express or implied, are granted by Apple herein. 154 | Modifications and/or Larger Works may require additional patent licenses from 155 | Apple which Apple may grant in its sole discretion. 156 | 157 | 6. Additional Terms. You may choose to offer, and to charge a fee for, 158 | warranty, support, indemnity or liability obligations and/or other rights 159 | consistent with the scope of the license granted herein ("Additional Terms") to 160 | one or more recipients of Covered Code. However, You may do so only on Your own 161 | behalf and as Your sole responsibility, and not on behalf of Apple or any 162 | Contributor. You must obtain the recipient's agreement that any such Additional 163 | Terms are offered by You alone, and You hereby agree to indemnify, defend and 164 | hold Apple and every Contributor harmless for any liability incurred by or 165 | claims asserted against Apple or such Contributor by reason of any such 166 | Additional Terms. 167 | 168 | 7. Versions of the License. Apple may publish revised and/or new versions 169 | of this License from time to time. Each version will be given a distinguishing 170 | version number. Once Original Code has been published under a particular 171 | version of this License, You may continue to use it under the terms of that 172 | version. You may also choose to use such Original Code under the terms of any 173 | subsequent version of this License published by Apple. No one other than Apple 174 | has the right to modify the terms applicable to Covered Code created under this 175 | License. 176 | 177 | 8. NO WARRANTY OR SUPPORT. The Covered Code may contain in whole or in 178 | part pre-release, untested, or not fully tested works. The Covered Code may 179 | contain errors that could cause failures or loss of data, and may be incomplete 180 | or contain inaccuracies. You expressly acknowledge and agree that use of the 181 | Covered Code, or any portion thereof, is at Your sole and entire risk. THE 182 | COVERED CODE IS PROVIDED "AS IS" AND WITHOUT WARRANTY, UPGRADES OR SUPPORT OF 183 | ANY KIND AND APPLE AND APPLE'S LICENSOR(S) (COLLECTIVELY REFERRED TO AS "APPLE" 184 | FOR THE PURPOSES OF SECTIONS 8 AND 9) AND ALL CONTRIBUTORS EXPRESSLY DISCLAIM 185 | ALL WARRANTIES AND/OR CONDITIONS, EXPRESS OR IMPLIED, INCLUDING, BUT NOT 186 | LIMITED TO, THE IMPLIED WARRANTIES AND/OR CONDITIONS OF MERCHANTABILITY, OF 187 | SATISFACTORY QUALITY, OF FITNESS FOR A PARTICULAR PURPOSE, OF ACCURACY, OF 188 | QUIET ENJOYMENT, AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. APPLE AND EACH 189 | CONTRIBUTOR DOES NOT WARRANT AGAINST INTERFERENCE WITH YOUR ENJOYMENT OF THE 190 | COVERED CODE, THAT THE FUNCTIONS CONTAINED IN THE COVERED CODE WILL MEET YOUR 191 | REQUIREMENTS, THAT THE OPERATION OF THE COVERED CODE WILL BE UNINTERRUPTED OR 192 | ERROR-FREE, OR THAT DEFECTS IN THE COVERED CODE WILL BE CORRECTED. NO ORAL OR 193 | WRITTEN INFORMATION OR ADVICE GIVEN BY APPLE, AN APPLE AUTHORIZED 194 | REPRESENTATIVE OR ANY CONTRIBUTOR SHALL CREATE A WARRANTY. You acknowledge 195 | that the Covered Code is not intended for use in the operation of nuclear 196 | facilities, aircraft navigation, communication systems, or air traffic control 197 | machines in which case the failure of the Covered Code could lead to death, 198 | personal injury, or severe physical or environmental damage. 199 | 200 | 9. LIMITATION OF LIABILITY. TO THE EXTENT NOT PROHIBITED BY LAW, IN NO 201 | EVENT SHALL APPLE OR ANY CONTRIBUTOR BE LIABLE FOR ANY INCIDENTAL, SPECIAL, 202 | INDIRECT OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATING TO THIS LICENSE OR 203 | YOUR USE OR INABILITY TO USE THE COVERED CODE, OR ANY PORTION THEREOF, WHETHER 204 | UNDER A THEORY OF CONTRACT, WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCTS 205 | LIABILITY OR OTHERWISE, EVEN IF APPLE OR SUCH CONTRIBUTOR HAS BEEN ADVISED OF 206 | THE POSSIBILITY OF SUCH DAMAGES AND NOTWITHSTANDING THE FAILURE OF ESSENTIAL 207 | PURPOSE OF ANY REMEDY. SOME JURISDICTIONS DO NOT ALLOW THE LIMITATION OF 208 | LIABILITY OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS LIMITATION MAY NOT 209 | APPLY TO YOU. In no event shall Apple's total liability to You for all damages 210 | (other than as may be required by applicable law) under this License exceed the 211 | amount of fifty dollars ($50.00). 212 | 213 | 10. Trademarks. This License does not grant any rights to use the 214 | trademarks or trade names "Apple", "Mac", "Mac OS", "QuickTime", "QuickTime 215 | Streaming Server" or any other trademarks, service marks, logos or trade names 216 | belonging to Apple (collectively "Apple Marks") or to any trademark, service 217 | mark, logo or trade name belonging to any Contributor. You agree not to use 218 | any Apple Marks in or as part of the name of products derived from the Original 219 | Code or to endorse or promote products derived from the Original Code other 220 | than as expressly permitted by and in strict compliance at all times with 221 | Apple's third party trademark usage guidelines which are posted at 222 | http://www.apple.com/legal/guidelinesfor3rdparties.html. 223 | 224 | 11. Ownership. Subject to the licenses granted under this License, each 225 | Contributor retains all rights, title and interest in and to any Modifications 226 | made by such Contributor. Apple retains all rights, title and interest in and 227 | to the Original Code and any Modifications made by or on behalf of Apple 228 | ("Apple Modifications"), and such Apple Modifications will not be automatically 229 | subject to this License. Apple may, at its sole discretion, choose to license 230 | such Apple Modifications under this License, or on different terms from those 231 | contained in this License or may choose not to license them at all. 232 | 233 | 12. Termination. 234 | 235 | 12.1 Termination. This License and the rights granted hereunder will 236 | terminate: 237 | 238 | (a) automatically without notice from Apple if You fail to comply with any 239 | term(s) of this License and fail to cure such breach within 30 days of becoming 240 | aware of such breach; 241 | (b) immediately in the event of the circumstances described in Section 242 | 13.5(b); or 243 | (c) automatically without notice from Apple if You, at any time during the 244 | term of this License, commence an action for patent infringement against Apple; 245 | provided that Apple did not first commence an action for patent infringement 246 | against You in that instance. 247 | 248 | 12.2 Effect of Termination. Upon termination, You agree to immediately stop 249 | any further use, reproduction, modification, sublicensing and distribution of 250 | the Covered Code. All sublicenses to the Covered Code which have been properly 251 | granted prior to termination shall survive any termination of this License. 252 | Provisions which, by their nature, should remain in effect beyond the 253 | termination of this License shall survive, including but not limited to 254 | Sections 3, 5, 8, 9, 10, 11, 12.2 and 13. No party will be liable to any other 255 | for compensation, indemnity or damages of any sort solely as a result of 256 | terminating this License in accordance with its terms, and termination of this 257 | License will be without prejudice to any other right or remedy of any party. 258 | 259 | 13. Miscellaneous. 260 | 261 | 13.1 Government End Users. The Covered Code is a "commercial item" as 262 | defined in FAR 2.101. Government software and technical data rights in the 263 | Covered Code include only those rights customarily provided to the public as 264 | defined in this License. This customary commercial license in technical data 265 | and software is provided in accordance with FAR 12.211 (Technical Data) and 266 | 12.212 (Computer Software) and, for Department of Defense purchases, DFAR 267 | 252.227-7015 (Technical Data -- Commercial Items) and 227.7202-3 (Rights in 268 | Commercial Computer Software or Computer Software Documentation). Accordingly, 269 | all U.S. Government End Users acquire Covered Code with only those rights set 270 | forth herein. 271 | 272 | 13.2 Relationship of Parties. This License will not be construed as 273 | creating an agency, partnership, joint venture or any other form of legal 274 | association between or among You, Apple or any Contributor, and You will not 275 | represent to the contrary, whether expressly, by implication, appearance or 276 | otherwise. 277 | 278 | 13.3 Independent Development. Nothing in this License will impair Apple's 279 | right to acquire, license, develop, have others develop for it, market and/or 280 | distribute technology or products that perform the same or similar functions 281 | as, or otherwise compete with, Modifications, Larger Works, technology or 282 | products that You may develop, produce, market or distribute. 283 | 284 | 13.4 Waiver; Construction. Failure by Apple or any Contributor to enforce 285 | any provision of this License will not be deemed a waiver of future enforcement 286 | of that or any other provision. Any law or regulation which provides that the 287 | language of a contract shall be construed against the drafter will not apply to 288 | this License. 289 | 290 | 13.5 Severability. (a) If for any reason a court of competent jurisdiction 291 | finds any provision of this License, or portion thereof, to be unenforceable, 292 | that provision of the License will be enforced to the maximum extent 293 | permissible so as to effect the economic benefits and intent of the parties, 294 | and the remainder of this License will continue in full force and effect. (b) 295 | Notwithstanding the foregoing, if applicable law prohibits or restricts You 296 | from fully and/or specifically complying with Sections 2 and/or 3 or prevents 297 | the enforceability of either of those Sections, this License will immediately 298 | terminate and You must immediately discontinue any use of the Covered Code and 299 | destroy all copies of it that are in your possession or control. 300 | 301 | 13.6 Dispute Resolution. Any litigation or other dispute resolution between 302 | You and Apple relating to this License shall take place in the Northern 303 | District of California, and You and Apple hereby consent to the personal 304 | jurisdiction of, and venue in, the state and federal courts within that 305 | District with respect to this License. The application of the United Nations 306 | Convention on Contracts for the International Sale of Goods is expressly 307 | excluded. 308 | 309 | 13.7 Entire Agreement; Governing Law. This License constitutes the entire 310 | agreement between the parties with respect to the subject matter hereof. This 311 | License shall be governed by the laws of the United States and the State of 312 | California, except that body of California law concerning conflicts of law. 313 | 314 | Where You are located in the province of Quebec, Canada, the following clause 315 | applies: The parties hereby confirm that they have requested that this License 316 | and all related documents be drafted in English. Les parties ont exigé que le 317 | présent contrat et tous les documents connexes soient rédigés en anglais. 318 | 319 | EXHIBIT A. 320 | 321 | "Portions Copyright (c) 1999-2007 Apple Inc. All Rights Reserved. 322 | 323 | This file contains Original Code and/or Modifications of Original Code as 324 | defined in and that are subject to the Apple Public Source License Version 2.0 325 | (the 'License'). You may not use this file except in compliance with the 326 | License. Please obtain a copy of the License at 327 | http://www.opensource.apple.com/apsl/ and read it before using this file. 328 | 329 | The Original Code and all software distributed under the License are 330 | distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS 331 | OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT 332 | LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 333 | PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. Please see the License for the 334 | specific language governing rights and limitations under the License." 335 | 336 | -------------------------------------------------------------------------------- /wrap/wrap.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2020 Asahi Linux contributors 3 | * Copyright (c) 1998-2014 Apple Computer, Inc. All rights reserved. 4 | * Copyright (c) 2005 Apple Computer, Inc. All rights reserved. 5 | * 6 | * IOKit prototypes and stub implementations from upstream IOKitLib sources. 7 | * DYLD_INTERPOSE macro from dyld source code. All other code in the file is 8 | * by Asahi Linux contributors. 9 | * 10 | * @APPLE_LICENSE_HEADER_START@ 11 | * 12 | * This file contains Original Code and/or Modifications of Original Code 13 | * as defined in and that are subject to the Apple Public Source License 14 | * Version 2.0 (the 'License'). You may not use this file except in 15 | * compliance with the License. Please obtain a copy of the License at 16 | * http://www.opensource.apple.com/apsl/ and read it before using this 17 | * file. 18 | * 19 | * The Original Code and all software distributed under the License are 20 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 21 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 22 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 23 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 24 | * Please see the License for the specific language governing rights and 25 | * limitations under the License. 26 | * 27 | * @APPLE_LICENSE_HEADER_END@ 28 | */ 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | #include 38 | #include 39 | 40 | #include "selectors.h" 41 | #include "cmdstream.h" 42 | #include "io.h" 43 | #include "decode.h" 44 | #include "util.h" 45 | 46 | /* Apple macro */ 47 | 48 | #define DYLD_INTERPOSE(_replacment,_replacee) \ 49 | __attribute__((used)) static struct{ const void* replacment; const void* replacee; } _interpose_##_replacee \ 50 | __attribute__ ((section ("__DATA,__interpose"))) = { (const void*)(unsigned long)&_replacment, (const void*)(unsigned long)&_replacee }; 51 | 52 | mach_port_t metal_connection = 0; 53 | 54 | kern_return_t 55 | wrap_IOConnectCallMethod( 56 | mach_port_t connection, // In 57 | uint32_t selector, // In 58 | const uint64_t *input, // In 59 | uint32_t inputCnt, // In 60 | const void *inputStruct, // In 61 | size_t inputStructCnt, // In 62 | uint64_t *output, // Out 63 | uint32_t *outputCnt, // In/Out 64 | void *outputStruct, // Out 65 | size_t *outputStructCntP) // In/Out 66 | { 67 | /* Heuristic guess which connection is Metal, skip over I/O from everything else */ 68 | bool bail = false; 69 | 70 | if (metal_connection == 0) { 71 | if (selector == AGX_SELECTOR_SET_API) 72 | metal_connection = connection; 73 | else 74 | bail = true; 75 | } else if (metal_connection != connection) 76 | bail = true; 77 | 78 | if (bail) 79 | return IOConnectCallMethod(connection, selector, input, inputCnt, inputStruct, inputStructCnt, output, outputCnt, outputStruct, outputStructCntP); 80 | 81 | /* Check the arguments make sense */ 82 | assert((input != NULL) == (inputCnt != 0)); 83 | assert((inputStruct != NULL) == (inputStructCnt != 0)); 84 | assert((output != NULL) == (outputCnt != 0)); 85 | assert((outputStruct != NULL) == (outputStructCntP != 0)); 86 | 87 | /* Dump inputs */ 88 | switch (selector) { 89 | case AGX_SELECTOR_SET_API: 90 | assert(input == NULL && output == NULL && outputStruct == NULL); 91 | assert(inputStruct != NULL && inputStructCnt == 16); 92 | assert(((uint8_t *) inputStruct)[15] == 0x0); 93 | 94 | printf("%X: SET_API(%s)\n", connection, (const char *) inputStruct); 95 | break; 96 | 97 | case AGX_SELECTOR_SUBMIT_COMMAND_BUFFERS: 98 | assert(output == NULL && outputStruct == NULL); 99 | assert(inputStructCnt == 40); 100 | assert(inputCnt == 1); 101 | 102 | printf("%X: SUBMIT_COMMAND_BUFFERS command queue id:%llx %p\n", connection, input[0], inputStruct); 103 | 104 | const struct agx_submit_cmdbuf_req *req = inputStruct; 105 | 106 | pandecode_cmdstream(req->cmdbuf, false); 107 | 108 | if (getenv("ASAHI_DUMP")) 109 | pandecode_dump_mappings(); 110 | 111 | /* fallthrough */ 112 | default: 113 | printf("%X: call %s (out %p, %zu)", connection, wrap_selector_name(selector), outputStructCntP, outputStructCntP ? *outputStructCntP : 0); 114 | 115 | for (uint64_t u = 0; u < inputCnt; ++u) 116 | printf(" %llx", input[u]); 117 | 118 | if(inputStructCnt) { 119 | printf(", struct:\n"); 120 | hexdump(stdout, inputStruct, inputStructCnt, true); 121 | } else { 122 | printf("\n"); 123 | } 124 | 125 | break; 126 | } 127 | 128 | /* Invoke the real method */ 129 | kern_return_t ret = IOConnectCallMethod(connection, selector, input, inputCnt, inputStruct, inputStructCnt, output, outputCnt, outputStruct, outputStructCntP); 130 | 131 | printf("return %u", ret); 132 | 133 | /* Dump the outputs */ 134 | if(outputCnt) { 135 | printf("%u scalars: ", *outputCnt); 136 | 137 | for (uint64_t u = 0; u < *outputCnt; ++u) 138 | printf("%llx ", output[u]); 139 | 140 | printf("\n"); 141 | } 142 | 143 | if(outputStructCntP) { 144 | printf(" struct\n"); 145 | hexdump(stdout, outputStruct, *outputStructCntP, true); 146 | 147 | if (selector == 2) { 148 | /* Dump linked buffer as well */ 149 | void **o = outputStruct; 150 | hexdump(stdout, *o, 64, true); 151 | } 152 | } 153 | 154 | printf("\n"); 155 | 156 | /* Track allocations for later analysis (dumping, disassembly, etc) */ 157 | switch (selector) { 158 | case AGX_SELECTOR_CREATE_CMDBUF: { 159 | assert(inputCnt == 2); 160 | assert((*outputStructCntP) == 0x10); 161 | uint64_t *inp = (uint64_t *) input; 162 | assert(inp[1] == 1 || inp[1] == 0); 163 | uint64_t *ptr = (uint64_t *) outputStruct; 164 | uint32_t *words = (uint32_t *) (ptr + 1); 165 | 166 | pandecode_track_alloc((struct agx_allocation) { 167 | .index = words[1], 168 | .map = (void *) *ptr, 169 | .size = words[0], 170 | .type = inp[1] ? AGX_ALLOC_CMDBUF : AGX_ALLOC_MEMMAP 171 | }); 172 | break; 173 | } 174 | 175 | case AGX_SELECTOR_ALLOCATE_MEM: { 176 | assert((*outputStructCntP) == 0x50); 177 | uint64_t *iptrs = (uint64_t *) inputStruct; 178 | uint64_t *ptrs = (uint64_t *) outputStruct; 179 | uint64_t gpu_va = ptrs[0]; 180 | uint64_t cpu = ptrs[1]; 181 | uint64_t cpu_fixed_1 = iptrs[6]; 182 | uint64_t cpu_fixed_2 = iptrs[7]; /* xxx what's the diff? */ 183 | if (cpu && cpu_fixed_1) 184 | assert(cpu == cpu_fixed_1); 185 | #if 0 186 | /* TODO: what about this case? */ 187 | else if (cpu == 0) 188 | cpu = cpu_fixed_1; 189 | #endif 190 | uint64_t size = ptrs[4]; 191 | uint32_t *iwords = (uint32_t *) inputStruct; 192 | const char *type = agx_memory_type_name(iwords[20]); 193 | printf("allocate gpu va %llx, cpu %llx, 0x%llx bytes ", gpu_va, cpu, size); 194 | if (type) 195 | printf(" %s\n", type); 196 | else 197 | printf(" unknown type %08X\n", iwords[20]); 198 | 199 | pandecode_track_alloc((struct agx_allocation) { 200 | .type = AGX_ALLOC_REGULAR, 201 | .size = size, 202 | .index = ptrs[3] >> 32ull, 203 | .gpu_va = gpu_va, 204 | .map = (void *) cpu, 205 | }); 206 | } 207 | 208 | default: 209 | break; 210 | } 211 | 212 | return ret; 213 | } 214 | 215 | kern_return_t 216 | wrap_IOConnectCallAsyncMethod( 217 | mach_port_t connection, // In 218 | uint32_t selector, // In 219 | mach_port_t wakePort, // In 220 | uint64_t *reference, // In 221 | uint32_t referenceCnt, // In 222 | const uint64_t *input, // In 223 | uint32_t inputCnt, // In 224 | const void *inputStruct, // In 225 | size_t inputStructCnt, // In 226 | uint64_t *output, // Out 227 | uint32_t *outputCnt, // In/Out 228 | void *outputStruct, // Out 229 | size_t *outputStructCntP) // In/Out 230 | { 231 | /* Check the arguments make sense */ 232 | assert((input != NULL) == (inputCnt != 0)); 233 | assert((inputStruct != NULL) == (inputStructCnt != 0)); 234 | assert((output != NULL) == (outputCnt != 0)); 235 | assert((outputStruct != NULL) == (outputStructCntP != 0)); 236 | 237 | printf("%X: call %X, wake port %X (out %p, %zu)", connection, selector, wakePort, outputStructCntP, outputStructCntP ? *outputStructCntP : 0); 238 | 239 | for (uint64_t u = 0; u < inputCnt; ++u) 240 | printf(" %llx", input[u]); 241 | 242 | if(inputStructCnt) { 243 | printf(", struct:\n"); 244 | hexdump(stdout, inputStruct, inputStructCnt, true); 245 | } else { 246 | printf("\n"); 247 | } 248 | 249 | printf(", references: "); 250 | for (unsigned i = 0; i < referenceCnt; ++i) 251 | printf(" %llx", reference[i]); 252 | printf("\n"); 253 | 254 | kern_return_t ret = IOConnectCallAsyncMethod(connection, selector, wakePort, reference, referenceCnt, input, inputCnt, inputStruct, inputStructCnt, output, outputCnt, outputStruct, outputStructCntP); 255 | 256 | printf("return %u", ret); 257 | 258 | if(outputCnt) { 259 | printf("%u scalars: ", *outputCnt); 260 | 261 | for (uint64_t u = 0; u < *outputCnt; ++u) 262 | printf("%llx ", output[u]); 263 | 264 | printf("\n"); 265 | } 266 | 267 | if(outputStructCntP) { 268 | printf(" struct\n"); 269 | hexdump(stdout, outputStruct, *outputStructCntP, true); 270 | 271 | if (selector == 2) { 272 | /* Dump linked buffer as well */ 273 | void **o = outputStruct; 274 | hexdump(stdout, *o, 64, true); 275 | } 276 | } 277 | 278 | printf("\n"); 279 | return ret; 280 | } 281 | 282 | kern_return_t 283 | wrap_IOConnectCallStructMethod( 284 | mach_port_t connection, // In 285 | uint32_t selector, // In 286 | const void *inputStruct, // In 287 | size_t inputStructCnt, // In 288 | void *outputStruct, // Out 289 | size_t *outputStructCntP) // In/Out 290 | { 291 | return wrap_IOConnectCallMethod(connection, selector, NULL, 0, inputStruct, inputStructCnt, NULL, NULL, outputStruct, outputStructCntP); 292 | } 293 | 294 | kern_return_t 295 | wrap_IOConnectCallAsyncStructMethod( 296 | mach_port_t connection, // In 297 | uint32_t selector, // In 298 | mach_port_t wakePort, // In 299 | uint64_t *reference, // In 300 | uint32_t referenceCnt, // In 301 | const void *inputStruct, // In 302 | size_t inputStructCnt, // In 303 | void *outputStruct, // Out 304 | size_t *outputStructCnt) // In/Out 305 | { 306 | return wrap_IOConnectCallAsyncMethod(connection, selector, wakePort, 307 | reference, referenceCnt, 308 | NULL, 0, 309 | inputStruct, inputStructCnt, 310 | NULL, NULL, 311 | outputStruct, outputStructCnt); 312 | } 313 | 314 | kern_return_t 315 | wrap_IOConnectCallScalarMethod( 316 | mach_port_t connection, // In 317 | uint32_t selector, // In 318 | const uint64_t *input, // In 319 | uint32_t inputCnt, // In 320 | uint64_t *output, // Out 321 | uint32_t *outputCnt) // In/Out 322 | { 323 | return wrap_IOConnectCallMethod(connection, selector, 324 | input, inputCnt, 325 | NULL, 0, 326 | output, outputCnt, 327 | NULL, NULL); 328 | } 329 | 330 | kern_return_t 331 | wrap_IOConnectCallAsyncScalarMethod( 332 | mach_port_t connection, // In 333 | uint32_t selector, // In 334 | mach_port_t wakePort, // In 335 | uint64_t *reference, // In 336 | uint32_t referenceCnt, // In 337 | const uint64_t *input, // In 338 | uint32_t inputCnt, // In 339 | uint64_t *output, // Out 340 | uint32_t *outputCnt) // In/Out 341 | { 342 | return wrap_IOConnectCallAsyncMethod(connection, selector, wakePort, 343 | reference, referenceCnt, 344 | input, inputCnt, 345 | NULL, 0, 346 | output, outputCnt, 347 | NULL, NULL); 348 | } 349 | 350 | kern_return_t 351 | wrap_IOConnectSetNotificationPort( 352 | io_connect_t connect, 353 | uint32_t type, 354 | mach_port_t port, 355 | uintptr_t reference ) 356 | { 357 | printf("connect %X, type %X, to notification port %X, with reference %lx\n", connect, type, port, reference); 358 | kern_return_t ret = IOConnectSetNotificationPort(connect, type, port, reference); 359 | printf("return %u\n", ret); 360 | return ret; 361 | } 362 | 363 | kern_return_t 364 | wrap_IOSetNotificationPort( 365 | mach_port_t connect, 366 | uint32_t type, 367 | mach_port_t port ) 368 | { 369 | return wrap_IOConnectSetNotificationPort(connect, type, port, 0); 370 | } 371 | 372 | IONotificationPortRef 373 | wrap_IONotificationPortCreate( 374 | mach_port_t masterPort ) 375 | { 376 | IONotificationPortRef ref = IONotificationPortCreate(masterPort); 377 | printf("creating notification port from master %X --> %p\n", masterPort, ref); 378 | return ref; 379 | } 380 | 381 | void 382 | wrap_IONotificationPortSetDispatchQueue(IONotificationPortRef notify, dispatch_queue_t queue) 383 | { 384 | printf("set dispatch queue %p to queue %p\n", notify, queue); 385 | IONotificationPortSetDispatchQueue(notify, queue); 386 | } 387 | 388 | mach_port_t 389 | wrap_IODataQueueAllocateNotificationPort() 390 | { 391 | mach_port_t ret = IODataQueueAllocateNotificationPort(); 392 | printf("data queue notif port %X\n", ret); 393 | return ret; 394 | } 395 | 396 | IOReturn 397 | wrap_IODataQueueSetNotificationPort(IODataQueueMemory *dataQueue, mach_port_t notifyPort) 398 | { 399 | IOReturn ret = IODataQueueSetNotificationPort(dataQueue, notifyPort); 400 | printf("data queue %p set notif port %X -> %X\n", dataQueue, notifyPort, ret); 401 | return ret; 402 | } 403 | 404 | DYLD_INTERPOSE(wrap_IOConnectCallMethod, IOConnectCallMethod); 405 | DYLD_INTERPOSE(wrap_IOConnectCallAsyncMethod, IOConnectCallAsyncMethod); 406 | DYLD_INTERPOSE(wrap_IOConnectCallStructMethod, IOConnectCallStructMethod); 407 | DYLD_INTERPOSE(wrap_IOConnectCallAsyncStructMethod, IOConnectCallAsyncStructMethod); 408 | DYLD_INTERPOSE(wrap_IOConnectCallScalarMethod, IOConnectCallScalarMethod); 409 | DYLD_INTERPOSE(wrap_IOConnectCallAsyncScalarMethod, IOConnectCallAsyncScalarMethod); 410 | DYLD_INTERPOSE(wrap_IOConnectSetNotificationPort, IOConnectSetNotificationPort); 411 | //DYLD_INTERPOSE(wrap_IOSetNotificationPort, IOSetNotificationPort); 412 | DYLD_INTERPOSE(wrap_IONotificationPortCreate, IONotificationPortCreate); 413 | DYLD_INTERPOSE(wrap_IONotificationPortSetDispatchQueue, IONotificationPortSetDispatchQueue); 414 | DYLD_INTERPOSE(wrap_IODataQueueAllocateNotificationPort, IODataQueueAllocateNotificationPort); 415 | DYLD_INTERPOSE(wrap_IODataQueueSetNotificationPort, IODataQueueSetNotificationPort); 416 | --------------------------------------------------------------------------------