├── .gitignore
├── Makefile
├── README.md
├── demo
    ├── demo.c
    ├── demo.h
    ├── iokit.c
    ├── shaders.c
    └── slowfb.c
├── disasm-driver.c
├── disasm
    └── disasm.c
├── docs
    ├── Codenames.md
    └── table.py
├── lib
    ├── cmdbuf.xml
    ├── cmdstream.h
    ├── decode.c
    ├── decode.h
    ├── gen_pack.py
    ├── io.c
    ├── io.h
    ├── selectors.h
    ├── tiling.c
    ├── tiling.h
    └── util.h
└── wrap
    ├── APPLE_LICENSE
    └── wrap.c


/.gitignore:
--------------------------------------------------------------------------------
1 | wrap.dylib*
2 | demo-bin*
3 | fb.bin
4 | disasm-bin*
5 | agx_pack.h
6 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all: wrap.dylib demo-bin disasm-bin
 2 | .PHONY: clean all
 3 | .SUFFIXES:
 4 | 
 5 | clean:
 6 | 	rm -f wrap.dylib demo-bin agx_pack.h
 7 | 
 8 | CFLAGS := -g -Wall -Werror -Wextra -Wno-unused-variable -Wno-unused-function -Wno-unused-parameter
 9 | WRAP_HDRS := $(wildcard lib/*.h)\
10 | 
11 | WRAP_SRCS := $(wildcard lib/*.c)\
12 |              $(wildcard wrap/*.c)\
13 | 	     $(wildcard disasm/*.c)\
14 | 
15 | wrap.dylib: $(WRAP_SRCS) $(WRAP_HDRS) Makefile agx_pack.h
16 | 	clang -o $@ $(WRAP_SRCS) -I lib/ -I . -dynamiclib -framework IOKit $(CFLAGS)
17 | 
18 | DEMO_SRCS := $(wildcard lib/*.c)\
19 |              $(wildcard demo/*.c)\
20 |              $(wildcard disasm/*.c)
21 | 
22 | DEMO_HDRS := $(wildcard lib/*.h)\
23 | 
24 | demo-bin: $(DEMO_SRCS) $(DEMO_HDRS) Makefile agx_pack.h
25 | 	clang -o $@ $(DEMO_SRCS) -I lib/ -I . -I /opt/X11/include -L /opt/X11/lib/ -lX11 -framework IOKit $(CFLAGS)
26 | 
27 | agx_pack.h: lib/gen_pack.py lib/cmdbuf.xml Makefile
28 | 	python3 lib/gen_pack.py lib/cmdbuf.xml > agx_pack.h
29 | 
30 | DISASM_SRCS := $(wildcard disasm/*.c)\
31 |              disasm-driver.c
32 | 
33 | disasm-bin: $(DISASM_SRCS) Makefile
34 | 	clang -o $@ $(DISASM_SRCS) $(CFLAGS)
35 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Asahi GPU 
 2 | 
 3 | Research for an open source graphics stack for Apple M1.
 4 | 
 5 | As development of a Mesa driver has begun, development work has moved in-tree in [Mesa](https://gitlab.freedesktop.org/mesa/mesa/). As such this repository is no longer in use. 
 6 | 
 7 | ## wrap
 8 | 
 9 | Build with the included makefile `make wrap.dylib`, and insert in any Metal application by setting the environment variable `DYLD_INSERT_LIBRARIES=/Users/bloom/gpu/wrap.dylib`.
10 | 
11 | ## Contributors
12 | 
13 | * Alyssa Rosenzweig (`bloom`) on IRC, working on the command stream and ISA
14 | * marcan, working on kernel side
15 | 
16 | ## Contributing
17 | 
18 | All contributors are expected to abide by our [Code of Conduct](https://asahilinux.org/code-of-conduct) and our [Copyright and Reverse Engineering Policy](https://asahilinux.org/copyright).
19 | 
20 | For more information, please see our [Contributing](https://asahilinux.org/contribute/) page.
21 | 
22 | 


--------------------------------------------------------------------------------
/demo/demo.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <assert.h>
  4 | #include <unistd.h>
  5 | #include <inttypes.h>
  6 | #include <time.h>
  7 | #include "tiling.h"
  8 | #include "demo.h"
  9 | #include "util.h"
 10 | #include "../agx_pack.h"
 11 | 
 12 | #define WIDTH 1311
 13 | #define HEIGHT 717
 14 | 
 15 | static uint64_t
 16 | demo_zero(struct agx_allocator *allocator, size_t count)
 17 | {
 18 | 	struct agx_ptr ptr = agx_allocate(allocator, count);
 19 | 	memset(ptr.map, 0, count);
 20 | 	return ptr.gpu_va;
 21 | }
 22 | 
 23 | /* Upload vertex attribtues */
 24 | 
 25 | float t = 0.0;
 26 | 
 27 | static uint64_t
 28 | demo_attributes(struct agx_allocator *allocator)
 29 | {
 30 | 	float attributes1[] = {
 31 | 		t++   , -250.0 ,  0.0f,     0.0f,
 32 | 		1.0f   ,  0.0f   ,  0.0f,     1.0f,
 33 | 		-250.0f,  -250.0f,  0.0f,     0.0f,
 34 | 		0.0f   ,  1.0f   ,  0.0f,     1.0f,
 35 | 		0.0f   ,  250.0f ,  0.0f,     0.0f,
 36 | 		0.0f   ,  0.0f   ,  1.0f,     1.0f,
 37 | 		250.0f   ,  250.0f ,  0.0f,     0.0f,
 38 | 		0.0f   ,  0.0f   ,  1.0f,     1.0f,
 39 | 	};
 40 | 
 41 | 	uint32_t attributes2[] = { WIDTH, HEIGHT };
 42 | 
 43 | 	uint64_t attribs[2] = {
 44 | 		agx_upload(allocator, attributes1, sizeof(attributes1)),
 45 | 		agx_upload(allocator, attributes2, sizeof(attributes2))
 46 | 	};
 47 | 
 48 | 	return agx_upload(allocator, attribs, sizeof(attribs));
 49 | }
 50 | 
 51 | static uint64_t
 52 | demo_viewport(struct agx_allocator *allocator)
 53 | {
 54 | 	struct agx_ptr t = agx_allocate(allocator, AGX_VIEWPORT_LENGTH);
 55 | 	bl_pack(t.map, VIEWPORT, cfg) {
 56 | 		cfg.translate_x = WIDTH / 2;
 57 | 		cfg.scale_x = WIDTH / 2;
 58 | 		cfg.translate_y = HEIGHT / 2;
 59 | 		cfg.scale_y = -(HEIGHT / 2);
 60 | 		cfg.near_z = 0.0f;
 61 | 		cfg.far_z = 1.0f;
 62 | 	};
 63 | 
 64 | 	return t.gpu_va;
 65 | }
 66 | 
 67 | /* FP16 */
 68 | static uint64_t
 69 | demo_clear_color(struct agx_allocator *allocator)
 70 | {
 71 | 	__fp16 colour[] = {
 72 | 		0.99f, 0.75f, 0.53f, 1.0f
 73 | 	};
 74 | 
 75 | 	return agx_upload(allocator, colour, sizeof(colour));
 76 | }
 77 | 
 78 | static uint64_t
 79 | demo_render_target(struct agx_allocator *allocator, struct agx_allocation *framebuffer)
 80 | {
 81 | 	struct agx_ptr t = agx_allocate(allocator, AGX_RENDER_TARGET_LENGTH);
 82 | 	bl_pack(t.map, RENDER_TARGET, cfg) {
 83 | 		cfg.unk_0 = 0xa22;
 84 | 		cfg.swizzle_r = AGX_CHANNEL_B;
 85 | 		cfg.swizzle_g = AGX_CHANNEL_G;
 86 | 		cfg.swizzle_b = AGX_CHANNEL_R;
 87 | 		cfg.swizzle_a = AGX_CHANNEL_A;
 88 | 		cfg.width = WIDTH;
 89 | 		cfg.height = WIDTH;
 90 | 		cfg.buffer = framebuffer->gpu_va;
 91 | 		cfg.unk_100 = 0x1000000;
 92 | 	};
 93 | 
 94 | 	return t.gpu_va;
 95 | }
 96 | 
 97 | /* Fed into fragment writeout */
 98 | static uint64_t
 99 | demo_unk0_5(struct agx_allocator *allocator)
100 | {
101 | 	uint32_t unk[] = { 0, ~0 };
102 | 	return agx_upload(allocator, unk, sizeof(unk));
103 | }
104 | 
105 | static uint64_t
106 | make_ptr40(uint8_t tag0, uint8_t tag1, uint8_t tag2, uint64_t ptr)
107 | {
108 | 	assert(ptr < (1ull << 40));
109 | 
110 | 	return (tag0 << 0) | (tag1 << 8) | (tag2 << 16) | (ptr << 24);
111 | }
112 | 
113 | static uint64_t
114 | demo_launch_fragment(struct agx_allocator *allocator, struct agx_allocation *fsbuf)
115 | {
116 | 	uint32_t unk[] = {
117 | 		0x800000,
118 | 		0x1002, // XXX: blob sets 0x10000 bit and adds an extra pointer to unknown data
119 | 		fsbuf->gpu_va + 0xC0, // XXX: dynalloc -- fragment shader
120 | 		0x1440,
121 | 		0x0,
122 | 	};
123 | 
124 | 	return agx_upload(allocator, unk, sizeof(unk));
125 | }
126 | 
127 | static uint64_t
128 | demo_unk8(struct agx_allocator *allocator)
129 | {
130 | 	uint32_t unk[] = {
131 | 		0x100c0000, 0x100, 0x0, 0x0, 0x0,
132 | 	};
133 | 
134 | 	return agx_upload(allocator, unk, sizeof(unk));
135 | }
136 | 
137 | static uint64_t
138 | demo_unk9(struct agx_allocator *allocator)
139 | {
140 | 	uint8_t unk[] = {
141 | 		0x00, 0x00, 0x02, 0x0c,
142 | 		0x00, 0x00, 0x01, 0x00,
143 | 		0x00, 0x00, 0x00, 0x00,
144 | 		0x05, 0x00, 0x00, 0x00
145 | 	};
146 | 
147 | 	return agx_upload(allocator, unk, sizeof(unk));
148 | }
149 | 
150 | static uint64_t
151 | demo_unk10(struct agx_allocator *allocator)
152 | {
153 | 	uint32_t unk[] = {
154 | 		0x10000b5,
155 | 		0x40200,
156 | 		0x7200f00,
157 | 		0xe000000,
158 | 		0x7200f00,
159 | 		0x0e000000,
160 | 		0,
161 | 	};
162 | 
163 | 	return agx_upload(allocator, unk, sizeof(unk));
164 | }
165 | 
166 | static uint64_t
167 | demo_unk11(struct agx_allocator *allocator)
168 | {
169 | 	uint32_t unk[] = {
170 | 		0x200004a,
171 | 		0x200,
172 | 		0x7e00000,
173 | 		0x7e00000,
174 | 		0x1ffff
175 | 	};
176 | 
177 | 	return agx_upload(allocator, unk, sizeof(unk));
178 | }
179 | 
180 | static uint64_t
181 | demo_unk12(struct agx_allocator *allocator)
182 | {
183 | 	uint32_t unk[] = {
184 | 		0x410000,
185 | 		0x1e3ce508,
186 | 		0xa0
187 | 	};
188 | 
189 | 	return agx_upload(allocator, unk, sizeof(unk));
190 | }
191 | 
192 | static uint64_t
193 | demo_unk13(struct agx_allocator *allocator)
194 | {
195 | 	uint32_t unk[] = {
196 | 		0x200000, 0x480,
197 | 	};
198 | 
199 | 	return agx_upload(allocator, unk, sizeof(unk));
200 | }
201 | 
202 | static uint64_t
203 | demo_unk14(struct agx_allocator *allocator)
204 | {
205 | 	uint32_t unk[] = {
206 | 		0x100, 0x0,
207 | 	};
208 | 
209 | 	return agx_upload(allocator, unk, sizeof(unk));
210 | }
211 | 
212 | /* TODO: there appears to be hidden support for line loops/triangle fans/quads
213 |  * but still need to confirm on a more substantive workload, also I can't get
214 |  * points/lines to work yet.. */
215 | 
216 | static uint64_t
217 | demo_unk2(struct agx_allocator *allocator, struct agx_allocation *vsbuf, struct agx_allocation *fsbuf)
218 | {
219 | 	struct agx_ptr ptr = agx_allocate(allocator, 0x800);
220 | 	uint8_t *out = ptr.map;
221 | 	uint64_t temp = 0;
222 | 
223 | 	assert(vsbuf->gpu_va < (1ull << 32));
224 | 	assert(fsbuf->gpu_va < (1ull << 32));
225 | 
226 | 	// Bind vertex pipeline and start queueing commands
227 | 	uint32_t bind_vertex[] = {
228 | 		0x4000002e,
229 | 		0x1002,
230 | 		vsbuf->gpu_va,
231 | 		0x0505,
232 | 	};
233 | 
234 | 	memcpy(out, bind_vertex, sizeof(bind_vertex));
235 | 	out += sizeof(bind_vertex);
236 | 
237 | 	/* yes, really unaligned */
238 | 	*(out++) = 0x0;
239 | 
240 | 	/* Remark: the first argument to each ptr40 is the number of 32-bit
241 | 	 * words pointed to. The data type is inferred at the source. In theory
242 | 	 * this means we can reorder blocks. We can also duplicate blocks.
243 | 	 * Exception: the first block which is tagged 0?  Duplication means
244 | 	 * this isn't by length, instead a special record at the end indicates
245 | 	 * the end. */
246 | 
247 | 	temp = make_ptr40(0x00, 0x00, 0x00, demo_zero(allocator, 16));
248 | 	memcpy(out, &temp, 8);
249 | 	out += 8;
250 | 
251 | 	temp = make_ptr40(0x05, 0x00, 0x00, demo_unk8(allocator));
252 | 	memcpy(out, &temp, 8);
253 | 	out += 8;
254 | 
255 | 	temp = make_ptr40(0x05, 0x00, 0x00, demo_launch_fragment(allocator, fsbuf));
256 | 	memcpy(out, &temp, 8);
257 | 	out += 8;
258 | 
259 | 	temp = make_ptr40(0x04, 0x00, 0x00, demo_unk9(allocator));
260 | 	memcpy(out, &temp, 8);
261 | 	out += 8;
262 | 
263 | 	temp = make_ptr40(0x07, 0x00, 0x00, demo_unk10(allocator));
264 | 	memcpy(out, &temp, 8);
265 | 	out += 8;
266 | 
267 | 	temp = make_ptr40(0x05, 0x00, 0x00, demo_unk11(allocator));
268 | 	memcpy(out, &temp, 8);
269 | 	out += 8;
270 | 
271 | 	temp = make_ptr40(0x0a, 0x00, 0x00, demo_viewport(allocator));
272 | 	memcpy(out, &temp, 8);
273 | 	out += 8;
274 | 
275 | 	temp = make_ptr40(0x03, 0x00, 0x00, demo_unk12(allocator));
276 | 	memcpy(out, &temp, 8);
277 | 	out += 8;
278 | 
279 | 	temp = make_ptr40(0x02, 0x00, 0x00, demo_unk13(allocator));
280 | 	memcpy(out, &temp, 8);
281 | 	out += 8;
282 | 
283 | 	temp = make_ptr40(0x02, 0x00, 0x00, demo_unk14(allocator));
284 | 	memcpy(out, &temp, 8);
285 | 	out += 8;
286 | 
287 | 	/* Must be after the rest */
288 | 
289 | 	bl_pack(out, DRAW, cfg) {
290 | 		cfg.primitive = AGX_PRIMITIVE_TRIANGLE_STRIP;
291 | 		cfg.vertex_start = 0;
292 | 		cfg.vertex_count = 4;
293 | 		cfg.instance_count = 1;
294 | 	};
295 | 
296 | 	out += AGX_DRAW_LENGTH;
297 | 
298 | 	uint8_t stop[] = {
299 | 		0x00, 0x00, 0x00, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, // Stop
300 | 	};
301 | 
302 | 	memcpy(out, stop, sizeof(stop));
303 | 	out += sizeof(stop);
304 | 
305 | 	return ptr.gpu_va;
306 | }
307 | 
308 | /* Odd pattern */
309 | static uint64_t
310 | demo_unk6(struct agx_allocator *allocator)
311 | {
312 | 	struct agx_ptr ptr = agx_allocate(allocator, 0x4000 * sizeof(uint64_t));
313 | 	uint64_t *buf = ptr.map;
314 |         memset(buf, 0, sizeof(*buf));
315 | 
316 |         for (unsigned i = 1; i < 0x3ff; ++i)
317 |                 buf[i] = (i + 1);
318 | 
319 | 	return ptr.gpu_va;
320 | }
321 | 
322 | #define PTR40(a, b, c, ptr) make_ptr40(0x ## a, 0x ## b, 0x ##c, ptr)
323 | 
324 | /* Set arguments to a vertex/compute shader (attribute table or
325 |  * kernel arguments respectively). start/sz are word-sized */
326 | 
327 | static uint64_t
328 | demo_bind_arg_words(uint64_t gpu_va, unsigned start, unsigned sz)
329 | {
330 | 	assert(sz < 8);
331 | 	assert(gpu_va < (1ull << 40));
332 | 	assert(start < 0x80); /* TODO: oliver */
333 | 
334 | 	return 0x1d | (start << 9) | (sz << 21) | (gpu_va << 24);
335 | }
336 | 
337 | static void
338 | demo_vsbuf(uint64_t *buf, struct agx_allocator *allocator, struct agx_allocator *shader_pool)
339 | {
340 | 	uint32_t vs_offs = demo_vertex_shader(shader_pool);
341 | 	uint32_t aux0 = demo_vertex_pre(shader_pool);
342 | 
343 | 	uint64_t gpu_va = demo_attributes(allocator);
344 | 	buf[0] = demo_bind_arg_words(gpu_va, 0, 2);
345 | 	buf[1] = demo_bind_arg_words(gpu_va + 8, 2, 2);
346 | 	buf[2] = 0x0000904d | (0x80dull << 32) | ((uint64_t) (vs_offs & 0xFFFF) << 48);
347 | 	buf[3] = (vs_offs >> 16) | (0x028d << 16) | (0x00380100ull << 32);
348 | 	buf[4] = (0xc080) | ((uint64_t) aux0 << 16);
349 | }
350 | 
351 | static void
352 | demo_fsbuf(uint64_t *buf, struct agx_allocator *allocator, struct agx_allocation *framebuffer, struct agx_allocator *shader_pool)
353 | {
354 | 	uint32_t clear_offs = demo_clear(shader_pool);
355 | 	uint32_t aux3_offs = demo_frag_aux3(shader_pool);
356 | 	uint32_t fs_offs = demo_fragment_shader(shader_pool);
357 | 
358 | 	memset(buf, 0, 128 * 8);
359 | 
360 | 	/* Clear shader */
361 | 	buf[ 8] = demo_bind_arg_words(demo_clear_color(allocator), 6, 2);
362 | 	buf[ 9] = 0x2010bd4d | (0x40dull << 32) | ((uint64_t) (clear_offs & 0xFFFF) << 48);
363 | 	buf[10] = ((uint64_t) clear_offs >> 16) | (0x18d << 16) | (0x00880100ull << 32);
364 | 	buf[11] = 0;
365 | 	buf[12] = 0;
366 | 	buf[13] = 0;
367 | 	buf[14] = 0;
368 | 	buf[15] = 0;
369 | 
370 | 	/* AUX3 */
371 | 	buf[16] = PTR40(dd, 00, 10, demo_render_target(allocator, framebuffer));
372 | 	buf[17] = demo_bind_arg_words(demo_unk0_5(allocator), 2, 2);
373 | 	buf[18] = 0x2010bd4d | (0x000dull << 32) | ((uint64_t) (aux3_offs & 0xFFFF) << 48);
374 | 	buf[19] = ((uint64_t) aux3_offs >> 16) | (0x18d << 16) | (0x00880100ull << 32);
375 | 	buf[20] = 0;
376 | 	buf[21] = 0;
377 | 	buf[22] = 0;
378 | 	buf[23] = 0;
379 | 
380 | 	/* Fragment shader */
381 | 	buf[24] = demo_bind_arg_words(demo_zero(allocator, 8), 2, 2);
382 | 	buf[25] = 0x2010bd4d | (0x50dull << 32) | ((uint64_t) (fs_offs & 0xFFFF) << 48);
383 | 	buf[26] = (fs_offs >> 16) | (0x208d << 16) | (0xf3580100ull << 32);
384 | 	buf[27] = 0x00880002 | (0xc080ull << 32);
385 | 	buf[28] = 0;
386 | 	buf[29] = 0;
387 | 	buf[30] = 0;
388 | 	buf[31] = 0;
389 | }
390 | 
391 | struct cmdbuf {
392 | 	uint32_t *map;
393 | 	unsigned offset;
394 | };
395 | 
396 | static void
397 | EMIT32(struct cmdbuf *cmdbuf, uint32_t val)
398 | {
399 | 	cmdbuf->map[cmdbuf->offset++] = val;
400 | }
401 | 
402 | static void
403 | EMIT64(struct cmdbuf *cmdbuf, uint64_t val)
404 | {
405 | 	EMIT32(cmdbuf, (val & 0xFFFFFFFF));
406 | 	EMIT32(cmdbuf, (val >> 32));
407 | }
408 | 
409 | static void
410 | EMIT_WORDS(struct cmdbuf *cmdbuf, uint8_t *buf, size_t count)
411 | {
412 | 	assert((count & 0x3) == 0);
413 | 
414 | 	for (unsigned i = 0; i < count; i += 4) {
415 | 		uint32_t u32 =
416 | 			(buf[i + 3] << 24) |
417 | 			(buf[i + 2] << 16) |
418 | 			(buf[i + 1] <<  8) |
419 | 			(buf[i + 0] <<  0);
420 | 
421 | 		EMIT32(cmdbuf, u32);
422 | 	}
423 | }
424 | 
425 | static void
426 | EMIT_ZERO_WORDS(struct cmdbuf *cmdbuf, size_t words)
427 | {
428 | 	memset(cmdbuf->map + cmdbuf->offset, 0, words * 4);
429 | 	cmdbuf->offset += words;
430 | }
431 | 
432 | static void
433 | demo_cmdbuf(uint64_t *buf, struct agx_allocator *allocator,
434 | 		struct agx_allocation *vsbuf,
435 | 		struct agx_allocation *fsbuf,
436 | 		struct agx_allocation *framebuffer,
437 | 		struct agx_allocator *shaders)
438 | {
439 | 	demo_vsbuf((uint64_t *) vsbuf->map, allocator, shaders);
440 | 	demo_fsbuf((uint64_t *) fsbuf->map, allocator, framebuffer, shaders);
441 | 
442 | 	struct cmdbuf _cmdbuf = {
443 | 		.map = (uint32_t *) buf,
444 | 		.offset = 0
445 | 	};
446 | 
447 | 	struct cmdbuf *cmdbuf = &_cmdbuf;
448 | 
449 | 	/* Vertex stuff */
450 | 	EMIT32(cmdbuf, 0x10000);
451 | 	EMIT32(cmdbuf, 0x780); // Compute: 0x188
452 | 	EMIT32(cmdbuf, 0x7);
453 | 	EMIT_ZERO_WORDS(cmdbuf, 5);
454 | 	EMIT32(cmdbuf, 0x758); // Compute: 0x180
455 | 	EMIT32(cmdbuf, 0x18);  // Compute: 0x0
456 | 	EMIT32(cmdbuf, 0x758); // Compute: 0x0
457 | 	EMIT32(cmdbuf, 0x728); // Compute: 0x150
458 | 
459 | 	EMIT32(cmdbuf, 0x30); /* 0x30 */
460 | 	EMIT32(cmdbuf, 0x01); /* 0x34. Compute: 0x03 */
461 | 
462 | 	/* Pointer to data about the vertex and fragment shaders */
463 | 	EMIT64(cmdbuf, demo_unk2(allocator, vsbuf, fsbuf));
464 | 
465 | 	EMIT_ZERO_WORDS(cmdbuf, 20);
466 | 
467 | 	EMIT64(cmdbuf, 0); /* 0x90, compute blob - some zero */
468 | 	EMIT64(cmdbuf, 0); // blob - 0x540 bytes of zero, compute blob - null
469 | 	EMIT64(cmdbuf, 0); // blob - 0x280 bytes of zero
470 | 	EMIT64(cmdbuf, 0); // a8, compute blob - zero pointer
471 | 
472 | 	EMIT64(cmdbuf, 0); // compute blob - zero pointer
473 | 	EMIT64(cmdbuf, 0); // compute blob - zero pointer
474 | 	EMIT64(cmdbuf, 0); // compute blob - zero pointer
475 | 
476 | 	// while zero for vertex, used to include the odd unk6 pattern for compute
477 | 	EMIT64(cmdbuf, 0); // compute blob - 0x1
478 | 	EMIT64(cmdbuf, 0); // d0,  ompute blob - pointer to odd pattern, compare how it's done later for frag
479 | 
480 | 	// compute 8 bytes of zero, then reconverge at *
481 | 
482 | 	EMIT32(cmdbuf, 0x6b0003); // d8
483 | 	EMIT32(cmdbuf, 0x3a0012); // dc
484 | 
485 | 	/* Possibly the funny pattern but not actually pointed to for vertex */
486 | 	EMIT64(cmdbuf, 1); // e0
487 | 	EMIT64(cmdbuf, 0); // e8
488 | 
489 | 	EMIT_ZERO_WORDS(cmdbuf, 44);
490 | 
491 | 	EMIT64(cmdbuf, 0); // blob - 0x20 bytes of zero
492 | 	EMIT64(cmdbuf, 1); // 1a8
493 | 
494 | 	// * compute reconverges here at 0xe0 in my trace
495 | 	EMIT32(cmdbuf, 0x1c); // 1b0
496 | 
497 | 	// compute 0xe4: [encoder ID -- from selector6 + 2 with blob], 0, 0, 0xffffffff, done for a while
498 | 	// compute 0x120: 0x9 | 0x128: 0x40
499 | 
500 | 	EMIT32(cmdbuf, 0); // 1b0 - compute: 0x10000
501 | 	EMIT64(cmdbuf, 0x0); // 1b8 -- compute 0x10000
502 | 	EMIT32(cmdbuf, 0xffffffff); // note we can zero!
503 | 	EMIT32(cmdbuf, 0xffffffff); // note we can zero! compute 0
504 | 	EMIT32(cmdbuf, 0xffffffff); // note we can zero! compute 0
505 | 	EMIT32(cmdbuf, 0);
506 | 
507 | 	EMIT_ZERO_WORDS(cmdbuf, 40);
508 | 
509 | 	EMIT32(cmdbuf, 0xffff8002); // 0x270
510 | 	EMIT32(cmdbuf, 0);
511 | 	EMIT64(cmdbuf, fsbuf->gpu_va + 0x44);// clear -- XXX: dynalloc
512 | 	EMIT32(cmdbuf, 0);
513 | 	EMIT32(cmdbuf, 0);
514 | 	EMIT32(cmdbuf, 0);
515 | 	EMIT32(cmdbuf, 0x12);
516 | 	EMIT64(cmdbuf, fsbuf->gpu_va + 0x84); // AUX3 -- 0x290 -- XXX: dynalloc
517 | 	EMIT64(cmdbuf, demo_zero(allocator, 0x1000));
518 | 	EMIT64(cmdbuf, demo_zero(allocator, 0x1000));
519 | 	EMIT64(cmdbuf, 0);
520 | 
521 | 	EMIT_ZERO_WORDS(cmdbuf, 48);
522 | 
523 | 	EMIT64(cmdbuf, 4);
524 | 	EMIT64(cmdbuf, 0xc000);
525 | 
526 | 	/* Note: making these smallers scissors polygons but not clear colour */
527 | 	EMIT32(cmdbuf, WIDTH);
528 | 	EMIT32(cmdbuf, HEIGHT);
529 | 	EMIT64(cmdbuf, demo_zero(allocator, 0x8000));
530 | 
531 | 	EMIT_ZERO_WORDS(cmdbuf, 48);
532 | 
533 | 	EMIT64(cmdbuf, 0); // 0x450
534 | 	EMIT32(cmdbuf, fui(1.0)); // fui(1.0f)
535 | 	EMIT32(cmdbuf, 0x300);
536 | 	EMIT64(cmdbuf, 0);
537 | 	EMIT64(cmdbuf, 0x1000000);
538 | 	EMIT32(cmdbuf, 0xffffffff);
539 | 	EMIT32(cmdbuf, 0xffffffff);
540 | 	EMIT32(cmdbuf, 0xffffffff);
541 | 	EMIT32(cmdbuf, 0);
542 | 
543 | 	EMIT_ZERO_WORDS(cmdbuf, 8);
544 | 
545 | 	EMIT64(cmdbuf, 0); // 0x4a0
546 | 	EMIT32(cmdbuf, 0xffff8212);
547 | 	EMIT32(cmdbuf, 0);
548 | 
549 | 	EMIT64(cmdbuf, fsbuf->gpu_va + 0x4);// XXX: dynalloc -- not referenced
550 | 	EMIT64(cmdbuf, 0);
551 | 
552 | 	EMIT32(cmdbuf, 0);
553 | 	EMIT32(cmdbuf, 0x12);
554 | 	EMIT32(cmdbuf, fsbuf->gpu_va + 0x84); // AUX3
555 | 	EMIT32(cmdbuf, 0);
556 | 
557 | 	EMIT_ZERO_WORDS(cmdbuf, 44);
558 | 
559 | 	EMIT64(cmdbuf, 1); // 0x580
560 | 	EMIT64(cmdbuf, 0);
561 | 	EMIT_ZERO_WORDS(cmdbuf, 4);
562 | 
563 | 	/* Compare compute case ,which has a bit of reordering, but we can swap */
564 | 	EMIT32(cmdbuf, 0x1c); // 0x5a0
565 | 	EMIT32(cmdbuf, 0);
566 | 	EMIT64(cmdbuf, 0xCAFECAFE); // encoder ID XXX: don't fix
567 | 	EMIT32(cmdbuf, 0);
568 | 	EMIT32(cmdbuf, 0xffffffff);
569 | 
570 | 	// remark: opposite order for compute, but we can swap the orders
571 | 	EMIT32(cmdbuf, 1);
572 | 	EMIT32(cmdbuf, 0);
573 | 	EMIT64(cmdbuf, 0);
574 | 	EMIT64(cmdbuf, 0 /* demo_unk6(allocator) */);
575 | 
576 | 	/* note: width/height act like scissor, but changing the 0s doesn't
577 | 	 * seem to affect (maybe scissor enable bit missing), _and this affects
578 | 	 * the clear_ .. bbox maybe */
579 | 	EMIT32(cmdbuf, 0);
580 | 	EMIT32(cmdbuf, 0);
581 | 	EMIT32(cmdbuf, WIDTH); // can increase up to 16384
582 | 	EMIT32(cmdbuf, HEIGHT);
583 | 
584 | 	EMIT32(cmdbuf, 1);
585 | 	EMIT32(cmdbuf, 8);
586 | 	EMIT32(cmdbuf, 8);
587 | 	EMIT32(cmdbuf, 0);
588 | 
589 | 	EMIT_ZERO_WORDS(cmdbuf, 12);
590 | 
591 | 	EMIT32(cmdbuf, 0); // 0x620
592 | 	EMIT32(cmdbuf, 8);
593 | 	EMIT32(cmdbuf, 0x20);
594 | 	EMIT32(cmdbuf, 0x20);
595 | 	EMIT32(cmdbuf, 0x1);
596 | 	EMIT32(cmdbuf, 0);
597 | 	EMIT64(cmdbuf, 0);
598 | 
599 | 	EMIT_ZERO_WORDS(cmdbuf, 72);
600 | 
601 | 	EMIT32(cmdbuf, 0); // 0x760
602 | 	EMIT32(cmdbuf, 0x1);
603 | 	EMIT64(cmdbuf, 0x100 | (framebuffer->gpu_va << 16));
604 | 
605 | 	EMIT32(cmdbuf, 0xa0000);
606 | 	EMIT32(cmdbuf, 0x4c000000);
607 | 	EMIT32(cmdbuf, 0x0c001d);
608 | 
609 | 	EMIT32(cmdbuf, 0x640000);
610 | }
611 | 
612 | static struct agx_map_entry
613 | demo_map_entry(struct agx_allocation *alloc)
614 | {
615 | 	return (struct agx_map_entry) {
616 | 		.unkAAA = 0x20,
617 | 		.unkBBB = 0x1,
618 | 		.unka = 0x1ffff,
619 | 		.index = alloc->index,
620 | 	};
621 | }
622 | 
623 | static struct agx_map_header
624 | demo_map_header(uint64_t cmdbuf_id, uint64_t encoder_id, unsigned count)
625 | {
626 | 	return (struct agx_map_header) {
627 | 		.cmdbuf_id = cmdbuf_id,
628 | 		.unk2 = 0x1,
629 | 		.unk3 = 0x528, // 1320
630 | 		.encoder_id = encoder_id,
631 | 		.unk6 = 0x0,
632 | 		.unk7 = 0x780, // 1920
633 | 
634 | 		/* +1 for the sentinel ending */
635 | 		.nr_entries_1 = count + 1,
636 | 		.nr_entries_2 = count + 1,
637 | 		.unka = 0x0b,
638 | 	};
639 | }
640 | 
641 | static void
642 | demo_mem_map(void *map, struct agx_allocation *allocs, unsigned count,
643 | 		uint64_t cmdbuf_id, uint64_t encoder_id)
644 | {
645 | 	struct agx_map_header *header = map;
646 | 	struct agx_map_entry *entries = (struct agx_map_entry *) (map + 0x40);
647 | 
648 | 	/* Header precedes the entry */
649 | 	*header = demo_map_header(cmdbuf_id, encoder_id, count);
650 | 
651 | 	/* Add an entry for each BO mapped */
652 | 	for (unsigned i = 0; i < count; ++i) {
653 | 		if (allocs[i].type != AGX_ALLOC_REGULAR)
654 | 			continue;
655 | 
656 | 		entries[i] = (struct agx_map_entry) {
657 | 			.unkAAA = 0x20,
658 | 			.unkBBB = 0x1,
659 | 			.unka = 0x1ffff,
660 | 			.index = allocs[i].index
661 | 		};
662 | 	}
663 | 
664 | 	/* Final entry is a sentinel */
665 | 	entries[count] = (struct agx_map_entry) {
666 | 		.unkAAA = 0x40,
667 | 		.unkBBB = 0x1,
668 | 		.unka = 0x1ffff,
669 | 		.index = 0
670 | 	};
671 | }
672 | 
673 | void demo(mach_port_t connection, bool offscreen)
674 | {
675 | 	struct agx_command_queue command_queue = agx_create_command_queue(connection);
676 | 
677 | 	// XXX: why do BO ids below 6 mess things up..?
678 | 	for (unsigned i = 0; i < 6; ++i) {
679 | 		struct agx_allocation dummy = agx_alloc_mem(connection, 4096, AGX_MEMORY_TYPE_FRAMEBUFFER, false);
680 | 	}
681 | 
682 | 	struct agx_allocation shader = agx_alloc_mem(connection, 0x10000, AGX_MEMORY_TYPE_SHADER, false);
683 | 
684 | 	struct agx_allocator shader_pool = { .backing = shader, };
685 | 
686 | 	struct agx_allocation bo = agx_alloc_mem(connection, 1920*1080*4*2, AGX_MEMORY_TYPE_FRAMEBUFFER, false);
687 | 	struct agx_allocator allocator = { .backing = bo };
688 | 
689 | 	struct agx_allocation vsbuf = agx_alloc_mem(connection, 0x8000, AGX_MEMORY_TYPE_CMDBUF_32, false);
690 | 	struct agx_allocation fsbuf = agx_alloc_mem(connection, 0x8000, AGX_MEMORY_TYPE_CMDBUF_32, false);
691 | 	struct agx_allocation framebuffer = agx_alloc_mem(connection, 
692 | 		ALIGN_POT(WIDTH, 64) * ALIGN_POT(HEIGHT, 64) * 4,
693 | 		AGX_MEMORY_TYPE_FRAMEBUFFER, false);
694 | 
695 | 	struct agx_allocation cmdbuf = agx_alloc_cmdbuf(connection, 0x4000, true);
696 | 
697 | 	struct agx_allocation memmap = agx_alloc_cmdbuf(connection, 0x4000, false);
698 | 
699 | 	uint64_t global_ids = agx_cmdbuf_global_ids(connection);
700 | 
701 | 	struct agx_allocation allocs[] = {
702 | 		shader,
703 | 		bo,
704 | 		vsbuf,
705 | 		fsbuf,
706 | 		framebuffer
707 | 	};
708 | 
709 | 	demo_mem_map(memmap.map, allocs, sizeof(allocs) / sizeof(allocs[0]),
710 | 			0xDEADBEEF, 0xCAFECAFE); // (unk6 + 1, unk6 + 2) but it doesn't really matter
711 | 
712 | 	uint32_t *linear = malloc(WIDTH * HEIGHT * 4);
713 | 
714 | 	if (!offscreen)
715 | 		slowfb_init((uint8_t *) linear, WIDTH, HEIGHT);
716 | 
717 | 	for (;;) {
718 | 		demo_cmdbuf(cmdbuf.map, &allocator, &vsbuf, &fsbuf, &framebuffer, &shader_pool);
719 | 		agx_submit_cmdbuf(connection, &cmdbuf, &memmap, command_queue.id);
720 | 
721 | 		/* Block until it's done */
722 | 		IOReturn ret = IODataQueueWaitForAvailableData(command_queue.notif.queue, command_queue.notif.port);
723 | 		while (IODataQueueDataAvailable(command_queue.notif.queue))
724 | 			ret = IODataQueueDequeue(command_queue.notif.queue, NULL, 0);
725 | 
726 | 		/* Dump the framebuffer */
727 | 		ash_detile(framebuffer.map, linear,
728 | 				WIDTH, 32, WIDTH,
729 | 				0, 0, WIDTH, HEIGHT);
730 | 
731 | 		shader_pool.offset = 0;
732 | 		allocator.offset = 0;
733 | 
734 | 		if (offscreen) {
735 | 			FILE *fp = fopen("fb.bin", "wb");
736 | 			fwrite(linear, 1, WIDTH * HEIGHT * 4, fp);
737 | 			fclose(fp);
738 | 
739 | 			break;
740 | 		} else {
741 | 			slowfb_update(WIDTH, HEIGHT);
742 | 		}
743 | 	}
744 | }
745 | 


--------------------------------------------------------------------------------
/demo/demo.h:
--------------------------------------------------------------------------------
 1 | #ifndef __DEMO_H
 2 | #define __DEMO_H
 3 | 
 4 | #include <assert.h>
 5 | #include "io.h"
 6 | #include "cmdstream.h"
 7 | 
 8 | /* Dumb watermark allocator for demo purposes */
 9 | 
10 | struct agx_allocator {
11 | 	struct agx_allocation backing;
12 | 	unsigned offset;
13 | };
14 | 
15 | struct agx_ptr {
16 | 	void *map;
17 | 	uint64_t gpu_va;
18 | };
19 | 
20 | static struct agx_ptr
21 | agx_allocate(struct agx_allocator *allocator, size_t size)
22 | {
23 | 	allocator->offset = (allocator->offset & ~127) + 128;
24 | 	assert(size < (allocator->backing.size - allocator->offset));
25 | 
26 | 	struct agx_ptr ptr = {
27 | 		.map = allocator->backing.map + allocator->offset,
28 | 		.gpu_va = allocator->backing.gpu_va + allocator->offset,
29 | 	};
30 | 
31 | 	allocator->offset += size;
32 | 	return ptr;
33 | }
34 | 
35 | static uint64_t
36 | agx_upload(struct agx_allocator *allocator, void *data, size_t size)
37 | {
38 | 	struct agx_ptr ptr = agx_allocate(allocator, size);
39 | 	memcpy(ptr.map, data, size);
40 | 	return ptr.gpu_va;
41 | }
42 | 
43 | void demo(mach_port_t connection, bool offscreen);
44 | uint32_t demo_vertex_shader(struct agx_allocator *allocator);
45 | uint32_t demo_fragment_shader(struct agx_allocator *allocator);
46 | uint32_t demo_vertex_pre(struct agx_allocator *allocator);
47 | uint32_t demo_clear(struct agx_allocator *allocator);
48 | uint32_t demo_frag_aux3(struct agx_allocator *allocator);
49 | 
50 | void slowfb_init(uint8_t *framebuffer, int width, int height);
51 | void slowfb_update(int width, int height);
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/demo/iokit.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice (including the next
12 |  * paragraph) shall be included in all copies or substantial portions of the
13 |  * Software.
14 |  *
15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |  * SOFTWARE.
22 |  */
23 | 
24 | #include <stdio.h>
25 | #include <assert.h>
26 | #include <mach/mach.h>
27 | #include <IOKit/IOKitLib.h>
28 | #include "selectors.h"
29 | #include "demo.h"
30 | 
31 | /* Sample code for opening a connection to the AGX kernel module via IOKit */
32 | 
33 | #define AGX_SERVICE_TYPE 0x100005
34 | 
35 | int main(int argc, char **argv)
36 | {
37 | 	(void) argc;
38 | 	(void) argv;
39 | 
40 | 	kern_return_t ret;
41 | 
42 | 	/* TODO: Support other models */
43 | 	CFDictionaryRef matching = IOServiceNameMatching("AGXAcceleratorG13G_B0");
44 | 
45 | 	io_service_t service =
46 | 		IOServiceGetMatchingService(kIOMasterPortDefault, matching);
47 | 
48 | 	if (!service) {
49 | 		fprintf(stderr, "G13 (B0) accelerator not found\n");
50 | 		return 1;
51 | 	}
52 | 
53 | 	io_connect_t connection = 0;
54 | 	ret = IOServiceOpen(service, mach_task_self(), AGX_SERVICE_TYPE, &connection);
55 | 
56 | 	if (ret) {
57 | 		fprintf(stderr, "Error from IOServiceOpen: %u\n", ret);
58 | 		return 1;
59 | 	}
60 | 
61 | 	const char *api = "Equestria";
62 | 	char in[16] = { 0 };
63 | 	assert(strlen(api) < sizeof(in));
64 | 	memcpy(in, api, strlen(api));
65 | 
66 | 	ret = IOConnectCallStructMethod(connection, AGX_SELECTOR_SET_API, in,
67 | 			sizeof(in), NULL, NULL);
68 | 
69 | 	/* Oddly, the return codes are flipped for SET_API */
70 | 	if (ret != 1) {
71 | 		fprintf(stderr, "Error setting API: %u\n", ret);
72 | 		return 1;
73 | 	}
74 | 
75 | 	char version[456] = { 0 };
76 | 	size_t version_len = sizeof(version);
77 | 
78 | 	ret = IOConnectCallStructMethod(connection, AGX_SELECTOR_GET_VERSION, NULL, 0,
79 | 			version, &version_len);
80 | 
81 | 	if (ret) {
82 | 		fprintf(stderr, "Error getting version: %u\n", ret);
83 | 		/* TODO: why? */
84 | 	}
85 | 
86 | 	assert(version_len == sizeof(version));
87 | 	printf("Kext build date: %s\n", version + (25 * 8));
88 | 
89 | 	demo(connection, getenv("DISPLAY") == NULL);
90 | 
91 | 	ret = IOServiceClose(connection);
92 | 
93 | 	if (ret) {
94 | 		fprintf(stderr, "Error from IOServiceClose: %u\n", ret);
95 | 		return 1;
96 | 	}
97 | }
98 | 


--------------------------------------------------------------------------------
/demo/shaders.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include "demo.h"
  3 | 
  4 | void agx_disassemble(void *_code, size_t maxlen, FILE *fp);
  5 | 
  6 | #define AGX_STOP \
  7 | 	0x88, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, \
  8 | 	0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00, 0x08, 0x00 \
  9 | 
 10 | #define AGX_BLEND \
 11 | 	0x09, 0x00, 0x00, 0x04, 0xf0, 0xfc, 0x80, 0x03
 12 | 
 13 | /* Minimal vertex shader, where u4/u5 are preloaded by the paired compute
 14 |  * shader's uniform_store
 15 | 
 16 |    0: 9e034a0202800100     imadd            $r0_r1, r5, 32, u0
 17 |    8: 0e05c22218000000     iadd             r1, r1.discard, u1
 18 |   10: 0501000500c43200     device_load      1, 0, 0, 4, 0, i32, pair, r0_r1, r0_r1, 0, signed, lsl 1
 19 |   18: 3800                 wait             0
 20 |   1a: 1a89c0821800         fmul             r2, r0.discard, u4
 21 |   20: 1a81c2a21800         fmul             r0, r1.discard, u5
 22 |   26: 621100000000         mov              r4, 0
 23 |   2c: 62050000803f         mov              r1, 1065353216
 24 |   32: 11108280             TODO.st_var      1, r4, 2
 25 |   36: 11048380             TODO.st_var      1, r1, 3
 26 |   3a: 11088080             TODO.st_var      1, r2, 0
 27 |   3e: 91008180             TODO.st_var_final 1, r0, 1
 28 | */
 29 | 
 30 | uint8_t vertex_shader[] = {
 31 | 	0x9e, 0x03, 0x4a, 0x02, 0x02, 0x80, 0x01, 0x00,
 32 | 	0x0e, 0x05, 0xc2, 0x22, 0x18, 0x00, 0x00, 0x00,
 33 | 	0x05, 0x01, 0x00, 0x05, 0x00, 0xc4, 0x32, 0x00,
 34 | 	0x38, 0x00,
 35 | 	0x1a, 0x89, 0xc0, 0x82, 0x18, 0x00,
 36 | 	0x1a, 0x81, 0xc2, 0xa2, 0x18, 0x00,
 37 | 	0x62, 0x11, 0x00, 0x00, 0x00, 0x00,
 38 | 	0x62, 0x05, 0x00, 0x00, 0x80, 0x3f,
 39 | 	0x11, 0x10, 0x82, 0x80,
 40 | 	0x11, 0x04, 0x83, 0x80,
 41 | 	0x11, 0x08, 0x80, 0x80,
 42 | 	0x91, 0x00, 0x81, 0x80,
 43 | 	AGX_STOP
 44 | };
 45 | 
 46 | /* Custom solid colour frag shader
 47 |    0: 6200873a             mov              r0l, 14983
 48 |    4: 62020531             mov              r0h, 12549
 49 |    8: 62040531             mov              r1l, 12549
 50 |    c: 6206003c             mov              r1h, 15360
 51 |   10: 4800c200             TODO.writeout    512, 3
 52 |   14: 480c0000             TODO.writeout    12, 0
 53 |   18: 09000004f0fc8003     TODO.blend       
 54 | */
 55 | 
 56 | uint8_t fragment_shader[] = {
 57 | 	0x62, 0x00, 0x87, 0x3A,
 58 | 	0x62, 0x02, 0x05, 0x31,
 59 | 	0x62, 0x04, 0x05, 0x31,
 60 | 	0x62, 0x06, 0x00, 0x3c,
 61 | 	0x48, 0x00, 0xc2, 0x00,
 62 | 	0x48, 0x0c, 0x00, 0x00,
 63 | 	AGX_BLEND,
 64 | 	AGX_STOP
 65 | };
 66 | 
 67 | 
 68 | /*
 69 |   Compute shader implementing (float2) (1.0 / (dims * 0.5)), where dimensions
 70 |   is the ivec2 of width, height of the framebuffer (the address of which is
 71 |   preloadeded as u2_u3), since this shows up in our minimal vertex shader...
 72 |   I've seen Mali do this optimization before, but never so aggressively.
 73 | 
 74 |    0: 0501040d00c43200     device_load      1, 0, 0, 4, 0, i32, pair, r0_r1, u2_u3, 0, signed, lsl 1
 75 |    8: 3800                 wait             0
 76 |    a: be890a042c00         convert          u32_to_f, $r2, r0.discard, 1
 77 |   10: be810a242c00         convert          u32_to_f, $r0, r1.discard, 1
 78 |   16: 9a85c4020200         fmul             $r1, r2.discard, 0.5
 79 |   1c: 0a05c282             rcp              r1, r1.discard
 80 |   20: 9a81c0020200         fmul             $r0, r0.discard, 0.5
 81 |   26: 0a01c082             rcp              r0, r0.discard
 82 |   2a: c508803d00803000     uniform_store    2, i16, pair, 0, r1l_r1h, 8
 83 |   32: c500a03d00803000     uniform_store    2, i16, pair, 0, r0l_r0h, 10
 84 |   */
 85 | 
 86 | uint8_t vertex_pre[] = {
 87 | 	0x05, 0x01, 0x04, 0x0d, 0x00, 0xc4, 0x32, 0x00,
 88 | 	0x38, 0x00,
 89 | 	0xbe, 0x89, 0x0a, 0x04, 0x2c, 0x00,
 90 | 	0xbe, 0x81, 0x0a, 0x24, 0x2c, 0x00,
 91 | 	0x9a, 0x85, 0xc4, 0x02, 0x02, 0x00,
 92 | 	0x0a, 0x05, 0xc2, 0x82, 0x9a, 0x81, 0xc0, 0x02, 0x02, 0x00, 0x0a, 0x01,
 93 | 	0xc0, 0x82, 0xc5, 0x08, 0x80, 0x3d, 0x00, 0x80, 0x30, 0x00, 0xc5, 0x00,
 94 | 	0xa0, 0x3d, 0x00, 0x80, 0x30, 0x00,
 95 | 	AGX_STOP
 96 | };
 97 | 
 98 | /* Clears the tilebuffer, where u6-u7 are preloaded with the FP16 clear colour
 99 |  * by the paired compute shader AUX2
100 | 
101 |    0: 7e018c098040         bitop_mov        r0, u6
102 |    6: 7e058e098000         bitop_mov        r1, u7
103 |    c: 09000004f0fc8003     TODO.blend
104 |    */
105 | 
106 | uint8_t clear[] = {
107 | 	0x7e, 0x01, 0x8c, 0x09, 0x80, 0x40,
108 | 	0x7e, 0x05, 0x8e, 0x09, 0x80, 0x00,
109 | 	AGX_BLEND,
110 | 	AGX_STOP
111 | };
112 | 
113 | uint8_t frag_aux3[] = {
114 | 	0x7e, 0x00, 0x04, 0x09, 0x80, 0x00,
115 | 	0xb1, 0x80, 0x00, 0x80, 0x00, 0x4a, 0x00, 0x00, 0x0a, 0x00,
116 | 	AGX_STOP
117 | };
118 | 
119 | uint32_t
120 | demo_upload_shader(const char *label, struct agx_allocator *allocator, uint8_t *code, size_t sz)
121 | {
122 | #if 0
123 | 	printf("%s:\n", label);
124 | 	agx_disassemble(code, sz, stdout);
125 | 	printf("\n");
126 | #endif
127 | 	(void) label;
128 | 
129 | 	return agx_upload(allocator, code, sz);
130 | }
131 | 
132 | uint32_t
133 | demo_vertex_shader(struct agx_allocator *allocator)
134 | {
135 | 	return demo_upload_shader("vs", allocator, vertex_shader, sizeof(vertex_shader));
136 | }
137 | 
138 | uint32_t
139 | demo_fragment_shader(struct agx_allocator *allocator)
140 | {
141 | 	return demo_upload_shader("fs", allocator, fragment_shader, sizeof(fragment_shader));
142 | }
143 | 
144 | uint32_t
145 | demo_vertex_pre(struct agx_allocator *allocator)
146 | {
147 | 	return demo_upload_shader("vertex_pre", allocator, vertex_pre, sizeof(vertex_pre));
148 | }
149 | 
150 | uint32_t
151 | demo_clear(struct agx_allocator *allocator)
152 | {
153 | 	return demo_upload_shader("clear", allocator, clear, sizeof(clear));
154 | }
155 | 
156 | uint32_t
157 | demo_frag_aux3(struct agx_allocator *allocator)
158 | {
159 | 	return demo_upload_shader("frag_aux3", allocator, frag_aux3, sizeof(frag_aux3));
160 | }
161 | 


--------------------------------------------------------------------------------
/demo/slowfb.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice (including the next
12 |  * paragraph) shall be included in all copies or substantial portions of the
13 |  * Software.
14 |  *
15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |  * SOFTWARE.
22 |  */
23 | 
24 | #include <X11/Xlib.h>
25 | #include <stdio.h>
26 | #include <stdint.h>
27 | #include <assert.h>
28 | 
29 | Display *d;
30 | Window w;
31 | XImage *image;
32 | GC gc;
33 | 
34 | void slowfb_init(uint8_t *framebuffer, int width, int height) {
35 | 	d = XOpenDisplay(NULL);
36 | 	assert(d != NULL);
37 | 	int black = BlackPixel(d, DefaultScreen(d));
38 | 	w = XCreateSimpleWindow(d, DefaultRootWindow(d), 0, 0, width, height, 0, black, black);
39 | 	XSelectInput(d, w, StructureNotifyMask);
40 | 	XMapWindow(d, w);
41 | 	gc = XCreateGC(d, w, 0, NULL);
42 | 	for (;;) {
43 | 		XEvent e;
44 | 		XNextEvent(d, &e);
45 | 		if (e.type == MapNotify) break;
46 | 	}
47 | 	image = XCreateImage(d, DefaultVisual(d, 0), 24, ZPixmap, 0, (void *) framebuffer, width, height, 32, 0);
48 | }
49 | 
50 | void slowfb_update(int width, int height) {
51 | 	XPutImage(d, w, gc, image, 0, 0, 0, 0, width, height);
52 | }
53 | 


--------------------------------------------------------------------------------
/disasm-driver.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <err.h>
 5 | 
 6 | extern void
 7 | agx_disassemble(void *_code, size_t maxlen, FILE *fp);
 8 | 
 9 | int main(int argc, char **argv)
10 | {
11 | 	--argc;
12 | 	++argv;
13 | 	if (argc != 2)
14 | 		errx(1, "usage: disasm-bin FILE hex-offset");	
15 | 
16 | 	FILE *f = fopen(argv[0], "rb");
17 | 	if (!f)
18 | 		err(2, "input file");
19 | 
20 | 	off_t offset = strtol(argv[1], NULL, 16);
21 | 	fseek(f, offset, SEEK_SET);
22 | 
23 | 	char buf[4096];
24 | 	int n = fread(buf, 1, sizeof(buf), f);
25 | 	fclose(f);
26 | 
27 | 	agx_disassemble(buf, n, stdout);
28 | 	return 0;
29 | }
30 | 


--------------------------------------------------------------------------------
/disasm/disasm.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice (including the next
 12 |  * paragraph) shall be included in all copies or substantial portions of the
 13 |  * Software.
 14 |  *
 15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 |  * SOFTWARE.
 22 |  */
 23 | 
 24 | #include <stdio.h>
 25 | #include <stdint.h>
 26 | #include <stdbool.h>
 27 | #include <stdlib.h>
 28 | #include <assert.h>
 29 | #include <math.h>
 30 | 
 31 | /* Opcode table? Speculative since I don't know the opcode size yet, but this
 32 |  * should help bootstrap... These opcodes correspond to the bottom 7-bits of
 33 |  * the first byte, with the 8th bit from the 8th bit of the *second* byte. This
 34 |  * is still a guess. */
 35 | 
 36 | enum agx_opcodes {
 37 | 	OPC_FFMA_CMPCT_16 = 0x36,
 38 | 	OPC_FFMA_CMPCT_SAT_16 = 0x76,
 39 | 	OPC_FMUL_16 = 0x96,
 40 | 	OPC_FADD_16 = 0xA6,
 41 | 	OPC_FFMA_16 = 0xB6,
 42 | 	OPC_FMUL_SAT_16 = 0xD6,
 43 | 	OPC_FADD_SAT_16 = 0xE6,
 44 | 	OPC_FFMA_SAT_16 = 0xF6,
 45 | 
 46 | 	OPC_FROUND_32 = 0x0A,
 47 | 	OPC_FFMA_CMPCT_32 = 0x3A,
 48 | 	OPC_FFMA_CMPCT_SAT_32 = 0x7A,
 49 | 	OPC_FMUL_32 = 0x9A,
 50 | 	OPC_FADD_32 = 0xAA,
 51 | 	OPC_FFMA_32 = 0xBA,
 52 | 	OPC_FMUL_SAT_32 = 0xDA,
 53 | 	OPC_FADD_SAT_32 = 0xEA,
 54 | 	OPC_FFMA_SAT_32 = 0xFA,
 55 | 
 56 | 	OPC_IADD = 0x0E,
 57 | 	OPC_IMAD = 0x1E,
 58 | 	OPC_ISHL = 0x2E,
 59 | 	/* 0x3e seen with reverse_bits, and used in clz */
 60 | 	OPC_IADDSAT = 0x4E,
 61 | 	OPC_ISHR = 0xAE,
 62 | 	OPC_I2F = 0xBE,
 63 | 
 64 | 	OPC_LOAD = 0x05, // todo
 65 | 	OPC_STORE = 0x45, // todo
 66 | 	OPC_FCSEL = 0x02,
 67 | 	OPC_ICSEL = 0x12,
 68 | 	OPC_MOVI = 0x62,
 69 | 	OPC_LD_COMPUTE = 0x72,
 70 | 	OPC_BITOP = 0x7E,
 71 | 	OPC_WAIT = 0x38, // seen after loads?
 72 | 	OPC_STOP = 0x08,
 73 | 
 74 | 	OPC_LD_VAR_NO_PERSPECTIVE = 0xA1,
 75 | 	OPC_LD_VAR = 0xE1, // perspective
 76 | 	OPC_ST_VAR = 0x11,
 77 | 	OPC_UNKB1 = 0xB1, // seen in aux frag shader
 78 | 	OPC_UNK48 = 0x48, // seen before blending
 79 | 	OPC_BLEND = 0x09,
 80 | 
 81 | 	// branching instructions, not understood
 82 | 	OPC_UNKD2 = 0xD2,
 83 | 	OPC_UNK42 = 0x42,
 84 | 	OPC_UNK52 = 0x52,
 85 | 
 86 | 	// not sure what this does, but appears to be 4 bytes
 87 | 	OPC_UNK80 = 0x80,
 88 | };
 89 | 
 90 | #define I 0
 91 | #define C 1
 92 | 
 93 | static struct {
 94 | 	const char *name;
 95 | 	unsigned size;
 96 | 	bool complete;
 97 | } agx_opcode_table[256] = {
 98 | 	[OPC_FADD_16] = { "fadd.16", 6, I },
 99 | 	[OPC_FADD_SAT_16] = { "fadd.sat.16", 6, I },
100 | 	[OPC_FMUL_16] = { "fmul.16", 6, I },
101 | 	[OPC_FMUL_SAT_16] = { "fmul.sat.16", 6, I },
102 | 	[OPC_FFMA_CMPCT_16] = { "ffma.cmpct.16", 6, I },
103 | 	[OPC_FFMA_CMPCT_SAT_16] = { "ffma.cmpct.sat.16", 6, I },
104 | 	[OPC_FFMA_16] = { "ffma.16", 8, I },
105 | 	[OPC_FFMA_SAT_16] = { "ffma.sat.16", 8, I },
106 | 
107 | 	[OPC_FROUND_32] = { "fround.32", 6, I },
108 | 	[OPC_FADD_32] = { "fadd.32", 6, C },
109 | 	[OPC_FADD_SAT_32] = { "fadd.sat.32", 6, C },
110 | 	[OPC_FMUL_32] = { "fmul.32", 6, C },
111 | 	[OPC_FMUL_SAT_32] = { "fmul.sat.32", 6, C },
112 | 	[OPC_FFMA_32] = { "ffma.32", 8, I },
113 | 	[OPC_FFMA_SAT_32] = { "ffma.sat.32", 8, I },
114 | 	[OPC_FFMA_CMPCT_32] = { "ffma.cmpct.32", 6, I },
115 | 	[OPC_FFMA_CMPCT_SAT_32] = { "ffma.cmpct.sat.32", 6, I },
116 | 
117 | 	[OPC_I2F] = { "i2f", 6, I },
118 | 	[OPC_IADD] = { "iadd", 8, I },
119 | 	[OPC_IMAD] = { "imad", 8, I },
120 | 	[OPC_ISHL] = { "ishl", 8, I },
121 | 	[OPC_IADDSAT] = { "iaddsat", 8, I },
122 | 	[OPC_ISHR] = { "ishr", 8, I },
123 | 
124 | 	[OPC_LOAD] = { "load", 8, I },
125 | 	[OPC_LD_VAR_NO_PERSPECTIVE] = { "ld_var.no_perspective", 8, I },
126 | 	[OPC_LD_VAR] = { "ld_var", 8, I },
127 | 	[OPC_UNKB1] = { "unkb1", 10, I },
128 | 	[OPC_STORE] = { "store", 8, I },
129 | 	[OPC_ST_VAR] = { "st_var", 4, C },
130 | 	[OPC_FCSEL] = { "fcsel", 8, I },
131 | 	[OPC_ICSEL] = { "icsel", 8, I },
132 | 	[OPC_MOVI] = { "movi", 4, C },
133 | 	[OPC_LD_COMPUTE] = { "ld_compute", 4, C },
134 | 	[OPC_BITOP] = { "bitop", 6, I },
135 | 	[OPC_BLEND] = { "blend", 8, I },
136 | 	[OPC_STOP] = { "stop", 4, I },
137 | 
138 | 	[OPC_WAIT] = { "wait", 2, I },
139 | 	[OPC_UNK48] = { "unk48", 4, I },
140 | 	[OPC_UNK42] = { "unk42", 6, I },
141 | 	[OPC_UNK52] = { "unk52", 6, I },
142 | 	[OPC_UNK80] = { "unk80", 4, I },
143 | 	[OPC_UNKD2] = { "unkD2", 12, I },
144 | };
145 | 
146 | #undef I
147 | #undef C
148 | 
149 | static unsigned
150 | agx_instr_bytes(uint8_t opc, uint8_t reg)
151 | {
152 | 	/* For immediate moves, 32-bit immediates are larger */
153 | 	if (opc == OPC_MOVI && (reg & 0x1))
154 | 		return 6;
155 | 	else
156 | 		return agx_opcode_table[opc].size ?: 2;
157 | }
158 | 
159 | /* Print float src, including modifiers */
160 | 
161 | struct agx_src {
162 | 	unsigned type : 2;
163 | 	unsigned reg;
164 | 	bool size32;
165 | 	bool abs;
166 | 	bool neg;
167 | 	unsigned unk;
168 | };
169 | 
170 | static void
171 | agx_print_src(FILE *fp, struct agx_src s)
172 | {
173 | 	/* Known source types: immediates (8-bit only?), constant memory
174 | 	 * (indexing 64-bits at a time from preloaded memory), and general
175 | 	 * purpose registers */
176 | 	const char *types[] = { "#", "unk1:", "u", "" };
177 | 
178 | 	fprintf(fp, ", %s%u%s%s%s%s",
179 | 			types[s.type], s.reg,
180 | 			(s.size32 || s.type == 0) ? "" : ((s.reg & 1) ? "h" : "l"),
181 | 			s.abs ? ".abs" : "", s.neg ? ".neg" : "",
182 | 			s.unk ? ".unk" : "");
183 | }
184 | 
185 | static void
186 | agx_print_float_src(FILE *fp, unsigned type, unsigned reg, bool size32, bool abs, bool neg)
187 | {
188 | 	assert(type <= 3);
189 | 	agx_print_src(fp, (struct agx_src) {
190 | 			.type = type, .reg = reg, .size32 = size32,
191 | 			.abs = abs, .neg = neg
192 | 		});
193 | }
194 | 
195 | /* Decode 12-bit packed float source */
196 | static struct agx_src
197 | agx_decode_float_src(uint16_t packed)
198 | {
199 | 	return (struct agx_src) {
200 | 		.reg = (packed & 0x3F),
201 | 		.type = (packed & 0xC0) >> 6,
202 | 		.unk = (packed & 0x100),
203 | 		.size32 = (packed & 0x200),
204 | 		.abs = (packed & 0x400),
205 | 		.neg = (packed & 0x800),
206 | 	};
207 | }
208 | 
209 | /* When we know more how the encodings relate to each other, these
210 |  * per-instruction prints will hopefully disappear, assuming things are
211 |  * sufficiently regular.
212 |  *
213 |  * fadd.f32 is 6 bytes. First two bytes are used for opcode/destination, so we
214 |  * have 32-bits to decode here, or 16-bits per source. Since a register is at
215 |  * least 6-bits, 2-bit type, 3-bits widen, that leaves only 10-bits unaccounted
216 |  * for in the instruction.
217 |  *
218 |  * Byte 0: [2 - src0 type][6 - src0 value]
219 |  * Byte 1: [4 - src1 value lo][1 - neg][1 - abs][1 - size][1 - unk]
220 |  * Byte 2: [1 - neg][1 - abs][1 - size][1 - unk][2 - src1 type][2 - src1 value hi]
221 |  * Byte 3: [8 - zero]
222 |  *
223 |  */
224 | 
225 | static void
226 | agx_print_fadd_f32(FILE *fp, uint8_t *code)
227 | {
228 | 	agx_print_src(fp, agx_decode_float_src(code[2] | ((code[3] & 0xF) << 8)));
229 | 	agx_print_src(fp, agx_decode_float_src((code[3] >> 4) | (code[4] << 4)));
230 | 
231 | 	if (code[5])
232 | 		fprintf(fp, " /* unk5 = %02X */", code[5]);
233 | }
234 | 
235 | static void
236 | agx_print_ld_compute(uint8_t *code, FILE *fp)
237 | {
238 | 	/* 4 bytes, first 2 used for opcode and dest reg, next few bits for the
239 | 	 * component, the rest is a selector for what to load */
240 | 	uint16_t arg = code[2] | (code[3] << 8);
241 | 
242 | 	unsigned component = arg & 0x3;
243 | 	uint16_t selector = arg >> 2;
244 | 
245 | 	fprintf(fp, ", ");
246 | 
247 | 	switch (selector) {
248 | 	case 0x00:
249 | 		fprintf(fp, "[threadgroup_position_in_grid]");
250 | 		break;
251 | 	case 0x0c:
252 | 		fprintf(fp, "[thread_position_in_threadgroup]");
253 | 		break;
254 | 	case 0x0d:
255 | 		fprintf(fp, "[thread_position_in_simdgroup]");
256 | 		break;
257 | 	case 0x104:
258 | 		fprintf(fp, "[thread_position_in_grid]");
259 | 		break;
260 | 	default:
261 | 		fprintf(fp, "[unk_%X]", selector);
262 | 		break;
263 | 	}
264 | 
265 | 	fprintf(fp, ".%c", "xyzw"[component]);
266 | }
267 | 
268 | static void
269 | agx_print_bitop_src(uint16_t value, FILE *fp)
270 | {
271 | 	/* different encoding from float srcs -- slightly smaller */
272 | 	uint16_t mode = (value >> 6) & 0x0f;
273 | 	uint16_t v = (value & 0x3f) | ((value >> 4) & 0xc0);
274 | 
275 | 	switch (mode) {
276 | 	case 0x0:
277 | 		// 8-bit immediate
278 | 		fprintf(fp, "#0x%x", v);
279 | 		break;
280 | 	case 0x3:
281 | 		// 16b register
282 | 		fprintf(fp, "h%d", v);
283 | 		break;
284 | 	case 0xb:
285 | 		// 32b register
286 | 		assert((v&1) == 0);
287 | 		fprintf(fp, "w%d", v >> 1);
288 | 		break;
289 | 	default:
290 | 		fprintf(fp, "unk_%x", value);
291 | 		break;
292 | 	}
293 | }
294 | 
295 | static void
296 | agx_print_bitop(uint8_t *code, FILE *fp)
297 | {
298 | 	/* 6 bytes */
299 | 	/* Universal bitop instruction. Control bits express operation as
300 | 	 * sum-of-products: a&b, ~a&b, a&~b, ~a&~b */
301 | 
302 | 	/* XXX: dst encoding may not be quite correct either, but is done
303 | 	 * in common code before this point */
304 | 	/* XXX: disassemble to "friendly" pseudoop ? */
305 | 
306 | 	uint8_t control = (code[3] >> 2) & 0x3;
307 | 	control |= (code[4] >> 4) & 0xc; 
308 | 	fprintf(fp, ", #0x%x, ", control);
309 | 
310 | 	uint16_t src1_bits = code[2] | ((uint16_t)(code[3]&3) << 8) |
311 | 		((uint16_t)code[5]&0xc)<<8;
312 | 	uint16_t src2_bits = (code[3] >> 4) | (((uint16_t)code[4]&0x3f)<<4) |
313 | 		(((uint16_t)code[5]&0x3)<<10);
314 | 
315 | 	agx_print_bitop_src(src1_bits, fp);
316 | 	fprintf(fp, ", ");
317 | 	agx_print_bitop_src(src2_bits, fp);
318 | }
319 | 
320 | static float
321 | agx_decode_float_imm8(uint16_t src)
322 | {
323 | 	float sign = (src & 0x80) ? -1.0f : 1.0f;
324 | 	int e = ((src & 0x70) >> 4);
325 | 
326 | 	if (e == 0) {
327 | 		/* denorm */
328 | 		return sign * (src & 0x0f) / 64.0f;
329 | 	}
330 | 	else {
331 | 		return sign * ldexpf((src & 0x0f) | 0x10, e - 7);
332 | 	}
333 | }
334 | 
335 | static void
336 | agx_print_fp16_src(uint16_t src, uint16_t type, FILE *fp)
337 | {
338 | 	/* XXX: type&2 bit may be something odd like code[0]&0x80 */
339 | 
340 | 	switch (type & 5) {
341 | 	case 0x0:
342 | 		/* packed float8 immediate */
343 | 		fprintf(fp, "#%ff", agx_decode_float_imm8(src));
344 | 		break;
345 | 	case 0x1:
346 | 		/* half register */
347 | 		fprintf(fp, "h%d", src);
348 | 		break;
349 | 	case 0x4:
350 | 	case 0x5:
351 | 		/* constant space; extra bit packed in
352 | 		 * bottom bit of type */
353 | 		fprintf(fp, "const_%d", ((type&1)<<8) | src);
354 | 		break;
355 | 	default:
356 | 		fprintf(fp, "unk_%x:%x", type, src);
357 | 		break;
358 | 	}
359 | 
360 | 	if (type & 0x8)
361 | 		fprintf(fp, ".abs");
362 | 	if (type & 0x10)
363 | 		fprintf(fp, ".neg");
364 | 
365 | }
366 | 
367 | static void
368 | agx_print_fadd16(uint8_t *code, FILE *fp)
369 | {
370 | 	/* 6 bytes */
371 | 	uint16_t src1 = (code[2] & 0x3f) | ((code[5] & 0x0c)<<4);
372 | 	uint16_t type1 = (code[2] >> 6) | ((code[3] & 0x0f)<<2);
373 | 
374 | 	uint16_t src2 = (code[3] >> 4) | ((code[4] & 0x3)<<4) | ((code[5] & 0x3)<<6);
375 | 	uint16_t type2 = (code[4] >> 2);
376 | 
377 | 	fprintf(fp, ", ");
378 | 	agx_print_fp16_src(src1, type1, fp);
379 | 	fprintf(fp, ", ");
380 | 	agx_print_fp16_src(src2, type2, fp);
381 | }
382 | 
383 | static void
384 | agx_print_st_var(uint8_t *code, FILE *fp)
385 | {
386 | 	/* 4 bytes, first for opcode. Second for source register  third
387 | 	 * indicates the destination, fourth unknown */
388 | 	if (code[1] & 0x1)
389 | 		fprintf(fp, ".unk");
390 | 
391 | 	fprintf(fp, ", index:%u", code[2] & 0xF);
392 | 
393 | 	if ((code[2] & 0xF0) != 0x80)
394 | 		fprintf(fp, ", unk2=%X", code[2] >> 4);
395 | 
396 | 	if (code[3] != 0x80)
397 | 		fprintf(fp, ", unk3=%X", code[3]);
398 | }
399 | 
400 | /* Disassembles a single instruction */
401 | 
402 | unsigned
403 | agx_disassemble_instr(uint8_t *code, bool *stop, bool verbose, FILE *fp)
404 | {
405 | 	/* Decode the opcode first, requires 2 bytes */
406 | 	uint8_t opc = (code[0] & 0x7F) | (code[1] & 0x80);
407 | 
408 | 	/* Guess the size */
409 | 	unsigned bytes = agx_instr_bytes(opc, code[1]);
410 | 
411 | 	/* Hexdump the instruction */
412 | 
413 | 	if (verbose || !agx_opcode_table[opc].complete) {
414 | 		fprintf(fp, "#");
415 | 		for (unsigned i = 0; i < bytes; ++i)
416 | 			fprintf(fp, " %02X", code[i]);
417 | 		fprintf(fp, "\n");
418 | 	}
419 | 
420 | 	unsigned op_unk80 = code[0] & 0x80; /* XXX: what is this? */
421 | 	fprintf(fp, "%c", op_unk80 ? '+' : '-'); /* Stay concise.. */
422 | 
423 | 	if (agx_opcode_table[opc].name)
424 | 		fputs(agx_opcode_table[opc].name, fp);
425 | 	else
426 | 		fprintf(fp, "op_%02X", opc);
427 | 
428 | 	if (opc == OPC_ICSEL) {
429 | 		unsigned mode = (code[7] & 0xF0) >> 4;
430 | 		if (mode == 0x1)
431 | 			fprintf(fp, ".eq"); // output 16-bit bool
432 | 		else if (mode == 0x2)
433 | 			fprintf(fp, ".imin");
434 | 		else if (mode == 0x3)
435 | 			fprintf(fp, ".ult"); // output 16-bit bool
436 | 		else if (mode == 0x4)
437 | 			fprintf(fp, ".imax");
438 | 		else if (mode == 0x5)
439 | 			fprintf(fp, ".ugt"); // output 16-bit bool
440 | 		else
441 | 			fprintf(fp, ".unk%X", mode);
442 | 	} else if (opc == OPC_FCSEL) {
443 | 		unsigned mode = (code[7] & 0xF0) >> 4;
444 | 
445 | 		if (mode == 0x6)
446 | 			fprintf(fp, ".fmin");
447 | 		else if (mode == 0xE)
448 | 			fprintf(fp, ".fmax");
449 | 		else
450 | 			fprintf(fp, ".unk%X", mode);
451 | 	}
452 | 
453 | 	/* Decode destination register, common to all ALUs (and maybe more?) */
454 | 	uint8_t dest = code[1];
455 | 	bool dest_32 = dest & 0x1; /* clear for 16-bit */
456 | 	unsigned dest_reg = (dest >> 1) & 0x3F;
457 | 
458 | 	/* Maybe it's a 32-bit opcode */
459 | 	if (opc == OPC_ST_VAR)
460 | 		dest_32 = !dest_32;
461 | 
462 | 	fprintf(fp, " %s%u",
463 | 			dest_32 ? "w" : "h",
464 | 			dest_reg);
465 | 
466 | 	/* Decode other stuff, TODO */
467 | 	switch (opc) {
468 | 	case OPC_ST_VAR:
469 | 		agx_print_st_var(code, fp);
470 | 		break;
471 | 	case OPC_LD_COMPUTE:
472 | 		agx_print_ld_compute(code, fp);
473 | 		break;
474 | 	case OPC_BITOP:
475 | 		agx_print_bitop(code, fp);
476 | 		break;
477 | 	case OPC_FADD_16:
478 | 	case OPC_FADD_SAT_16:
479 | 	case OPC_FMUL_16:
480 | 	case OPC_FMUL_SAT_16:
481 | 		agx_print_fadd16(code, fp);
482 | 		break;
483 | 	case OPC_MOVI: {
484 | 		uint32_t imm = code[2] | (code[3] << 8);
485 | 
486 | 		if (dest_32)
487 | 			imm |= (code[4] << 16) | (code[5] << 24);
488 | 
489 | 		fprintf(fp, ", #0x%X", imm);
490 | 		break;
491 | 	}
492 | 	case OPC_FADD_32:
493 | 	case OPC_FADD_SAT_32:
494 | 	case OPC_FMUL_32:
495 | 	case OPC_FMUL_SAT_32:
496 | 		agx_print_fadd_f32(fp, code);
497 | 		break;
498 | 	default: {
499 | 		/* Make some guesses */
500 | 		bool iadd = opc == OPC_IADD;
501 | 
502 | 		if (bytes > 2) {
503 | 			agx_print_float_src(fp,
504 | 				(code[2] & 0xC0) >> 6,
505 | 				(code[2] & 0x3F) |
506 | 					(iadd ? ((code[5] & 0x0C) << 4) : 0),
507 | 				       	// TODO: why overlap?
508 | 				code[3] & 0x20,
509 | 				code[3] & 0x04,
510 | 				code[3] & 0x08);
511 | 
512 | 			agx_print_float_src(fp,
513 | 				(code[4] & 0x0C) >> 2,
514 | 				((code[3] >> 4) & 0xF) | ((code[4] & 0x3) << 4) | ((code[7] & 0x3) << 6),
515 | 				code[4] & 0x20,
516 | 				code[4] & 0x40,
517 | 				code[4] & 0x80);
518 | 		}
519 | 
520 | 		if (bytes > 6 && !iadd) {
521 | 			agx_print_float_src(fp,
522 | 				(code[5] & 0xC0) >> 6,
523 | 				(code[5] & 0x3F) | (code[6] & 0xC0),
524 | 				code[6] & 0x20,
525 | 				code[6] & 0x04,
526 | 				code[6] & 0x08);
527 | 		}
528 | 
529 | 		break;
530 | 	}
531 | 	}
532 | 
533 | 	fprintf(fp, "\n");
534 | 
535 | 	if (code[0] == (OPC_STOP | 0x80))
536 | 		*stop = true;
537 | 
538 | 	return bytes;
539 | }
540 | 
541 | /* Disassembles a shader */
542 | 
543 | void
544 | agx_disassemble(void *_code, size_t maxlen, FILE *fp)
545 | {
546 | 	if (maxlen > 256)
547 | 		maxlen = 256;
548 | 
549 | 	uint8_t *code = _code;
550 | 
551 | 	bool stop = false;
552 | 	unsigned bytes = 0;
553 | 	bool verbose = getenv("ASAHI_VERBOSE") != NULL;
554 | 
555 | 	while((bytes + 8) < maxlen && !stop)
556 | 		bytes += agx_disassemble_instr(code + bytes, &stop, verbose, fp);
557 | 
558 | 	if (!stop)
559 | 		fprintf(fp, "// error: stop instruction not found\n");
560 | }
561 | 


--------------------------------------------------------------------------------
/docs/Codenames.md:
--------------------------------------------------------------------------------
1 | On an Mac Mini M1 (2020):
2 | 
3 | * ioreg gives AGXAcceleratorG13G_B0 (with clients of type AGXDeviceUserClient), parent type sgx@4000000
4 | * Also has gfx-asc@6400000 -> AppleASCWrapV4 -> ... -> AGXFirmwareKextG13RTBuddy
5 | * Metal dispatches to AGXMetal13_3
6 | 
7 | All in all, looks like this is a **G13** chip.
8 | 


--------------------------------------------------------------------------------
/docs/table.py:
--------------------------------------------------------------------------------
 1 | # Correspondance of bytes spilled to bitfield in question
 2 | 
 3 | table = [
 4 | 	(340, 6),
 5 | 	(132, 5),
 6 | 	(420, 7),
 7 | 	(548, 8),
 8 | 	(740, 8),
 9 | 	(1140, 9),
10 | 	(500, 7),
11 | 	(516, 8),
12 | 	(356, 6),
13 | 	(192, 5),
14 | 	(112, 4),
15 | 	(52, 2),
16 | 	(36, 2),
17 | 	(20, 1),
18 | 	(4, 1),
19 | 	(68, 3),
20 | 	(212, 5),
21 | 	(372, 6),
22 | 	(404, 7),
23 | 	(388, 7)
24 | ]
25 | 


--------------------------------------------------------------------------------
/lib/cmdbuf.xml:
--------------------------------------------------------------------------------
  1 | <blxml>
  2 |   <enum name="Channel">
  3 |     <value name="R" value="0"/>
  4 |     <value name="G" value="1"/>
  5 |     <value name="B" value="2"/>
  6 |     <value name="A" value="3"/>
  7 |   </enum>
  8 | 
  9 |   <enum name="Primitive">
 10 |     <value name="Points" value="0"/>
 11 |     <value name="Lines" value="1"/>
 12 |     <value name="Line strip" value="3"/>
 13 |     <value name="Triangles" value="6"/>
 14 |     <value name="Triangle strip" value="9"/>
 15 |   </enum>
 16 | 
 17 |   <struct name="Render Target" size="16">
 18 |     <field name="Unk 0" size="17" start="0" type="hex"/>
 19 |     <field name="Swizzle R" size="2" start="16" type="Channel"/>
 20 |     <field name="Swizzle G" size="2" start="18" type="Channel"/>
 21 |     <field name="Swizzle B" size="2" start="20" type="Channel"/>
 22 |     <field name="Swizzle A" size="2" start="22" type="Channel"/>
 23 |     <field name="Width" size="14" start="24" type="uint" modifier="minus(1)"/>
 24 |     <field name="Height" size="14" start="38" type="uint" modifier="minus(1)"/>
 25 |     <field name="Unk 52" size="1" start="52" type="bool"/>
 26 |     <field name="Rotate 90" size="1" start="53" type="bool" default="false"/>
 27 |     <field name="Flip vertical" size="1" start="54" type="bool" default="false"/>
 28 |     <field name="Unk 55" size="9" start="55" type="hex"/>
 29 |     <field name="Buffer" size="36" start="64" type="address" modifier="shr(4)"/>
 30 |     <field name="Unk 100" size="28" start="100" type="hex"/>
 31 |   </struct>
 32 | 
 33 |   <!--- Identified by tag? -->
 34 |   <struct name="Viewport" size="40">
 35 |     <field name="Tag 1" size="32" start="0:0" type="hex" default="0xc00"/>
 36 |     <field name="Tag 2" size="32" start="1:0" type="hex" default="0x18"/>
 37 |     <field name="Tag 3" size="32" start="2:0" type="hex" default="0x12"/>
 38 |     <field name="Tag 4" size="32" start="3:0" type="hex" default="0x0"/>
 39 |     <field name="Translate X" size="32" start="4:0" type="float"/>
 40 |     <field name="Scale X" size="32" start="5:0" type="float"/>
 41 |     <field name="Translate Y" size="32" start="6:0" type="float"/>
 42 |     <field name="Scale Y" size="32" start="7:0" type="float"/>
 43 |     <field name="Near Z" size="32" start="8:0" type="float"/>
 44 |     <field name="Far Z" size="32" start="9:0" type="float"/>
 45 |   </struct>
 46 | 
 47 |   <struct name="Linkage" size="16">
 48 |     <field name="Tag" size="32" start="0:0" type="hex" default="0xC020000"/>
 49 |     <field name="Unk 1" size="32" start="1:0" type="hex" default="0x100"/>
 50 |     <field name="Unk 2" size="32" start="2:0" type="hex" default="0x0"/>
 51 |     <field name="Varying count" size="32" start="3:0" type="uint"/>
 52 |   </struct>
 53 | 
 54 |   <!--- Commands valid within a pipeline -->
 55 |   <struct name="Bind uniform" size="8">
 56 |     <field name="Tag" size="8" start="0:0" type="hex" default="0x1d"/>
 57 |     <field name="Start (halfs)" size="8" start="0:8" type="uint"/>
 58 |     <field name="Unk" size="4" start="0:16" type="hex" default="0x0"/>
 59 |     <field name="Size (halfs)" size="4" start="0:20" type="uint"/>
 60 |     <field name="Buffer" size="40" start="0:24" type="address"/>
 61 |   </struct>
 62 | 
 63 |   <enum name="Preshader mode">
 64 |     <value name="Preshader" value="3"/>
 65 |     <value name="No preshader" value="8"/>
 66 |   </enum>
 67 | 
 68 |   <struct name="Set shader" size="24">
 69 |     <field name="Tag" size="8" start="0:0" type="hex" default="0x4d"/>
 70 |     <field name="Unk 1" size="24" start="0:8" type="hex" default="0x90"/>
 71 |     <field name="Unk 2" size="16" start="1:0" type="hex" default="0x40d"/> <!-- TODO differs with stage -->
 72 |     <field name="Code" size="32" start="1:16" type="address"/>
 73 |     <field name="Unk 3" size="8" start="2:16" type="hex" default="0x8d"/>
 74 |     <field name="Register quadwords" size="5" start="2:24" type="uint"/> <!-- 0 for max -->
 75 |     <field name="Unk 3b" size="3" start="2:29" type="hex" default="0x0"/>
 76 |     <field name="Spill size" size="8" start="3:0" type="hex" default="0"/> <!-- TODO: determine relation, see docs/table.py -->
 77 |     <field name="Unk 4" size="12" start="3:8" type="hex" default="0x801"/>
 78 |     <field name="Preshader mode" size="4" start="3:20" type="Preshader mode" default="No preshader"/>
 79 |     <field name="Unk 6" size="8" start="3:24" type="hex" default="0x0"/>
 80 |     <field name="Preshader unk" size="16" start="4:0" type="hex" default="0x0"/>
 81 |     <field name="Preshader code" size="32" start="4:16" type="address"/>
 82 |     <field name="Unk 7" size="16" start="5:16" type="hex" default="0x0"/> <!-- blob is inconsistent -->
 83 |   </struct>
 84 | 
 85 |   <struct name="Set shader extended" size="32">
 86 |     <field name="Tag" size="8" start="0:0" type="hex" default="0x4d"/>
 87 |     <field name="Unk 1" size="24" start="0:8" type="hex" default="0x2010bd"/>
 88 |     <field name="Unk 2" size="16" start="1:0" type="hex" default="0x50d"/>
 89 |     <field name="Code" size="32" start="1:16" type="address"/>
 90 |     <field name="Unk 3" size="8" start="2:16" type="hex" default="0x28d"/>
 91 |     <field name="Register quadwords" size="5" start="2:24" type="uint"/> <!-- 0 for max -->
 92 |     <field name="Unk 3b" size="3" start="2:29" type="hex" default="0x0"/>
 93 |     <field name="Frag unk" size="32" start="3:0" type="hex" default="0xf3580100"/>
 94 |     <field name="Spill size" size="8" start="4:0" type="hex" default="0"/> <!-- TODO: determine relation, see docs/table.py -->
 95 |     <field name="Unk 4" size="12" start="4:8" type="hex" default="0x801"/>
 96 |     <field name="Preshader mode" size="4" start="4:20" type="Preshader mode" default="No preshader"/>
 97 |     <field name="Unk 6" size="8" start="4:24" type="hex" default="0x0"/>
 98 |     <field name="Preshader unk" size="16" start="5:0" type="hex" default="0x0"/>
 99 |     <field name="Preshader code" size="32" start="5:16" type="address"/>
100 |     <field name="Unk 7" size="16" start="6:16" type="hex" default="0x0"/> <!-- blob is inconsistent -->
101 |     <field name="Unk 8" size="32" start="7:0" type="hex" default="0x0"/> <!-- may not exist -->
102 |   </struct>
103 | 
104 |   <!--- Command to bind a vertex pipeline, followed by subcommands. Counts are
105 |         specified in 32-bit word units. Intepretation per-shader stage. Unknown what
106 |         output counts mean for fragment yet -->
107 |   <struct name="Bind pipeline" size="16">
108 |     <field name="Tag" size="32" start="0:0" type="hex" default="0x4000002e"/>
109 |     <field name="Unk 1" size="16" start="1:0" type="hex" default="0x1002"/>
110 |     <field name="Input count" size="8" start="1:16" type="uint" default="0"/>
111 |     <field name="Padding 1" size="8" start="1:24" type="hex" default="0x0"/>
112 |     <field name="Pipeline" size="32" start="2:0" type="address"/>
113 |     <field name="Output count 1" size="8" start="3:0" type="uint" default="0"/>
114 |     <field name="Output count 2" size="8" start="3:8" type="uint" default="0"/>
115 |     <field name="Padding 2" size="16" start="3:16" type="hex" default="0x0"/>
116 |   </struct>
117 | 
118 |   <!-- Subcommands are packed inside sized records -->
119 |   <struct name="Record" size="8">
120 |     <field name="Size (words)" size="8" start="0:0" type="uint"/>
121 |     <field name="Tag" size="16" start="0:8" type="hex" default="0x0000"/>
122 |     <field name="Data" size="40" start="0:24" type="address"/>
123 |   </struct>
124 | 
125 |   <!--- Command to issue a direct non-indexed draw -->
126 |   <struct name="Draw" size="15">
127 |     <field name="Primitive" size="8" start="0:0" type="Primitive"/>
128 |     <field name="Command" size="16" start="0:8" type="hex" default="0x61c0"/>
129 |     <field name="Vertex count" size="32" start="0:24" type="uint"/>
130 |     <field name="Instance count" size="32" start="1:24" type="uint"/> <!-- must be nonzero -->
131 |     <field name="Vertex start" size="32" start="2:24" type="uint"/>
132 |   </struct>
133 | 
134 |   <!--- Command to launch a direct compute kernel -->
135 |   <struct name="Launch" size="36">
136 |     <field name="Command" size="32" start="0:0" type="hex" default="0x1002"/>
137 |     <field name="Pipeline" size="32" start="1:0" type="address"/>
138 |     <field name="Group count X" size="32" start="2:0" type="uint"/>
139 |     <field name="Group count Y" size="32" start="3:0" type="uint"/>
140 |     <field name="Group count Z" size="32" start="4:0" type="uint"/>
141 |     <field name="Local size X" size="32" start="5:0" type="uint"/>
142 |     <field name="Local size Y" size="32" start="6:0" type="uint"/>
143 |     <field name="Local size Z" size="32" start="7:0" type="uint"/>
144 |     <field name="Unk" size="32" start="8:0" type="hex" default="0x60000160"/>
145 |   </struct>
146 | </blxml>
147 | 


--------------------------------------------------------------------------------
/lib/cmdstream.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice (including the next
12 |  * paragraph) shall be included in all copies or substantial portions of the
13 |  * Software.
14 |  *
15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |  * SOFTWARE.
22 |  */
23 | 
24 | #ifndef __CMDSTREAM_H
25 | #define __CMDSTREAM_H
26 | 
27 | #include <stdint.h>
28 | 
29 | struct agx_map_header {
30 | 	uint64_t cmdbuf_id; // GUID
31 | 	uint32_t unk2; // 01 00 00 00
32 | 	uint32_t unk3; // 28 05 00 80
33 | 	uint64_t encoder_id; // GUID
34 | 	uint32_t unk6; // 00 00 00 00
35 | 	uint32_t unk7; // 80 07 00 00
36 | 	uint32_t nr_entries_1;
37 | 	uint32_t nr_entries_2;
38 | 	uint32_t unka; // 0b 00 00 00
39 | 	uint32_t padding[4];
40 | } __attribute__((packed));
41 | 
42 | struct agx_map_entry {
43 | 	uint32_t unkAAA; // 20 00 00 00
44 | 	uint32_t unk2; // 00 00 00 00 
45 | 	uint32_t unk3; // 00 00 00 00
46 | 	uint32_t unk4; // 00 00 00 00
47 | 	uint32_t unk5; // 00 00 00 00
48 | 	uint32_t unk6; // 00 00 00 00 
49 | 	uint32_t unkBBB; // 01 00 00 00
50 | 	uint32_t unk8; // 00 00 00 00
51 | 	uint32_t unk9; // 00 00 00 00
52 | 	uint32_t unka; // ff ff 01 00 
53 | 	uint32_t index;
54 | 	uint32_t padding[5];
55 | } __attribute__((packed));
56 | 
57 | #endif
58 | 


--------------------------------------------------------------------------------
/lib/decode.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2017-2019 Alyssa Rosenzweig
  3 |  * Copyright (C) 2017-2019 Connor Abbott
  4 |  * Copyright (C) 2019 Collabora, Ltd.
  5 |  *
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a
  7 |  * copy of this software and associated documentation files (the "Software"),
  8 |  * to deal in the Software without restriction, including without limitation
  9 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 10 |  * and/or sell copies of the Software, and to permit persons to whom the
 11 |  * Software is furnished to do so, subject to the following conditions:
 12 |  *
 13 |  * The above copyright notice and this permission notice (including the next
 14 |  * paragraph) shall be included in all copies or substantial portions of the
 15 |  * Software.
 16 |  *
 17 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 20 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 23 |  * SOFTWARE.
 24 |  */
 25 | 
 26 | #include <agx_pack.h>
 27 | #include <stdio.h>
 28 | #include <stdlib.h>
 29 | #include <memory.h>
 30 | #include <stdbool.h>
 31 | #include <stdarg.h>
 32 | #include <ctype.h>
 33 | #include <sys/mman.h>
 34 | 
 35 | #include "decode.h"
 36 | #include "io.h"
 37 | 
 38 | extern void agx_disassemble(void *_code, size_t maxlen, FILE *fp);
 39 | 
 40 | FILE *pandecode_dump_stream;
 41 | 
 42 | /* Memory handling, this can't pull in proper data structures so hardcode some
 43 |  * things, it should be "good enough" for most use cases */
 44 | 
 45 | #define MAX_MAPPINGS 4096
 46 | 
 47 | struct agx_allocation mmap_array[MAX_MAPPINGS];
 48 | unsigned mmap_count = 0;
 49 | 
 50 | struct agx_allocation *ro_mappings[MAX_MAPPINGS];
 51 | unsigned ro_mapping_count = 0;
 52 | 
 53 | static struct agx_allocation *
 54 | pandecode_find_mapped_gpu_mem_containing_rw(uint64_t addr)
 55 | {
 56 |         for (unsigned i = 0; i < mmap_count; ++i) {
 57 |                 if (addr >= mmap_array[i].gpu_va && (addr - mmap_array[i].gpu_va) < mmap_array[i].size)
 58 |                         return mmap_array + i;
 59 |         }
 60 | 
 61 |         return NULL;
 62 | }
 63 | 
 64 | struct agx_allocation *
 65 | pandecode_find_mapped_gpu_mem_containing(uint64_t addr)
 66 | {
 67 |         struct agx_allocation *mem = pandecode_find_mapped_gpu_mem_containing_rw(addr);
 68 | 
 69 |         if (mem && mem->map && !mem->ro) {
 70 |                 mprotect(mem->map, mem->size, PROT_READ);
 71 |                 mem->ro = true;
 72 |                 ro_mappings[ro_mapping_count++] = mem;
 73 |                 assert(ro_mapping_count < MAX_MAPPINGS);
 74 |         }
 75 | 
 76 |         return mem;
 77 | }
 78 | 
 79 | static inline void *
 80 | __pandecode_fetch_gpu_mem(const struct agx_allocation *mem,
 81 |                           uint64_t gpu_va, size_t size,
 82 |                           int line, const char *filename)
 83 | {
 84 |         if (!mem)
 85 |                 mem = pandecode_find_mapped_gpu_mem_containing(gpu_va);
 86 | 
 87 |         if (!mem) {
 88 |                 fprintf(stderr, "Access to unknown memory %" PRIx64 " in %s:%d\n",
 89 |                         gpu_va, filename, line);
 90 | 		fflush(pandecode_dump_stream);
 91 |                 assert(0);
 92 |         }
 93 | 
 94 |         assert(mem);
 95 |         assert(size + (gpu_va - mem->gpu_va) <= mem->size);
 96 | 
 97 |         return mem->map + gpu_va - mem->gpu_va;
 98 | }
 99 | 
100 | #define pandecode_fetch_gpu_mem(gpu_va, size) \
101 | 	__pandecode_fetch_gpu_mem(NULL, gpu_va, size, __LINE__, __FILE__)
102 | 
103 | static void
104 | pandecode_map_read_write(void)
105 | {
106 |         for (unsigned i = 0; i < ro_mapping_count; ++i) {
107 |                 ro_mappings[i]->ro = false;
108 |                 mprotect(ro_mappings[i]->map, ro_mappings[i]->size,
109 |                                 PROT_READ | PROT_WRITE);
110 |         }
111 | 
112 |         ro_mapping_count = 0;
113 | }
114 | 
115 | /* Helpers for parsing the cmdstream */
116 | 
117 | #define DUMP_UNPACKED(T, var, str) { \
118 |         pandecode_log(str); \
119 |         bl_print(pandecode_dump_stream, T, var, (pandecode_indent + 1) * 2); \
120 | }
121 | 
122 | #define DUMP_CL(T, cl, str) {\
123 |         bl_unpack(cl, T, temp); \
124 |         DUMP_UNPACKED(T, temp, str "\n"); \
125 | }
126 | 
127 | #define pandecode_log(str) fputs(str, pandecode_dump_stream)
128 | #define pandecode_msg(str) fprintf(pandecode_dump_stream, "// %s", str)
129 | 
130 | unsigned pandecode_indent = 0;
131 | 
132 | /* To check for memory safety issues, validates that the given pointer in GPU
133 |  * memory is valid, containing at least sz bytes. The goal is to detect
134 |  * GPU-side memory bugs (NULL pointer dereferences, buffer overflows, or buffer
135 |  * overruns) by statically validating pointers.
136 |  */
137 | 
138 | static void
139 | pandecode_validate_buffer(uint64_t addr, size_t sz)
140 | {
141 |         if (!addr) {
142 |                 pandecode_msg("XXX: null pointer deref");
143 |                 return;
144 |         }
145 | 
146 |         /* Find a BO */
147 | 
148 |         struct agx_allocation *bo =
149 |                 pandecode_find_mapped_gpu_mem_containing(addr);
150 | 
151 |         if (!bo) {
152 |                 pandecode_msg("XXX: invalid memory dereference\n");
153 |                 return;
154 |         }
155 | 
156 |         /* Bounds check */
157 | 
158 |         unsigned offset = addr - bo->gpu_va;
159 |         unsigned total = offset + sz;
160 | 
161 |         if (total > bo->size) {
162 |                 fprintf(pandecode_dump_stream, "// XXX: buffer overrun. "
163 |                                 "Chunk of size %zu at offset %d in buffer of size %zu. "
164 |                                 "Overrun by %zu bytes. \n",
165 |                                 sz, offset, bo->size, total - bo->size);
166 |                 return;
167 |         }
168 | }
169 | 
170 | static struct agx_allocation *
171 | pandecode_find_cmdbuf(unsigned cmdbuf_index)
172 | {
173 | 	for (unsigned i = 0; i < mmap_count; ++i) {
174 | 		if (mmap_array[i].type != AGX_ALLOC_CMDBUF)
175 | 			continue;
176 | 
177 | 		if (mmap_array[i].index != cmdbuf_index)
178 | 			continue;
179 | 
180 | 		return &mmap_array[i];
181 | 	}
182 | 
183 | 	return NULL;
184 | }
185 | 
186 | static void
187 | pandecode_dump_bo(struct agx_allocation *bo, const char *name)
188 | {
189 | 	fprintf(pandecode_dump_stream, "%s %s (%u)\n", name, bo->name ?: "", bo->index);
190 | 	hexdump(pandecode_dump_stream, bo->map, bo->size, false);
191 | }
192 | 
193 | /* Abstraction for command stream parsing */
194 | typedef unsigned (*decode_cmd)(const uint8_t *map, bool verbose);
195 | 
196 | #define STATE_DONE (0xFFFFFFFFu)
197 | 
198 | static void
199 | pandecode_stateful(uint64_t va, const char *label, decode_cmd decoder, bool verbose)
200 | {
201 | 	struct agx_allocation *alloc = pandecode_find_mapped_gpu_mem_containing(va);
202 | 	assert(alloc != NULL && "nonexistant object");
203 | 	fprintf(pandecode_dump_stream, "%s\n", label);
204 | 
205 | 	uint8_t *map = pandecode_fetch_gpu_mem(va, 64);
206 | 	uint8_t *end = map + alloc->size;
207 | 
208 | 	if (verbose)
209 | 		pandecode_dump_bo(alloc, label);
210 | 
211 | 	 while (map < end) {
212 | 		 unsigned count = decoder(map, verbose);
213 | 
214 | 		 /* If we fail to decode, default to a hexdump (don't hang) */
215 | 		 if (count == 0) {
216 | 			hexdump(pandecode_dump_stream, map, 8, false);
217 | 			count = 8;
218 | 		 }
219 | 
220 | 		 map += count;
221 | 
222 | 		 if (count == STATE_DONE)
223 | 			 break;
224 | 	 }
225 | }
226 | 
227 | static unsigned
228 | pandecode_pipeline(const uint8_t *map, UNUSED bool verbose)
229 | {
230 | 	uint8_t zeroes[16] = { 0 };
231 | 
232 | 	if (map[0] == 0x4D && map[1] == 0xbd) {
233 | 		/* TODO: Disambiguation for extended is a guess */
234 | 		bl_unpack(map, SET_SHADER_EXTENDED, cmd);
235 | 		DUMP_UNPACKED(SET_SHADER_EXTENDED, cmd, "Set shader\n");
236 | 
237 | 		if (cmd.preshader_mode == AGX_PRESHADER_MODE_PRESHADER) {
238 | 			pandecode_log("Preshader\n");
239 | 			agx_disassemble(pandecode_fetch_gpu_mem(cmd.preshader_code, 8192),
240 | 				8192, pandecode_dump_stream);
241 | 			pandecode_log("\n---\n");
242 | 		}
243 | 
244 | 		pandecode_log("\n");
245 | 		agx_disassemble(pandecode_fetch_gpu_mem(cmd.code, 8192),
246 | 			8192, pandecode_dump_stream);
247 | 		pandecode_log("\n");
248 | 
249 | 		return AGX_SET_SHADER_EXTENDED_LENGTH;
250 | 	} else if (map[0] == 0x4D) {
251 | 		bl_unpack(map, SET_SHADER, cmd);
252 | 		DUMP_UNPACKED(SET_SHADER, cmd, "Set shader\n");
253 | 
254 | 		if (cmd.preshader_mode == AGX_PRESHADER_MODE_PRESHADER) {
255 | 			pandecode_log("Preshader\n");
256 | 			agx_disassemble(pandecode_fetch_gpu_mem(cmd.preshader_code, 8192),
257 | 				8192, pandecode_dump_stream);
258 | 			pandecode_log("\n---\n");
259 | 		}
260 | 
261 | 		pandecode_log("\n");
262 | 		agx_disassemble(pandecode_fetch_gpu_mem(cmd.code, 8192),
263 | 			8192, pandecode_dump_stream);
264 | 		FILE *fp = fopen("vertex.bin", "wb");
265 | 		fwrite(pandecode_fetch_gpu_mem(cmd.code, 8192), 1, 8192, fp);
266 | 		fclose(fp);
267 | 		pandecode_log("\n");
268 | 
269 | 		return AGX_SET_SHADER_LENGTH;
270 | 	} else if (map[0] == 0x1D) {
271 | 		DUMP_CL(BIND_UNIFORM, map, "Bind uniform");
272 | 		return AGX_BIND_UNIFORM_LENGTH;
273 | 	} else if (memcmp(map, zeroes, 16) == 0) {
274 | 		/* TODO: Termination */
275 | 		return STATE_DONE;
276 | 	} else {
277 | 		return 0;
278 | 	}
279 | }
280 | 
281 | static void
282 | pandecode_record(uint64_t va, size_t size, bool verbose)
283 | {
284 | 	uint8_t *map = pandecode_fetch_gpu_mem(va, size);
285 | 	uint32_t tag = 0;
286 | 	memcpy(&tag, map, 4);
287 | 
288 | 	if (tag == 0x00000C00) {
289 | 		assert(size == AGX_VIEWPORT_LENGTH);
290 | 		DUMP_CL(VIEWPORT, map, "Viewport");
291 | 	} else if (tag == 0x0C020000) {
292 | 		assert(size == AGX_LINKAGE_LENGTH);
293 | 		DUMP_CL(LINKAGE, map, "Linkage");
294 | 	} else if (tag == 0x800000) {
295 | 		assert(size == (AGX_BIND_PIPELINE_LENGTH + 4));
296 | //		XXX: why does this raise a bus error?
297 | //		uint32_t unk = 0;
298 | //		memcpy(map + AGX_BIND_PIPELINE_LENGTH, &unk, 4);
299 | 
300 | 		 bl_unpack(map, BIND_PIPELINE, cmd);
301 | 		 pandecode_stateful(cmd.pipeline, "Pipeline", pandecode_pipeline, verbose);
302 | 		 DUMP_UNPACKED(BIND_PIPELINE, cmd, "Bind fragment pipeline\n");
303 | //		 fprintf(pandecode_dump_stream, "Unk: %X\n", unk);
304 | 	} else {
305 | 		fprintf(pandecode_dump_stream, "Record %" PRIx64 "\n", va);
306 | 		hexdump(pandecode_dump_stream, map, size, false);
307 | 	}
308 | }
309 | 
310 | static unsigned
311 | pandecode_cmd(const uint8_t *map, bool verbose)
312 | {
313 | 	if (map[0] == 0x02 && map[1] == 0x10 && map[2] == 0x00 && map[3] == 0x00) {
314 | 		 bl_unpack(map, LAUNCH, cmd);
315 | 		 pandecode_stateful(cmd.pipeline, "Pipeline", pandecode_pipeline, verbose);
316 | 		 DUMP_UNPACKED(LAUNCH, cmd, "Launch\n");
317 | 		 return AGX_LAUNCH_LENGTH;
318 | 	} else if (map[0] == 0x2E && map[1] == 0x00 && map[2] == 0x00 && map[3] == 0x40) {
319 | 		 bl_unpack(map, BIND_PIPELINE, cmd);
320 | 		 pandecode_stateful(cmd.pipeline, "Pipeline", pandecode_pipeline, verbose);
321 | 		 DUMP_UNPACKED(BIND_PIPELINE, cmd, "Bind vertex pipeline\n");
322 | 
323 | 		 /* Random unaligned null byte, it's pretty awful.. */
324 | 		 assert(map[AGX_BIND_PIPELINE_LENGTH] == 0);
325 | 		 return AGX_BIND_PIPELINE_LENGTH + 1;
326 | 	} else if (map[1] == 0xc0 && map[2] == 0x61) {
327 | 		 DUMP_CL(DRAW, map, "Draw");
328 | 		 return AGX_DRAW_LENGTH;
329 | 	} else if (map[0] == 0x00 && map[1] == 0x00 && map[2] == 0x00 && map[3] == 0xc0) {
330 | 		return STATE_DONE;
331 | 	} else if (map[1] == 0x00 && map[2] == 0x00) {
332 | 		/* No need to explicitly dump the record */
333 | 		 bl_unpack(map, RECORD, cmd);
334 | 		 struct agx_allocation *mem = pandecode_find_mapped_gpu_mem_containing(cmd.data);
335 | 
336 | 		 if (mem)
337 | 			 pandecode_record(cmd.data, cmd.size_words * 4, verbose);
338 | 		 else
339 | 			 DUMP_UNPACKED(RECORD, cmd, "Non-existant record (XXX)\n");
340 | 
341 | 		 return AGX_RECORD_LENGTH;
342 | 	} else if (map[0] == 0 && map[1] == 0 && map[2] == 0xC0 && map[3] == 0x00) {
343 | 		unsigned zero[16] = { 0 };
344 | 		assert(memcmp(map + 4, zero, sizeof(zero)) == 0);
345 | 		return STATE_DONE;
346 | 	} else {
347 | 		return 0;
348 | 	}
349 | }
350 | 
351 | void
352 | pandecode_cmdstream(unsigned cmdbuf_index, bool verbose)
353 | {
354 |         pandecode_dump_file_open();
355 | 
356 | 	struct agx_allocation *cmdbuf = pandecode_find_cmdbuf(cmdbuf_index);
357 | 	assert(cmdbuf != NULL && "nonexistant command buffer");
358 | 
359 | 	if (verbose)
360 | 		pandecode_dump_bo(cmdbuf, "Command buffer");
361 | 
362 | 	/* TODO: What else is in here? */
363 | 	uint64_t *encoder = ((uint64_t *) cmdbuf->map) + 7;
364 | 	pandecode_stateful(*encoder, "Encoder", pandecode_cmd, verbose);
365 | 
366 |         pandecode_map_read_write();
367 | }
368 | 
369 | void
370 | pandecode_dump_mappings(void)
371 | {
372 |         pandecode_dump_file_open();
373 | 
374 | 	for (unsigned i = 0; i < mmap_count; ++i) {
375 | 		if (!mmap_array[i].map || !mmap_array[i].size)
376 | 			continue;
377 | 
378 | 		assert(mmap_array[i].type < AGX_NUM_ALLOC);
379 | 
380 | 		fprintf(pandecode_dump_stream, "Buffer: type %s, gpu %llx, index %u.bin:\n\n",
381 | 			agx_alloc_types[mmap_array[i].type],
382 | 			mmap_array[i].gpu_va, mmap_array[i].index);
383 | 
384 | 		hexdump(pandecode_dump_stream, mmap_array[i].map, mmap_array[i].size, false);
385 | 		fprintf(pandecode_dump_stream, "\n");
386 | 	}
387 | }
388 | 
389 | 
390 | 
391 | static void
392 | pandecode_add_name(struct agx_allocation *mem, uint64_t gpu_va, const char *name)
393 | {
394 |         if (!name) {
395 |                 /* If we don't have a name, assign one */
396 | 
397 |                 snprintf(mem->name, sizeof(mem->name) - 1,
398 |                          "memory_%" PRIx64, gpu_va);
399 |         } else {
400 |                 assert((strlen(name) + 1) < sizeof(mem->name));
401 |                 memcpy(mem->name, name, strlen(name) + 1);
402 |         }
403 | }
404 | 
405 | void
406 | pandecode_track_alloc(struct agx_allocation alloc)
407 | {
408 |         assert((mmap_count + 1) < MAX_MAPPINGS);
409 |         mmap_array[mmap_count++] = alloc;
410 | }
411 | 
412 | static char *
413 | pointer_as_memory_reference(uint64_t ptr)
414 | {
415 |         struct agx_allocation *mapped;
416 |         char *out = malloc(128);
417 | 
418 |         /* Try to find the corresponding mapped zone */
419 | 
420 |         mapped = pandecode_find_mapped_gpu_mem_containing_rw(ptr);
421 | 
422 |         if (mapped) {
423 |                 snprintf(out, 128, "%s + %d", mapped->name, (int) (ptr - mapped->gpu_va));
424 |                 return out;
425 |         }
426 | 
427 |         /* Just use the raw address if other options are exhausted */
428 | 
429 |         snprintf(out, 128, "0x%" PRIx64, ptr);
430 |         return out;
431 | 
432 | }
433 | 
434 | static int pandecode_dump_frame_count = 0;
435 | 
436 | void
437 | pandecode_dump_file_open(void)
438 | {
439 |         if (pandecode_dump_stream)
440 |                 return;
441 | 
442 |         /* This does a getenv every frame, so it is possible to use
443 |          * setenv to change the base at runtime.
444 |          */
445 |         const char *dump_file_base = getenv("PANDECODE_DUMP_FILE") ?: "pandecode.dump";
446 |         if (!strcmp(dump_file_base, "stderr"))
447 |                 pandecode_dump_stream = stderr;
448 |         else {
449 |                 char buffer[1024];
450 |                 snprintf(buffer, sizeof(buffer), "%s.%04d", dump_file_base, pandecode_dump_frame_count);
451 |                 printf("pandecode: dump command stream to file %s\n", buffer);
452 |                 pandecode_dump_stream = fopen(buffer, "w");
453 |                 if (!pandecode_dump_stream)
454 |                         fprintf(stderr,
455 |                                 "pandecode: failed to open command stream log file %s\n",
456 |                                 buffer);
457 |         }
458 | }
459 | 
460 | static void
461 | pandecode_dump_file_close(void)
462 | {
463 |         if (pandecode_dump_stream && pandecode_dump_stream != stderr) {
464 |                 fclose(pandecode_dump_stream);
465 |                 pandecode_dump_stream = NULL;
466 |         }
467 | }
468 | 
469 | void
470 | pandecode_next_frame(void)
471 | {
472 |         pandecode_dump_file_close();
473 |         pandecode_dump_frame_count++;
474 | }
475 | 
476 | void
477 | pandecode_close(void)
478 | {
479 |         pandecode_dump_file_close();
480 | }
481 | 


--------------------------------------------------------------------------------
/lib/decode.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2017-2019 Lyude Paul
 3 |  * Copyright (C) 2017-2019 Alyssa Rosenzweig
 4 |  *
 5 |  * Permission is hereby granted, free of charge, to any person obtaining a
 6 |  * copy of this software and associated documentation files (the "Software"),
 7 |  * to deal in the Software without restriction, including without limitation
 8 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 9 |  * and/or sell copies of the Software, and to permit persons to whom the
10 |  * Software is furnished to do so, subject to the following conditions:
11 |  *
12 |  * The above copyright notice and this permission notice (including the next
13 |  * paragraph) shall be included in all copies or substantial portions of the
14 |  * Software.
15 |  *
16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 |  * SOFTWARE.
23 |  *
24 |  */
25 | 
26 | #ifndef __PAN_DECODE_H__
27 | #define __PAN_DECODE_H__
28 | 
29 | #include "io.h"
30 | 
31 | void pandecode_next_frame(void);
32 | 
33 | void pandecode_close(void);
34 | 
35 | void pandecode_cmdstream(unsigned cmdbuf_index, bool verbose);
36 | 
37 | void pandecode_dump_file_open(void);
38 | 
39 | void pandecode_track_alloc(struct agx_allocation alloc);
40 | 
41 | void pandecode_dump_mappings(void);
42 | 
43 | #endif /* __MMAP_TRACE_H__ */
44 | 


--------------------------------------------------------------------------------
/lib/gen_pack.py:
--------------------------------------------------------------------------------
  1 | #encoding=utf-8
  2 | 
  3 | # Copyright (C) 2016 Intel Corporation
  4 | # Copyright (C) 2016 Broadcom
  5 | # Copyright (C) 2020 Collabora, Ltd.
  6 | #
  7 | # Permission is hereby granted, free of charge, to any person obtaining a
  8 | # copy of this software and associated documentation files (the "Software"),
  9 | # to deal in the Software without restriction, including without limitation
 10 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 11 | # and/or sell copies of the Software, and to permit persons to whom the
 12 | # Software is furnished to do so, subject to the following conditions:
 13 | #
 14 | # The above copyright notice and this permission notice (including the next
 15 | # paragraph) shall be included in all copies or substantial portions of the
 16 | # Software.
 17 | #
 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 21 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 23 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 24 | # IN THE SOFTWARE.
 25 | 
 26 | import xml.parsers.expat
 27 | import sys
 28 | import operator
 29 | from functools import reduce
 30 | 
 31 | global_prefix = "agx"
 32 | 
 33 | pack_header = """
 34 | /* Generated code, see midgard.xml and gen_pack_header.py
 35 |  *
 36 |  * Packets, enums and structures for Panfrost.
 37 |  *
 38 |  * This file has been generated, do not hand edit.
 39 |  */
 40 | 
 41 | #ifndef BL_PACK_H
 42 | #define BL_PACK_H
 43 | 
 44 | #include <stdio.h>
 45 | #include <stdint.h>
 46 | #include <stdbool.h>
 47 | #include <assert.h>
 48 | #include <math.h>
 49 | #include <inttypes.h>
 50 | #include "lib/util.h"
 51 | 
 52 | #define __gen_unpack_float(x, y, z) uif(__gen_unpack_uint(x, y, z))
 53 | 
 54 | static inline uint64_t
 55 | __gen_uint(uint64_t v, uint32_t start, uint32_t end)
 56 | {
 57 | #ifndef NDEBUG
 58 |    const int width = end - start + 1;
 59 |    if (width < 64) {
 60 |       const uint64_t max = (1ull << width) - 1;
 61 |       assert(v <= max);
 62 |    }
 63 | #endif
 64 | 
 65 |    return v << start;
 66 | }
 67 | 
 68 | static inline uint32_t
 69 | __gen_sint(int32_t v, uint32_t start, uint32_t end)
 70 | {
 71 | #ifndef NDEBUG
 72 |    const int width = end - start + 1;
 73 |    if (width < 64) {
 74 |       const int64_t max = (1ll << (width - 1)) - 1;
 75 |       const int64_t min = -(1ll << (width - 1));
 76 |       assert(min <= v && v <= max);
 77 |    }
 78 | #endif
 79 | 
 80 |    return (((uint32_t) v) << start) & ((2ll << end) - 1);
 81 | }
 82 | 
 83 | static inline uint64_t
 84 | __gen_unpack_uint(const uint8_t *restrict cl, uint32_t start, uint32_t end)
 85 | {
 86 |    uint64_t val = 0;
 87 |    const int width = end - start + 1;
 88 |    const uint64_t mask = (width == 64 ? ~0 : (1ull << width) - 1 );
 89 | 
 90 |    for (unsigned byte = start / 8; byte <= end / 8; byte++) {
 91 |       val |= ((uint64_t) cl[byte]) << ((byte - start / 8) * 8);
 92 |    }
 93 | 
 94 |    return (val >> (start % 8)) & mask;
 95 | }
 96 | 
 97 | static inline uint64_t
 98 | __gen_unpack_sint(const uint8_t *restrict cl, uint32_t start, uint32_t end)
 99 | {
100 |    int size = end - start + 1;
101 |    int64_t val = __gen_unpack_uint(cl, start, end);
102 | 
103 |    /* Get the sign bit extended. */
104 |    return (val << (64 - size)) >> (64 - size);
105 | }
106 | 
107 | #define bl_prepare(dst, T)                                 \\
108 |    *(dst) = (struct AGX_ ## T){ AGX_ ## T ## _header }
109 | 
110 | #define bl_pack(dst, T, name)                              \\
111 |    for (struct AGX_ ## T name = { AGX_ ## T ## _header }, \\
112 |         *_loop_terminate = (void *) (dst);                  \\
113 |         __builtin_expect(_loop_terminate != NULL, 1);       \\
114 |         ({ AGX_ ## T ## _pack((uint32_t *) (dst), &name);  \\
115 |            _loop_terminate = NULL; }))
116 | 
117 | #define bl_unpack(src, T, name)                        \\
118 |         struct AGX_ ## T name;                         \\
119 |         AGX_ ## T ## _unpack((uint8_t *)(src), &name)
120 | 
121 | #define bl_print(fp, T, var, indent)                   \\
122 |         AGX_ ## T ## _print(fp, &(var), indent)
123 | 
124 | """
125 | 
126 | def to_alphanum(name):
127 |     substitutions = {
128 |         ' ': '_',
129 |         '/': '_',
130 |         '[': '',
131 |         ']': '',
132 |         '(': '',
133 |         ')': '',
134 |         '-': '_',
135 |         ':': '',
136 |         '.': '',
137 |         ',': '',
138 |         '=': '',
139 |         '>': '',
140 |         '#': '',
141 |         '&': '',
142 |         '*': '',
143 |         '"': '',
144 |         '+': '',
145 |         '\'': '',
146 |     }
147 | 
148 |     for i, j in substitutions.items():
149 |         name = name.replace(i, j)
150 | 
151 |     return name
152 | 
153 | def safe_name(name):
154 |     name = to_alphanum(name)
155 |     if not name[0].isalpha():
156 |         name = '_' + name
157 | 
158 |     return name
159 | 
160 | def prefixed_upper_name(prefix, name):
161 |     if prefix:
162 |         name = prefix + "_" + name
163 |     return safe_name(name).upper()
164 | 
165 | def enum_name(name):
166 |     return "{}_{}".format(global_prefix, safe_name(name)).lower()
167 | 
168 | def num_from_str(num_str):
169 |     if num_str.lower().startswith('0x'):
170 |         return int(num_str, base=16)
171 |     else:
172 |         assert(not num_str.startswith('0') and 'octals numbers not allowed')
173 |         return int(num_str)
174 | 
175 | MODIFIERS = ["shr", "minus", "align", "log2"]
176 | 
177 | def parse_modifier(modifier):
178 |     if modifier is None:
179 |         return None
180 | 
181 |     for mod in MODIFIERS:
182 |         if modifier[0:len(mod)] == mod:
183 |             if mod == "log2":
184 |                 assert(len(mod) == len(modifier))
185 |                 return [mod]
186 | 
187 |             if modifier[len(mod)] == '(' and modifier[-1] == ')':
188 |                 ret = [mod, int(modifier[(len(mod) + 1):-1])]
189 |                 if ret[0] == 'align':
190 |                     align = ret[1]
191 |                     # Make sure the alignment is a power of 2
192 |                     assert(align > 0 and not(align & (align - 1)));
193 | 
194 |                 return ret
195 | 
196 |     print("Invalid modifier")
197 |     assert(False)
198 | 
199 | class Field(object):
200 |     def __init__(self, parser, attrs):
201 |         self.parser = parser
202 |         if "name" in attrs:
203 |             self.name = safe_name(attrs["name"]).lower()
204 |             self.human_name = attrs["name"]
205 | 
206 |         if ":" in str(attrs["start"]):
207 |             (word, bit) = attrs["start"].split(":")
208 |             self.start = (int(word) * 32) + int(bit)
209 |         else:
210 |             self.start = int(attrs["start"])
211 | 
212 |         self.end = self.start + int(attrs["size"]) - 1
213 |         self.type = attrs["type"]
214 | 
215 |         if self.type == 'bool' and self.start != self.end:
216 |             print("#error Field {} has bool type but more than one bit of size".format(self.name));
217 | 
218 |         if "prefix" in attrs:
219 |             self.prefix = safe_name(attrs["prefix"]).upper()
220 |         else:
221 |             self.prefix = None
222 | 
223 |         if "exact" in attrs:
224 |             self.exact = int(attrs["exact"])
225 |         else:
226 |             self.exact = None
227 | 
228 |         self.default = attrs.get("default")
229 | 
230 |         # Map enum values
231 |         if self.type in self.parser.enums and self.default is not None:
232 |             self.default = safe_name('{}_{}_{}'.format(global_prefix, self.type, self.default)).upper()
233 | 
234 |         self.modifier  = parse_modifier(attrs.get("modifier"))
235 | 
236 |     def emit_template_struct(self, dim):
237 |         if self.type == 'address':
238 |             type = 'uint64_t'
239 |         elif self.type == 'bool':
240 |             type = 'bool'
241 |         elif self.type == 'float':
242 |             type = 'float'
243 |         elif self.type in ['uint', 'hex'] and self.end - self.start > 32:
244 |             type = 'uint64_t'
245 |         elif self.type == 'int':
246 |             type = 'int32_t'
247 |         elif self.type in ['uint', 'uint/float', 'hex']:
248 |             type = 'uint32_t'
249 |         elif self.type in self.parser.structs:
250 |             type = 'struct ' + self.parser.gen_prefix(safe_name(self.type.upper()))
251 |         elif self.type in self.parser.enums:
252 |             type = 'enum ' + enum_name(self.type)
253 |         else:
254 |             print("#error unhandled type: %s" % self.type)
255 |             type = "uint32_t"
256 | 
257 |         print("   %-36s %s%s;" % (type, self.name, dim))
258 | 
259 |         for value in self.values:
260 |             name = prefixed_upper_name(self.prefix, value.name)
261 |             print("#define %-40s %d" % (name, value.value))
262 | 
263 |     def overlaps(self, field):
264 |         return self != field and max(self.start, field.start) <= min(self.end, field.end)
265 | 
266 | class Group(object):
267 |     def __init__(self, parser, parent, start, count, label):
268 |         self.parser = parser
269 |         self.parent = parent
270 |         self.start = start
271 |         self.count = count
272 |         self.label = label
273 |         self.size = 0
274 |         self.length = 0
275 |         self.fields = []
276 | 
277 |     def get_length(self):
278 |         # Determine number of bytes in this group.
279 |         calculated = max(field.end // 8 for field in self.fields) + 1 if len(self.fields) > 0 else 0
280 |         if self.length > 0:
281 |             assert(self.length >= calculated)
282 |         else:
283 |             self.length = calculated
284 |         return self.length
285 | 
286 | 
287 |     def emit_template_struct(self, dim):
288 |         if self.count == 0:
289 |             print("   /* variable length fields follow */")
290 |         else:
291 |             if self.count > 1:
292 |                 dim = "%s[%d]" % (dim, self.count)
293 | 
294 |             if len(self.fields) == 0:
295 |                 print("   int dummy;")
296 | 
297 |             for field in self.fields:
298 |                 if field.exact is not None:
299 |                     continue
300 | 
301 |                 field.emit_template_struct(dim)
302 | 
303 |     class Word:
304 |         def __init__(self):
305 |             self.size = 32
306 |             self.contributors = []
307 | 
308 |     class FieldRef:
309 |         def __init__(self, field, path, start, end):
310 |             self.field = field
311 |             self.path = path
312 |             self.start = start
313 |             self.end = end
314 | 
315 |     def collect_fields(self, fields, offset, path, all_fields):
316 |         for field in fields:
317 |             field_path = '{}{}'.format(path, field.name)
318 |             field_offset = offset + field.start
319 | 
320 |             if field.type in self.parser.structs:
321 |                 sub_struct = self.parser.structs[field.type]
322 |                 self.collect_fields(sub_struct.fields, field_offset, field_path + '.', all_fields)
323 |                 continue
324 | 
325 |             start = field_offset
326 |             end = offset + field.end
327 |             all_fields.append(self.FieldRef(field, field_path, start, end))
328 | 
329 |     def collect_words(self, fields, offset, path, words):
330 |         for field in fields:
331 |             field_path = '{}{}'.format(path, field.name)
332 |             start = offset + field.start
333 | 
334 |             if field.type in self.parser.structs:
335 |                 sub_fields = self.parser.structs[field.type].fields
336 |                 self.collect_words(sub_fields, start, field_path + '.', words)
337 |                 continue
338 | 
339 |             end = offset + field.end
340 |             contributor = self.FieldRef(field, field_path, start, end)
341 |             first_word = contributor.start // 32
342 |             last_word = contributor.end // 32
343 |             for b in range(first_word, last_word + 1):
344 |                 if not b in words:
345 |                     words[b] = self.Word()
346 |                 words[b].contributors.append(contributor)
347 | 
348 |     def emit_pack_function(self):
349 |         self.get_length()
350 | 
351 |         words = {}
352 |         self.collect_words(self.fields, 0, '', words)
353 | 
354 |         # Validate the modifier is lossless
355 |         for field in self.fields:
356 |             if field.modifier is None:
357 |                 continue
358 | 
359 |             assert(field.exact is None)
360 | 
361 |             if field.modifier[0] == "shr":
362 |                 shift = field.modifier[1]
363 |                 mask = hex((1 << shift) - 1)
364 |                 print("   assert((values->{} & {}) == 0);".format(field.name, mask))
365 |             elif field.modifier[0] == "minus":
366 |                 print("   assert(values->{} >= {});".format(field.name, field.modifier[1]))
367 |             elif field.modifier[0] == "log2":
368 |                 print("   assert(util_is_power_of_two_nonzero(values->{}));".format(field.name))
369 | 
370 |         for index in range(self.length // 4):
371 |             # Handle MBZ words
372 |             if not index in words:
373 |                 print("   cl[%2d] = 0;" % index)
374 |                 continue
375 | 
376 |             word = words[index]
377 | 
378 |             word_start = index * 32
379 | 
380 |             v = None
381 |             prefix = "   cl[%2d] =" % index
382 | 
383 |             for contributor in word.contributors:
384 |                 field = contributor.field
385 |                 name = field.name
386 |                 start = contributor.start
387 |                 end = contributor.end
388 |                 contrib_word_start = (start // 32) * 32
389 |                 start -= contrib_word_start
390 |                 end -= contrib_word_start
391 | 
392 |                 value = str(field.exact) if field.exact is not None else "values->{}".format(contributor.path)
393 |                 if field.modifier is not None:
394 |                     if field.modifier[0] == "shr":
395 |                         value = "{} >> {}".format(value, field.modifier[1])
396 |                     elif field.modifier[0] == "minus":
397 |                         value = "{} - {}".format(value, field.modifier[1])
398 |                     elif field.modifier[0] == "align":
399 |                         value = "ALIGN_POT({}, {})".format(value, field.modifier[1])
400 |                     elif field.modifier[0] == "log2":
401 |                         value = "util_logbase2({})".format(value)
402 | 
403 |                 if field.type in ["uint", "hex", "address"]:
404 |                     s = "__gen_uint(%s, %d, %d)" % \
405 |                         (value, start, end)
406 |                 elif field.type in self.parser.enums:
407 |                     s = "__gen_uint(%s, %d, %d)" % \
408 |                         (value, start, end)
409 |                 elif field.type == "int":
410 |                     s = "__gen_sint(%s, %d, %d)" % \
411 |                         (value, start, end)
412 |                 elif field.type == "bool":
413 |                     s = "__gen_uint(%s, %d, %d)" % \
414 |                         (value, start, end)
415 |                 elif field.type == "float":
416 |                     assert(start == 0 and end == 31)
417 |                     s = "__gen_uint(fui({}), 0, 32)".format(value)
418 |                 else:
419 |                     s = "#error unhandled field {}, type {}".format(contributor.path, field.type)
420 | 
421 |                 if not s == None:
422 |                     shift = word_start - contrib_word_start
423 |                     if shift:
424 |                         s = "%s >> %d" % (s, shift)
425 | 
426 |                     if contributor == word.contributors[-1]:
427 |                         print("%s %s;" % (prefix, s))
428 |                     else:
429 |                         print("%s %s |" % (prefix, s))
430 |                     prefix = "           "
431 | 
432 |             continue
433 | 
434 |     # Given a field (start, end) contained in word `index`, generate the 32-bit
435 |     # mask of present bits relative to the word
436 |     def mask_for_word(self, index, start, end):
437 |         field_word_start = index * 32
438 |         start -= field_word_start
439 |         end -= field_word_start
440 |         # Cap multiword at one word
441 |         start = max(start, 0)
442 |         end = min(end, 32 - 1)
443 |         count = (end - start + 1)
444 |         return (((1 << count) - 1) << start)
445 | 
446 |     def emit_unpack_function(self):
447 |         # First, verify there is no garbage in unused bits
448 |         words = {}
449 |         self.collect_words(self.fields, 0, '', words)
450 | 
451 |         for index in range(self.length // 4):
452 |             base = index * 32
453 |             word = words.get(index, self.Word())
454 |             masks = [self.mask_for_word(index, c.start, c.end) for c in word.contributors]
455 |             mask = reduce(lambda x,y: x | y, masks, 0)
456 | 
457 |             ALL_ONES = 0xffffffff
458 | 
459 |             if mask != ALL_ONES:
460 |                 TMPL = '   if (((const uint32_t *) cl)[{}] & {}) fprintf(stderr, "XXX: Invalid field of {} unpacked at word {}\\n");'
461 |                 print(TMPL.format(index, hex(mask ^ ALL_ONES), self.label, index))
462 | 
463 |         fieldrefs = []
464 |         self.collect_fields(self.fields, 0, '', fieldrefs)
465 |         for fieldref in fieldrefs:
466 |             field = fieldref.field
467 |             convert = None
468 | 
469 |             args = []
470 |             args.append('cl')
471 |             args.append(str(fieldref.start))
472 |             args.append(str(fieldref.end))
473 | 
474 |             if field.type in set(["uint", "uint/float", "address", "hex"]) | self.parser.enums:
475 |                 convert = "__gen_unpack_uint"
476 |             elif field.type == "int":
477 |                 convert = "__gen_unpack_sint"
478 |             elif field.type == "bool":
479 |                 convert = "__gen_unpack_uint"
480 |             elif field.type == "float":
481 |                 convert = "__gen_unpack_float"
482 |             else:
483 |                 s = "/* unhandled field %s, type %s */\n" % (field.name, field.type)
484 | 
485 |             suffix = ""
486 |             prefix = ""
487 |             if field.modifier:
488 |                 if field.modifier[0] == "minus":
489 |                     suffix = " + {}".format(field.modifier[1])
490 |                 elif field.modifier[0] == "shr":
491 |                     suffix = " << {}".format(field.modifier[1])
492 |                 if field.modifier[0] == "log2":
493 |                     prefix = "1 << "
494 | 
495 |             decoded = '{}{}({}){}'.format(prefix, convert, ', '.join(args), suffix)
496 | 
497 |             print('   values->{} = {};'.format(fieldref.path, decoded))
498 |             if field.modifier and field.modifier[0] == "align":
499 |                 mask = hex(field.modifier[1] - 1)
500 |                 print('   assert(!(values->{} & {}));'.format(fieldref.path, mask))
501 | 
502 |     def emit_print_function(self):
503 |         for field in self.fields:
504 |             convert = None
505 |             name, val = field.human_name, 'values->{}'.format(field.name)
506 | 
507 |             if field.type in self.parser.structs:
508 |                 pack_name = self.parser.gen_prefix(safe_name(field.type)).upper()
509 |                 print('   fprintf(fp, "%*s{}:\\n", indent, "");'.format(field.human_name))
510 |                 print("   {}_print(fp, &values->{}, indent + 2);".format(pack_name, field.name))
511 |             elif field.type == "address":
512 |                 # TODO resolve to name
513 |                 print('   fprintf(fp, "%*s{}: 0x%" PRIx64 "\\n", indent, "", {});'.format(name, val))
514 |             elif field.type in self.parser.enums:
515 |                 print('   fprintf(fp, "%*s{}: %s\\n", indent, "", {}_as_str({}));'.format(name, enum_name(field.type), val))
516 |             elif field.type == "int":
517 |                 print('   fprintf(fp, "%*s{}: %d\\n", indent, "", {});'.format(name, val))
518 |             elif field.type == "bool":
519 |                 print('   fprintf(fp, "%*s{}: %s\\n", indent, "", {} ? "true" : "false");'.format(name, val))
520 |             elif field.type == "float":
521 |                 print('   fprintf(fp, "%*s{}: %f\\n", indent, "", {});'.format(name, val))
522 |             elif field.type in ["uint", "hex"] and (field.end - field.start) >= 32:
523 |                 print('   fprintf(fp, "%*s{}: 0x%" PRIx64 "\\n", indent, "", {});'.format(name, val))
524 |             elif field.type == "hex":
525 |                 print('   fprintf(fp, "%*s{}: 0x%" PRIx32 "\\n", indent, "", {});'.format(name, val))
526 |             elif field.type == "uint/float":
527 |                 print('   fprintf(fp, "%*s{}: 0x%X (%f)\\n", indent, "", {}, uif({}));'.format(name, val, val))
528 |             else:
529 |                 print('   fprintf(fp, "%*s{}: %u\\n", indent, "", {});'.format(name, val))
530 | 
531 | class Value(object):
532 |     def __init__(self, attrs):
533 |         self.name = attrs["name"]
534 |         self.value = int(attrs["value"], 0)
535 | 
536 | class Parser(object):
537 |     def __init__(self):
538 |         self.parser = xml.parsers.expat.ParserCreate()
539 |         self.parser.StartElementHandler = self.start_element
540 |         self.parser.EndElementHandler = self.end_element
541 | 
542 |         self.struct = None
543 |         self.structs = {}
544 |         # Set of enum names we've seen.
545 |         self.enums = set()
546 | 
547 |     def gen_prefix(self, name):
548 |         return '{}_{}'.format(global_prefix.upper(), name)
549 | 
550 |     def start_element(self, name, attrs):
551 |         if name == "blxml":
552 |             print(pack_header)
553 |         elif name == "struct":
554 |             name = attrs["name"]
555 |             self.no_direct_packing = attrs.get("no-direct-packing", False)
556 |             object_name = self.gen_prefix(safe_name(name.upper()))
557 |             self.struct = object_name
558 | 
559 |             self.group = Group(self, None, 0, 1, name)
560 |             if "size" in attrs:
561 |                 self.group.length = int(attrs["size"])
562 |             self.group.align = int(attrs["align"]) if "align" in attrs else None
563 |             self.structs[attrs["name"]] = self.group
564 |         elif name == "field":
565 |             self.group.fields.append(Field(self, attrs))
566 |             self.values = []
567 |         elif name == "enum":
568 |             self.values = []
569 |             self.enum = safe_name(attrs["name"])
570 |             self.enums.add(attrs["name"])
571 |             if "prefix" in attrs:
572 |                 self.prefix = attrs["prefix"]
573 |             else:
574 |                 self.prefix= None
575 |         elif name == "value":
576 |             self.values.append(Value(attrs))
577 | 
578 |     def end_element(self, name):
579 |         if name == "struct":
580 |             self.emit_struct()
581 |             self.struct = None
582 |             self.group = None
583 |         elif name  == "field":
584 |             self.group.fields[-1].values = self.values
585 |         elif name  == "enum":
586 |             self.emit_enum()
587 |             self.enum = None
588 |         elif name == "blxml":
589 |             print('#endif')
590 | 
591 |     def emit_header(self, name):
592 |         default_fields = []
593 |         for field in self.group.fields:
594 |             if not type(field) is Field:
595 |                 continue
596 |             if field.default is not None:
597 |                 default_fields.append("   .{} = {}".format(field.name, field.default))
598 |             elif field.type in self.structs:
599 |                 default_fields.append("   .{} = {{ {}_header }}".format(field.name, self.gen_prefix(safe_name(field.type.upper()))))
600 | 
601 |         print('#define %-40s\\' % (name + '_header'))
602 |         if default_fields:
603 |             print(",  \\\n".join(default_fields))
604 |         else:
605 |             print('   0')
606 |         print('')
607 | 
608 |     def emit_template_struct(self, name, group):
609 |         print("struct %s {" % name)
610 |         group.emit_template_struct("")
611 |         print("};\n")
612 | 
613 |     def emit_pack_function(self, name, group):
614 |         print("static inline void\n%s_pack(uint32_t * restrict cl,\n%sconst struct %s * restrict values)\n{" %
615 |               (name, ' ' * (len(name) + 6), name))
616 | 
617 |         group.emit_pack_function()
618 | 
619 |         print("}\n\n")
620 | 
621 |         print('#define {} {}'.format (name + "_LENGTH", self.group.length))
622 |         if self.group.align != None:
623 |             print('#define {} {}'.format (name + "_ALIGN", self.group.align))
624 |         print('struct {}_packed {{ uint32_t opaque[{}]; }};'.format(name.lower(), self.group.length // 4))
625 | 
626 |     def emit_unpack_function(self, name, group):
627 |         print("static inline void")
628 |         print("%s_unpack(const uint8_t * restrict cl,\n%sstruct %s * restrict values)\n{" %
629 |               (name.upper(), ' ' * (len(name) + 8), name))
630 | 
631 |         group.emit_unpack_function()
632 | 
633 |         print("}\n")
634 | 
635 |     def emit_print_function(self, name, group):
636 |         print("static inline void")
637 |         print("{}_print(FILE *fp, const struct {} * values, unsigned indent)\n{{".format(name.upper(), name))
638 | 
639 |         group.emit_print_function()
640 | 
641 |         print("}\n")
642 | 
643 |     def emit_struct(self):
644 |         name = self.struct
645 | 
646 |         self.emit_template_struct(self.struct, self.group)
647 |         self.emit_header(name)
648 |         if self.no_direct_packing == False:
649 |             self.emit_pack_function(self.struct, self.group)
650 |             self.emit_unpack_function(self.struct, self.group)
651 |         self.emit_print_function(self.struct, self.group)
652 | 
653 |     def enum_prefix(self, name):
654 |         return 
655 | 
656 |     def emit_enum(self):
657 |         e_name = enum_name(self.enum)
658 |         prefix = e_name if self.enum != 'Format' else global_prefix
659 |         print('enum {} {{'.format(e_name))
660 | 
661 |         for value in self.values:
662 |             name = '{}_{}'.format(prefix, value.name)
663 |             name = safe_name(name).upper()
664 |             print('        % -36s = %6d,' % (name, value.value))
665 |         print('};\n')
666 | 
667 |         print("static inline const char *")
668 |         print("{}_as_str(enum {} imm)\n{{".format(e_name.lower(), e_name))
669 |         print("    switch (imm) {")
670 |         for value in self.values:
671 |             name = '{}_{}'.format(prefix, value.name)
672 |             name = safe_name(name).upper()
673 |             print('    case {}: return "{}";'.format(name, value.name))
674 |         print('    default: return "XXX: INVALID";')
675 |         print("    }")
676 |         print("}\n")
677 | 
678 |     def parse(self, filename):
679 |         file = open(filename, "rb")
680 |         self.parser.ParseFile(file)
681 |         file.close()
682 | 
683 | if len(sys.argv) < 2:
684 |     print("No input xml file specified")
685 |     sys.exit(1)
686 | 
687 | input_file = sys.argv[1]
688 | 
689 | p = Parser()
690 | p.parse(input_file)
691 | 


--------------------------------------------------------------------------------
/lib/io.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice (including the next
 12 |  * paragraph) shall be included in all copies or substantial portions of the
 13 |  * Software.
 14 |  *
 15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 |  * SOFTWARE.
 22 |  */
 23 | 
 24 | #include <stdio.h>
 25 | #include <assert.h>
 26 | #include <IOKit/IOKitLib.h>
 27 | #include "io.h"
 28 | #include "selectors.h"
 29 | #include "util.h"
 30 | 
 31 | struct agx_allocation
 32 | agx_alloc_mem(mach_port_t connection, size_t size, enum agx_memory_type type, bool write_combine)
 33 | {
 34 | 	uint32_t mode = 0x430; // shared, ?
 35 | 	uint32_t cache = write_combine ? 0x400 : 0x0;
 36 | 
 37 | 	uint32_t args_in[24] = { 0 };
 38 | 	args_in[1] = write_combine ? 0x400 : 0x0;
 39 | 	args_in[2] = 0x2580320; //0x18000; // unk
 40 | 	args_in[3] = 0x1; // unk;
 41 | 	args_in[4] = 0x4000101; //0x1000101; // unk
 42 | 	args_in[5] = mode;
 43 | 	args_in[16] = size;
 44 | 	args_in[20] = type;
 45 | 	args_in[21] = 0x3;
 46 | 
 47 | 	uint64_t out[10] = { 0 };
 48 | 	size_t out_sz = sizeof(out);
 49 | 
 50 | 	kern_return_t ret = IOConnectCallMethod(connection,
 51 | 			AGX_SELECTOR_ALLOCATE_MEM, NULL, 0, args_in,
 52 | 			sizeof(args_in), NULL, 0, out, &out_sz);
 53 | 
 54 | 	assert(ret == 0);
 55 | 	assert(out_sz == sizeof(out));
 56 | 
 57 | 	return (struct agx_allocation) {
 58 | 		.type = AGX_ALLOC_REGULAR,
 59 | 		.guid = out[5],
 60 | 		.index = (out[3] >> 32ull),
 61 | 		.gpu_va = out[0],
 62 | 		.map = (void *) out[1],
 63 | 		.size = size
 64 | 	};
 65 | }
 66 | 
 67 | struct agx_allocation
 68 | agx_alloc_cmdbuf(mach_port_t connection, size_t size, bool cmdbuf)
 69 | {
 70 | 	struct agx_create_cmdbuf_resp out = {};
 71 | 	size_t out_sz = sizeof(out);
 72 | 
 73 | 	uint64_t inputs[2] = {
 74 | 		size,
 75 | 		cmdbuf ? 1 : 0
 76 | 	};
 77 | 
 78 | 	kern_return_t ret = IOConnectCallMethod(connection,
 79 | 			AGX_SELECTOR_CREATE_CMDBUF, inputs, 2, NULL, 0, NULL,
 80 | 			NULL, &out, &out_sz);
 81 | 
 82 | 	assert(ret == 0);
 83 | 	assert(out_sz == sizeof(out));
 84 | 	assert(out.size == size);
 85 | 
 86 | 	return (struct agx_allocation) {
 87 | 		.type = cmdbuf ? AGX_ALLOC_CMDBUF : AGX_ALLOC_MEMMAP,
 88 | 		.index = out.id,
 89 | 		.map = out.map,
 90 | 		.size = out.size,
 91 | 		.guid = 0, /* TODO? */
 92 | 	};
 93 | }
 94 | 
 95 | uint64_t
 96 | agx_cmdbuf_global_ids(mach_port_t connection)
 97 | {
 98 | 	uint32_t out[4] = {};
 99 | 	size_t out_sz = sizeof(out);
100 | 
101 | 	kern_return_t ret = IOConnectCallStructMethod(connection,
102 | 			0x6,
103 | 			NULL, 0, &out, &out_sz);
104 | 
105 | 	assert(ret == 0);
106 | 	assert(out_sz == sizeof(out));
107 | 	assert(out[2] == (out[0] + 0x1000000));
108 | 
109 | 	/* Returns a 32-bit but is 64-bit in Instruments, extend with the
110 | 	 * missing high bit */
111 | 	return (out[0]) | (1ull << 32ull);
112 | }
113 | 
114 | void
115 | agx_submit_cmdbuf(mach_port_t connection, struct agx_allocation *cmdbuf, struct agx_allocation *mappings, uint64_t scalar)
116 | {
117 | 	struct agx_submit_cmdbuf_req req = {
118 | 		.unk0 = 0x10,
119 | 		.unk1 = 0x1,
120 | 		.cmdbuf = cmdbuf->index,
121 | 		.mappings = mappings->index,
122 | 		.unk2 = 0x0,
123 | 		.unk3 = 0x1,
124 | 	};
125 | 
126 | 	assert(sizeof(req) == 40);
127 | 
128 | 	kern_return_t ret = IOConnectCallMethod(connection,
129 | 			AGX_SELECTOR_SUBMIT_COMMAND_BUFFERS, 
130 | 			&scalar, 1,
131 | 			&req, sizeof(req),
132 | 			NULL, 0, NULL, 0);
133 | 
134 | 	assert(ret == 0);
135 | 	return;
136 | }
137 | 
138 | struct agx_notification_queue
139 | agx_create_notification_queue(mach_port_t connection)
140 | {
141 | 	struct agx_create_notification_queue_resp resp;
142 | 	size_t resp_size = sizeof(resp);
143 | 	assert(resp_size == 0x10);
144 | 
145 | 	kern_return_t ret = IOConnectCallStructMethod(connection,
146 | 			AGX_SELECTOR_CREATE_NOTIFICATION_QUEUE,
147 | 			NULL, 0, &resp, &resp_size);
148 | 
149 | 	assert(resp_size == sizeof(resp));
150 | 	assert(ret == 0);
151 | 
152 | 	mach_port_t notif_port = IODataQueueAllocateNotificationPort();
153 | 	IOConnectSetNotificationPort(connection, 0, notif_port, resp.unk2);
154 | 
155 | 	return (struct agx_notification_queue) {
156 | 		.port = notif_port,
157 | 		.queue = resp.queue,
158 | 		.id = resp.unk2
159 | 	};
160 | }
161 | 
162 | struct agx_command_queue
163 | agx_create_command_queue(mach_port_t connection)
164 | {
165 | 	struct agx_command_queue queue = {};
166 | 
167 | 	{
168 | 		uint8_t buffer[1024 + 8] = { 0 };
169 | 		const char *path = "/tmp/a.out";
170 | 		assert(strlen(path) < 1022);
171 | 		memcpy(buffer + 0, path, strlen(path));
172 | 
173 | 		/* Copy to the end */
174 | 		unsigned END_LEN = MIN2(strlen(path), 1024 - strlen(path));
175 | 		unsigned SKIP = strlen(path) - END_LEN;
176 | 		unsigned OFFS = 1024 - END_LEN;
177 | 		memcpy(buffer + OFFS, path + SKIP, END_LEN);
178 | 
179 | 		buffer[1024] = 0x2;
180 | 
181 | 		struct agx_create_command_queue_resp out = {};
182 | 		size_t out_sz = sizeof(out);
183 | 
184 | 		kern_return_t ret = IOConnectCallStructMethod(connection,
185 | 				AGX_SELECTOR_CREATE_COMMAND_QUEUE, 
186 | 				buffer, sizeof(buffer),
187 | 				&out, &out_sz);
188 | 
189 | 		assert(ret == 0);
190 | 		assert(out_sz == sizeof(out));
191 | 
192 | 		queue.id = out.id;
193 | 		assert(queue.id);
194 | 	}
195 | 
196 | 	queue.notif = agx_create_notification_queue(connection);
197 | 
198 | 	{
199 | 		uint64_t scalars[2] = {
200 | 			queue.id,
201 | 			queue.notif.id
202 | 		};
203 | 
204 | 		kern_return_t ret = IOConnectCallScalarMethod(connection,
205 | 				0x1D, 
206 | 				scalars, 2, NULL, NULL);
207 | 
208 | 		assert(ret == 0);
209 | 	}
210 | 
211 | 	{
212 | 		uint64_t scalars[2] = {
213 | 			queue.id,
214 | 			0x1ffffffffull
215 | 		};
216 | 
217 | 		kern_return_t ret = IOConnectCallScalarMethod(connection,
218 | 				0x29, 
219 | 				scalars, 2, NULL, NULL);
220 | 
221 | 		assert(ret == 0);
222 | 	}
223 | 
224 | 	return queue;
225 | }
226 | 


--------------------------------------------------------------------------------
/lib/io.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice (including the next
12 |  * paragraph) shall be included in all copies or substantial portions of the
13 |  * Software.
14 |  *
15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |  * SOFTWARE.
22 |  */
23 | 
24 | #ifndef __AGX_IO_H
25 | #define __AGX_IO_H
26 | 
27 | #include <stdbool.h>
28 | #include <mach/mach.h>
29 | #include "selectors.h"
30 | 
31 | enum agx_alloc_type {
32 | 	AGX_ALLOC_REGULAR = 0,
33 | 	AGX_ALLOC_MEMMAP = 1,
34 | 	AGX_ALLOC_CMDBUF = 2,
35 | 	AGX_NUM_ALLOC,
36 | };
37 | 
38 | static const char *agx_alloc_types[AGX_NUM_ALLOC] = { "mem", "map", "cmd" };
39 | 
40 | struct agx_allocation {
41 | 	enum agx_alloc_type type;
42 | 	size_t size;
43 | 
44 | 	/* Index unique only up to type, process-local */
45 | 	unsigned index;
46 | 
47 | 	/* Globally unique value (system wide) for tracing. Exists for
48 | 	 * resources, command buffers, GPU submissions, segments, segent lists,
49 | 	 * encoders, accelerators, and channels. Corresponds to Instruments'
50 | 	 * magic table metal-gpu-submission-to-command-buffer-id */
51 | 	uint64_t guid;
52 | 
53 | 	/* If CPU mapped, CPU address. NULL if not mapped */
54 | 	void *map;
55 | 
56 | 	/* If type REGULAR, mapped GPU address */
57 | 	uint64_t gpu_va;
58 | 
59 | 	/* Human-readable label, or NULL if none */
60 | 	char *name;
61 | 
62 | 	/* Used while decoding, marked read-only */
63 | 	bool ro;
64 | };
65 | 
66 | struct agx_notification_queue {
67 | 	mach_port_t port;
68 | 	IODataQueueMemory *queue;
69 | 	unsigned id;
70 | };
71 | 
72 | struct agx_command_queue {
73 | 	unsigned id;
74 | 	struct agx_notification_queue notif;
75 | };
76 | 
77 | struct agx_allocation agx_alloc_mem(mach_port_t connection, size_t size, enum agx_memory_type type, bool write_combine);
78 | struct agx_allocation agx_alloc_cmdbuf(mach_port_t connection, size_t size, bool cmdbuf);
79 | void agx_submit_cmdbuf(mach_port_t connection, struct agx_allocation *cmdbuf, struct agx_allocation *mappings, uint64_t scalar);
80 | struct agx_command_queue agx_create_command_queue(mach_port_t connection);
81 | uint64_t agx_cmdbuf_global_ids(mach_port_t connection);
82 | 
83 | #endif
84 | 


--------------------------------------------------------------------------------
/lib/selectors.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice (including the next
 12 |  * paragraph) shall be included in all copies or substantial portions of the
 13 |  * Software.
 14 |  *
 15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 |  * SOFTWARE.
 22 |  */
 23 | 
 24 | #ifndef __AGX_SELECTOR_H
 25 | #define __AGX_SELECTOR_H
 26 | 
 27 | #include <IOKit/IODataQueueClient.h>
 28 | 
 29 | enum agx_selector {
 30 | 	AGX_SELECTOR_GET_GLOBAL_IDS = 0x6,
 31 | 	AGX_SELECTOR_SET_API = 0x7,
 32 | 	AGX_SELECTOR_CREATE_COMMAND_QUEUE = 0x8,
 33 | 	AGX_SELECTOR_FREE_COMMAND_QUEUE = 0x9,
 34 | 	AGX_SELECTOR_ALLOCATE_MEM = 0xA,
 35 | 	AGX_SELECTOR_FREE_MEM = 0xB,
 36 | 	AGX_SELECTOR_CREATE_CMDBUF = 0xF,
 37 | 	AGX_SELECTOR_FREE_CMDBUF = 0x10,
 38 | 	AGX_SELECTOR_CREATE_NOTIFICATION_QUEUE = 0x11,
 39 | 	AGX_SELECTOR_FREE_NOTIFICATION_QUEUE = 0x12,
 40 | 	AGX_SELECTOR_SUBMIT_COMMAND_BUFFERS = 0x1E,
 41 | 	AGX_SELECTOR_GET_VERSION = 0x23,
 42 | 	AGX_NUM_SELECTORS = 0x30
 43 | };
 44 | 
 45 | static const char *selector_table[AGX_NUM_SELECTORS] = {
 46 | 	"unk0",
 47 | 	"unk1",
 48 | 	"unk2",
 49 | 	"unk3",
 50 | 	"unk4",
 51 | 	"unk5",
 52 | 	"GET_GLOBAL_IDS",
 53 | 	"SET_API",
 54 | 	"CREATE_COMMAND_QUEUE",
 55 | 	"FREE_COMMAND_QUEUE",
 56 | 	"ALLOCATE_MEM",
 57 | 	"FREE_MEM",
 58 | 	"unkC",
 59 | 	"unkD",
 60 | 	"unkE",
 61 | 	"CREATE_CMDBUF",
 62 | 	"FREE_CMDBUF",
 63 | 	"CREATE_NOTIFICATION_QUEUE",
 64 | 	"FREE_NOTIFICATION_QUEUE",
 65 | 	"unk13",
 66 | 	"unk14",
 67 | 	"unk15",
 68 | 	"unk16",
 69 | 	"unk17",
 70 | 	"unk18",
 71 | 	"unk19",
 72 | 	"unk1A",
 73 | 	"unk1B",
 74 | 	"unk1C",
 75 | 	"unk1D",
 76 | 	"SUBMIT_COMMAND_BUFFERS",
 77 | 	"unk1F",
 78 | 	"unk20",
 79 | 	"unk21",
 80 | 	"unk22",
 81 | 	"GET_VERSION",
 82 | 	"unk24",
 83 | 	"unk25",
 84 | 	"unk26",
 85 | 	"unk27",
 86 | 	"unk28",
 87 | 	"unk29",
 88 | 	"unk2A",
 89 | 	"unk2B",
 90 | 	"unk2C",
 91 | 	"unk2D",
 92 | 	"unk2E",
 93 | 	"unk2F"
 94 | };
 95 | 
 96 | static inline const char *
 97 | wrap_selector_name(uint32_t selector)
 98 | {
 99 | 	return (selector < AGX_NUM_SELECTORS) ? selector_table[selector] : "unk??";
100 | }
101 | 
102 | struct agx_create_command_queue_resp {
103 | 	uint64_t id;
104 | 	uint32_t unk2; // 90 0A 08 27
105 | 	uint32_t unk3; // 0
106 | } __attribute__((packed));
107 | 
108 | struct agx_create_cmdbuf_resp {
109 | 	void *map;
110 | 	uint32_t size;
111 | 	uint32_t id;
112 | } __attribute__((packed));
113 | 
114 | struct agx_create_notification_queue_resp {
115 | 	IODataQueueMemory *queue;
116 | 	uint32_t unk2; // 1
117 | 	uint32_t unk3; // 0
118 | } __attribute__((packed));
119 | 
120 | struct agx_submit_cmdbuf_req {
121 | 	uint32_t unk0;
122 | 	uint32_t unk1;
123 | 	uint32_t cmdbuf;
124 | 	uint32_t mappings;
125 | 	void *user_0;
126 | 	void *user_1;
127 | 	uint32_t unk2;
128 | 	uint32_t unk3;
129 | } __attribute__((packed));
130 | 
131 | /* Memory allocation isn't really understood yet. By comparing SHADER/CMDBUF_32
132 |  * vs everything else, it appears the 0x40000000 bit indicates the GPU VA must
133 |  * be be in the first 4GiB */
134 | 
135 | enum agx_memory_type {
136 | 	AGX_MEMORY_TYPE_NORMAL      = 0x00000000, /* used for user allocations */
137 | 	AGX_MEMORY_TYPE_UNK         = 0x08000000, /* unknown */
138 | 	AGX_MEMORY_TYPE_CMDBUF_64   = 0x18000000, /* used for command buffer storage */
139 | 	AGX_MEMORY_TYPE_SHADER      = 0x48000000, /* used for shader memory, with VA = 0 */
140 | 	AGX_MEMORY_TYPE_CMDBUF_32   = 0x58000000, /* used for command buffers, with VA < 32-bit */
141 | 	AGX_MEMORY_TYPE_FRAMEBUFFER = 0x00888F00, /* used for framebuffer backing */
142 | };
143 | 
144 | static inline const char *
145 | agx_memory_type_name(uint32_t type)
146 | {
147 | 	switch (type) {
148 | 	case AGX_MEMORY_TYPE_NORMAL: return "normal";
149 | 	case AGX_MEMORY_TYPE_UNK: return "unk";
150 | 	case AGX_MEMORY_TYPE_CMDBUF_64: return "cmdbuf_64";
151 | 	case AGX_MEMORY_TYPE_SHADER: return "shader";
152 | 	case AGX_MEMORY_TYPE_CMDBUF_32: return "cmdbuf_32";
153 | 	case AGX_MEMORY_TYPE_FRAMEBUFFER: return "framebuffer";
154 | 	default: return NULL;
155 | 	}
156 | }
157 | 
158 | #endif
159 | 


--------------------------------------------------------------------------------
/lib/tiling.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
  3 |  *
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a
  5 |  * copy of this software and associated documentation files (the "Software"),
  6 |  * to deal in the Software without restriction, including without limitation
  7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8 |  * and/or sell copies of the Software, and to permit persons to whom the
  9 |  * Software is furnished to do so, subject to the following conditions:
 10 |  *
 11 |  * The above copyright notice and this permission notice (including the next
 12 |  * paragraph) shall be included in all copies or substantial portions of the
 13 |  * Software.
 14 |  *
 15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 |  * SOFTWARE.
 22 |  */
 23 | 
 24 | #include <stdio.h>
 25 | #include <assert.h>
 26 | #include <stdlib.h>
 27 | #include <stdint.h>
 28 | 
 29 | /* Z-order with 64x64 tiles:
 30 |  *
 31 |  * 	[y5][x5][y4][x4][y3][x3][y2][x2][y1][x1][y0][x0]
 32 |  *
 33 |  * Efficient tiling algorithm described in
 34 |  * https://fgiesen.wordpress.com/2011/01/17/texture-tiling-and-swizzling/ but
 35 |  * for posterity, we split into X and Y parts, and are faced with the problem
 36 |  * of incrementing:
 37 |  *
 38 |  * 	0 [x5] 0 [x4] 0 [x3] 0 [x2] 0 [x1] 0 [x0]
 39 |  *
 40 |  * To do so, we fill in the "holes" with 1's by adding the bitwise inverse of
 41 |  * the mask of bits we care about
 42 |  *
 43 |  * 	0 [x5] 0 [x4] 0 [x3] 0 [x2] 0 [x1] 0 [x0]
 44 |  *    + 1  0   1  0   1  0   1  0   1  0   1  0
 45 |  *    ------------------------------------------
 46 |  * 	1 [x5] 1 [x4] 1 [x3] 1 [x2] 1 [x1] 1 [x0]
 47 |  *
 48 |  * Then when we add one, the holes are passed over by forcing carry bits high.
 49 |  * Finally, we need to zero out the holes, by ANDing with the mask of bits we
 50 |  * care about. In total, we get the expression (X + ~mask + 1) & mask, and
 51 |  * applying the two's complement identity, we are left with (X - mask) & mask
 52 |  */
 53 | 
 54 | #define TILE_WIDTH 64
 55 | #define TILE_HEIGHT 64
 56 | #define TILE_SHIFT 6
 57 | #define TILE_MASK ((1 << TILE_SHIFT) - 1)
 58 | 
 59 | /* mask of bits used for X coordinate in a tile */
 60 | #define SPACE_MASK 0x555 // 0b010101010101
 61 | 
 62 | #define MAX2(x, y) (((x) > (y)) ? (x) : (y))
 63 | #define MIN2(x, y) (((x) < (y)) ? (x) : (y))
 64 | 
 65 | static uint32_t
 66 | ash_space_bits(unsigned x)
 67 | {
 68 | 	assert(x < TILE_WIDTH);
 69 | 	return ((x & 1) << 0) | ((x & 2) << 1) | ((x & 4) << 2) |
 70 | 		((x & 8) << 3) | ((x & 16) << 4) | ((x & 32) << 5);
 71 | }
 72 | 
 73 | static void
 74 | ash_detile_unaligned_32(uint32_t *tiled, uint32_t *linear,
 75 | 		unsigned width, unsigned linear_pitch,
 76 | 		unsigned sx, unsigned sy, unsigned smaxx, unsigned smaxy)
 77 | {
 78 | 	unsigned tiles_per_row = (width + TILE_WIDTH - 1) >> TILE_SHIFT;
 79 | 	unsigned y_offs = ash_space_bits(sy & TILE_MASK);
 80 | 	unsigned x_offs_start = ash_space_bits(sx & TILE_MASK);
 81 | 
 82 | 	for (unsigned y = sy; y < smaxy; ++y) {
 83 | 		unsigned tile_y = (y >> TILE_SHIFT);
 84 | 		unsigned tile_row = tile_y * tiles_per_row;
 85 | 		unsigned x_offs = x_offs_start;
 86 | 
 87 | 		uint32_t *linear_row = linear;
 88 | 		
 89 | 		for (unsigned x = sx; x < smaxx; ++x) {
 90 | 			unsigned tile_x = (x >> TILE_SHIFT);
 91 | 			unsigned tile_idx = (tile_row + tile_x);
 92 | 			unsigned tile_base = tile_idx * (TILE_WIDTH * TILE_HEIGHT);
 93 | 
 94 | 			*(linear_row++) = tiled[tile_base + y_offs + x_offs];
 95 | 			x_offs = (x_offs - SPACE_MASK) & SPACE_MASK;
 96 | 		}
 97 | 
 98 | 		y_offs = (((y_offs >> 1) - SPACE_MASK) & SPACE_MASK) << 1;
 99 | 		linear += linear_pitch;
100 | 	}
101 | }
102 | 
103 | /* Assumes sx, smaxx are both aligned to TILE_WIDTH */
104 | static void
105 | ash_detile_aligned_32(uint32_t *tiled, uint32_t *linear,
106 | 		unsigned width, unsigned linear_pitch,
107 | 		unsigned sx, unsigned sy, unsigned smaxx, unsigned smaxy)
108 | {
109 | 	unsigned tiles_per_row = (width + TILE_WIDTH - 1) >> TILE_SHIFT;
110 | 	unsigned y_offs = 0;
111 | 
112 | 	for (unsigned y = sy; y < smaxy; ++y) {
113 | 		unsigned tile_y = (y >> TILE_SHIFT);
114 | 		unsigned tile_row = tile_y * tiles_per_row;
115 | 		unsigned x_offs = 0;
116 | 
117 | 		uint32_t *linear_row = linear;
118 | 		
119 | 		for (unsigned x = sx; x < smaxx; x += TILE_WIDTH) {
120 | 			unsigned tile_x = (x >> TILE_SHIFT);
121 | 			unsigned tile_idx = (tile_row + tile_x);
122 | 			unsigned tile_base = tile_idx * (TILE_WIDTH * TILE_HEIGHT);
123 | 			uint32_t *tile = tiled + tile_base + y_offs;
124 | 
125 | 			for (unsigned j = 0; j < TILE_WIDTH; ++j) {
126 | 				/* Written in a funny way to avoid inner shift,
127 | 				 * do it free as part of x_offs instead */
128 | 				uint32_t *in = (uint32_t *) (((uint8_t *) tile) + x_offs);
129 | 				*(linear_row++) = *in;
130 | 				x_offs = (x_offs - (SPACE_MASK << 2)) & (SPACE_MASK << 2);
131 | 			}
132 | 		}
133 | 
134 | 		y_offs = (((y_offs >> 1) - SPACE_MASK) & SPACE_MASK) << 1;
135 | 		linear += linear_pitch;
136 | 	}
137 | }
138 | 
139 | static void
140 | ash_detile_32(uint32_t *tiled, uint32_t *linear,
141 | 		unsigned width, unsigned linear_pitch,
142 | 		unsigned sx, unsigned sy, unsigned smaxx, unsigned smaxy)
143 | {
144 | 	if (sx & TILE_MASK) {
145 | 		ash_detile_unaligned_32(tiled, linear, width, linear_pitch, sx, sy,
146 | 				MIN2(TILE_WIDTH - (sx & TILE_MASK), smaxx - sx), smaxy);
147 | 		sx = (sx & ~TILE_MASK) + 1;
148 | 	}
149 | 
150 | 	if ((smaxx & TILE_MASK) && (smaxx > sx)) {
151 | 		ash_detile_unaligned_32(tiled, linear, width, linear_pitch,
152 | 				MAX2(sx, smaxx & ~TILE_MASK), sy,
153 | 				smaxx, smaxy);
154 | 		smaxx = (smaxx & ~TILE_MASK);
155 | 	}
156 | 
157 | 	if (smaxx > sx) {
158 | 		ash_detile_aligned_32(tiled, linear, width, linear_pitch,
159 | 				sx, sy, smaxx, smaxy);
160 | 	}
161 | }
162 | 
163 | void
164 | ash_detile(uint32_t *tiled, uint32_t *linear,
165 | 		unsigned width, unsigned bpp, unsigned linear_pitch,
166 | 		unsigned sx, unsigned sy, unsigned smaxx, unsigned smaxy)
167 | {
168 | 	/* TODO: parametrize with macro magic */
169 | 	assert(bpp == 32);
170 | 
171 | 	ash_detile_32(tiled, linear, width, linear_pitch, sx, sy, smaxx, smaxy);
172 | }
173 | 


--------------------------------------------------------------------------------
/lib/tiling.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice (including the next
12 |  * paragraph) shall be included in all copies or substantial portions of the
13 |  * Software.
14 |  *
15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |  * SOFTWARE.
22 |  */
23 | 
24 | #ifndef __ASH_DETILE_H
25 | #define __ASH_DETILE_H
26 | 
27 | void ash_detile(uint32_t *tiled, uint32_t *linear,
28 | 		unsigned width, unsigned bpp, unsigned linear_pitch,
29 | 		unsigned sx, unsigned sy, unsigned smaxx, unsigned smaxy);
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/lib/util.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
 3 |  *
 4 |  * Permission is hereby granted, free of charge, to any person obtaining a
 5 |  * copy of this software and associated documentation files (the "Software"),
 6 |  * to deal in the Software without restriction, including without limitation
 7 |  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 8 |  * and/or sell copies of the Software, and to permit persons to whom the
 9 |  * Software is furnished to do so, subject to the following conditions:
10 |  *
11 |  * The above copyright notice and this permission notice (including the next
12 |  * paragraph) shall be included in all copies or substantial portions of the
13 |  * Software.
14 |  *
15 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 |  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |  * SOFTWARE.
22 |  */
23 | 
24 | #ifndef __UTIL_H
25 | #define __UTIL_H
26 | 
27 | #include <string.h>
28 | 
29 | #define UNUSED __attribute__((unused))
30 | #define MAX2(x, y) (((x) > (y)) ? (x) : (y))
31 | #define MIN2(x, y) (((x) < (y)) ? (x) : (y))
32 | #define ALIGN_POT(v, pot) (((v) + ((pot) - 1)) & ~((pot) - 1))
33 | 
34 | static uint32_t
35 | fui(float f)
36 | {
37 | 	uint32_t u = 0;
38 | 	memcpy(&u, &f, 4);
39 | 	return u;
40 | }
41 | 
42 | static float
43 | uif(uint32_t u)
44 | {
45 | 	float f = 0;
46 | 	memcpy(&f, &u, 4);
47 | 	return f;
48 | }
49 | 
50 | /* Pretty-printer */
51 | static void
52 | hexdump(FILE *fp, const uint8_t *hex, size_t cnt, bool with_strings)
53 | {
54 | 	unsigned zero_count = 0;
55 | 
56 | 	for (unsigned i = 0; i < cnt; ++i) {
57 | 		if ((i & 0xF) == 0)
58 | 			fprintf(fp, "%06X  ", i);
59 | 
60 | 		uint8_t v = hex[i];
61 | 
62 | 		if (v == 0 && (i & 0xF) == 0) {
63 | 			/* Check if we're starting an aligned run of zeroes */
64 | 			unsigned zero_count = 0;
65 | 
66 | 			for (unsigned j = i; j < cnt; ++j) {
67 | 				if (hex[j] == 0)
68 | 					zero_count++;
69 | 				else
70 | 					break;
71 | 			}
72 | 
73 | 			if (zero_count >= 32) {
74 | 				fprintf(fp, "*\n");
75 | 				i += (zero_count & ~0xF) - 1;
76 | 				continue;
77 | 			}
78 | 		}
79 | 
80 | 		fprintf(fp, "%02X ", hex[i]);
81 | 		if ((i & 0xF) == 0xF && with_strings) {
82 | 			fprintf(fp, " | ");
83 | 			for (unsigned j = i & ~0xF; j <= i; ++j) {
84 | 				uint8_t c = hex[j];
85 | 				fputc((c < 32 || c > 128) ? '.' : c, fp);
86 | 			}
87 | 		}
88 | 
89 | 		if ((i & 0xF) == 0xF)
90 | 			fprintf(fp, "\n");
91 | 	}
92 | 
93 | 	fprintf(fp, "\n");
94 | }
95 | 
96 | #endif
97 | 


--------------------------------------------------------------------------------
/wrap/APPLE_LICENSE:
--------------------------------------------------------------------------------
  1 | APPLE PUBLIC SOURCE LICENSE
  2 | Version 2.0 -  August 6, 2003
  3 | 
  4 | Please read this License carefully before downloading this software.  By
  5 | downloading or using this software, you are agreeing to be bound by the terms
  6 | of this License.  If you do not or cannot agree to the terms of this License,
  7 | please do not download or use the software.
  8 | 
  9 | Apple Note:  In January 2007, Apple changed its corporate name from "Apple
 10 | Computer, Inc." to "Apple Inc."  This change has been reflected below and
 11 | copyright years updated, but no other changes have been made to the APSL 2.0.
 12 | 
 13 | 1.	General; Definitions.  This License applies to any program or other
 14 | work which Apple Inc. ("Apple") makes publicly available and which contains a
 15 | notice placed by Apple identifying such program or work as "Original Code" and
 16 | stating that it is subject to the terms of this Apple Public Source License
 17 | version 2.0 ("License").  As used in this License:
 18 | 
 19 | 1.1	 "Applicable Patent Rights" mean:  (a) in the case where Apple is the
 20 | grantor of rights, (i) claims of patents that are now or hereafter acquired,
 21 | owned by or assigned to Apple and (ii) that cover subject matter contained in
 22 | the Original Code, but only to the extent necessary to use, reproduce and/or
 23 | distribute the Original Code without infringement; and (b) in the case where
 24 | You are the grantor of rights, (i) claims of patents that are now or hereafter
 25 | acquired, owned by or assigned to You and (ii) that cover subject matter in
 26 | Your Modifications, taken alone or in combination with Original Code.
 27 | 
 28 | 1.2	"Contributor" means any person or entity that creates or contributes to
 29 | the creation of Modifications.
 30 | 
 31 | 1.3	 "Covered Code" means the Original Code, Modifications, the combination
 32 | of Original Code and any Modifications, and/or any respective portions thereof.
 33 | 
 34 | 1.4	"Externally Deploy" means: (a) to sublicense, distribute or otherwise
 35 | make Covered Code available, directly or indirectly, to anyone other than You;
 36 | and/or (b) to use Covered Code, alone or as part of a Larger Work, in any way
 37 | to provide a service, including but not limited to delivery of content, through
 38 | electronic communication with a client other than You.
 39 | 
 40 | 1.5	"Larger Work" means a work which combines Covered Code or portions
 41 | thereof with code not governed by the terms of this License.
 42 | 
 43 | 1.6	"Modifications" mean any addition to, deletion from, and/or change to,
 44 | the substance and/or structure of the Original Code, any previous
 45 | Modifications, the combination of Original Code and any previous Modifications,
 46 | and/or any respective portions thereof.  When code is released as a series of
 47 | files, a Modification is:  (a) any addition to or deletion from the contents of
 48 | a file containing Covered Code; and/or (b) any new file or other representation
 49 | of computer program statements that contains any part of Covered Code. 
 50 | 
 51 | 1.7	"Original Code" means (a) the Source Code of a program or other work as
 52 | originally made available by Apple under this License, including the Source
 53 | Code of any updates or upgrades to such programs or works made available by
 54 | Apple under this License, and that has been expressly identified by Apple as
 55 | such in the header file(s) of such work; and (b) the object code compiled from
 56 | such Source Code and originally made available by Apple under this License
 57 | 
 58 | 1.8	"Source Code" means the human readable form of a program or other work
 59 | that is suitable for making modifications to it, including all modules it
 60 | contains, plus any associated interface definition files, scripts used to
 61 | control compilation and installation of an executable (object code).
 62 | 
 63 | 1.9	"You" or "Your" means an individual or a legal entity exercising rights
 64 | under this License.  For legal entities, "You" or "Your" includes any entity
 65 | which controls, is controlled by, or is under common control with, You, where
 66 | "control" means (a) the power, direct or indirect, to cause the direction or
 67 | management of such entity, whether by contract or otherwise, or (b) ownership
 68 | of fifty percent (50%) or more of the outstanding shares or beneficial
 69 | ownership of such entity.
 70 | 
 71 | 2.	Permitted Uses; Conditions & Restrictions.   Subject to the terms and
 72 | conditions of this License, Apple hereby grants You, effective on the date You
 73 | accept this License and download the Original Code, a world-wide, royalty-free,
 74 | non-exclusive license, to the extent of Apple's Applicable Patent Rights and
 75 | copyrights covering the Original Code, to do the following:
 76 | 
 77 | 2.1	Unmodified Code.  You may use, reproduce, display, perform, internally
 78 | distribute within Your organization, and Externally Deploy verbatim, unmodified
 79 | copies of the Original Code, for commercial or non-commercial purposes,
 80 | provided that in each instance:
 81 | 
 82 | (a)	You must retain and reproduce in all copies of Original Code the
 83 | copyright and other proprietary notices and disclaimers of Apple as they appear
 84 | in the Original Code, and keep intact all notices in the Original Code that
 85 | refer to this License; and
 86 | 
 87 | (b) 	You must include a copy of this License with every copy of Source Code
 88 | of Covered Code and documentation You distribute or Externally Deploy, and You
 89 | may not offer or impose any terms on such Source Code that alter or restrict
 90 | this License or the recipients' rights hereunder, except as permitted under
 91 | Section 6.
 92 | 
 93 | 2.2	Modified Code.  You may modify Covered Code and use, reproduce,
 94 | display, perform, internally distribute within Your organization, and
 95 | Externally Deploy Your Modifications and Covered Code, for commercial or
 96 | non-commercial purposes, provided that in each instance You also meet all of
 97 | these conditions:
 98 | 
 99 | (a)	You must satisfy all the conditions of Section 2.1 with respect to the
100 | Source Code of the Covered Code; 
101 | 
102 | (b)	You must duplicate, to the extent it does not already exist, the notice
103 | in Exhibit A in each file of the Source Code of all Your Modifications, and
104 | cause the modified files to carry prominent notices stating that You changed
105 | the files and the date of any change; and
106 | 
107 | (c)	If You Externally Deploy Your Modifications, You must make Source Code
108 | of all Your Externally Deployed Modifications either available to those to whom
109 | You have Externally Deployed Your Modifications, or publicly available.  Source
110 | Code of Your Externally Deployed Modifications must be released under the terms
111 | set forth in this License, including the license grants set forth in Section 3
112 | below, for as long as you Externally Deploy the Covered Code or twelve (12)
113 | months from the date of initial External Deployment, whichever is longer. You
114 | should preferably distribute the Source Code of Your Externally Deployed
115 | Modifications electronically (e.g. download from a web site).
116 | 
117 | 2.3	Distribution of Executable Versions.  In addition, if You Externally
118 | Deploy Covered Code (Original Code and/or Modifications) in object code,
119 | executable form only, You must include a prominent notice, in the code itself
120 | as well as in related documentation, stating that Source Code of the Covered
121 | Code is available under the terms of this License with information on how and
122 | where to obtain such Source Code.  
123 | 
124 | 2.4	Third Party Rights.  You expressly acknowledge and agree that although
125 | Apple and each Contributor grants the licenses to their respective portions of
126 | the Covered Code set forth herein, no assurances are provided by Apple or any
127 | Contributor that the Covered Code does not infringe the patent or other
128 | intellectual property rights of any other entity. Apple and each Contributor
129 | disclaim any liability to You for claims brought by any other entity based on
130 | infringement of intellectual property rights or otherwise. As a condition to
131 | exercising the rights and licenses granted hereunder, You hereby assume sole
132 | responsibility to secure any other intellectual property rights needed, if any.
133 | For example, if a third party patent license is required to allow You to
134 | distribute the Covered Code, it is Your responsibility to acquire that license
135 | before distributing the Covered Code.
136 | 
137 | 3.	Your Grants.  In consideration of, and as a condition to, the licenses
138 | granted to You under this License, You hereby grant to any person or entity
139 | receiving or distributing Covered Code under this License a non-exclusive,
140 | royalty-free, perpetual, irrevocable license, under Your Applicable Patent
141 | Rights and other intellectual property rights (other than patent) owned or
142 | controlled by You, to use, reproduce, display, perform, modify, sublicense,
143 | distribute and Externally Deploy Your Modifications of the same scope and
144 | extent as Apple's licenses under Sections 2.1 and 2.2 above.  
145 | 
146 | 4.	Larger Works.  You may create a Larger Work by combining Covered Code
147 | with other code not governed by the terms of this License and distribute the
148 | Larger Work as a single product.  In each such instance, You must make sure the
149 | requirements of this License are fulfilled for the Covered Code or any portion
150 | thereof. 
151 | 
152 | 5.	Limitations on Patent License.   Except as expressly stated in Section
153 | 2, no other patent rights, express or implied, are granted by Apple herein.
154 | Modifications and/or Larger Works may require additional patent licenses from
155 | Apple which Apple may grant in its sole discretion.  
156 | 
157 | 6.	Additional Terms.  You may choose to offer, and to charge a fee for,
158 | warranty, support, indemnity or liability obligations and/or other rights
159 | consistent with the scope of the license granted herein ("Additional Terms") to
160 | one or more recipients of Covered Code. However, You may do so only on Your own
161 | behalf and as Your sole responsibility, and not on behalf of Apple or any
162 | Contributor. You must obtain the recipient's agreement that any such Additional
163 | Terms are offered by You alone, and You hereby agree to indemnify, defend and
164 | hold Apple and every Contributor harmless for any liability incurred by or
165 | claims asserted against Apple or such Contributor by reason of any such
166 | Additional Terms. 
167 | 
168 | 7.	Versions of the License.  Apple may publish revised and/or new versions
169 | of this License from time to time.  Each version will be given a distinguishing
170 | version number.  Once Original Code has been published under a particular
171 | version of this License, You may continue to use it under the terms of that
172 | version. You may also choose to use such Original Code under the terms of any
173 | subsequent version of this License published by Apple.  No one other than Apple
174 | has the right to modify the terms applicable to Covered Code created under this
175 | License.  
176 | 
177 | 8.	NO WARRANTY OR SUPPORT.  The Covered Code may contain in whole or in
178 | part pre-release, untested, or not fully tested works.  The Covered Code may
179 | contain errors that could cause failures or loss of data, and may be incomplete
180 | or contain inaccuracies.  You expressly acknowledge and agree that use of the
181 | Covered Code, or any portion thereof, is at Your sole and entire risk.  THE
182 | COVERED CODE IS PROVIDED "AS IS" AND WITHOUT WARRANTY, UPGRADES OR SUPPORT OF
183 | ANY KIND AND APPLE AND APPLE'S LICENSOR(S) (COLLECTIVELY REFERRED TO AS "APPLE"
184 | FOR THE PURPOSES OF SECTIONS 8 AND 9) AND ALL CONTRIBUTORS EXPRESSLY DISCLAIM
185 | ALL WARRANTIES AND/OR CONDITIONS, EXPRESS OR IMPLIED, INCLUDING, BUT NOT
186 | LIMITED TO, THE IMPLIED WARRANTIES AND/OR CONDITIONS OF MERCHANTABILITY, OF
187 | SATISFACTORY QUALITY, OF FITNESS FOR A PARTICULAR PURPOSE, OF ACCURACY, OF
188 | QUIET ENJOYMENT, AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.  APPLE AND EACH
189 | CONTRIBUTOR DOES NOT WARRANT AGAINST INTERFERENCE WITH YOUR ENJOYMENT OF THE
190 | COVERED CODE, THAT THE FUNCTIONS CONTAINED IN THE COVERED CODE WILL MEET YOUR
191 | REQUIREMENTS, THAT THE OPERATION OF THE COVERED CODE WILL BE UNINTERRUPTED OR
192 | ERROR-FREE, OR THAT DEFECTS IN THE COVERED CODE WILL BE CORRECTED.  NO ORAL OR
193 | WRITTEN INFORMATION OR ADVICE GIVEN BY APPLE, AN APPLE AUTHORIZED
194 | REPRESENTATIVE OR ANY CONTRIBUTOR SHALL CREATE A WARRANTY.  You acknowledge
195 | that the Covered Code is not intended for use in the operation of nuclear
196 | facilities, aircraft navigation, communication systems, or air traffic control
197 | machines in which case the failure of the Covered Code could lead to death,
198 | personal injury, or severe physical or environmental damage.
199 | 
200 | 9.	LIMITATION OF LIABILITY. TO THE EXTENT NOT PROHIBITED BY LAW, IN NO
201 | EVENT SHALL APPLE OR ANY CONTRIBUTOR BE LIABLE FOR ANY INCIDENTAL, SPECIAL,
202 | INDIRECT OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATING TO THIS LICENSE OR
203 | YOUR USE OR INABILITY TO USE THE COVERED CODE, OR ANY PORTION THEREOF, WHETHER
204 | UNDER A THEORY OF CONTRACT, WARRANTY, TORT (INCLUDING NEGLIGENCE), PRODUCTS
205 | LIABILITY OR OTHERWISE, EVEN IF APPLE OR SUCH CONTRIBUTOR HAS BEEN ADVISED OF
206 | THE POSSIBILITY OF SUCH DAMAGES AND NOTWITHSTANDING THE FAILURE OF ESSENTIAL
207 | PURPOSE OF ANY REMEDY. SOME JURISDICTIONS DO NOT ALLOW THE LIMITATION OF
208 | LIABILITY OF INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THIS LIMITATION MAY NOT
209 | APPLY TO YOU. In no event shall Apple's total liability to You for all damages
210 | (other than as may be required by applicable law) under this License exceed the
211 | amount of fifty dollars ($50.00).
212 | 
213 | 10.	Trademarks.  This License does not grant any rights to use the
214 | trademarks or trade names  "Apple", "Mac", "Mac OS", "QuickTime", "QuickTime
215 | Streaming Server" or any other trademarks, service marks, logos or trade names
216 | belonging to Apple (collectively "Apple Marks") or to any trademark, service
217 | mark, logo or trade name belonging to any Contributor.  You agree not to use
218 | any Apple Marks in or as part of the name of products derived from the Original
219 | Code or to endorse or promote products derived from the Original Code other
220 | than as expressly permitted by and in strict compliance at all times with
221 | Apple's third party trademark usage guidelines which are posted at
222 | http://www.apple.com/legal/guidelinesfor3rdparties.html.  
223 | 
224 | 11.	Ownership. Subject to the licenses granted under this License, each
225 | Contributor retains all rights, title and interest in and to any Modifications
226 | made by such Contributor.  Apple retains all rights, title and interest in and
227 | to the Original Code and any Modifications made by or on behalf of Apple
228 | ("Apple Modifications"), and such Apple Modifications will not be automatically
229 | subject to this License.  Apple may, at its sole discretion, choose to license
230 | such Apple Modifications under this License, or on different terms from those
231 | contained in this License or may choose not to license them at all.  
232 | 
233 | 12.	Termination.  
234 | 
235 | 12.1	Termination.  This License and the rights granted hereunder will
236 | terminate:
237 | 
238 | (a)	automatically without notice from Apple if You fail to comply with any
239 | term(s) of this License and fail to cure such breach within 30 days of becoming
240 | aware of such breach;
241 | (b)	immediately in the event of the circumstances described in Section
242 | 13.5(b); or
243 | (c)	automatically without notice from Apple if You, at any time during the
244 | term of this License, commence an action for patent infringement against Apple;
245 | provided that Apple did not first commence an action for patent infringement
246 | against You in that instance.
247 | 
248 | 12.2	Effect of Termination.  Upon termination, You agree to immediately stop
249 | any further use, reproduction, modification, sublicensing and distribution of
250 | the Covered Code.  All sublicenses to the Covered Code which have been properly
251 | granted prior to termination shall survive any termination of this License.
252 | Provisions which, by their nature, should remain in effect beyond the
253 | termination of this License shall survive, including but not limited to
254 | Sections 3, 5, 8, 9, 10, 11, 12.2 and 13.  No party will be liable to any other
255 | for compensation, indemnity or damages of any sort solely as a result of
256 | terminating this License in accordance with its terms, and termination of this
257 | License will be without prejudice to any other right or remedy of any party.
258 | 
259 | 13. 	Miscellaneous.
260 | 
261 | 13.1	Government End Users.   The Covered Code is a "commercial item" as
262 | defined in FAR 2.101.  Government software and technical data rights in the
263 | Covered Code include only those rights customarily provided to the public as
264 | defined in this License. This customary commercial license in technical data
265 | and software is provided in accordance with FAR 12.211 (Technical Data) and
266 | 12.212 (Computer Software) and, for Department of Defense purchases, DFAR
267 | 252.227-7015 (Technical Data -- Commercial Items) and 227.7202-3 (Rights in
268 | Commercial Computer Software or Computer Software Documentation).  Accordingly,
269 | all U.S. Government End Users acquire Covered Code with only those rights set
270 | forth herein.
271 | 
272 | 13.2	Relationship of Parties.  This License will not be construed as
273 | creating an agency, partnership, joint venture or any other form of legal
274 | association between or among You, Apple or any Contributor, and You will not
275 | represent to the contrary, whether expressly, by implication, appearance or
276 | otherwise.
277 | 
278 | 13.3	Independent Development.   Nothing in this License will impair Apple's
279 | right to acquire, license, develop, have others develop for it, market and/or
280 | distribute technology or products that perform the same or similar functions
281 | as, or otherwise compete with, Modifications, Larger Works, technology or
282 | products that You may develop, produce, market or distribute.
283 | 
284 | 13.4	Waiver; Construction.  Failure by Apple or any Contributor to enforce
285 | any provision of this License will not be deemed a waiver of future enforcement
286 | of that or any other provision.  Any law or regulation which provides that the
287 | language of a contract shall be construed against the drafter will not apply to
288 | this License.
289 | 
290 | 13.5	Severability.  (a) If for any reason a court of competent jurisdiction
291 | finds any provision of this License, or portion thereof, to be unenforceable,
292 | that provision of the License will be enforced to the maximum extent
293 | permissible so as to effect the economic benefits and intent of the parties,
294 | and the remainder of this License will continue in full force and effect.  (b)
295 | Notwithstanding the foregoing, if applicable law prohibits or restricts You
296 | from fully and/or specifically complying with Sections 2 and/or 3 or prevents
297 | the enforceability of either of those Sections, this License will immediately
298 | terminate and You must immediately discontinue any use of the Covered Code and
299 | destroy all copies of it that are in your possession or control.
300 | 
301 | 13.6	Dispute Resolution.  Any litigation or other dispute resolution between
302 | You and Apple relating to this License shall take place in the Northern
303 | District of California, and You and Apple hereby consent to the personal
304 | jurisdiction of, and venue in, the state and federal courts within that
305 | District with respect to this License. The application of the United Nations
306 | Convention on Contracts for the International Sale of Goods is expressly
307 | excluded.
308 | 
309 | 13.7	Entire Agreement; Governing Law.  This License constitutes the entire
310 | agreement between the parties with respect to the subject matter hereof.  This
311 | License shall be governed by the laws of the United States and the State of
312 | California, except that body of California law concerning conflicts of law. 
313 | 
314 | Where You are located in the province of Quebec, Canada, the following clause
315 | applies:  The parties hereby confirm that they have requested that this License
316 | and all related documents be drafted in English. Les parties ont exigé que le
317 | présent contrat et tous les documents connexes soient rédigés en anglais. 
318 | 
319 | EXHIBIT A. 
320 | 
321 | "Portions Copyright (c) 1999-2007 Apple Inc.  All Rights Reserved.
322 | 
323 | This file contains Original Code and/or Modifications of Original Code as
324 | defined in and that are subject to the Apple Public Source License Version 2.0
325 | (the 'License').  You may not use this file except in compliance with the
326 | License.  Please obtain a copy of the License at
327 | http://www.opensource.apple.com/apsl/ and read it before using this file.
328 | 
329 | The Original Code and all software distributed under the License are
330 | distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS
331 | OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
332 | LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
333 | PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.  Please see the License for the
334 | specific language governing rights and limitations under the License." 
335 | 
336 | 


--------------------------------------------------------------------------------
/wrap/wrap.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2020 Asahi Linux contributors
  3 |  * Copyright (c) 1998-2014 Apple Computer, Inc. All rights reserved.
  4 |  * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
  5 |  *
  6 |  * IOKit prototypes and stub implementations from upstream IOKitLib sources.
  7 |  * DYLD_INTERPOSE macro from dyld source code.  All other code in the file is
  8 |  * by Asahi Linux contributors.
  9 |  *
 10 |  * @APPLE_LICENSE_HEADER_START@
 11 |  * 
 12 |  * This file contains Original Code and/or Modifications of Original Code
 13 |  * as defined in and that are subject to the Apple Public Source License
 14 |  * Version 2.0 (the 'License'). You may not use this file except in
 15 |  * compliance with the License. Please obtain a copy of the License at
 16 |  * http://www.opensource.apple.com/apsl/ and read it before using this
 17 |  * file.
 18 |  * 
 19 |  * The Original Code and all software distributed under the License are
 20 |  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 21 |  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 22 |  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 23 |  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 24 |  * Please see the License for the specific language governing rights and
 25 |  * limitations under the License.
 26 |  * 
 27 |  * @APPLE_LICENSE_HEADER_END@
 28 |  */
 29 | 
 30 | #include <stdio.h>
 31 | #include <stdint.h>
 32 | #include <unistd.h>
 33 | #include <dlfcn.h>
 34 | #include <assert.h>
 35 | #include <inttypes.h>
 36 | 
 37 | #include <mach/mach.h>
 38 | #include <IOKit/IOKitLib.h>
 39 | 
 40 | #include "selectors.h"
 41 | #include "cmdstream.h"
 42 | #include "io.h"
 43 | #include "decode.h"
 44 | #include "util.h"
 45 | 
 46 | /* Apple macro */
 47 | 
 48 | #define DYLD_INTERPOSE(_replacment,_replacee) \
 49 | 	__attribute__((used)) static struct{ const void* replacment; const void* replacee; } _interpose_##_replacee \
 50 | 	__attribute__ ((section ("__DATA,__interpose"))) = { (const void*)(unsigned long)&_replacment, (const void*)(unsigned long)&_replacee };
 51 | 
 52 | mach_port_t metal_connection = 0;
 53 | 
 54 | kern_return_t
 55 | wrap_IOConnectCallMethod(
 56 | 	mach_port_t	 connection,		// In
 57 | 	uint32_t	 selector,		// In
 58 | 	const uint64_t	*input,			// In
 59 | 	uint32_t	 inputCnt,		// In
 60 | 	const void	*inputStruct,		// In
 61 | 	size_t		 inputStructCnt,	// In
 62 | 	uint64_t	*output,		// Out
 63 | 	uint32_t	*outputCnt,		// In/Out
 64 | 	void		*outputStruct,		// Out
 65 | 	size_t		*outputStructCntP)	// In/Out
 66 | {
 67 | 	/* Heuristic guess which connection is Metal, skip over I/O from everything else */
 68 | 	bool bail = false;
 69 | 
 70 | 	if (metal_connection == 0) {
 71 | 		if (selector == AGX_SELECTOR_SET_API)
 72 | 			metal_connection = connection;
 73 | 		else
 74 | 			bail = true;
 75 | 	} else if (metal_connection != connection)
 76 | 		bail = true;
 77 | 
 78 | 	if (bail)
 79 | 		return IOConnectCallMethod(connection, selector, input, inputCnt, inputStruct, inputStructCnt, output, outputCnt, outputStruct, outputStructCntP);
 80 | 
 81 | 	/* Check the arguments make sense */
 82 | 	assert((input != NULL) == (inputCnt != 0));
 83 | 	assert((inputStruct != NULL) == (inputStructCnt != 0));
 84 | 	assert((output != NULL) == (outputCnt != 0));
 85 | 	assert((outputStruct != NULL) == (outputStructCntP != 0));
 86 | 
 87 | 	/* Dump inputs */
 88 | 	switch (selector) {
 89 | 	case AGX_SELECTOR_SET_API:
 90 | 		assert(input == NULL && output == NULL && outputStruct == NULL);
 91 | 		assert(inputStruct != NULL && inputStructCnt == 16);
 92 | 		assert(((uint8_t *) inputStruct)[15] == 0x0);
 93 | 
 94 | 		printf("%X: SET_API(%s)\n", connection, (const char *) inputStruct);
 95 | 		break;
 96 | 
 97 | 	case AGX_SELECTOR_SUBMIT_COMMAND_BUFFERS:
 98 | 		assert(output == NULL && outputStruct == NULL);
 99 | 		assert(inputStructCnt == 40);
100 | 		assert(inputCnt == 1);
101 | 		
102 | 		printf("%X: SUBMIT_COMMAND_BUFFERS command queue id:%llx %p\n", connection, input[0], inputStruct);
103 | 
104 | 		const struct agx_submit_cmdbuf_req *req = inputStruct;
105 | 
106 | 		pandecode_cmdstream(req->cmdbuf, false);
107 | 
108 | 		if (getenv("ASAHI_DUMP"))
109 | 			pandecode_dump_mappings();
110 | 
111 | 		/* fallthrough */
112 | 	default:
113 | 		printf("%X: call %s (out %p, %zu)", connection, wrap_selector_name(selector), outputStructCntP, outputStructCntP ? *outputStructCntP : 0);
114 | 
115 | 		for (uint64_t u = 0; u < inputCnt; ++u)
116 | 			printf(" %llx", input[u]);
117 | 
118 | 		if(inputStructCnt) {
119 | 			printf(", struct:\n");
120 | 			hexdump(stdout, inputStruct, inputStructCnt, true);
121 | 		} else {
122 | 			printf("\n");
123 | 		}
124 | 		
125 | 		break;
126 | 	}
127 | 
128 | 	/* Invoke the real method */
129 | 	kern_return_t ret = IOConnectCallMethod(connection, selector, input, inputCnt, inputStruct, inputStructCnt, output, outputCnt, outputStruct, outputStructCntP);
130 | 
131 | 	printf("return %u", ret);
132 | 
133 | 	/* Dump the outputs */
134 | 	if(outputCnt) {
135 | 		printf("%u scalars: ", *outputCnt);
136 | 
137 | 		for (uint64_t u = 0; u < *outputCnt; ++u)
138 | 			printf("%llx ", output[u]);
139 | 
140 | 		printf("\n");
141 | 	}
142 | 
143 | 	if(outputStructCntP) {
144 | 		printf(" struct\n");
145 | 		hexdump(stdout, outputStruct, *outputStructCntP, true);
146 | 
147 | 		if (selector == 2) {
148 | 			/* Dump linked buffer as well */
149 | 			void **o = outputStruct;
150 | 			hexdump(stdout, *o, 64, true);
151 | 		}
152 | 	}
153 | 
154 | 	printf("\n");
155 | 
156 | 	/* Track allocations for later analysis (dumping, disassembly, etc) */
157 | 	switch (selector) {
158 | 	case AGX_SELECTOR_CREATE_CMDBUF: {
159 | 		assert(inputCnt == 2);
160 | 		assert((*outputStructCntP) == 0x10);
161 | 		uint64_t *inp = (uint64_t *) input;
162 | 		assert(inp[1] == 1 || inp[1] == 0);
163 | 		uint64_t *ptr = (uint64_t *) outputStruct;
164 | 		uint32_t *words = (uint32_t *) (ptr + 1);
165 | 
166 | 		pandecode_track_alloc((struct agx_allocation) {
167 | 			.index = words[1],
168 | 			.map = (void *) *ptr,
169 | 			.size = words[0],
170 | 			.type = inp[1] ? AGX_ALLOC_CMDBUF : AGX_ALLOC_MEMMAP
171 | 		});
172 | 		break;
173 | 	}
174 | 	
175 | 	case AGX_SELECTOR_ALLOCATE_MEM: {
176 | 		assert((*outputStructCntP) == 0x50);
177 | 		uint64_t *iptrs = (uint64_t *) inputStruct;
178 | 		uint64_t *ptrs = (uint64_t *) outputStruct;
179 | 		uint64_t gpu_va = ptrs[0];
180 | 		uint64_t cpu = ptrs[1];
181 | 		uint64_t cpu_fixed_1 = iptrs[6];
182 | 		uint64_t cpu_fixed_2 = iptrs[7]; /* xxx what's the diff? */
183 | 		if (cpu && cpu_fixed_1)
184 | 			assert(cpu == cpu_fixed_1);
185 | #if 0
186 | 		/* TODO: what about this case? */
187 | 		else if (cpu == 0)
188 | 			cpu = cpu_fixed_1;
189 | #endif
190 | 		uint64_t size = ptrs[4];
191 | 		uint32_t *iwords = (uint32_t *) inputStruct;
192 | 		const char *type = agx_memory_type_name(iwords[20]);
193 | 		printf("allocate gpu va %llx, cpu %llx, 0x%llx bytes ", gpu_va, cpu, size);
194 | 		if (type)
195 | 			printf(" %s\n", type);
196 | 		else
197 | 			printf(" unknown type %08X\n", iwords[20]);
198 | 
199 | 		pandecode_track_alloc((struct agx_allocation) {
200 | 			.type = AGX_ALLOC_REGULAR,
201 | 			.size = size,
202 | 			.index = ptrs[3] >> 32ull,
203 | 			.gpu_va = gpu_va,
204 | 			.map = (void *) cpu,
205 | 		});
206 | 	}
207 | 
208 | 	default:
209 | 		break;
210 | 	}
211 | 
212 | 	return ret;
213 | }
214 | 
215 | kern_return_t
216 | wrap_IOConnectCallAsyncMethod(
217 |         mach_port_t      connection,            // In
218 |         uint32_t         selector,              // In
219 |         mach_port_t      wakePort,              // In
220 |         uint64_t        *reference,             // In
221 |         uint32_t         referenceCnt,          // In
222 |         const uint64_t  *input,                 // In
223 |         uint32_t         inputCnt,              // In
224 |         const void      *inputStruct,           // In
225 |         size_t           inputStructCnt,        // In
226 |         uint64_t        *output,                // Out
227 |         uint32_t        *outputCnt,             // In/Out
228 |         void            *outputStruct,          // Out
229 |         size_t          *outputStructCntP)      // In/Out
230 | {
231 | 	/* Check the arguments make sense */
232 | 	assert((input != NULL) == (inputCnt != 0));
233 | 	assert((inputStruct != NULL) == (inputStructCnt != 0));
234 | 	assert((output != NULL) == (outputCnt != 0));
235 | 	assert((outputStruct != NULL) == (outputStructCntP != 0));
236 | 
237 | 	printf("%X: call %X, wake port %X (out %p, %zu)", connection, selector, wakePort, outputStructCntP, outputStructCntP ? *outputStructCntP : 0);
238 | 
239 | 	for (uint64_t u = 0; u < inputCnt; ++u)
240 | 		printf(" %llx", input[u]);
241 | 
242 | 	if(inputStructCnt) {
243 | 		printf(", struct:\n");
244 | 		hexdump(stdout, inputStruct, inputStructCnt, true);
245 | 	} else {
246 | 		printf("\n");
247 | 	}
248 | 
249 | 	printf(", references: ");
250 | 	for (unsigned i = 0; i < referenceCnt; ++i)
251 | 		printf(" %llx", reference[i]);
252 | 	printf("\n");
253 | 
254 | 	kern_return_t ret = IOConnectCallAsyncMethod(connection, selector, wakePort, reference, referenceCnt, input, inputCnt, inputStruct, inputStructCnt, output, outputCnt, outputStruct, outputStructCntP);
255 | 
256 | 	printf("return %u", ret);
257 | 
258 |  	if(outputCnt) {
259 | 		printf("%u scalars: ", *outputCnt);
260 | 
261 | 		for (uint64_t u = 0; u < *outputCnt; ++u)
262 | 			printf("%llx ", output[u]);
263 | 
264 | 		printf("\n");
265 | 	}
266 | 
267 | 	if(outputStructCntP) {
268 | 		printf(" struct\n");
269 | 		hexdump(stdout, outputStruct, *outputStructCntP, true);
270 | 
271 | 		if (selector == 2) {
272 | 			/* Dump linked buffer as well */
273 | 			void **o = outputStruct;
274 | 			hexdump(stdout, *o, 64, true);
275 | 		}
276 | 	}
277 | 
278 | 	printf("\n");
279 | 	return ret;
280 | }
281 | 
282 | kern_return_t
283 | wrap_IOConnectCallStructMethod(
284 |         mach_port_t      connection,            // In
285 |         uint32_t         selector,              // In
286 |         const void      *inputStruct,           // In
287 |         size_t           inputStructCnt,        // In
288 |         void            *outputStruct,          // Out
289 |         size_t          *outputStructCntP)       // In/Out
290 | {
291 | 	return wrap_IOConnectCallMethod(connection, selector, NULL, 0, inputStruct, inputStructCnt, NULL, NULL, outputStruct, outputStructCntP);
292 | }
293 | 
294 | kern_return_t
295 | wrap_IOConnectCallAsyncStructMethod(
296 |         mach_port_t      connection,            // In
297 |         uint32_t         selector,              // In
298 |         mach_port_t      wakePort,              // In
299 |         uint64_t        *reference,             // In
300 |         uint32_t         referenceCnt,          // In
301 |         const void      *inputStruct,           // In
302 |         size_t           inputStructCnt,        // In
303 |         void            *outputStruct,          // Out
304 |         size_t          *outputStructCnt)       // In/Out
305 | {
306 |     return wrap_IOConnectCallAsyncMethod(connection,   selector, wakePort,
307 |                                     reference,    referenceCnt,
308 |                                     NULL,         0,
309 |                                     inputStruct,  inputStructCnt,
310 |                                     NULL,         NULL,
311 |                                     outputStruct, outputStructCnt);
312 | }
313 | 
314 | kern_return_t
315 | wrap_IOConnectCallScalarMethod(
316 |         mach_port_t      connection,            // In
317 |         uint32_t         selector,              // In
318 |         const uint64_t  *input,                 // In
319 |         uint32_t         inputCnt,              // In
320 |         uint64_t        *output,                // Out
321 |         uint32_t        *outputCnt)             // In/Out
322 | {
323 |     return wrap_IOConnectCallMethod(connection, selector,
324 |                                input,      inputCnt,
325 |                                NULL,       0,
326 |                                output,     outputCnt,
327 |                                NULL,       NULL);
328 | }
329 | 
330 | kern_return_t
331 | wrap_IOConnectCallAsyncScalarMethod(
332 |         mach_port_t      connection,            // In
333 |         uint32_t         selector,              // In
334 |         mach_port_t      wakePort,              // In
335 |         uint64_t        *reference,             // In
336 |         uint32_t         referenceCnt,          // In
337 |         const uint64_t  *input,                 // In
338 |         uint32_t         inputCnt,              // In
339 |         uint64_t        *output,                // Out
340 |         uint32_t        *outputCnt)             // In/Out
341 | {
342 |     return wrap_IOConnectCallAsyncMethod(connection, selector, wakePort,
343 |                                     reference,  referenceCnt,
344 |                                     input,      inputCnt,
345 |                                     NULL,       0,
346 |                                     output,    outputCnt,
347 |                                     NULL,      NULL);
348 | }
349 | 
350 | kern_return_t
351 | wrap_IOConnectSetNotificationPort(
352 | 	io_connect_t	connect,
353 | 	uint32_t	type,
354 | 	mach_port_t	port,
355 | 	uintptr_t	reference )
356 | {
357 | 	printf("connect %X, type %X, to notification port %X, with reference %lx\n", connect, type, port, reference);
358 | 	kern_return_t ret = IOConnectSetNotificationPort(connect, type, port, reference);
359 | 	printf("return %u\n", ret);
360 | 	return ret;
361 | }
362 | 
363 | kern_return_t
364 | wrap_IOSetNotificationPort(
365 | 	mach_port_t	connect,
366 | 	uint32_t	type,
367 | 	mach_port_t	port )
368 | {
369 | 	return wrap_IOConnectSetNotificationPort(connect, type, port, 0);
370 | }
371 | 
372 | IONotificationPortRef
373 | wrap_IONotificationPortCreate(
374 | 	mach_port_t	masterPort )
375 | {
376 | 	IONotificationPortRef ref = IONotificationPortCreate(masterPort);
377 | 	printf("creating notification port from master %X --> %p\n", masterPort, ref);
378 | 	return ref;
379 | }
380 | 
381 | void
382 | wrap_IONotificationPortSetDispatchQueue(IONotificationPortRef notify, dispatch_queue_t queue)
383 | {
384 | 	printf("set dispatch queue %p to queue %p\n", notify, queue);
385 | 	IONotificationPortSetDispatchQueue(notify, queue);
386 | }
387 | 
388 | mach_port_t
389 | wrap_IODataQueueAllocateNotificationPort()
390 | {
391 | 	mach_port_t ret = IODataQueueAllocateNotificationPort();
392 | 	printf("data queue notif port %X\n", ret);
393 | 	return ret;
394 | }
395 | 
396 | IOReturn
397 | wrap_IODataQueueSetNotificationPort(IODataQueueMemory *dataQueue, mach_port_t notifyPort)
398 | {
399 | 	IOReturn ret = IODataQueueSetNotificationPort(dataQueue, notifyPort);
400 | 	printf("data queue %p set notif port %X -> %X\n", dataQueue, notifyPort, ret);
401 | 	return ret;
402 | }
403 | 
404 | DYLD_INTERPOSE(wrap_IOConnectCallMethod, IOConnectCallMethod);
405 | DYLD_INTERPOSE(wrap_IOConnectCallAsyncMethod, IOConnectCallAsyncMethod);
406 | DYLD_INTERPOSE(wrap_IOConnectCallStructMethod, IOConnectCallStructMethod);
407 | DYLD_INTERPOSE(wrap_IOConnectCallAsyncStructMethod, IOConnectCallAsyncStructMethod);
408 | DYLD_INTERPOSE(wrap_IOConnectCallScalarMethod, IOConnectCallScalarMethod);
409 | DYLD_INTERPOSE(wrap_IOConnectCallAsyncScalarMethod, IOConnectCallAsyncScalarMethod);
410 | DYLD_INTERPOSE(wrap_IOConnectSetNotificationPort, IOConnectSetNotificationPort);
411 | //DYLD_INTERPOSE(wrap_IOSetNotificationPort, IOSetNotificationPort);
412 | DYLD_INTERPOSE(wrap_IONotificationPortCreate, IONotificationPortCreate);
413 | DYLD_INTERPOSE(wrap_IONotificationPortSetDispatchQueue, IONotificationPortSetDispatchQueue);
414 | DYLD_INTERPOSE(wrap_IODataQueueAllocateNotificationPort, IODataQueueAllocateNotificationPort);
415 | DYLD_INTERPOSE(wrap_IODataQueueSetNotificationPort, IODataQueueSetNotificationPort);
416 | 


--------------------------------------------------------------------------------