├── Makefile ├── README ├── bsp_test.c ├── decode_frame.c ├── extract_firmware.py ├── frame_nal └── h264_player.c /Makefile: -------------------------------------------------------------------------------- 1 | LDFLAGS=-lX11 -lvdpau 2 | CFLAGS=-g -Wall 3 | MESA_DIR=../mesa 4 | GALLIUM_DIR=$(MESA_DIR)/src/gallium 5 | 6 | all: h264_player bsp_test decode_frame 7 | 8 | h264_player: h264_player.o 9 | bsp_test: bsp_test.o 10 | $(CC) -o $@ $^ $(LDFLAGS) -ldrm -ldrm_nouveau -lxcb -lxcb-dri2 11 | 12 | decode_frame: decode_frame.o 13 | $(CC) -o $@ $^ $(LDFLAGS) -ldrm -ldrm_nouveau -lxcb -lxcb-dri2 14 | 15 | bsp_test.o: bsp_test.c 16 | $(CC) -c $^ $(CFLAGS) -I$(GALLIUM_DIR)/drivers -I$(GALLIUM_DIR)/include -I$(MESA_DIR)/include -I$(GALLIUM_DIR)/auxiliary -I/usr/include/libdrm 17 | 18 | decode_frame.o: decode_frame.c 19 | $(CC) -c $^ $(CFLAGS) -I$(GALLIUM_DIR)/drivers -I$(GALLIUM_DIR)/include -I$(MESA_DIR)/include -I$(GALLIUM_DIR)/auxiliary -I/usr/include/libdrm 20 | 21 | .PHONY = clean 22 | 23 | clean: 24 | -rm -rf *.o h264_player bsp_test decode_frame 25 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | These are tools designed to help with reverse-engineering the VP2 2 | engine available on NVIDIA NV84-NV98 cards. 3 | 4 | h264_player: 5 | 6 | A player that is designed to play back exactly one video, for now: 7 | http://www.h264info.com/clips.html, download The Simpsons Movie 8 | trailer, and run mplayer -dumpvideo foo.mkv. The resulting 9 | stream.dump file is the input file for h264_player. 10 | 11 | Note that it does not do any frame reordering, so the output is very 12 | jittery. Also since it doesn't have access to the picinfo, it's 13 | hardcoded to the right thing (so other videos are unlikely to play). 14 | 15 | (Current version commented s.t. it outputs just the first frame's 16 | YUV data on stdout.) 17 | 18 | bsp_test: 19 | 20 | Tries to make sure that the BSP engine is accessible and functioning 21 | properly. Invokes the 0x304 "write semaphore" method, and checks 22 | whether the value is indeed written. 23 | 24 | decode_frame: 25 | 26 | Standalone program that decodes a single NAL (that it loads from a 27 | separate file). This has all the bits necessary to do the actual 28 | decoding, but uses hardcoded picinfo, as h264_player above. Output 29 | is a YUV file on stdout. 30 | -------------------------------------------------------------------------------- /bsp_test.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 Ilia Mirkin 3 | * All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a 6 | * copy of this software and associated documentation files (the "Software"), 7 | * to deal in the Software without restriction, including without limitation 8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | * and/or sell copies of the Software, and to permit persons to whom the 10 | * Software is furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice (including the next 13 | * paragraph) shall be included in all copies or substantial portions of the 14 | * Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | * OTHER DEALINGS IN THE SOFTWARE. 23 | */ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | 33 | #include "nv50/nv50_context.h" 34 | 35 | #undef NDEBUG 36 | #include 37 | 38 | /* From pipe_loader_drm.c in mesa */ 39 | static void 40 | pipe_loader_drm_x_auth(int fd) 41 | { 42 | /* Try authenticate with the X server to give us access to devices that X 43 | * is running on. */ 44 | xcb_connection_t *xcb_conn; 45 | const xcb_setup_t *xcb_setup; 46 | xcb_screen_iterator_t s; 47 | xcb_dri2_connect_cookie_t connect_cookie; 48 | xcb_dri2_connect_reply_t *connect; 49 | drm_magic_t magic; 50 | xcb_dri2_authenticate_cookie_t authenticate_cookie; 51 | xcb_dri2_authenticate_reply_t *authenticate; 52 | 53 | xcb_conn = xcb_connect(NULL, NULL); 54 | 55 | if(!xcb_conn) 56 | return; 57 | 58 | xcb_setup = xcb_get_setup(xcb_conn); 59 | 60 | if (!xcb_setup) 61 | goto disconnect; 62 | 63 | s = xcb_setup_roots_iterator(xcb_setup); 64 | connect_cookie = xcb_dri2_connect_unchecked(xcb_conn, s.data->root, 65 | XCB_DRI2_DRIVER_TYPE_DRI); 66 | connect = xcb_dri2_connect_reply(xcb_conn, connect_cookie, NULL); 67 | 68 | if (!connect || connect->driver_name_length 69 | + connect->device_name_length == 0) { 70 | 71 | goto disconnect; 72 | } 73 | 74 | if (drmGetMagic(fd, &magic)) 75 | goto disconnect; 76 | 77 | authenticate_cookie = xcb_dri2_authenticate_unchecked(xcb_conn, 78 | s.data->root, 79 | magic); 80 | authenticate = xcb_dri2_authenticate_reply(xcb_conn, 81 | authenticate_cookie, 82 | NULL); 83 | FREE(authenticate); 84 | 85 | disconnect: 86 | xcb_disconnect(xcb_conn); 87 | } 88 | 89 | 90 | int main() { 91 | struct nouveau_device *dev; 92 | struct nouveau_client *client; 93 | struct nouveau_object *channel; 94 | struct nouveau_object *bsp; 95 | struct nouveau_pushbuf *push; 96 | struct nouveau_bo *sem = NULL; 97 | struct nv04_fifo nv04_data = { .vram = 0xbeef0201, .gart = 0xbeef0202 }; 98 | 99 | int fd, i; 100 | 101 | fd = open("/dev/dri/card0", O_RDWR); 102 | assert(fd); 103 | pipe_loader_drm_x_auth(fd); 104 | 105 | assert(!nouveau_device_wrap(fd, 0, &dev)); 106 | assert(!nouveau_client_new(dev, &client)); 107 | assert(!nouveau_object_new(&dev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS, 108 | &nv04_data, sizeof(nv04_data), &channel)); 109 | assert(!nouveau_pushbuf_new(client, channel, 2, 4096, 1, &push)); 110 | assert(!nouveau_object_new(channel, 0xbeef74b0, 0x74b0, NULL, 0, &bsp)); 111 | assert(!nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 0x1000, NULL, &sem)); 112 | 113 | printf("bo offset: %lx\n", sem->offset); 114 | printf("bo handle: %x\n", sem->handle); 115 | 116 | assert(!nouveau_bo_map(sem, NOUVEAU_BO_RD | NOUVEAU_BO_WR, client)); 117 | 118 | /* 119 | struct nouveau_pushbuf_refn refs[] = { 120 | { sem, NOUVEAU_BO_RDWR } 121 | }; 122 | assert(!nouveau_pushbuf_refn(push, refs, 1)); 123 | */ 124 | 125 | printf("bo map: %p\n", sem->map); 126 | 127 | uint32_t *map = sem->map; 128 | *map = 0xdeadbeef; 129 | 130 | BEGIN_NV04(push, 0, 0x60, 1); 131 | PUSH_DATA (push, nv04_data.vram); 132 | 133 | /* Bind the BSP to the fifo */ 134 | BEGIN_NV04(push, 1, 0, 1); 135 | PUSH_DATA (push, bsp->handle); 136 | 137 | /* Set the DMA channels */ 138 | BEGIN_NV04(push, 1, 0x180, 11); 139 | for (i = 0; i < 11; i++) 140 | PUSH_DATA(push, nv04_data.vram); 141 | 142 | BEGIN_NV04(push, 1, 0x1b8, 1); 143 | PUSH_DATA (push, nv04_data.vram); 144 | 145 | /* Set the semaphore */ 146 | BEGIN_NV04(push, 1, 0x610, 3); 147 | PUSH_DATAh(push, sem->offset); 148 | PUSH_DATA (push, sem->offset); 149 | PUSH_DATA (push, 0xabce); 150 | 151 | /* Write abce to the semaphore location */ 152 | BEGIN_NV04(push, 1, 0x304, 1); 153 | PUSH_DATA (push, 0x101); 154 | 155 | BEGIN_NV04(push, 1, 0x80, 1); 156 | PUSH_DATA (push, 0); 157 | PUSH_KICK (push); 158 | 159 | /* Wait for abce to come out */ 160 | BEGIN_NV04(push, 4, 0x10, 4); 161 | PUSH_DATAh(push, sem->offset); 162 | PUSH_DATA (push, sem->offset); 163 | PUSH_DATA (push, 0xabce); 164 | PUSH_DATA (push, 1); /* Wait for equal */ 165 | PUSH_KICK (push); 166 | 167 | nouveau_bo_wait(sem, NOUVEAU_BO_RDWR, client); 168 | 169 | printf("%x\n", *map); 170 | usleep(10000); 171 | printf("%x\n", *map); 172 | return 0; 173 | } 174 | -------------------------------------------------------------------------------- /decode_frame.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2013 Ilia Mirkin 3 | * All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a 6 | * copy of this software and associated documentation files (the "Software"), 7 | * to deal in the Software without restriction, including without limitation 8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | * and/or sell copies of the Software, and to permit persons to whom the 10 | * Software is furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice (including the next 13 | * paragraph) shall be included in all copies or substantial portions of the 14 | * Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | * OTHER DEALINGS IN THE SOFTWARE. 23 | */ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | 34 | 35 | #include "nv50/nv50_context.h" 36 | 37 | #undef NDEBUG 38 | #include 39 | 40 | /* From pipe_loader_drm.c in mesa */ 41 | static void 42 | pipe_loader_drm_x_auth(int fd) 43 | { 44 | /* Try authenticate with the X server to give us access to devices that X 45 | * is running on. */ 46 | xcb_connection_t *xcb_conn; 47 | const xcb_setup_t *xcb_setup; 48 | xcb_screen_iterator_t s; 49 | xcb_dri2_connect_cookie_t connect_cookie; 50 | xcb_dri2_connect_reply_t *connect; 51 | drm_magic_t magic; 52 | xcb_dri2_authenticate_cookie_t authenticate_cookie; 53 | xcb_dri2_authenticate_reply_t *authenticate; 54 | 55 | xcb_conn = xcb_connect(NULL, NULL); 56 | 57 | if(!xcb_conn) 58 | return; 59 | 60 | xcb_setup = xcb_get_setup(xcb_conn); 61 | 62 | if (!xcb_setup) 63 | goto disconnect; 64 | 65 | s = xcb_setup_roots_iterator(xcb_setup); 66 | connect_cookie = xcb_dri2_connect_unchecked(xcb_conn, s.data->root, 67 | XCB_DRI2_DRIVER_TYPE_DRI); 68 | connect = xcb_dri2_connect_reply(xcb_conn, connect_cookie, NULL); 69 | 70 | if (!connect || connect->driver_name_length 71 | + connect->device_name_length == 0) { 72 | 73 | goto disconnect; 74 | } 75 | 76 | if (drmGetMagic(fd, &magic)) 77 | goto disconnect; 78 | 79 | authenticate_cookie = xcb_dri2_authenticate_unchecked(xcb_conn, 80 | s.data->root, 81 | magic); 82 | authenticate = xcb_dri2_authenticate_reply(xcb_conn, 83 | authenticate_cookie, 84 | NULL); 85 | FREE(authenticate); 86 | 87 | disconnect: 88 | xcb_disconnect(xcb_conn); 89 | } 90 | 91 | static struct nouveau_bufctx *bufctx; 92 | 93 | static struct nouveau_bo * 94 | new_bo_and_map(struct nouveau_device *dev, 95 | struct nouveau_client *client, long size) { 96 | struct nouveau_bo *ret; 97 | assert(!nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0x1000, size, NULL, &ret)); 98 | if (client) 99 | assert(!nouveau_bo_map(ret, NOUVEAU_BO_RDWR, client)); 100 | fprintf(stderr, "returning map: %llx\n", ret->offset); 101 | nouveau_bufctx_refn(bufctx, 0, ret, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); 102 | return ret; 103 | } 104 | 105 | static struct nouveau_bo * 106 | new_bo_and_map_tile(struct nouveau_device *dev, 107 | struct nouveau_client *client, long size) { 108 | struct nouveau_bo *ret; 109 | union nouveau_bo_config cfg; 110 | 111 | cfg.nv50.tile_mode = 0x20; 112 | cfg.nv50.memtype = 0x70; 113 | assert(!nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0x1000, size, &cfg, &ret)); 114 | if (client) 115 | assert(!nouveau_bo_map(ret, NOUVEAU_BO_RDWR, client)); 116 | fprintf(stderr, "returning map: %llx\n", ret->offset); 117 | nouveau_bufctx_refn(bufctx, 0, ret, NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR); 118 | return ret; 119 | } 120 | 121 | static struct nouveau_bo * 122 | new_bo_and_map_gart(struct nouveau_device *dev, 123 | struct nouveau_client *client, long size) { 124 | struct nouveau_bo *ret; 125 | assert(!nouveau_bo_new(dev, NOUVEAU_BO_GART, 0x1000, size, NULL, &ret)); 126 | if (client) 127 | assert(!nouveau_bo_map(ret, NOUVEAU_BO_RDWR, client)); 128 | fprintf(stderr, "returning gart map: %llx\n", ret->offset); 129 | nouveau_bufctx_refn(bufctx, 0, ret, NOUVEAU_BO_GART | NOUVEAU_BO_RDWR); 130 | return ret; 131 | } 132 | 133 | static void 134 | load_bsp_fw(struct nouveau_bo *fw) { 135 | int fd = open("/lib/firmware/nouveau/nv84_bsp-h264", O_RDONLY); 136 | struct stat statbuf; 137 | void *addr; 138 | assert(fd); 139 | assert(fstat(fd, &statbuf) == 0); 140 | addr = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0); 141 | assert(addr); 142 | 143 | memcpy(fw->map, addr, statbuf.st_size); 144 | memset(fw->map + statbuf.st_size, 0, fw->size - statbuf.st_size); 145 | 146 | munmap(addr, statbuf.st_size); 147 | close(fd); 148 | } 149 | 150 | static void 151 | clear_3d(struct nouveau_pushbuf *push, uint64_t offset, 152 | uint16_t w, uint16_t h, int scale, int tile_mode, uint32_t color) { 153 | int i; 154 | 155 | BEGIN_NV04(push, 3, 0x200, 4); 156 | PUSH_DATAh(push, offset); 157 | PUSH_DATA (push, offset); 158 | PUSH_DATA (push, 0xd5); /* RGBA8_UNORM - some of the 0's use BGRA8, but whatever, it's all 0's... */ 159 | PUSH_DATA (push, tile_mode); /* tile mode */ 160 | BEGIN_NV04(push, 3, 0xff4, 2); 161 | PUSH_DATA (push, (uint32_t)w << 16); 162 | PUSH_DATA (push, (uint32_t)h << 16); 163 | BEGIN_NV04(push, 3, 0x1240, 2); 164 | PUSH_DATA (push, (scale == 1 ? 0 : 0x80000000) | scale * w); 165 | PUSH_DATA (push, h); 166 | BEGIN_NV04(push, 3, 0x143c, 1); 167 | PUSH_DATA (push, 0); 168 | BEGIN_NV04(push, 3, 0xd80, 4); 169 | for (i = 0; i < 4; i++) 170 | PUSH_DATA(push, color); 171 | BEGIN_NV04(push, 3, 0x19d0, 1); 172 | PUSH_DATA (push, 0x3c); 173 | PUSH_KICK (push); 174 | } 175 | 176 | static void 177 | copy_to_linear(struct nouveau_pushbuf *push, uint64_t from, uint64_t to, 178 | int width, int height, int lines, int dest_pitch) { 179 | BEGIN_NV04(push, 4, 0x200, 4); 180 | PUSH_DATA (push, 0); 181 | PUSH_DATA (push, 0x20 /* tiling mode */); 182 | PUSH_DATA (push, width); 183 | PUSH_DATA (push, height); 184 | 185 | BEGIN_NV04(push, 4, 0x218, 2); 186 | PUSH_DATA (push, 0 << 16); /* y offset */ 187 | PUSH_DATA (push, 1); 188 | 189 | BEGIN_NV04(push, 4, 0x238, 2); 190 | PUSH_DATAh(push, from); 191 | PUSH_DATAh(push, to); 192 | 193 | BEGIN_NV04(push, 4, 0x30c, 8); 194 | PUSH_DATA (push, from); 195 | PUSH_DATA (push, to); 196 | PUSH_DATA (push, 0); 197 | PUSH_DATA (push, dest_pitch); 198 | PUSH_DATA (push, width); 199 | PUSH_DATA (push, lines); 200 | PUSH_DATA (push, 0x101); 201 | PUSH_DATA (push, 0); 202 | } 203 | 204 | static void 205 | copy_buffer(struct nouveau_pushbuf *push, struct nouveau_bo *from, struct nouveau_bo *to) { 206 | /* 207 | copy_to_linear(push, from->offset, to->offset + 0 * 0x7d00, 1280, 272, 25, 0); 208 | copy_to_linear(push, from->offset, to->offset + 1 * 0x7d00, 1280, 272, 25, 25); 209 | copy_to_linear(push, from->offset, to->offset + 2 * 0x7d00, 1280, 272, 25, 50); 210 | copy_to_linear(push, from->offset, to->offset + 3 * 0x7d00, 1280, 272, 25, 75); 211 | copy_to_linear(push, from->offset, to->offset + 4 * 0x7d00, 1280, 272, 25, 100); 212 | copy_to_linear(push, from->offset, to->offset + 5 * 0x7d00, 1280, 272, 25, 125); 213 | copy_to_linear(push, from->offset, to->offset + 6 * 0x7d00, 1280, 272, 25, 150); 214 | copy_to_linear(push, from->offset, to->offset + 7 * 0x7d00, 1280, 272, 25, 175); 215 | copy_to_linear(push, from->offset, to->offset + 8 * 0x7d00, 1280, 272, 25, 200); 216 | copy_to_linear(push, from->offset, to->offset + 9 * 0x7d00, 1280, 272, 25, 225); 217 | copy_to_linear(push, from->offset, to->offset + 10 * 0x7d00, 1280, 272, 22, 250); 218 | */ 219 | copy_to_linear(push, from->offset, to->offset, 1280, 272, 272, 1280 * 2); 220 | 221 | /* 222 | copy_to_linear(push, from->offset + 0x55000, to->offset + 0x55000 + 0 * 0x7d00, 1280, 272, 25, 0); 223 | copy_to_linear(push, from->offset + 0x55000, to->offset + 0x55000 + 1 * 0x7d00, 1280, 272, 25, 25); 224 | copy_to_linear(push, from->offset + 0x55000, to->offset + 0x55000 + 2 * 0x7d00, 1280, 272, 25, 50); 225 | copy_to_linear(push, from->offset + 0x55000, to->offset + 0x55000 + 3 * 0x7d00, 1280, 272, 25, 75); 226 | copy_to_linear(push, from->offset + 0x55000, to->offset + 0x55000 + 4 * 0x7d00, 1280, 272, 25, 100); 227 | copy_to_linear(push, from->offset + 0x55000, to->offset + 0x55000 + 5 * 0x7d00, 1280, 272, 25, 125); 228 | copy_to_linear(push, from->offset + 0x55000, to->offset + 0x55000 + 6 * 0x7d00, 1280, 272, 25, 150); 229 | copy_to_linear(push, from->offset + 0x55000, to->offset + 0x55000 + 7 * 0x7d00, 1280, 272, 25, 175); 230 | copy_to_linear(push, from->offset + 0x55000, to->offset + 0x55000 + 8 * 0x7d00, 1280, 272, 25, 200); 231 | copy_to_linear(push, from->offset + 0x55000, to->offset + 0x55000 + 9 * 0x7d00, 1280, 272, 25, 225); 232 | copy_to_linear(push, from->offset + 0x55000, to->offset + 0x55000 + 10 * 0x7d00, 1280, 272, 22, 250); 233 | */ 234 | copy_to_linear(push, from->offset + 0x55000, to->offset + /*0x55000*/ 1280, 1280, 272, 272, 1280 * 2); 235 | 236 | /* 237 | copy_to_linear(push, from->offset + 0xaa000, to->offset + 0xaa000 + 0 * 0x7d00, 238 | 1280, 136, 25, 0); 239 | copy_to_linear(push, from->offset + 0xaa000, to->offset + 0xaa000 + 1 * 0x7d00, 240 | 1280, 136, 25, 25); 241 | copy_to_linear(push, from->offset + 0xaa000, to->offset + 0xaa000 + 2 * 0x7d00, 242 | 1280, 136, 25, 50); 243 | copy_to_linear(push, from->offset + 0xaa000, to->offset + 0xaa000 + 3 * 0x7d00, 244 | 1280, 136, 25, 75); 245 | copy_to_linear(push, from->offset + 0xaa000, to->offset + 0xaa000 + 4 * 0x7d00, 246 | 1280, 136, 25, 100); 247 | copy_to_linear(push, from->offset + 0xaa000, to->offset + 0xaa000 + 5 * 0x7d00, 248 | 1280, 136, 11, 125); 249 | */ 250 | copy_to_linear(push, from->offset + 0xaa000, to->offset + 0xaa000, 1280, 136, 136, 1280 * 2); 251 | 252 | /* Round up number of lines to 16, so 2d000 offset on source. */ 253 | /* 254 | copy_to_linear(push, from->offset + 0xaa000 + 0x2d000, to->offset + 0xaa000 + 0x2a800 + 0 * 0x7d00, 255 | 1280, 136, 25, 0); 256 | copy_to_linear(push, from->offset + 0xaa000 + 0x2d000, to->offset + 0xaa000 + 0x2a800 + 1 * 0x7d00, 257 | 1280, 136, 25, 25); 258 | copy_to_linear(push, from->offset + 0xaa000 + 0x2d000, to->offset + 0xaa000 + 0x2a800 + 2 * 0x7d00, 259 | 1280, 136, 25, 50); 260 | copy_to_linear(push, from->offset + 0xaa000 + 0x2d000, to->offset + 0xaa000 + 0x2a800 + 3 * 0x7d00, 261 | 1280, 136, 25, 75); 262 | copy_to_linear(push, from->offset + 0xaa000 + 0x2d000, to->offset + 0xaa000 + 0x2a800 + 4 * 0x7d00, 263 | 1280, 136, 25, 100); 264 | copy_to_linear(push, from->offset + 0xaa000 + 0x2d000, to->offset + 0xaa000 + 0x2a800 + 5 * 0x7d00, 265 | 1280, 136, 11, 125); 266 | */ 267 | copy_to_linear(push, from->offset + 0xaa000 + 0x2d000, to->offset + 0xaa000 + /*0x2a800*/ + 1280, 1280, 136, 136, 1280 * 2); 268 | 269 | PUSH_KICK(push); 270 | } 271 | 272 | static void 273 | load_vp_fw(struct nouveau_bo *fw) { 274 | int fd; 275 | struct stat statbuf; 276 | void *addr; 277 | 278 | assert((fd = open("/lib/firmware/nouveau/nv84_vp-h264-1", O_RDONLY))); 279 | assert(fstat(fd, &statbuf) == 0); 280 | assert(statbuf.st_size < 0x1f400); 281 | assert((addr = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0))); 282 | 283 | memcpy(fw->map, addr, statbuf.st_size); 284 | 285 | munmap(addr, statbuf.st_size); 286 | close(fd); 287 | 288 | assert((fd = open("/lib/firmware/nouveau/nv84_vp-h264-2", O_RDONLY))); 289 | assert(fstat(fd, &statbuf) == 0); 290 | assert((addr = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0))); 291 | 292 | memcpy(fw->map + 0x1f400, addr, statbuf.st_size); 293 | 294 | munmap(addr, statbuf.st_size); 295 | close(fd); 296 | } 297 | 298 | static void 299 | load_bitstream(struct nouveau_bo *data) { 300 | int fd; 301 | struct stat statbuf; 302 | void *addr; 303 | uint32_t arr[0x530 / 4] = {0}; 304 | uint32_t arr2[0x44 / 4] = {0}; 305 | 306 | assert((fd = open("frame_nal", O_RDONLY))); 307 | assert(fstat(fd, &statbuf) == 0); 308 | assert((addr = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0))); 309 | 310 | arr[0x0 / 4 + 0] = 0x1; 311 | arr[0x120 / 4 + 2] = 0x5; 312 | arr[0x130 / 4 + 0] = 0x6; 313 | arr[0x130 / 4 + 2] = 0x6; 314 | arr[0x130 / 4 + 3] = 0x4f; 315 | arr[0x140 / 4 + 0] = 0x21; 316 | arr[0x140 / 4 + 1] = 0x1; 317 | arr[0x140 / 4 + 3] = 0x1; 318 | arr[0x150 / 4 + 0] = 0x1; 319 | arr[0x1e0 / 4 + 1] = 0x1; 320 | arr[0x320 / 4 + 0] = 0x10000; 321 | arr[0x320 / 4 + 1] = 0x10000; 322 | arr[0x320 / 4 + 2] = 0x10000; 323 | 324 | arr2[1] = statbuf.st_size + 3 + 16; 325 | 326 | uint32_t end[2] = {0x0b010000, 0}; 327 | 328 | memcpy(data->map, arr, sizeof(arr)); 329 | memcpy(data->map + 0x600, arr2, sizeof(arr2)); 330 | uint8_t *map = data->map; 331 | map[0x700] = 0; 332 | map[0x701] = 0; 333 | map[0x702] = 1; 334 | memcpy(data->map + 0x703, addr, statbuf.st_size); 335 | memcpy(data->map + 0x703 + statbuf.st_size, end, sizeof(end)); 336 | memcpy(data->map + 0x703 + statbuf.st_size + sizeof(end), end, sizeof(end)); 337 | } 338 | 339 | static void 340 | init_vp_params(struct nouveau_bo *data, struct nouveau_bo *frames[]) { 341 | uint32_t *map = data->map; 342 | int i; 343 | 344 | for (i = 0; i < 0xe0 / 4; i++) 345 | map[i] = 0x10101010; 346 | map[0xe0 / 4] = 0x500; /* width */ 347 | map[0xe4 / 4] = 0x220; /* height */ 348 | for (i = 0; i < 16; i++) 349 | *((uint64_t *)data->map + 0xe8 / 8 + i) = frames[0]->offset; 350 | 351 | for (i = 0; i < 16; i++) 352 | *((uint64_t *)data->map + 0x168 / 8 + i) = frames[1]->offset; 353 | 354 | map[0x1e8 / 4] = 0; 355 | map[0x1ec / 4] = 0; 356 | map[0x1f0 / 4] = 0x500; 357 | map[0x1f4 / 4] = 0x500; 358 | map[0x1f8 / 4] = 0x500; 359 | map[0x1fc / 4] = 0x220; 360 | map[0x200 / 4] = 0x220; 361 | map[0x204 / 4] = 0x220; 362 | map[0x208 / 4] = 0; 363 | map[0x20c / 4] = 0; 364 | map[0x210 / 4] = 0x3231564e; /* ??? */ 365 | map[0x214 / 4] = 0; 366 | map[0x400 / 4] = 0x500; 367 | map[0x404 / 4] = 0x220; 368 | map[0x408 / 4] = 0xaa0; /* width * height / 8 ? */ 369 | map[0x40c / 4] = 0x500; 370 | map[0x410 / 4] = 0x500; 371 | map[0x414 / 4] = 0x500; 372 | map[0x418 / 4] = 0x220; 373 | map[0x41c / 4] = 0x220; 374 | map[0x420 / 4] = 0x220; 375 | map[0x424 / 4] = 0; 376 | map[0x428 / 4] = 0; 377 | map[0x42c / 4] = 0; 378 | map[0x430 / 4] = 0; 379 | map[0x434 / 4] = 1; 380 | } 381 | 382 | int main() { 383 | struct nouveau_device *dev; 384 | struct nouveau_client *client; 385 | struct nouveau_object *channel; 386 | struct nouveau_object *bsp, *vp, *threed, *m2mf, *sync; 387 | struct nouveau_pushbuf *push; 388 | struct nouveau_bo *bsp_sem, *bsp_fw, *bsp_scratch, *bitstream, *mbring, *vpring; 389 | struct nouveau_bo *vp_sem, *vp_fw, *vp_scratch, *vp_params, *frames[2]; 390 | struct nouveau_bo *d3_fpvp, *d3_cb_def, *d3_tsc_tic; 391 | struct nouveau_bo *output; 392 | 393 | struct nv04_fifo nv04_data = { .vram = 0xbeef0201, .gart = 0xbeef0202 }; 394 | 395 | int fd, i; 396 | 397 | fd = open("/dev/dri/card0", O_RDWR); 398 | assert(fd); 399 | pipe_loader_drm_x_auth(fd); 400 | 401 | assert(!nouveau_device_wrap(fd, 0, &dev)); 402 | assert(!nouveau_client_new(dev, &client)); 403 | assert(!nouveau_object_new(&dev->object, 0, NOUVEAU_FIFO_CHANNEL_CLASS, 404 | &nv04_data, sizeof(nv04_data), &channel)); 405 | assert(!nouveau_pushbuf_new(client, channel, 2, 0x2000, 1, &push)); 406 | 407 | assert(!nouveau_object_new(channel, 0xbeef74b0, 0x74b0, NULL, 0, &bsp)); 408 | assert(!nouveau_object_new(channel, 0xbeef7476, 0x7476, NULL, 0, &vp)); 409 | assert(!nouveau_object_new(channel, 0xbeef8297, 0x8297, NULL, 0, &threed)); 410 | assert(!nouveau_object_new(channel, 0xbeef5039, 0x5039, NULL, 0, &m2mf)); 411 | 412 | assert(!nouveau_object_new(channel, 0xbeef0301, NOUVEAU_NOTIFIER_CLASS, 413 | &(struct nv04_notify){ .length = 32 }, 414 | sizeof(struct nv04_notify), &sync)); 415 | 416 | assert(!nouveau_bufctx_new(client, 1, &bufctx)); 417 | nouveau_pushbuf_bufctx(push, bufctx); 418 | 419 | 420 | bsp_sem = new_bo_and_map(dev, client, 0x1000); 421 | bsp_fw = new_bo_and_map(dev, client, 0xd9d0); 422 | bsp_scratch = new_bo_and_map(dev, client, 0x40000); 423 | bitstream = new_bo_and_map(dev, client, 0x1ffe00); 424 | mbring = new_bo_and_map(dev, NULL, 0x1d5800); 425 | vpring = new_bo_and_map(dev, NULL, 0x9ee200); 426 | 427 | vp_sem = new_bo_and_map(dev, client, 0x1000); 428 | vp_fw = new_bo_and_map(dev, client, 0x3b3fc); 429 | vp_scratch = new_bo_and_map(dev, client, 0x40000); 430 | vp_params = new_bo_and_map(dev, client, 0x2000); 431 | 432 | d3_fpvp = new_bo_and_map(dev, NULL, 0x8f00); 433 | d3_cb_def = new_bo_and_map(dev, NULL, 0x1000); 434 | d3_tsc_tic = new_bo_and_map(dev, NULL, 0x2000); 435 | 436 | for (i = 0; i < 2; i++) { 437 | frames[i] = new_bo_and_map_tile(dev, client, 0x104000); 438 | } 439 | 440 | output = new_bo_and_map_gart(dev, client, 0x104000); 441 | 442 | *(uint64_t *)bsp_sem->map = ~0; 443 | *(uint64_t *)vp_sem->map = ~0; 444 | 445 | /* Setup DMA for the SEMAPHORE logic */ 446 | BEGIN_NV04(push, 0, 0x60, 1); 447 | PUSH_DATA (push, nv04_data.vram); 448 | 449 | /* Bind the BSP to the fifo */ 450 | BEGIN_NV04(push, 1, 0, 1); 451 | PUSH_DATA (push, bsp->handle); 452 | 453 | /* Bind the VP to the fifo */ 454 | BEGIN_NV04(push, 2, 0, 1); 455 | PUSH_DATA (push, vp->handle); 456 | 457 | /* Bind the 3D to the fifo */ 458 | BEGIN_NV04(push, 3, 0, 1); 459 | PUSH_DATA (push, threed->handle); 460 | 461 | /* Bind the M2MF to the fifo */ 462 | BEGIN_NV04(push, 4, 0, 1); 463 | PUSH_DATA (push, m2mf->handle); 464 | 465 | /* Set the DMA channels */ 466 | BEGIN_NV04(push, 1, 0x180, 11); 467 | for (i = 0; i < 11; i++) 468 | PUSH_DATA(push, nv04_data.vram); 469 | 470 | BEGIN_NV04(push, 1, 0x1b8, 1); 471 | PUSH_DATA (push, nv04_data.vram); 472 | 473 | BEGIN_NV04(push, 2, 0x180, 11); 474 | for (i = 0; i < 11; i++) 475 | PUSH_DATA(push, nv04_data.vram); 476 | 477 | BEGIN_NV04(push, 2, 0x1b8, 1); 478 | PUSH_DATA (push, nv04_data.vram); 479 | 480 | BEGIN_NV04(push, 3, 0x180, 1); 481 | PUSH_DATA (push, sync->handle); 482 | BEGIN_NV04(push, 3, 0x188, 2); 483 | for (i = 0; i < 2; i++) 484 | PUSH_DATA (push, nv04_data.vram); 485 | BEGIN_NV04(push, 3, 0x198, 6); 486 | for (i = 0; i < 6; i++) 487 | PUSH_DATA (push, nv04_data.vram); 488 | 489 | BEGIN_NV04(push, 3, 0x1c0, 8); 490 | for (i = 0; i < 8; i++) 491 | PUSH_DATA (push, nv04_data.vram); 492 | 493 | BEGIN_NV04(push, 4, 0x180, 3); 494 | PUSH_DATA (push, sync->handle); 495 | for (i = 0; i < 2; i++) 496 | PUSH_DATA (push, nv04_data.gart); 497 | 498 | /* Initialize 3D FP/VP/whatever */ 499 | BEGIN_NV04(push, 3, 0xfa4, 2); 500 | PUSH_DATAh(push, d3_fpvp->offset); 501 | PUSH_DATA (push, d3_fpvp->offset); 502 | 503 | BEGIN_NV04(push, 3, 0xf7c, 2); 504 | PUSH_DATAh(push, d3_fpvp->offset); 505 | PUSH_DATA (push, d3_fpvp->offset); 506 | 507 | BEGIN_NV04(push, 3, 0x1290, 1); 508 | PUSH_DATA (push, 0xfff); 509 | BEGIN_NV04(push, 3, 0x1988, 1); 510 | PUSH_DATA (push, 0x240424); 511 | BEGIN_NV04(push, 3, 0x1298, 1); 512 | PUSH_DATA (push, 0x4); 513 | BEGIN_NV04(push, 3, 0x140c, 1); 514 | PUSH_DATA (push, 0x0); 515 | BEGIN_NV04(push, 3, 0x16ac, 2); 516 | PUSH_DATA (push, 0x24); 517 | PUSH_DATA (push, 0x0); 518 | BEGIN_NV04(push, 3, 0x129c, 1); 519 | PUSH_DATA (push, 0x20); 520 | BEGIN_NV04(push, 3, 0x1650, 2); 521 | PUSH_DATA (push, ~0); 522 | PUSH_DATA (push, ~0); 523 | BEGIN_NV04(push, 3, 0x16b0, 1); 524 | PUSH_DATA (push, 0x24); 525 | BEGIN_NV04(push, 3, 0x16bc, 1); 526 | PUSH_DATA (push, 0x03020100); 527 | BEGIN_NV04(push, 3, 0x1540, 2); 528 | PUSH_DATA (push, ~0); 529 | PUSH_DATA (push, ~0); 530 | BEGIN_NV04(push, 3, 0x1280, 3); 531 | PUSH_DATAh(push, d3_cb_def->offset); 532 | PUSH_DATA (push, d3_cb_def->offset); 533 | PUSH_DATA (push, 0x100); 534 | BEGIN_NV04(push, 3, 0x1694, 1); 535 | PUSH_DATA (push, 0x131); 536 | BEGIN_NV04(push, 3, 0x1280, 3); 537 | PUSH_DATAh(push, d3_cb_def->offset + 0x400); 538 | PUSH_DATA (push, d3_cb_def->offset + 0x400); 539 | PUSH_DATA (push, 0x100); 540 | BEGIN_NV04(push, 3, 0x1694, 1); 541 | PUSH_DATA (push, 0x1031); 542 | BEGIN_NV04(push, 3, 0xa00, 6); 543 | for (i = 0; i < 3; i++) 544 | PUSH_DATA(push, 0x3f800000); 545 | for (i = 0; i < 3; i++) 546 | PUSH_DATA(push, 0); 547 | BEGIN_NV04(push, 3, 0xc00, 4); 548 | PUSH_DATA (push, 0x20000000); 549 | PUSH_DATA (push, 0x20000000); 550 | PUSH_DATA (push, 0); 551 | PUSH_DATA (push, 0x3f800000); 552 | BEGIN_NV04(push, 3, 0xdac, 3); 553 | PUSH_DATA(push, 0x1b02); 554 | PUSH_DATA(push, 0x1b02); 555 | PUSH_DATA(push, 0); 556 | BEGIN_NV04(push, 3, 0xdc0, 3); 557 | PUSH_DATA(push, 0); 558 | PUSH_DATA(push, 0); 559 | PUSH_DATA(push, 0); 560 | BEGIN_NV04(push, 3, 0xdf8, 2); 561 | PUSH_DATA(push, 0); 562 | PUSH_DATA(push, 0); 563 | BEGIN_NV04(push, 3, 0xe00, 1); 564 | PUSH_DATA(push, 0); 565 | BEGIN_NV04(push, 3, 0x1234, 1); 566 | PUSH_DATA(push, 1); 567 | BEGIN_NV04(push, 3, 0x12cc, 3); 568 | PUSH_DATA(push, 0); 569 | PUSH_DATA(push, 3); 570 | PUSH_DATA(push, 2); 571 | BEGIN_NV04(push, 3, 0x12e8, 2); 572 | PUSH_DATA(push, 0); 573 | PUSH_DATA(push, 0); 574 | BEGIN_NV04(push, 3, 0x1308, 1); 575 | PUSH_DATA(push, 1); 576 | BEGIN_NV04(push, 3, 0x133c, 1); 577 | PUSH_DATA(push, 1); 578 | BEGIN_NV04(push, 3, 0x13bc, 1); 579 | PUSH_DATA(push, 0x44); 580 | /* 581 | BEGIN_NV04(push, 3, 0x1528, 1); 582 | PUSH_DATA(push, 0); 583 | */ 584 | BEGIN_NV04(push, 3, 0x1534, 1); 585 | PUSH_DATA(push, 0); 586 | BEGIN_NV04(push, 3, 0x155c, 3); 587 | PUSH_DATAh(push, d3_tsc_tic->offset + 0x1000); 588 | PUSH_DATA (push, d3_tsc_tic->offset + 0x1000); 589 | PUSH_DATA (push, 0x80); 590 | BEGIN_NV04(push, 3, 0x1574, 3); 591 | PUSH_DATAh(push, d3_tsc_tic->offset); 592 | PUSH_DATA (push, d3_tsc_tic->offset); 593 | PUSH_DATA (push, 0x80); 594 | BEGIN_NV04(push, 3, 0x15b4, 2); 595 | PUSH_DATA(push, 0); 596 | PUSH_DATA(push, 0); 597 | BEGIN_NV04(push, 3, 0x168c, 1); 598 | PUSH_DATA(push, 0); 599 | BEGIN_NV04(push, 3, 0x1924, 1); 600 | PUSH_DATA(push, 0); 601 | BEGIN_NV04(push, 3, 0x192c, 1); 602 | PUSH_DATA(push, 0); 603 | BEGIN_NV04(push, 3, 0x194c, 1); 604 | PUSH_DATA(push, 0); 605 | BEGIN_NV04(push, 3, 0x1a00, 1); 606 | PUSH_DATA(push, 0x1111); 607 | BEGIN_NV04(push, 3, 0x121c, 1); 608 | PUSH_DATA(push, 1); 609 | BEGIN_NV04(push, 3, 0x1538, 1); 610 | PUSH_DATA(push, 0); 611 | 612 | /* Clear stuff on mbring/vpring */ 613 | clear_3d(push, mbring->offset + 0xaa000, 614 | 64, 4760, 4, 0, 0); 615 | clear_3d(push, vpring->offset + 0x4f6100, 616 | 1024, 1, 4, 0, 0); 617 | clear_3d(push, vpring->offset + 0x9ed200, 618 | 1024, 1, 4, 0, 0); 619 | 620 | /* Write semaphore */ 621 | BEGIN_NV04(push, 3, 0x1b00, 4); 622 | PUSH_DATAh(push, bsp_sem->offset); 623 | PUSH_DATA (push, bsp_sem->offset); 624 | PUSH_DATA (push, 0); 625 | PUSH_DATA (push, 0xf010); /* write + ? */ 626 | 627 | /* Load BSP firmware/scratch buf */ 628 | load_bsp_fw(bsp_fw); 629 | BEGIN_NV04(push, 1, 0x600, 3); 630 | PUSH_DATAh(push, bsp_fw->offset); 631 | PUSH_DATA (push, bsp_fw->offset); 632 | PUSH_DATA (push, bsp_fw->size); 633 | 634 | BEGIN_NV04(push, 1, 0x628, 2); 635 | PUSH_DATA (push, bsp_scratch->offset >> 8); 636 | PUSH_DATA (push, bsp_scratch->size); 637 | PUSH_KICK (push); 638 | 639 | /* Load VP firmware/scratch buf */ 640 | 641 | load_vp_fw(vp_fw); 642 | BEGIN_NV04(push, 2, 0x600, 3); 643 | PUSH_DATAh(push, vp_fw->offset); 644 | PUSH_DATA (push, vp_fw->offset); 645 | PUSH_DATA (push, vp_fw->size); 646 | 647 | BEGIN_NV04(push, 2, 0x628, 2); 648 | PUSH_DATA (push, vp_scratch->offset >> 8); 649 | PUSH_DATA (push, vp_scratch->size); 650 | PUSH_KICK (push); 651 | 652 | load_bitstream(bitstream); 653 | init_vp_params(vp_params, frames); 654 | 655 | /* Clear frames */ 656 | /* 657 | clear_3d(push, frames[1]->offset, 658 | 320, 544, 1, 0x20, 0); 659 | clear_3d(push, frames[1]->offset + 0xaa000, 660 | 320, 272, 1, 0x20, 0x3f000000); 661 | clear_3d(push, frames[0]->offset, 662 | 320, 272, 1, 0x20, 0); 663 | clear_3d(push, frames[0]->offset + 0x55000, 664 | 320, 272, 1, 0x20, 0); 665 | clear_3d(push, frames[0]->offset + 0xaa000, 666 | 320, 136, 1, 0x20, 0x3f000000); 667 | clear_3d(push, frames[0]->offset + 0xaa000 + 0x2d000, // 1280 * 0x90 668 | 320, 136, 1, 0x20, 0x3f000000); 669 | */ 670 | 671 | memset(frames[0]->map, 0xff, frames[0]->size); 672 | memset(frames[1]->map, 0xff, frames[1]->size); 673 | 674 | /* Wait for the mbring/vpring clearing */ 675 | BEGIN_NV04(push, 1, 0x10, 4); 676 | PUSH_DATAh(push, bsp_sem->offset); 677 | PUSH_DATA (push, bsp_sem->offset); 678 | PUSH_DATA (push, 0); 679 | PUSH_DATA (push, 1); /* wait for sem == 0 */ 680 | PUSH_KICK (push); 681 | 682 | /* Kick off the BSP */ 683 | BEGIN_NV04(push, 1, 0x400, 20); 684 | PUSH_DATA (push, bitstream->offset >> 8); 685 | PUSH_DATA (push, (bitstream->offset >> 8) + 7); 686 | PUSH_DATA (push, 0xFF800); /* length? seems high. perhaps max buffer? */ 687 | PUSH_DATA (push, (bitstream->offset >> 8) + 6); 688 | PUSH_DATA (push, 1); 689 | PUSH_DATA (push, mbring->offset >> 8); 690 | PUSH_DATA (push, 0xaa000); /* width * height? */ 691 | PUSH_DATA (push, (mbring->offset >> 8) + 0xaa0); 692 | PUSH_DATA (push, vpring->offset >> 8); 693 | PUSH_DATA (push, 0x4f7100); /* half the vpring size? */ 694 | PUSH_DATA (push, 0x3fe000); 695 | PUSH_DATA (push, 0xd8300); 696 | PUSH_DATA (push, 0x0); 697 | PUSH_DATA (push, 0x3fe000); 698 | PUSH_DATA (push, 0x4d6300); 699 | PUSH_DATA (push, 0x1fe00); 700 | PUSH_DATA (push, (vpring->offset >> 8) + 0x4f61); /* 0x4d63 + 0x1fe */ 701 | PUSH_DATA (push, 0x654321); 702 | PUSH_DATA (push, 0); 703 | PUSH_DATA (push, 0x100008); 704 | 705 | BEGIN_NV04(push, 1, 0x620, 2); 706 | PUSH_DATA (push, 0); 707 | PUSH_DATA (push, 0); 708 | 709 | BEGIN_NV04(push, 1, 0x300, 1); 710 | PUSH_DATA (push, 0); 711 | 712 | /* Set the semaphore */ 713 | BEGIN_NV04(push, 1, 0x610, 3); 714 | PUSH_DATAh(push, bsp_sem->offset); 715 | PUSH_DATA (push, bsp_sem->offset); 716 | PUSH_DATA (push, 1); 717 | 718 | /* Write 1 to the semaphore location */ 719 | BEGIN_NV04(push, 1, 0x304, 1); 720 | PUSH_DATA (push, 0x101); 721 | PUSH_KICK (push); 722 | 723 | /* Wait for the semaphore to get written */ 724 | BEGIN_NV04(push, 2, 0x10, 4); 725 | PUSH_DATAh(push, bsp_sem->offset); 726 | PUSH_DATA (push, bsp_sem->offset); 727 | PUSH_DATA (push, 1); 728 | PUSH_DATA (push, 1); /* wait for sem == 1 */ 729 | PUSH_KICK (push); 730 | 731 | /* VP step 1 */ 732 | BEGIN_NV04(push, 2, 0x400, 15); 733 | PUSH_DATA (push, 1); 734 | PUSH_DATA (push, 0xaa0); /* related to aa000 above? */ 735 | PUSH_DATA (push, 0x3987654); 736 | PUSH_DATA (push, 0x55001); 737 | PUSH_DATA (push, vp_params->offset >> 8); 738 | PUSH_DATA (push, (vpring->offset >> 8) + 0x3fe0); 739 | PUSH_DATA (push, 0xd8300); 740 | PUSH_DATA (push, vpring->offset >> 8); 741 | PUSH_DATA (push, 0xff800); /* related to ff800 above? */ 742 | PUSH_DATA (push, (mbring->offset >> 8) + 0x1d38); 743 | PUSH_DATA (push, (vpring->offset >> 8) + 0x4f61); 744 | PUSH_DATA (push, 0); 745 | PUSH_DATA (push, 0x100008); 746 | PUSH_DATA (push, frames[0]->offset >> 8); 747 | PUSH_DATA (push, 0); 748 | 749 | BEGIN_NV04(push, 2, 0x620, 2); 750 | PUSH_DATA (push, 0); 751 | PUSH_DATA (push, 0); 752 | 753 | BEGIN_NV04(push, 2, 0x300, 1); 754 | PUSH_DATA (push, 0); 755 | PUSH_KICK (push); 756 | 757 | /* VP step 2 */ 758 | BEGIN_NV04(push, 2, 0x400, 5); 759 | PUSH_DATA (push, 0x54530201); 760 | PUSH_DATA (push, (vp_params->offset >> 8) + 0x4); 761 | PUSH_DATA (push, (vpring->offset >> 8) + 0x4d63); 762 | PUSH_DATA (push, frames[0]->offset >> 8); 763 | PUSH_DATA (push, frames[0]->offset >> 8); 764 | BEGIN_NV04(push, 2, 0x414, 1); 765 | PUSH_DATA (push, frames[1]->offset >> 8); 766 | 767 | BEGIN_NV04(push, 2, 0x620, 2); 768 | PUSH_DATA (push, 0); 769 | PUSH_DATA (push, 0x1f400); /* offset for second firmware */ 770 | 771 | BEGIN_NV04(push, 2, 0x300, 1); 772 | PUSH_DATA (push, 0); 773 | PUSH_KICK (push); 774 | 775 | /* Set the semaphore */ 776 | BEGIN_NV04(push, 2, 0x610, 3); 777 | PUSH_DATAh(push, vp_sem->offset); 778 | PUSH_DATA (push, vp_sem->offset); 779 | PUSH_DATA (push, 3); 780 | 781 | /* Write to the semaphore location, intr */ 782 | BEGIN_NV04(push, 2, 0x304, 1); 783 | PUSH_DATA (push, 0x101); 784 | PUSH_KICK (push); 785 | 786 | /* Set the semaphore */ 787 | BEGIN_NV04(push, 2, 0x610, 3); 788 | PUSH_DATAh(push, vp_sem->offset); 789 | PUSH_DATA (push, vp_sem->offset); 790 | PUSH_DATA (push, 3); 791 | 792 | /* Write to the semaphore location */ 793 | BEGIN_NV04(push, 2, 0x304, 1); 794 | PUSH_DATA (push, 1); 795 | PUSH_KICK (push); 796 | 797 | /* Wait for the semaphore to get written */ 798 | BEGIN_NV04(push, 4, 0x10, 4); 799 | PUSH_DATAh(push, vp_sem->offset); 800 | PUSH_DATA (push, vp_sem->offset); 801 | PUSH_DATA (push, 3); 802 | PUSH_DATA (push, 1); /* wait for sem == 3 */ 803 | PUSH_KICK (push); 804 | 805 | copy_buffer(push, frames[0], output); 806 | 807 | fprintf(stderr, "%x\n", *(uint32_t *)vp_sem->map); 808 | 809 | sleep(1); 810 | 811 | fprintf(stderr, "%x\n", *(uint32_t *)vp_sem->map); 812 | 813 | write(1, output->map, 0xaa000); 814 | for (i = 0; i < 0x55000; i += 2) { 815 | write(1, output->map + 0xaa000 + i, 1); 816 | } 817 | for (i = 0; i < 0x55000; i += 2) { 818 | write(1, output->map + 0xaa000 + i + 1, 1); 819 | } 820 | 821 | return 0; 822 | } 823 | -------------------------------------------------------------------------------- /extract_firmware.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # 3 | # Copyright 2013 Ilia Mirkin. 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files (the "Software"), 7 | # to deal in the Software without restriction, including without limitation 8 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | # and/or sell copies of the Software, and to permit persons to whom the 10 | # Software is furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | # THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 | # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 | # OTHER DEALINGS IN THE SOFTWARE. 22 | 23 | import itertools 24 | import mmap 25 | import os 26 | import re 27 | import struct 28 | import sys 29 | import tempfile 30 | import urllib 31 | import zlib 32 | 33 | # The firmware changes fairly rarely. From a limited sample, when the 34 | # firmware does change, the starts of the firmware remain the 35 | # same. When changing the version though, one should double-check the 36 | # sizes, which can be different. 37 | # 38 | # This is the list of tested versions that produce the same binaries 39 | VERSIONS = ( 40 | "319.17", 41 | "319.23", 42 | "319.32", 43 | "325.08", 44 | "325.15", 45 | "340.32", 46 | ) 47 | 48 | ARCHES = ("x86_64", "x86") 49 | 50 | def product(a, b): 51 | for x in a: 52 | for y in b: 53 | yield (x, y) 54 | 55 | cwd = os.getcwd() 56 | for (VERSION, ARCH) in product(VERSIONS, ARCHES): 57 | if os.path.exists("NVIDIA-Linux-%s-%s" % (ARCH, VERSION)): 58 | break 59 | else: 60 | print """Please run this in a directory where NVIDIA-Linux-x86-%(version)s is a subdir. 61 | 62 | You can make this happen by running 63 | wget http://us.download.nvidia.com/XFree86/Linux-x86/%(version)s/NVIDIA-Linux-x86-%(version)s.run 64 | sh NVIDIA-Linux-x86-%(version)s.run --extract-only 65 | 66 | Note: You can use other versions/arches, see the source for what is acceptable. 67 | """ % {"version": VERSIONS[-1]} 68 | sys.exit(1) 69 | 70 | kernel_f = open("NVIDIA-Linux-%s-%s/kernel/nv-kernel.o" % (ARCH, VERSION), "r") 71 | kernel = mmap.mmap(kernel_f.fileno(), 0, access=mmap.ACCESS_READ) 72 | 73 | user_f = open("NVIDIA-Linux-%s-%s/libnvcuvid.so.%s" % (ARCH, VERSION, VERSION), "r") 74 | user = mmap.mmap(user_f.fileno(), 0, access=mmap.ACCESS_READ) 75 | 76 | vp2_kernel_prefix = "\xcd\xab\x55\xee\x44" 77 | vp2_user_prefix = "\xce\xab\x55\xee\x20\x00\x00\xd0\x00\x00\x00\xd0" 78 | vp4_kernel_prefix = "\xf1\x97\x00\x42\xcf\x99" 79 | vp3_user_prefix = "\x64\x00\xf0\x20\x64\x00\xf1\x20\x64\x00\xf2\x20" 80 | vp3_vc1_prefix = "\x43\x00\x00\x34" * 2 81 | 82 | # List of chip revisions since the fuc loader expects nvXX_fucXXX files 83 | VP2_CHIPS = ["nv84"] # there are more, but no need for more symlinks 84 | VP3_CHIPS = ["nv98", "nvaa", "nvac"] 85 | VP4_0_CHIPS = ["nva3", "nva5", "nva8", "nvaf"] # nvaf is 4.1, but same fw 86 | VP4_2_CHIPS = ["nvc0", "nvc1", "nvc3", "nvc4", "nvc8", "nvce", "nvcf"] 87 | VP5_CHIPS = ["nvd7", "nvd9", "nve4", "nve6", "nve7", "nvf0", "nvf1", "nv106", "nv108"] 88 | 89 | def links(chips, tail): 90 | return list("%s_%s" % (chip, tail) for chip in chips) 91 | 92 | def vp3_offset(): 93 | # Note: 340 uses higher offset, 325 uses lower. Guessing 330 as the cutoff. 94 | if float(VERSION) < 330: 95 | return 2287 96 | return 2286 97 | 98 | def vp5_offset(): 99 | # Note: 340 uses higher offset, 325 uses lower. Guessing 330 as the cutoff. 100 | if float(VERSION) < 330: 101 | return 0xb3 102 | return 0xb7 103 | 104 | BLOBS = { 105 | # VP2 kernel xuc 106 | "nv84_bsp": { 107 | "data": kernel, 108 | "start": vp2_kernel_prefix + "\x46", 109 | "length": 0x16f3c, 110 | "links": links(VP2_CHIPS, "xuc103"), 111 | }, 112 | "nv84_vp": { 113 | "data": kernel, 114 | "start": vp2_kernel_prefix + "\x7c", 115 | "length": 0x1ae6c, 116 | "links": links(VP2_CHIPS, "xuc00f"), 117 | }, 118 | 119 | # VP3 kernel fuc 120 | "nv98_bsp": { 121 | "data": kernel, 122 | "start": "\xf1\x07\x00\x10\xf1\x03\x00\x00", 123 | "length": 0xac00, 124 | "pred": lambda data, i: data[i+vp3_offset()] == '\x8e', 125 | "links": links(VP3_CHIPS, "fuc084"), 126 | }, 127 | "nv98_vp": { 128 | "data": kernel, 129 | "start": "\xf1\x07\x00\x10\xf1\x03\x00\x00", 130 | "length": 0xa500, 131 | "pred": lambda data, i: data[i+vp3_offset()] == '\x95', 132 | "links": links(VP3_CHIPS, "fuc085"), 133 | }, 134 | "nv98_ppp": { 135 | "data": kernel, 136 | "start": "\xf1\x07\x00\x08\xf1\x03\x00\x00", 137 | "length": 0x3800, 138 | "pred": lambda data, i: data[i+vp3_offset()] == '\x30', 139 | "links": links(VP3_CHIPS, "fuc086"), 140 | }, 141 | 142 | # VP4.0 kernel fuc 143 | "nva3_bsp": { 144 | "data": kernel, 145 | "start": vp4_kernel_prefix, 146 | "length": 0x10200, 147 | "pred": lambda data, i: data[i+8*11+1] == '\xcf', 148 | "links": links(VP4_0_CHIPS, "fuc084"), 149 | }, 150 | "nva3_vp": { 151 | "data": kernel, 152 | "start": vp4_kernel_prefix, 153 | "length": 0xc600, 154 | "pred": lambda data, i: data[i+8*11+1] == '\x9e', 155 | "links": links(VP4_0_CHIPS, "fuc085"), 156 | }, 157 | "nva3_ppp": { 158 | "data": kernel, 159 | "start": vp4_kernel_prefix, 160 | "length": 0x3f00, 161 | "pred": lambda data, i: data[i+8*11+1] == '\x36', 162 | "links": links(VP4_0_CHIPS, "fuc086"), 163 | }, 164 | 165 | # VP4.2 kernel fuc 166 | "nvc0_bsp": { 167 | "data": kernel, 168 | "start": vp4_kernel_prefix, 169 | "length": 0x10d00, 170 | "pred": lambda data, i: data[i+0x59] == '\xd8', 171 | "links": links(VP4_2_CHIPS, "fuc084"), 172 | }, 173 | "nvc0_vp": { 174 | "data": kernel, 175 | "start": vp4_kernel_prefix, 176 | "length": 0xd300, 177 | "pred": lambda data, i: data[i+0x59] == '\xa5', 178 | "links": links(VP4_2_CHIPS, "fuc085"), 179 | }, 180 | "nvc0_ppp": { 181 | "data": kernel, 182 | "start": vp4_kernel_prefix, 183 | "length": 0x4100, 184 | "pred": lambda data, i: data[i+0x59] == '\x38', 185 | "links": links(VP4_2_CHIPS, "fuc086") + links(VP5_CHIPS, "fuc086"), 186 | }, 187 | 188 | # VP5 kernel fuc 189 | "nve0_bsp": { 190 | "data": kernel, 191 | "start": vp4_kernel_prefix, 192 | "length": 0x11c00, 193 | "pred": lambda data, i: data[i+vp5_offset()] == '\x27', 194 | "links": links(VP5_CHIPS, "fuc084"), 195 | }, 196 | "nve0_vp": { 197 | "data": kernel, 198 | "start": vp4_kernel_prefix, 199 | "length": 0xdd00, 200 | "pred": lambda data, i: data[i+vp5_offset()] == '\x0a', 201 | "links": links(VP5_CHIPS, "fuc085"), 202 | }, 203 | 204 | # VP2 user xuc 205 | "nv84_bsp-h264": { 206 | "data": user, 207 | "start": vp2_user_prefix + "\x88", 208 | "length": 0xd9d0, 209 | }, 210 | "nv84_vp-h264-1": { 211 | "data": user, 212 | "start": vp2_user_prefix + "\x3c", 213 | "length": 0x1f334, 214 | }, 215 | "nv84_vp-h264-2": { 216 | "data": user, 217 | "start": vp2_user_prefix + "\x04", 218 | "length": 0x1bffc, 219 | }, 220 | "nv84_vp-mpeg12": { 221 | "data": user, 222 | "start": vp2_user_prefix + "\x4c", 223 | "length": 0x22084, 224 | }, 225 | "nv84_vp-vc1-1": { 226 | "data": user, 227 | "start": vp2_user_prefix + "\x7c", 228 | "length": 0x2cd24, 229 | }, 230 | "nv84_vp-vc1-2": { 231 | "data": user, 232 | "start": vp2_user_prefix + "\xa4", 233 | "length": 0x1535c, 234 | }, 235 | "nv84_vp-vc1-3": { 236 | "data": user, 237 | "start": vp2_user_prefix + "\x34", 238 | "length": 0x133bc, 239 | }, 240 | 241 | # VP3 user vuc 242 | "vuc-vp3-mpeg12-0": { 243 | "data": user, 244 | "start": vp3_user_prefix, 245 | "length": 0xb00, 246 | "pred": lambda data, i: data[i + 11 * 8] == '\x4a' and data[i + 228] == '\x43', 247 | }, 248 | "vuc-vp3-h264-0": { 249 | "data": user, 250 | "start": vp3_user_prefix, 251 | "length": 0x1600, 252 | "pred": lambda data, i: data[i + 11 * 8 + 1] == '\xff' and data[i + 225] == '\x81', 253 | }, 254 | "vuc-vp3-vc1-0": { 255 | "data": user, 256 | "start": vp3_vc1_prefix + vp3_user_prefix, 257 | "length": 0x1d00, 258 | "pred": lambda data, i: data[i + 11 * 8 + 1] == '\xf4', 259 | }, 260 | "vuc-vp3-vc1-1": { 261 | "data": user, 262 | "start": vp3_vc1_prefix + vp3_user_prefix, 263 | "length": 0x2100, 264 | "pred": lambda data, i: data[i + 11 * 8 + 1] == '\x34', 265 | }, 266 | "vuc-vp3-vc1-2": { 267 | "data": user, 268 | "start": vp3_vc1_prefix + vp3_user_prefix, 269 | "length": 0x2300, 270 | "pred": lambda data, i: data[i + 11 * 8 + 1] == '\x98', 271 | }, 272 | 273 | # VP4.x user vuc 274 | "vuc-vp4-mpeg12-0": { 275 | "data": user, 276 | "start": vp3_user_prefix, 277 | "length": 0xc00, 278 | "pred": lambda data, i: data[i + 11 * 8] == '\x4a' and data[i + 228] == '\x44', 279 | "links": ["vuc-mpeg12-0"], 280 | }, 281 | "vuc-vp4-h264-0": { 282 | "data": user, 283 | "start": vp3_user_prefix, 284 | "length": 0x1900, 285 | "pred": lambda data, i: data[i + 11 * 8 + 1] == '\xff' and data[i + 225] == '\x8c', 286 | "links": ["vuc-h264-0"], 287 | }, 288 | "vuc-vp4-mpeg4-0": { 289 | "data": user, 290 | "start": vp3_user_prefix, 291 | "length": 0x1d00, 292 | "pred": lambda data, i: data[i + 61] == '\x30' and data[i + 6923] == '\x00', 293 | "links": ["vuc-mpeg4-0"], 294 | }, 295 | "vuc-vp4-mpeg4-1": { 296 | "data": user, 297 | "start": vp3_user_prefix, 298 | "length": 0x1d00, 299 | "pred": lambda data, i: data[i + 61] == '\x30' and data[i + 6923] == '\x20', 300 | "links": ["vuc-mpeg4-1"], 301 | }, 302 | "vuc-vp4-vc1-0": { 303 | "data": user, 304 | "start": vp3_vc1_prefix + vp3_user_prefix, 305 | "length": 0x1d00, 306 | "pred": lambda data, i: data[i + 11 * 8 + 1] == '\xb4', 307 | "links": ["vuc-vc1-0"], 308 | }, 309 | "vuc-vp4-vc1-1": { 310 | "data": user, 311 | "start": vp3_vc1_prefix + vp3_user_prefix, 312 | "length": 0x2100, 313 | "pred": lambda data, i: data[i + 11 * 8 + 1] == '\x08', 314 | "links": ["vuc-vc1-1"], 315 | }, 316 | "vuc-vp4-vc1-2": { 317 | "data": user, 318 | "start": vp3_vc1_prefix + vp3_user_prefix, 319 | "length": 0x2100, 320 | "pred": lambda data, i: data[i + 11 * 8 + 1] == '\x6c', 321 | "links": ["vuc-vc1-2"], 322 | }, 323 | } 324 | 325 | # Build a regex on the start data to speed things along. 326 | start_re = "|".join(set(re.escape(v["start"]) for v in BLOBS.itervalues())) 327 | files = set(v["data"] for v in BLOBS.itervalues()) 328 | 329 | done = set() 330 | 331 | for data in files: 332 | for match in re.finditer(start_re, data): 333 | for name, v in BLOBS.iteritems(): 334 | if name in done or data != v["data"] or match.group(0) != v["start"]: 335 | continue 336 | 337 | i = match.start(0) 338 | pred = v.get("pred") 339 | if pred and not pred(data, i): 340 | continue 341 | 342 | length = v["length"] 343 | links = v.get("links", []) 344 | 345 | with open(os.path.join(cwd, name), "w") as f: 346 | f.write(data[i:i+length]) 347 | 348 | done.add(name) 349 | for link in links: 350 | try: 351 | os.unlink(link) 352 | except: 353 | pass 354 | os.symlink(name, link) 355 | 356 | for name in set(BLOBS) - done: 357 | print "Firmware %s not found, ignoring." % name 358 | 359 | ARCHIVE_FILES = { 360 | 0: "fuc409d", 361 | 1: "fuc409c", 362 | 2: "fuc41ad", 363 | 3: "fuc41ac", 364 | } 365 | 366 | ARCHIVE_ORDERS = { 367 | "325.15": ["nvc0", "nvc8", "nvc3", "nvc4", "nvce", "nvcf", "nvc1", 368 | "nvd7", "nvd9", "nve4", "nve7", "nve6", "nvf0", "nvf1", 369 | "nv108"], 370 | } 371 | 372 | # Extract the gzipped archives found inside the kernel driver 373 | def decompress(prefix, start, s): 374 | try: 375 | decomp = zlib.decompressobj(-zlib.MAX_WBITS) 376 | data = decomp.decompress(s[10:]) 377 | except Exception, e: 378 | print prefix, repr(s[:16]), len(s) 379 | print e 380 | return False 381 | magic, count = struct.unpack(" 3 | * All Rights Reserved. 4 | * 5 | * Permission is hereby granted, free of charge, to any person obtaining a 6 | * copy of this software and associated documentation files (the "Software"), 7 | * to deal in the Software without restriction, including without limitation 8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | * and/or sell copies of the Software, and to permit persons to whom the 10 | * Software is furnished to do so, subject to the following conditions: 11 | * 12 | * The above copyright notice and this permission notice (including the next 13 | * paragraph) shall be included in all copies or substantial portions of the 14 | * Software. 15 | * 16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | * OTHER DEALINGS IN THE SOFTWARE. 23 | */ 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | #include 38 | #include 39 | #include 40 | 41 | VdpGetProcAddress *vdp_get_proc_address; 42 | 43 | VdpDecoderCreate *vdp_decoder_create; 44 | VdpDecoderDestroy *vdp_decoder_destroy; 45 | VdpDecoderRender *vdp_decoder_render; 46 | 47 | VdpVideoSurfaceCreate *vdp_video_surface_create; 48 | VdpVideoSurfaceDestroy *vdp_video_surface_destroy; 49 | VdpVideoSurfaceGetBitsYCbCr *vdp_video_surface_get_bits_ycbcr; 50 | 51 | VdpOutputSurfaceCreate *vdp_output_surface_create; 52 | VdpOutputSurfaceDestroy *vdp_output_surface_destroy; 53 | VdpOutputSurfaceGetBitsNative *vdp_output_surface_get_bits_native; 54 | 55 | VdpVideoMixerCreate *vdp_video_mixer_create; 56 | VdpVideoMixerDestroy *vdp_video_mixer_destroy; 57 | VdpVideoMixerRender *vdp_video_mixer_render; 58 | 59 | VdpPresentationQueueCreate *vdp_presentation_queue_create; 60 | VdpPresentationQueueDestroy *vdp_presentation_queue_destroy; 61 | VdpPresentationQueueDisplay *vdp_presentation_queue_display; 62 | VdpPresentationQueueBlockUntilSurfaceIdle *vdp_presentation_queue_block_until_surface_idle; 63 | VdpPresentationQueueGetTime *vdp_presentation_queue_get_time; 64 | VdpPresentationQueueTargetCreateX11 *vdp_presentation_queue_target_create_x11; 65 | 66 | int read_bit(void *addr, int *bit_offset) { 67 | int offt = *bit_offset; 68 | addr += offt / 8; 69 | offt %= 8; 70 | *bit_offset = *bit_offset + 1; 71 | return ((*(char *)addr) >> (7 - offt)) & 1; 72 | } 73 | 74 | uint64_t read_bits(void *addr, int *bit_offset, int n) { 75 | int i; 76 | uint64_t ret = 0; 77 | for (i = 0; i < n; i++) { 78 | ret <<= 1; 79 | ret |= read_bit(addr, bit_offset); 80 | } 81 | return ret; 82 | } 83 | 84 | uint64_t ue(void *addr, int *bit_offset) { 85 | int leadingZeroBits = -1; 86 | int b; 87 | for (b = 0; !b; leadingZeroBits++) { 88 | b = read_bit(addr, bit_offset); 89 | } 90 | int ret = (1 << leadingZeroBits) - 1 + read_bits(addr, bit_offset, leadingZeroBits); 91 | return ret; 92 | } 93 | 94 | int64_t se(void *addr, int *bit_offset) { 95 | int codeNum = ue(addr, bit_offset); 96 | return ((codeNum % 2 == 1) ? 1 : -1) * (codeNum / 2 + codeNum % 2); 97 | } 98 | 99 | void mark(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); 100 | void mark(const char *fmt, ...) { 101 | va_list ap; 102 | char buf[100] = {0}; 103 | va_start(ap, fmt); 104 | int len = vsnprintf(buf, 99, fmt, ap); 105 | va_end(ap); 106 | 107 | int fd = open("/sys/kernel/debug/tracing/trace_marker", O_WRONLY); 108 | if (fd == -1) return; 109 | 110 | assert(len == write(fd, buf, len)); 111 | close(fd); 112 | } 113 | 114 | int main(int argc, char **argv) { 115 | int width = 1280, height = 544; 116 | Display *display = XOpenDisplay(NULL); 117 | 118 | Window root = XDefaultRootWindow(display); 119 | Window window = XCreateSimpleWindow( 120 | display, root, 0, 0, 1280, 544, 0, 0, 0); 121 | XSelectInput(display, window, ExposureMask | KeyPressMask); 122 | XMapWindow(display, window); 123 | XSync(display, 0); 124 | 125 | VdpDevice dev; 126 | 127 | mark("vdp_device_create_x11\n"); 128 | int ret = vdp_device_create_x11(display, 0, &dev, &vdp_get_proc_address); 129 | assert(ret == VDP_STATUS_OK); 130 | 131 | #define get(id, func) \ 132 | ret = vdp_get_proc_address(dev, id, (void **)&func); \ 133 | assert(ret == VDP_STATUS_OK); 134 | 135 | get(VDP_FUNC_ID_DECODER_CREATE, vdp_decoder_create); 136 | get(VDP_FUNC_ID_DECODER_DESTROY, vdp_decoder_destroy); 137 | get(VDP_FUNC_ID_DECODER_RENDER, vdp_decoder_render); 138 | 139 | get(VDP_FUNC_ID_VIDEO_MIXER_CREATE, vdp_video_mixer_create); 140 | get(VDP_FUNC_ID_VIDEO_MIXER_DESTROY, vdp_video_mixer_destroy); 141 | get(VDP_FUNC_ID_VIDEO_MIXER_RENDER, vdp_video_mixer_render); 142 | 143 | get(VDP_FUNC_ID_VIDEO_SURFACE_CREATE, vdp_video_surface_create); 144 | get(VDP_FUNC_ID_VIDEO_SURFACE_DESTROY, vdp_video_surface_destroy); 145 | get(VDP_FUNC_ID_VIDEO_SURFACE_GET_BITS_Y_CB_CR, vdp_video_surface_get_bits_ycbcr); 146 | 147 | get(VDP_FUNC_ID_OUTPUT_SURFACE_CREATE, vdp_output_surface_create); 148 | get(VDP_FUNC_ID_OUTPUT_SURFACE_DESTROY, vdp_output_surface_destroy); 149 | get(VDP_FUNC_ID_OUTPUT_SURFACE_GET_BITS_NATIVE, vdp_output_surface_get_bits_native); 150 | 151 | get(VDP_FUNC_ID_PRESENTATION_QUEUE_CREATE, vdp_presentation_queue_create); 152 | get(VDP_FUNC_ID_PRESENTATION_QUEUE_DESTROY, vdp_presentation_queue_destroy); 153 | get(VDP_FUNC_ID_PRESENTATION_QUEUE_DISPLAY, vdp_presentation_queue_display); 154 | get(VDP_FUNC_ID_PRESENTATION_QUEUE_TARGET_CREATE_X11, vdp_presentation_queue_target_create_x11); 155 | get(VDP_FUNC_ID_PRESENTATION_QUEUE_BLOCK_UNTIL_SURFACE_IDLE, vdp_presentation_queue_block_until_surface_idle); 156 | get(VDP_FUNC_ID_PRESENTATION_QUEUE_GET_TIME, vdp_presentation_queue_get_time); 157 | 158 | #undef get 159 | 160 | VdpDecoder dec; 161 | VdpVideoSurface video[16]; 162 | VdpOutputSurface output; 163 | VdpPresentationQueue queue; 164 | VdpPresentationQueueTarget target; 165 | VdpVideoMixer mixer; 166 | 167 | VdpVideoMixerFeature mixer_features[] = { 168 | }; 169 | VdpVideoMixerParameter mixer_params[] = { 170 | VDP_VIDEO_MIXER_PARAMETER_VIDEO_SURFACE_WIDTH, 171 | VDP_VIDEO_MIXER_PARAMETER_VIDEO_SURFACE_HEIGHT, 172 | VDP_VIDEO_MIXER_PARAMETER_CHROMA_TYPE 173 | }; 174 | int zero = 0; 175 | const void *mixer_param_vals[] = { 176 | &width, 177 | &height, 178 | &zero 179 | }; 180 | 181 | mark("vdp_decoder_create\n"); 182 | ret = vdp_decoder_create(dev, VDP_DECODER_PROFILE_H264_MAIN, 1280, 544, 6, &dec); 183 | assert(ret == VDP_STATUS_OK); 184 | 185 | int i; 186 | for (i = 0; i < 16; i++) { 187 | mark("vdp_video_surface_create: %d\n", i); 188 | ret = vdp_video_surface_create(dev, VDP_CHROMA_TYPE_420, 1280, 544, &video[i]); 189 | assert(ret == VDP_STATUS_OK); 190 | mark(" <-- %d\n", video[i]); 191 | } 192 | 193 | 194 | mark("vdp_output_surface_create\n"); 195 | ret = vdp_output_surface_create(dev, VDP_RGBA_FORMAT_B8G8R8A8, 1280, 544, &output); 196 | assert(ret == VDP_STATUS_OK); 197 | 198 | mark("vdp_presentation_queue_target_create_x11\n"); 199 | ret = vdp_presentation_queue_target_create_x11(dev, window, &target); 200 | assert(ret == VDP_STATUS_OK); 201 | 202 | mark("vdp_presentation_queue_create\n"); 203 | ret = vdp_presentation_queue_create(dev, target, &queue); 204 | assert(ret == VDP_STATUS_OK); 205 | 206 | mark("vdp_video_mixer_create\n"); 207 | ret = vdp_video_mixer_create(dev, sizeof(mixer_features)/sizeof(mixer_features[0]), mixer_features, sizeof(mixer_params)/sizeof(mixer_params[0]), mixer_params, mixer_param_vals, &mixer); 208 | assert(ret == VDP_STATUS_OK); 209 | 210 | 211 | assert(argc > 1); 212 | int fd = open(argv[1], O_RDONLY); 213 | struct stat statbuf; 214 | assert(fstat(fd, &statbuf) == 0); 215 | void *addr = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED, fd, 0); 216 | void *orig_addr = addr; 217 | 218 | mark("mmap file addr: 0x%p size: 0x%lx\n", addr, statbuf.st_size); 219 | 220 | //printf("mmap'd file of size: %ld\n", statbuf.st_size); 221 | 222 | VdpPictureInfoH264 info = { 223 | .slice_count = 1, 224 | .field_order_cnt = { 65536, 65536 }, 225 | .is_reference = 1, 226 | .frame_num = -1, 227 | .field_pic_flag = 0, 228 | .bottom_field_flag = 0, 229 | .num_ref_frames = 6, 230 | .mb_adaptive_frame_field_flag = 0, 231 | .constrained_intra_pred_flag = 0, 232 | .weighted_pred_flag = 0, 233 | .weighted_bipred_idc = 0, 234 | .frame_mbs_only_flag = 1, 235 | .transform_8x8_mode_flag = 0, 236 | .chroma_qp_index_offset = 0, 237 | .second_chroma_qp_index_offset = 0, 238 | .pic_init_qp_minus26 = 0, 239 | .num_ref_idx_l0_active_minus1 = 0, 240 | .num_ref_idx_l1_active_minus1 = 0, 241 | .log2_max_frame_num_minus4 = 5, 242 | .pic_order_cnt_type = 0, 243 | .log2_max_pic_order_cnt_lsb_minus4 = 6, 244 | .delta_pic_order_always_zero_flag = 0, 245 | .direct_8x8_inference_flag = 1, 246 | .entropy_coding_mode_flag = 1, 247 | .pic_order_present_flag = 0, 248 | .deblocking_filter_control_present_flag = 1, 249 | .redundant_pic_cnt_present_flag = 0, 250 | }; 251 | int j; 252 | for (j = 0; j < 6; ++j) { 253 | int k; 254 | 255 | for (k = 0; k < 16; ++k) 256 | info.scaling_lists_4x4[j][k] = 16; 257 | } 258 | 259 | for (j = 0; j < 2; ++j) { 260 | int k; 261 | 262 | for (k = 0; k < 64; ++k) 263 | info.scaling_lists_8x8[j][k] = 16; 264 | } 265 | 266 | for (j = 0; j < 16; ++j) 267 | info.referenceFrames[j].surface = VDP_INVALID_HANDLE; 268 | 269 | 270 | mark("vdp_presentation_queue_get_time\n"); 271 | VdpTime t; 272 | ret = vdp_presentation_queue_get_time(queue, &t); 273 | assert(ret == VDP_STATUS_OK); 274 | 275 | fprintf(stderr, "Start time: %ld\n", t); 276 | 277 | int vframe = 0; 278 | 279 | while ((addr - orig_addr) < statbuf.st_size) { 280 | int size = ntohl(*(int *)addr); 281 | addr += 4; 282 | int nal_type = (*(char *)addr) & 0x1F; 283 | int nal_ref_idc = (*(char *)addr) >> 5; 284 | if (nal_type != 1 && nal_type != 5) { 285 | //fprintf(stderr, "Skipping NAL type %d, size: %d\n", nal_type, size); 286 | addr += size; 287 | continue; 288 | } 289 | //fprintf(stderr, "Processing NAL type %d, ref_idc: %d, size: %d\n", nal_type, nal_ref_idc, size); 290 | 291 | int bit_offset = 8; 292 | ue(addr, &bit_offset); 293 | int slice_type = ue(addr, &bit_offset); 294 | mark("nal_type: %d, ref_idc: %d, size: %d, slice_type: %d\n", nal_type, nal_ref_idc, size, slice_type); 295 | //fprintf(stderr, "Slice type: %d\n", slice_type); 296 | ue(addr, &bit_offset); 297 | info.frame_num = read_bits(addr, &bit_offset, info.log2_max_frame_num_minus4 + 4); 298 | if (nal_type == 5) { 299 | ue(addr, &bit_offset); 300 | info.frame_num = 0; 301 | for (j = 0; j < 16; ++j) 302 | info.referenceFrames[j].surface = VDP_INVALID_HANDLE; 303 | } 304 | 305 | uint32_t poc_lsb = read_bits(addr, &bit_offset, info.log2_max_pic_order_cnt_lsb_minus4 + 4); 306 | info.field_order_cnt[0] = (1 << 16) + poc_lsb; 307 | info.field_order_cnt[1] = (1 << 16) + poc_lsb; 308 | 309 | info.is_reference = nal_ref_idc != 0; 310 | 311 | VdpBitstreamBuffer buffer[2]; 312 | static const char header[3] = {0, 0, 1}; 313 | buffer[0].struct_version = VDP_BITSTREAM_BUFFER_VERSION; 314 | buffer[0].bitstream = header; 315 | buffer[0].bitstream_bytes = sizeof(header); 316 | buffer[1].struct_version = VDP_BITSTREAM_BUFFER_VERSION; 317 | buffer[1].bitstream = addr; 318 | buffer[1].bitstream_bytes = size; 319 | mark("vdp_decoder_render: %d\n", video[vframe]); 320 | ret = vdp_decoder_render(dec, video[vframe], (void*)&info, 2, buffer); 321 | assert(ret == VDP_STATUS_OK); 322 | 323 | mark("vdp_video_mixer_render\n"); 324 | ret = vdp_video_mixer_render( 325 | mixer, 326 | VDP_INVALID_HANDLE, NULL, 327 | VDP_VIDEO_MIXER_PICTURE_STRUCTURE_FRAME, 328 | 0, NULL, 329 | video[vframe], 330 | 0, NULL, 331 | NULL, 332 | output, 333 | NULL, 334 | NULL, 335 | 0, NULL); 336 | assert(ret == VDP_STATUS_OK); 337 | 338 | t += 1000000000ULL; 339 | mark("vdp_presentation_queue_display\n"); 340 | ret = vdp_presentation_queue_display(queue, output, 1280, 544, t); 341 | assert(ret == VDP_STATUS_OK); 342 | 343 | addr += size; 344 | 345 | /* 346 | uint32_t pitches[2] = {1280, 640 * 2}; 347 | uint8_t *data[2]; 348 | for (i = 0; i < 2; i++) { 349 | data[i] = malloc(1280 * 544 / (i ? 2 : 1)); 350 | assert(data[i]); 351 | } 352 | ret = vdp_video_surface_get_bits_ycbcr(video[vframe], VDP_YCBCR_FORMAT_NV12, (void **)data, pitches); 353 | assert(ret == VDP_STATUS_OK); 354 | 355 | write(1, data[0], 1280 * 544); 356 | for (i = 0; i < 1280 * 544 / 2; i+=2) 357 | write(1, data[1] + i, 1); 358 | for (i = 0; i < 1280 * 544 / 2; i+=2) 359 | write(1, data[1] + i + 1, 1); 360 | */ 361 | 362 | if (info.is_reference) { 363 | for (j = 5; j > 0; --j) 364 | memcpy(&info.referenceFrames[j], &info.referenceFrames[j-1], sizeof(info.referenceFrames[0])); 365 | info.referenceFrames[0].surface = video[vframe]; 366 | memcpy(info.referenceFrames[0].field_order_cnt, info.field_order_cnt, 2 * sizeof(uint32_t)); 367 | info.referenceFrames[0].frame_idx = info.frame_num; 368 | info.referenceFrames[0].top_is_reference = 1; 369 | info.referenceFrames[0].bottom_is_reference = 1; 370 | } 371 | vframe = (vframe + 1) % 16; 372 | //if (vframe > 10) break; 373 | } 374 | 375 | return 0; 376 | } 377 | --------------------------------------------------------------------------------