├── .dir-locals.el ├── .gitignore ├── Makefile.am ├── autogen.sh ├── configure.ac ├── include ├── drm │ └── vc4_drm.h ├── list.h ├── vc4_packet.h ├── vc4_qpu_defines.h └── vc4_tools.h ├── tests ├── .gitignore ├── Makefile.am ├── lib │ ├── vc4_qpu.c │ ├── vc4_qpu.h │ ├── vc4_test.c │ └── vc4_test.h ├── shader_map.c ├── shader_missing_end.c └── shader_noop.c └── tools ├── Makefile.am ├── vc4_dump_hang_state.c ├── vc4_dump_parse.c ├── vc4_dump_parse.h ├── vc4_dump_parse_cl.c ├── vc4_dump_to_clif.c └── vc4_qpu_disasm.c /.dir-locals.el: -------------------------------------------------------------------------------- 1 | ((prog-mode 2 | (indent-tabs-mode . nil) 3 | (tab-width . 8) 4 | (c-basic-offset . 8) 5 | (c-file-style . "stroustrup") 6 | (fill-column . 78) 7 | (eval . (progn 8 | (c-set-offset 'innamespace '0) 9 | (c-set-offset 'inline-open '0))) 10 | ) 11 | (makefile-mode (indent-tabs-mode . t)) 12 | ) 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # 2 | # X.Org module default exclusion patterns 3 | # The next section if for module specific patterns 4 | # 5 | # Do not edit the following section 6 | # GNU Build System (Autotools) 7 | aclocal.m4 8 | autom4te.cache/ 9 | autoscan.log 10 | ChangeLog 11 | compile 12 | config.guess 13 | config.h 14 | config.h.in 15 | config.log 16 | config-ml.in 17 | config.py 18 | config.status 19 | config.status.lineno 20 | config.sub 21 | configure 22 | configure.scan 23 | depcomp 24 | .deps/ 25 | INSTALL 26 | install-sh 27 | .libs/ 28 | libtool 29 | libtool.m4 30 | ltmain.sh 31 | lt~obsolete.m4 32 | ltoptions.m4 33 | ltsugar.m4 34 | ltversion.m4 35 | Makefile 36 | Makefile.in 37 | mdate-sh 38 | missing 39 | mkinstalldirs 40 | *.pc 41 | py-compile 42 | stamp-h? 43 | symlink-tree 44 | texinfo.tex 45 | ylwrap 46 | *.log 47 | *.trs 48 | 49 | # Do not edit the following section 50 | # Edit Compile Debug Document Distribute 51 | *~ 52 | *.[0-9] 53 | *.[0-9]x 54 | *.bak 55 | *.bin 56 | core 57 | *.dll 58 | *.exe 59 | *-ISO*.bdf 60 | *-JIS*.bdf 61 | *-KOI8*.bdf 62 | *.kld 63 | *.ko 64 | *.ko.cmd 65 | *.lai 66 | *.l[oa] 67 | *.[oa] 68 | *.obj 69 | *.patch 70 | *.so 71 | *.pcf.gz 72 | *.pdb 73 | *.tar.bz2 74 | *.tar.gz 75 | # 76 | # Add & Override patterns for vc4-gpu-tools 77 | # 78 | # Edit the following section as needed 79 | # For example, !report.pc overrides *.pc. See 'man gitignore' 80 | # 81 | tools/vc4_dump_to_clif 82 | tools/vc4_dump_hang_state 83 | tools/vc4_dump_parse 84 | .dirstamp 85 | -------------------------------------------------------------------------------- /Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright © 2015 Broadcom 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a 4 | # copy of this software and associated documentation files (the "Software"), 5 | # to deal in the Software without restriction, including without limitation 6 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | # and/or sell copies of the Software, and to permit persons to whom the 8 | # Software is furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice (including the next 11 | # paragraph) shall be included in all copies or substantial portions of the 12 | # Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | # IN THE SOFTWARE. 21 | 22 | SUBDIRS = tests tools 23 | 24 | noinst_HEADERS = \ 25 | include/list.h \ 26 | include/vc4_packet.h \ 27 | include/vc4_qpu_defines.h \ 28 | include/vc4_tools.h \ 29 | include/drm/vc4_drm.h \ 30 | $() 31 | -------------------------------------------------------------------------------- /autogen.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | srcdir=`dirname $0` 4 | test -z "$srcdir" && srcdir=. 5 | 6 | ORIGDIR=`pwd` 7 | cd $srcdir 8 | 9 | autoreconf -v --install || exit 1 10 | cd $ORIGDIR || exit $? 11 | 12 | if test -z "$NOCONFIGURE"; then 13 | $srcdir/configure "$@" 14 | fi 15 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | # Copyright © 2015 Broadcom 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a 4 | # copy of this software and associated documentation files (the "Software"), 5 | # to deal in the Software without restriction, including without limitation 6 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | # and/or sell copies of the Software, and to permit persons to whom the 8 | # Software is furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice (including the next 11 | # paragraph) shall be included in all copies or substantial portions of the 12 | # Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | # IN THE SOFTWARE. 21 | 22 | AC_PREREQ([2.60]) 23 | AC_INIT([vc4-gpu-tools], 24 | [1.0], 25 | [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg], 26 | [vc4-gpu-tools]) 27 | 28 | AC_CONFIG_SRCDIR([Makefile.am]) 29 | AC_CONFIG_HEADERS([config.h]) 30 | AC_CONFIG_MACRO_DIR([m4]) 31 | AC_USE_SYSTEM_EXTENSIONS 32 | AC_SYS_LARGEFILE 33 | AC_GNU_SOURCE 34 | AC_CANONICAL_HOST 35 | 36 | AM_INIT_AUTOMAKE([foreign dist-bzip2 subdir-objects]) 37 | 38 | AC_PROG_CC 39 | 40 | # Checks for functions, headers, structures, etc. 41 | AC_HEADER_STDC 42 | AC_CHECK_FUNCS([asprintf]) 43 | 44 | # Initialize libtool 45 | AC_DISABLE_STATIC 46 | AC_PROG_LIBTOOL 47 | 48 | # Require X.Org macros 1.16 or later for XORG_TESTSET_CFLAG 49 | m4_ifndef([XORG_MACROS_VERSION], 50 | [m4_fatal([must install xorg-macros 1.16 or later before running autoconf/autogen])]) 51 | XORG_MACROS_VERSION(1.16) 52 | XORG_DEFAULT_OPTIONS 53 | 54 | if test "x$GCC" = xyes ; then 55 | CWARNFLAGS="$CWARNFLAGS -fno-strict-aliasing -Wno-pointer-arith -Wno-declaration-after-statement" 56 | fi 57 | 58 | PKG_CHECK_MODULES(LIBDRM, [libdrm]) 59 | 60 | PKG_CHECK_MODULES([SIMPENROSE], [simpenrose], 61 | [HAVE_SIMPENROSE=yes], [HAVE_SIMPENROSE=no]) 62 | AM_CONDITIONAL(HAVE_SIMPENROSE, test x$HAVE_SIMPENROSE = xyes) 63 | 64 | AC_CONFIG_FILES([ 65 | Makefile 66 | tests/Makefile 67 | tools/Makefile 68 | ]) 69 | AC_OUTPUT 70 | -------------------------------------------------------------------------------- /include/drm/vc4_drm.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2014-2015 Broadcom 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #ifndef _UAPI_VC4_DRM_H_ 25 | #define _UAPI_VC4_DRM_H_ 26 | 27 | #include 28 | 29 | #define DRM_VC4_SUBMIT_CL 0x00 30 | #define DRM_VC4_WAIT_SEQNO 0x01 31 | #define DRM_VC4_WAIT_BO 0x02 32 | #define DRM_VC4_CREATE_BO 0x03 33 | #define DRM_VC4_MMAP_BO 0x04 34 | #define DRM_VC4_CREATE_SHADER_BO 0x05 35 | #define DRM_VC4_GET_HANG_STATE 0x06 36 | 37 | #define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl) 38 | #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno) 39 | #define DRM_IOCTL_VC4_WAIT_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_BO, struct drm_vc4_wait_bo) 40 | #define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo) 41 | #define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo) 42 | #define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo) 43 | #define DRM_IOCTL_VC4_GET_HANG_STATE DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state) 44 | 45 | struct drm_vc4_submit_rcl_surface { 46 | uint32_t hindex; /* Handle index, or ~0 if not present. */ 47 | uint32_t offset; /* Offset to start of buffer. */ 48 | /* 49 | * Bits for either render config (color_ms_write) or load/store packet. 50 | */ 51 | uint16_t bits; 52 | uint16_t pad; 53 | }; 54 | 55 | /** 56 | * struct drm_vc4_submit_cl - ioctl argument for submitting commands to the 3D 57 | * engine. 58 | * 59 | * Drivers typically use GPU BOs to store batchbuffers / command lists and 60 | * their associated state. However, because the VC4 lacks an MMU, we have to 61 | * do validation of memory accesses by the GPU commands. If we were to store 62 | * our commands in BOs, we'd need to do uncached readback from them to do the 63 | * validation process, which is too expensive. Instead, userspace accumulates 64 | * commands and associated state in plain memory, then the kernel copies the 65 | * data to its own address space, and then validates and stores it in a GPU 66 | * BO. 67 | */ 68 | struct drm_vc4_submit_cl { 69 | /* Pointer to the binner command list. 70 | * 71 | * This is the first set of commands executed, which runs the 72 | * coordinate shader to determine where primitives land on the screen, 73 | * then writes out the state updates and draw calls necessary per tile 74 | * to the tile allocation BO. 75 | */ 76 | uint64_t bin_cl; 77 | 78 | /* Pointer to the shader records. 79 | * 80 | * Shader records are the structures read by the hardware that contain 81 | * pointers to uniforms, shaders, and vertex attributes. The 82 | * reference to the shader record has enough information to determine 83 | * how many pointers are necessary (fixed number for shaders/uniforms, 84 | * and an attribute count), so those BO indices into bo_handles are 85 | * just stored as uint32_ts before each shader record passed in. 86 | */ 87 | uint64_t shader_rec; 88 | 89 | /* Pointer to uniform data and texture handles for the textures 90 | * referenced by the shader. 91 | * 92 | * For each shader state record, there is a set of uniform data in the 93 | * order referenced by the record (FS, VS, then CS). Each set of 94 | * uniform data has a uint32_t index into bo_handles per texture 95 | * sample operation, in the order the QPU_W_TMUn_S writes appear in 96 | * the program. Following the texture BO handle indices is the actual 97 | * uniform data. 98 | * 99 | * The individual uniform state blocks don't have sizes passed in, 100 | * because the kernel has to determine the sizes anyway during shader 101 | * code validation. 102 | */ 103 | uint64_t uniforms; 104 | uint64_t bo_handles; 105 | 106 | /* Size in bytes of the binner command list. */ 107 | uint32_t bin_cl_size; 108 | /* Size in bytes of the set of shader records. */ 109 | uint32_t shader_rec_size; 110 | /* Number of shader records. 111 | * 112 | * This could just be computed from the contents of shader_records and 113 | * the address bits of references to them from the bin CL, but it 114 | * keeps the kernel from having to resize some allocations it makes. 115 | */ 116 | uint32_t shader_rec_count; 117 | /* Size in bytes of the uniform state. */ 118 | uint32_t uniforms_size; 119 | 120 | /* Number of BO handles passed in (size is that times 4). */ 121 | uint32_t bo_handle_count; 122 | 123 | /* RCL setup: */ 124 | uint16_t width; 125 | uint16_t height; 126 | uint8_t min_x_tile; 127 | uint8_t min_y_tile; 128 | uint8_t max_x_tile; 129 | uint8_t max_y_tile; 130 | struct drm_vc4_submit_rcl_surface color_read; 131 | struct drm_vc4_submit_rcl_surface color_ms_write; 132 | struct drm_vc4_submit_rcl_surface zs_read; 133 | struct drm_vc4_submit_rcl_surface zs_write; 134 | uint32_t clear_color[2]; 135 | uint32_t clear_z; 136 | uint8_t clear_s; 137 | 138 | uint32_t pad:24; 139 | 140 | #define VC4_SUBMIT_CL_USE_CLEAR_COLOR (1 << 0) 141 | uint32_t flags; 142 | 143 | /* Returned value of the seqno of this render job (for the 144 | * wait ioctl). 145 | */ 146 | uint64_t seqno; 147 | }; 148 | 149 | /** 150 | * struct drm_vc4_wait_seqno - ioctl argument for waiting for 151 | * DRM_VC4_SUBMIT_CL completion using its returned seqno. 152 | * 153 | * timeout_ns is the timeout in nanoseconds, where "0" means "don't 154 | * block, just return the status." 155 | */ 156 | struct drm_vc4_wait_seqno { 157 | uint64_t seqno; 158 | uint64_t timeout_ns; 159 | }; 160 | 161 | /** 162 | * struct drm_vc4_wait_bo - ioctl argument for waiting for 163 | * completion of the last DRM_VC4_SUBMIT_CL on a BO. 164 | * 165 | * This is useful for cases where multiple processes might be 166 | * rendering to a BO and you want to wait for all rendering to be 167 | * completed. 168 | */ 169 | struct drm_vc4_wait_bo { 170 | uint32_t handle; 171 | uint32_t pad; 172 | uint64_t timeout_ns; 173 | }; 174 | 175 | /** 176 | * struct drm_vc4_create_bo - ioctl argument for creating VC4 BOs. 177 | * 178 | * There are currently no values for the flags argument, but it may be 179 | * used in a future extension. 180 | */ 181 | struct drm_vc4_create_bo { 182 | uint32_t size; 183 | uint32_t flags; 184 | /** Returned GEM handle for the BO. */ 185 | uint32_t handle; 186 | uint32_t pad; 187 | }; 188 | 189 | /** 190 | * struct drm_vc4_create_shader_bo - ioctl argument for creating VC4 191 | * shader BOs. 192 | * 193 | * Since allowing a shader to be overwritten while it's also being 194 | * executed from would allow privlege escalation, shaders must be 195 | * created using this ioctl, and they can't be mmapped later. 196 | */ 197 | struct drm_vc4_create_shader_bo { 198 | /* Size of the data argument. */ 199 | uint32_t size; 200 | /* Flags, currently must be 0. */ 201 | uint32_t flags; 202 | 203 | /* Pointer to the data. */ 204 | uint64_t data; 205 | 206 | /** Returned GEM handle for the BO. */ 207 | uint32_t handle; 208 | /* Pad, must be 0. */ 209 | uint32_t pad; 210 | }; 211 | 212 | /** 213 | * struct drm_vc4_mmap_bo - ioctl argument for mapping VC4 BOs. 214 | * 215 | * This doesn't actually perform an mmap. Instead, it returns the 216 | * offset you need to use in an mmap on the DRM device node. This 217 | * means that tools like valgrind end up knowing about the mapped 218 | * memory. 219 | * 220 | * There are currently no values for the flags argument, but it may be 221 | * used in a future extension. 222 | */ 223 | struct drm_vc4_mmap_bo { 224 | /** Handle for the object being mapped. */ 225 | uint32_t handle; 226 | uint32_t flags; 227 | /** offset into the drm node to use for subsequent mmap call. */ 228 | uint64_t offset; 229 | }; 230 | 231 | struct drm_vc4_get_hang_state_bo { 232 | uint32_t handle; 233 | uint32_t paddr; 234 | uint32_t size; 235 | uint32_t pad; 236 | }; 237 | 238 | /** 239 | * struct drm_vc4_hang_state - ioctl argument for collecting state 240 | * from a GPU hang for analysis. 241 | */ 242 | struct drm_vc4_get_hang_state { 243 | /** Pointer to array of struct drm_vc4_get_hang_state_bo. */ 244 | uint64_t bo; 245 | /** 246 | * On input, the size of the bo array. Output is the number 247 | * of bos to be returned. 248 | */ 249 | uint32_t bo_count; 250 | 251 | uint32_t start_bin, start_render; 252 | 253 | uint32_t ct0ca, ct0ea; 254 | uint32_t ct1ca, ct1ea; 255 | uint32_t ct0cs, ct1cs; 256 | uint32_t ct0ra0, ct1ra0; 257 | 258 | uint32_t bpca, bpcs; 259 | uint32_t bpoa, bpos; 260 | 261 | uint32_t vpmbase; 262 | 263 | uint32_t dbge; 264 | uint32_t fdbgo; 265 | uint32_t fdbgb; 266 | uint32_t fdbgr; 267 | uint32_t fdbgs; 268 | uint32_t errstat; 269 | 270 | /* Pad that we may save more registers into in the future. */ 271 | uint32_t pad[16]; 272 | }; 273 | 274 | #endif /* _UAPI_VC4_DRM_H_ */ 275 | -------------------------------------------------------------------------------- /include/list.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * 3 | * Copyright 2006 VMware, Inc., Bismarck, ND. USA. 4 | * All Rights Reserved. 5 | * 6 | * Permission is hereby granted, free of charge, to any person obtaining a 7 | * copy of this software and associated documentation files (the 8 | * "Software"), to deal in the Software without restriction, including 9 | * without limitation the rights to use, copy, modify, merge, publish, 10 | * distribute, sub license, and/or sell copies of the Software, and to 11 | * permit persons to whom the Software is furnished to do so, subject to 12 | * the following conditions: 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 17 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 18 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20 | * USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | * 22 | * The above copyright notice and this permission notice (including the 23 | * next paragraph) shall be included in all copies or substantial portions 24 | * of the Software. 25 | * 26 | **************************************************************************/ 27 | 28 | /** 29 | * \file 30 | * List macros heavily inspired by the Linux kernel 31 | * list handling. No list looping yet. 32 | * 33 | * Is not threadsafe, so common operations need to 34 | * be protected using an external mutex. 35 | */ 36 | 37 | #ifndef _UTIL_LIST_H_ 38 | #define _UTIL_LIST_H_ 39 | 40 | 41 | #include 42 | #include 43 | #include 44 | 45 | 46 | struct list_head 47 | { 48 | struct list_head *prev; 49 | struct list_head *next; 50 | }; 51 | 52 | static inline void list_inithead(struct list_head *item) 53 | { 54 | item->prev = item; 55 | item->next = item; 56 | } 57 | 58 | static inline void list_add(struct list_head *item, struct list_head *list) 59 | { 60 | item->prev = list; 61 | item->next = list->next; 62 | list->next->prev = item; 63 | list->next = item; 64 | } 65 | 66 | static inline void list_addtail(struct list_head *item, struct list_head *list) 67 | { 68 | item->next = list; 69 | item->prev = list->prev; 70 | list->prev->next = item; 71 | list->prev = item; 72 | } 73 | 74 | static inline void list_replace(struct list_head *from, struct list_head *to) 75 | { 76 | to->prev = from->prev; 77 | to->next = from->next; 78 | from->next->prev = to; 79 | from->prev->next = to; 80 | } 81 | 82 | static inline void list_del(struct list_head *item) 83 | { 84 | item->prev->next = item->next; 85 | item->next->prev = item->prev; 86 | item->prev = item->next = NULL; 87 | } 88 | 89 | static inline void list_delinit(struct list_head *item) 90 | { 91 | item->prev->next = item->next; 92 | item->next->prev = item->prev; 93 | item->next = item; 94 | item->prev = item; 95 | } 96 | 97 | static inline bool list_empty(struct list_head *list) 98 | { 99 | return list->next == list; 100 | } 101 | 102 | /** 103 | * Returns whether the list has exactly one element. 104 | */ 105 | static inline bool list_is_singular(const struct list_head *list) 106 | { 107 | return list->next != NULL && list->next->next == list; 108 | } 109 | 110 | static inline unsigned list_length(struct list_head *list) 111 | { 112 | struct list_head *node; 113 | unsigned length = 0; 114 | for (node = list->next; node != list; node = node->next) 115 | length++; 116 | return length; 117 | } 118 | 119 | static inline void list_validate(struct list_head *list) 120 | { 121 | struct list_head *node; 122 | assert(list->next->prev == list && list->prev->next == list); 123 | for (node = list->next; node != list; node = node->next) 124 | assert(node->next->prev == node && node->prev->next == node); 125 | } 126 | 127 | #define LIST_INITHEAD(__item) list_inithead(__item) 128 | #define LIST_ADD(__item, __list) list_add(__item, __list) 129 | #define LIST_ADDTAIL(__item, __list) list_addtail(__item, __list) 130 | #define LIST_REPLACE(__from, __to) list_replace(__from, __to) 131 | #define LIST_DEL(__item) list_del(__item) 132 | #define LIST_DELINIT(__item) list_delinit(__item) 133 | 134 | #define LIST_ENTRY(__type, __item, __field) \ 135 | ((__type *)(((char *)(__item)) - offsetof(__type, __field))) 136 | 137 | #define LIST_IS_EMPTY(__list) \ 138 | ((__list)->next == (__list)) 139 | 140 | /** 141 | * Cast from a pointer to a member of a struct back to the containing struct. 142 | * 143 | * 'sample' MUST be initialized, or else the result is undefined! 144 | */ 145 | #ifndef container_of 146 | #define container_of(ptr, sample, member) \ 147 | (void *)((char *)(ptr) \ 148 | - ((char *)&(sample)->member - (char *)(sample))) 149 | #endif 150 | 151 | #define list_first_entry(ptr, type, member) \ 152 | LIST_ENTRY(type, (ptr)->next, member) 153 | 154 | #define list_last_entry(ptr, type, member) \ 155 | LIST_ENTRY(type, (ptr)->prev, member) 156 | 157 | 158 | #define LIST_FOR_EACH_ENTRY(pos, head, member) \ 159 | for (pos = NULL, pos = container_of((head)->next, pos, member); \ 160 | &pos->member != (head); \ 161 | pos = container_of(pos->member.next, pos, member)) 162 | 163 | #define LIST_FOR_EACH_ENTRY_SAFE(pos, storage, head, member) \ 164 | for (pos = NULL, pos = container_of((head)->next, pos, member), \ 165 | storage = container_of(pos->member.next, pos, member); \ 166 | &pos->member != (head); \ 167 | pos = storage, storage = container_of(storage->member.next, storage, member)) 168 | 169 | #define LIST_FOR_EACH_ENTRY_SAFE_REV(pos, storage, head, member) \ 170 | for (pos = NULL, pos = container_of((head)->prev, pos, member), \ 171 | storage = container_of(pos->member.prev, pos, member); \ 172 | &pos->member != (head); \ 173 | pos = storage, storage = container_of(storage->member.prev, storage, member)) 174 | 175 | #define LIST_FOR_EACH_ENTRY_FROM(pos, start, head, member) \ 176 | for (pos = NULL, pos = container_of((start), pos, member); \ 177 | &pos->member != (head); \ 178 | pos = container_of(pos->member.next, pos, member)) 179 | 180 | #define LIST_FOR_EACH_ENTRY_FROM_REV(pos, start, head, member) \ 181 | for (pos = NULL, pos = container_of((start), pos, member); \ 182 | &pos->member != (head); \ 183 | pos = container_of(pos->member.prev, pos, member)) 184 | 185 | #define list_for_each_entry(type, pos, head, member) \ 186 | for (type *pos = LIST_ENTRY(type, (head)->next, member); \ 187 | &pos->member != (head); \ 188 | pos = LIST_ENTRY(type, pos->member.next, member)) 189 | 190 | #define list_for_each_entry_safe(type, pos, head, member) \ 191 | for (type *pos = LIST_ENTRY(type, (head)->next, member), \ 192 | *__next = LIST_ENTRY(type, pos->member.next, member); \ 193 | &pos->member != (head); \ 194 | pos = __next, \ 195 | __next = LIST_ENTRY(type, __next->member.next, member)) 196 | 197 | #define list_for_each_entry_rev(type, pos, head, member) \ 198 | for (type *pos = LIST_ENTRY(type, (head)->prev, member); \ 199 | &pos->member != (head); \ 200 | pos = LIST_ENTRY(type, pos->member.prev, member)) 201 | 202 | #define list_for_each_entry_safe_rev(type, pos, head, member) \ 203 | for (type *pos = LIST_ENTRY(type, (head)->prev, member), \ 204 | *__prev = LIST_ENTRY(type, pos->member.prev, member); \ 205 | &pos->member != (head); \ 206 | pos = __prev, \ 207 | __prev = LIST_ENTRY(type, __prev->member.prev, member)) 208 | 209 | #define list_for_each_entry_from(type, pos, start, head, member) \ 210 | for (type *pos = LIST_ENTRY(type, (start), member); \ 211 | &pos->member != (head); \ 212 | pos = LIST_ENTRY(type, pos->member.next, member)) 213 | 214 | #define list_for_each_entry_from_rev(type, pos, start, head, member) \ 215 | for (type *pos = LIST_ENTRY(type, (start), member); \ 216 | &pos->member != (head); \ 217 | pos = LIST_ENTRY(type, pos->member.prev, member)) 218 | 219 | #endif /*_UTIL_LIST_H_*/ 220 | -------------------------------------------------------------------------------- /include/vc4_packet.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2014 Broadcom 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #ifndef VC4_PACKET_H 25 | #define VC4_PACKET_H 26 | 27 | enum vc4_packet { 28 | VC4_PACKET_HALT = 0, 29 | VC4_PACKET_NOP = 1, 30 | 31 | VC4_PACKET_FLUSH = 4, 32 | VC4_PACKET_FLUSH_ALL = 5, 33 | VC4_PACKET_START_TILE_BINNING = 6, 34 | VC4_PACKET_INCREMENT_SEMAPHORE = 7, 35 | VC4_PACKET_WAIT_ON_SEMAPHORE = 8, 36 | 37 | VC4_PACKET_BRANCH = 16, 38 | VC4_PACKET_BRANCH_TO_SUB_LIST = 17, 39 | VC4_PACKET_RETURN_FROM_SUB_LIST = 18, 40 | 41 | VC4_PACKET_STORE_MS_TILE_BUFFER = 24, 42 | VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF = 25, 43 | VC4_PACKET_STORE_FULL_RES_TILE_BUFFER = 26, 44 | VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER = 27, 45 | VC4_PACKET_STORE_TILE_BUFFER_GENERAL = 28, 46 | VC4_PACKET_LOAD_TILE_BUFFER_GENERAL = 29, 47 | 48 | VC4_PACKET_GL_INDEXED_PRIMITIVE = 32, 49 | VC4_PACKET_GL_ARRAY_PRIMITIVE = 33, 50 | 51 | VC4_PACKET_COMPRESSED_PRIMITIVE = 48, 52 | VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE = 49, 53 | 54 | VC4_PACKET_PRIMITIVE_LIST_FORMAT = 56, 55 | 56 | VC4_PACKET_GL_SHADER_STATE = 64, 57 | VC4_PACKET_NV_SHADER_STATE = 65, 58 | VC4_PACKET_VG_SHADER_STATE = 66, 59 | 60 | VC4_PACKET_CONFIGURATION_BITS = 96, 61 | VC4_PACKET_FLAT_SHADE_FLAGS = 97, 62 | VC4_PACKET_POINT_SIZE = 98, 63 | VC4_PACKET_LINE_WIDTH = 99, 64 | VC4_PACKET_RHT_X_BOUNDARY = 100, 65 | VC4_PACKET_DEPTH_OFFSET = 101, 66 | VC4_PACKET_CLIP_WINDOW = 102, 67 | VC4_PACKET_VIEWPORT_OFFSET = 103, 68 | VC4_PACKET_Z_CLIPPING = 104, 69 | VC4_PACKET_CLIPPER_XY_SCALING = 105, 70 | VC4_PACKET_CLIPPER_Z_SCALING = 106, 71 | 72 | VC4_PACKET_TILE_BINNING_MODE_CONFIG = 112, 73 | VC4_PACKET_TILE_RENDERING_MODE_CONFIG = 113, 74 | VC4_PACKET_CLEAR_COLORS = 114, 75 | VC4_PACKET_TILE_COORDINATES = 115, 76 | 77 | /* Not an actual hardware packet -- this is what we use to put 78 | * references to GEM bos in the command stream, since we need the u32 79 | * int the actual address packet in order to store the offset from the 80 | * start of the BO. 81 | */ 82 | VC4_PACKET_GEM_HANDLES = 254, 83 | } __attribute__ ((__packed__)); 84 | 85 | #define VC4_PACKET_HALT_SIZE 1 86 | #define VC4_PACKET_NOP_SIZE 1 87 | #define VC4_PACKET_FLUSH_SIZE 1 88 | #define VC4_PACKET_FLUSH_ALL_SIZE 1 89 | #define VC4_PACKET_START_TILE_BINNING_SIZE 1 90 | #define VC4_PACKET_INCREMENT_SEMAPHORE_SIZE 1 91 | #define VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE 1 92 | #define VC4_PACKET_BRANCH_SIZE 5 93 | #define VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE 5 94 | #define VC4_PACKET_RETURN_FROM_SUB_LIST_SIZE 1 95 | #define VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE 1 96 | #define VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF_SIZE 1 97 | #define VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE 5 98 | #define VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE 5 99 | #define VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE 7 100 | #define VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE 7 101 | #define VC4_PACKET_GL_INDEXED_PRIMITIVE_SIZE 14 102 | #define VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE 10 103 | #define VC4_PACKET_COMPRESSED_PRIMITIVE_SIZE 1 104 | #define VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE_SIZE 1 105 | #define VC4_PACKET_PRIMITIVE_LIST_FORMAT_SIZE 2 106 | #define VC4_PACKET_GL_SHADER_STATE_SIZE 5 107 | #define VC4_PACKET_NV_SHADER_STATE_SIZE 5 108 | #define VC4_PACKET_VG_SHADER_STATE_SIZE 5 109 | #define VC4_PACKET_CONFIGURATION_BITS_SIZE 4 110 | #define VC4_PACKET_FLAT_SHADE_FLAGS_SIZE 5 111 | #define VC4_PACKET_POINT_SIZE_SIZE 5 112 | #define VC4_PACKET_LINE_WIDTH_SIZE 5 113 | #define VC4_PACKET_RHT_X_BOUNDARY_SIZE 3 114 | #define VC4_PACKET_DEPTH_OFFSET_SIZE 5 115 | #define VC4_PACKET_CLIP_WINDOW_SIZE 9 116 | #define VC4_PACKET_VIEWPORT_OFFSET_SIZE 5 117 | #define VC4_PACKET_Z_CLIPPING_SIZE 9 118 | #define VC4_PACKET_CLIPPER_XY_SCALING_SIZE 9 119 | #define VC4_PACKET_CLIPPER_Z_SCALING_SIZE 9 120 | #define VC4_PACKET_TILE_BINNING_MODE_CONFIG_SIZE 16 121 | #define VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE 11 122 | #define VC4_PACKET_CLEAR_COLORS_SIZE 14 123 | #define VC4_PACKET_TILE_COORDINATES_SIZE 3 124 | #define VC4_PACKET_GEM_HANDLES_SIZE 9 125 | 126 | #define VC4_MASK(high, low) (((1 << ((high) - (low) + 1)) - 1) << (low)) 127 | /* Using the GNU statement expression extension */ 128 | #define VC4_SET_FIELD(value, field) \ 129 | ({ \ 130 | uint32_t fieldval = (value) << field ## _SHIFT; \ 131 | assert((fieldval & ~ field ## _MASK) == 0); \ 132 | fieldval & field ## _MASK; \ 133 | }) 134 | 135 | #define VC4_GET_FIELD(word, field) (((word) & field ## _MASK) >> field ## _SHIFT) 136 | 137 | /** @{ 138 | * Bits used by packets like VC4_PACKET_STORE_TILE_BUFFER_GENERAL and 139 | * VC4_PACKET_TILE_RENDERING_MODE_CONFIG. 140 | */ 141 | #define VC4_TILING_FORMAT_LINEAR 0 142 | #define VC4_TILING_FORMAT_T 1 143 | #define VC4_TILING_FORMAT_LT 2 144 | /** @} */ 145 | 146 | /** @{ 147 | * 148 | * low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and 149 | * VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER. 150 | */ 151 | #define VC4_LOADSTORE_FULL_RES_EOF (1 << 3) 152 | #define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL (1 << 2) 153 | #define VC4_LOADSTORE_FULL_RES_DISABLE_ZS (1 << 1) 154 | #define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR (1 << 0) 155 | 156 | /** @{ 157 | * 158 | * byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and 159 | * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL (low bits of the address) 160 | */ 161 | 162 | #define VC4_LOADSTORE_TILE_BUFFER_EOF (1 << 3) 163 | #define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_VG_MASK (1 << 2) 164 | #define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_ZS (1 << 1) 165 | #define VC4_LOADSTORE_TILE_BUFFER_DISABLE_FULL_COLOR (1 << 0) 166 | 167 | /** @} */ 168 | 169 | /** @{ 170 | * 171 | * byte 0-1 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and 172 | * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL 173 | */ 174 | #define VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR (1 << 15) 175 | #define VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR (1 << 14) 176 | #define VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR (1 << 13) 177 | #define VC4_STORE_TILE_BUFFER_DISABLE_SWAP (1 << 12) 178 | 179 | #define VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK VC4_MASK(9, 8) 180 | #define VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT 8 181 | #define VC4_LOADSTORE_TILE_BUFFER_RGBA8888 0 182 | #define VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER 1 183 | #define VC4_LOADSTORE_TILE_BUFFER_BGR565 2 184 | /** @} */ 185 | 186 | /** @{ 187 | * 188 | * byte 0 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and 189 | * VC4_PACKET_LOAD_TILE_BUFFER_GENERAL 190 | */ 191 | #define VC4_STORE_TILE_BUFFER_MODE_MASK VC4_MASK(7, 6) 192 | #define VC4_STORE_TILE_BUFFER_MODE_SHIFT 6 193 | #define VC4_STORE_TILE_BUFFER_MODE_SAMPLE0 (0 << 6) 194 | #define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X4 (1 << 6) 195 | #define VC4_STORE_TILE_BUFFER_MODE_DECIMATE_X16 (2 << 6) 196 | 197 | /** The values of the field are VC4_TILING_FORMAT_* */ 198 | #define VC4_LOADSTORE_TILE_BUFFER_TILING_MASK VC4_MASK(5, 4) 199 | #define VC4_LOADSTORE_TILE_BUFFER_TILING_SHIFT 4 200 | 201 | #define VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK VC4_MASK(2, 0) 202 | #define VC4_LOADSTORE_TILE_BUFFER_BUFFER_SHIFT 0 203 | #define VC4_LOADSTORE_TILE_BUFFER_NONE 0 204 | #define VC4_LOADSTORE_TILE_BUFFER_COLOR 1 205 | #define VC4_LOADSTORE_TILE_BUFFER_ZS 2 206 | #define VC4_LOADSTORE_TILE_BUFFER_Z 3 207 | #define VC4_LOADSTORE_TILE_BUFFER_VG_MASK 4 208 | #define VC4_LOADSTORE_TILE_BUFFER_FULL 5 209 | /** @} */ 210 | 211 | #define VC4_INDEX_BUFFER_U8 (0 << 4) 212 | #define VC4_INDEX_BUFFER_U16 (1 << 4) 213 | 214 | /* This flag is only present in NV shader state. */ 215 | #define VC4_SHADER_FLAG_SHADED_CLIP_COORDS (1 << 3) 216 | #define VC4_SHADER_FLAG_ENABLE_CLIPPING (1 << 2) 217 | #define VC4_SHADER_FLAG_VS_POINT_SIZE (1 << 1) 218 | #define VC4_SHADER_FLAG_FS_SINGLE_THREAD (1 << 0) 219 | 220 | /** @{ byte 2 of config bits. */ 221 | #define VC4_CONFIG_BITS_EARLY_Z_UPDATE (1 << 1) 222 | #define VC4_CONFIG_BITS_EARLY_Z (1 << 0) 223 | /** @} */ 224 | 225 | /** @{ byte 1 of config bits. */ 226 | #define VC4_CONFIG_BITS_Z_UPDATE (1 << 7) 227 | /** same values in this 3-bit field as PIPE_FUNC_* */ 228 | #define VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT 4 229 | #define VC4_CONFIG_BITS_COVERAGE_READ_LEAVE (1 << 3) 230 | 231 | #define VC4_CONFIG_BITS_COVERAGE_UPDATE_NONZERO (0 << 1) 232 | #define VC4_CONFIG_BITS_COVERAGE_UPDATE_ODD (1 << 1) 233 | #define VC4_CONFIG_BITS_COVERAGE_UPDATE_OR (2 << 1) 234 | #define VC4_CONFIG_BITS_COVERAGE_UPDATE_ZERO (3 << 1) 235 | 236 | #define VC4_CONFIG_BITS_COVERAGE_PIPE_SELECT (1 << 0) 237 | /** @} */ 238 | 239 | /** @{ byte 0 of config bits. */ 240 | #define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_NONE (0 << 6) 241 | #define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X (1 << 6) 242 | #define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X (2 << 6) 243 | #define VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_MASK (3 << 6) 244 | 245 | #define VC4_CONFIG_BITS_AA_POINTS_AND_LINES (1 << 4) 246 | #define VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET (1 << 3) 247 | #define VC4_CONFIG_BITS_CW_PRIMITIVES (1 << 2) 248 | #define VC4_CONFIG_BITS_ENABLE_PRIM_BACK (1 << 1) 249 | #define VC4_CONFIG_BITS_ENABLE_PRIM_FRONT (1 << 0) 250 | /** @} */ 251 | 252 | /** @{ bits in the last u8 of VC4_PACKET_TILE_BINNING_MODE_CONFIG */ 253 | #define VC4_BIN_CONFIG_DB_NON_MS (1 << 7) 254 | 255 | #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK VC4_MASK(6, 5) 256 | #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_SHIFT 5 257 | #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 0 258 | #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_64 1 259 | #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128 2 260 | #define VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_256 3 261 | 262 | #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK VC4_MASK(4, 3) 263 | #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_SHIFT 3 264 | #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32 0 265 | #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_64 1 266 | #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_128 2 267 | #define VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_256 3 268 | 269 | #define VC4_BIN_CONFIG_AUTO_INIT_TSDA (1 << 2) 270 | #define VC4_BIN_CONFIG_TILE_BUFFER_64BIT (1 << 1) 271 | #define VC4_BIN_CONFIG_MS_MODE_4X (1 << 0) 272 | /** @} */ 273 | 274 | /** @{ bits in the last u16 of VC4_PACKET_TILE_RENDERING_MODE_CONFIG */ 275 | #define VC4_RENDER_CONFIG_DB_NON_MS (1 << 12) 276 | #define VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE (1 << 11) 277 | #define VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G (1 << 10) 278 | #define VC4_RENDER_CONFIG_COVERAGE_MODE (1 << 9) 279 | #define VC4_RENDER_CONFIG_ENABLE_VG_MASK (1 << 8) 280 | 281 | /** The values of the field are VC4_TILING_FORMAT_* */ 282 | #define VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK VC4_MASK(7, 6) 283 | #define VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT 6 284 | 285 | #define VC4_RENDER_CONFIG_DECIMATE_MODE_1X (0 << 4) 286 | #define VC4_RENDER_CONFIG_DECIMATE_MODE_4X (1 << 4) 287 | #define VC4_RENDER_CONFIG_DECIMATE_MODE_16X (2 << 4) 288 | #define VC4_RENDER_CONFIG_DECIMATE_MODE_MASK (3 << 4) 289 | 290 | #define VC4_RENDER_CONFIG_FORMAT_MASK VC4_MASK(3, 2) 291 | #define VC4_RENDER_CONFIG_FORMAT_SHIFT 2 292 | #define VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED 0 293 | #define VC4_RENDER_CONFIG_FORMAT_RGBA8888 1 294 | #define VC4_RENDER_CONFIG_FORMAT_BGR565 2 295 | 296 | #define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT (1 << 1) 297 | #define VC4_RENDER_CONFIG_MS_MODE_4X (1 << 0) 298 | 299 | #define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX (1 << 4) 300 | #define VC4_PRIMITIVE_LIST_FORMAT_32_XY (3 << 4) 301 | #define VC4_PRIMITIVE_LIST_FORMAT_TYPE_POINTS (0 << 0) 302 | #define VC4_PRIMITIVE_LIST_FORMAT_TYPE_LINES (1 << 0) 303 | #define VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES (2 << 0) 304 | #define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT (3 << 0) 305 | 306 | enum vc4_texture_data_type { 307 | VC4_TEXTURE_TYPE_RGBA8888 = 0, 308 | VC4_TEXTURE_TYPE_RGBX8888 = 1, 309 | VC4_TEXTURE_TYPE_RGBA4444 = 2, 310 | VC4_TEXTURE_TYPE_RGBA5551 = 3, 311 | VC4_TEXTURE_TYPE_RGB565 = 4, 312 | VC4_TEXTURE_TYPE_LUMINANCE = 5, 313 | VC4_TEXTURE_TYPE_ALPHA = 6, 314 | VC4_TEXTURE_TYPE_LUMALPHA = 7, 315 | VC4_TEXTURE_TYPE_ETC1 = 8, 316 | VC4_TEXTURE_TYPE_S16F = 9, 317 | VC4_TEXTURE_TYPE_S8 = 10, 318 | VC4_TEXTURE_TYPE_S16 = 11, 319 | VC4_TEXTURE_TYPE_BW1 = 12, 320 | VC4_TEXTURE_TYPE_A4 = 13, 321 | VC4_TEXTURE_TYPE_A1 = 14, 322 | VC4_TEXTURE_TYPE_RGBA64 = 15, 323 | VC4_TEXTURE_TYPE_RGBA32R = 16, 324 | VC4_TEXTURE_TYPE_YUV422R = 17, 325 | }; 326 | 327 | #define VC4_TEX_P0_OFFSET_MASK VC4_MASK(31, 12) 328 | #define VC4_TEX_P0_OFFSET_SHIFT 12 329 | #define VC4_TEX_P0_CSWIZ_MASK VC4_MASK(11, 10) 330 | #define VC4_TEX_P0_CSWIZ_SHIFT 10 331 | #define VC4_TEX_P0_CMMODE_MASK VC4_MASK(9, 9) 332 | #define VC4_TEX_P0_CMMODE_SHIFT 9 333 | #define VC4_TEX_P0_FLIPY_MASK VC4_MASK(8, 8) 334 | #define VC4_TEX_P0_FLIPY_SHIFT 8 335 | #define VC4_TEX_P0_TYPE_MASK VC4_MASK(7, 4) 336 | #define VC4_TEX_P0_TYPE_SHIFT 4 337 | #define VC4_TEX_P0_MIPLVLS_MASK VC4_MASK(3, 0) 338 | #define VC4_TEX_P0_MIPLVLS_SHIFT 0 339 | 340 | #define VC4_TEX_P1_TYPE4_MASK VC4_MASK(31, 31) 341 | #define VC4_TEX_P1_TYPE4_SHIFT 31 342 | #define VC4_TEX_P1_HEIGHT_MASK VC4_MASK(30, 20) 343 | #define VC4_TEX_P1_HEIGHT_SHIFT 20 344 | #define VC4_TEX_P1_ETCFLIP_MASK VC4_MASK(19, 19) 345 | #define VC4_TEX_P1_ETCFLIP_SHIFT 19 346 | #define VC4_TEX_P1_WIDTH_MASK VC4_MASK(18, 8) 347 | #define VC4_TEX_P1_WIDTH_SHIFT 8 348 | 349 | #define VC4_TEX_P1_MAGFILT_MASK VC4_MASK(7, 7) 350 | #define VC4_TEX_P1_MAGFILT_SHIFT 7 351 | # define VC4_TEX_P1_MAGFILT_LINEAR 0 352 | # define VC4_TEX_P1_MAGFILT_NEAREST 1 353 | 354 | #define VC4_TEX_P1_MINFILT_MASK VC4_MASK(6, 4) 355 | #define VC4_TEX_P1_MINFILT_SHIFT 4 356 | # define VC4_TEX_P1_MINFILT_LINEAR 0 357 | # define VC4_TEX_P1_MINFILT_NEAREST 1 358 | # define VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR 2 359 | # define VC4_TEX_P1_MINFILT_NEAR_MIP_LIN 3 360 | # define VC4_TEX_P1_MINFILT_LIN_MIP_NEAR 4 361 | # define VC4_TEX_P1_MINFILT_LIN_MIP_LIN 5 362 | 363 | #define VC4_TEX_P1_WRAP_T_MASK VC4_MASK(3, 2) 364 | #define VC4_TEX_P1_WRAP_T_SHIFT 2 365 | #define VC4_TEX_P1_WRAP_S_MASK VC4_MASK(1, 0) 366 | #define VC4_TEX_P1_WRAP_S_SHIFT 0 367 | # define VC4_TEX_P1_WRAP_REPEAT 0 368 | # define VC4_TEX_P1_WRAP_CLAMP 1 369 | # define VC4_TEX_P1_WRAP_MIRROR 2 370 | # define VC4_TEX_P1_WRAP_BORDER 3 371 | 372 | #define VC4_TEX_P2_PTYPE_MASK VC4_MASK(31, 30) 373 | #define VC4_TEX_P2_PTYPE_SHIFT 30 374 | # define VC4_TEX_P2_PTYPE_IGNORED 0 375 | # define VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE 1 376 | # define VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS 2 377 | # define VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS 3 378 | 379 | /* VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE bits */ 380 | #define VC4_TEX_P2_CMST_MASK VC4_MASK(29, 12) 381 | #define VC4_TEX_P2_CMST_SHIFT 12 382 | #define VC4_TEX_P2_BSLOD_MASK VC4_MASK(0, 0) 383 | #define VC4_TEX_P2_BSLOD_SHIFT 0 384 | 385 | /* VC4_TEX_P2_PTYPE_CHILD_IMAGE_DIMENSIONS */ 386 | #define VC4_TEX_P2_CHEIGHT_MASK VC4_MASK(22, 12) 387 | #define VC4_TEX_P2_CHEIGHT_SHIFT 12 388 | #define VC4_TEX_P2_CWIDTH_MASK VC4_MASK(10, 0) 389 | #define VC4_TEX_P2_CWIDTH_SHIFT 0 390 | 391 | /* VC4_TEX_P2_PTYPE_CHILD_IMAGE_OFFSETS */ 392 | #define VC4_TEX_P2_CYOFF_MASK VC4_MASK(22, 12) 393 | #define VC4_TEX_P2_CYOFF_SHIFT 12 394 | #define VC4_TEX_P2_CXOFF_MASK VC4_MASK(10, 0) 395 | #define VC4_TEX_P2_CXOFF_SHIFT 0 396 | 397 | #endif /* VC4_PACKET_H */ 398 | -------------------------------------------------------------------------------- /include/vc4_qpu_defines.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2014 Broadcom 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #ifndef VC4_QPU_DEFINES_H 25 | #define VC4_QPU_DEFINES_H 26 | 27 | enum qpu_op_add { 28 | QPU_A_NOP, 29 | QPU_A_FADD, 30 | QPU_A_FSUB, 31 | QPU_A_FMIN, 32 | QPU_A_FMAX, 33 | QPU_A_FMINABS, 34 | QPU_A_FMAXABS, 35 | QPU_A_FTOI, 36 | QPU_A_ITOF, 37 | QPU_A_ADD = 12, 38 | QPU_A_SUB, 39 | QPU_A_SHR, 40 | QPU_A_ASR, 41 | QPU_A_ROR, 42 | QPU_A_SHL, 43 | QPU_A_MIN, 44 | QPU_A_MAX, 45 | QPU_A_AND, 46 | QPU_A_OR, 47 | QPU_A_XOR, 48 | QPU_A_NOT, 49 | QPU_A_CLZ, 50 | QPU_A_V8ADDS = 30, 51 | QPU_A_V8SUBS = 31, 52 | }; 53 | 54 | enum qpu_op_mul { 55 | QPU_M_NOP, 56 | QPU_M_FMUL, 57 | QPU_M_MUL24, 58 | QPU_M_V8MULD, 59 | QPU_M_V8MIN, 60 | QPU_M_V8MAX, 61 | QPU_M_V8ADDS, 62 | QPU_M_V8SUBS, 63 | }; 64 | 65 | enum qpu_raddr { 66 | QPU_R_FRAG_PAYLOAD_ZW = 15, /* W for A file, Z for B file */ 67 | /* 0-31 are the plain regfile a or b fields */ 68 | QPU_R_UNIF = 32, 69 | QPU_R_VARY = 35, 70 | QPU_R_ELEM_QPU = 38, 71 | QPU_R_NOP, 72 | QPU_R_XY_PIXEL_COORD = 41, 73 | QPU_R_MS_REV_FLAGS = 42, 74 | QPU_R_VPM = 48, 75 | QPU_R_VPM_LD_BUSY, 76 | QPU_R_VPM_LD_WAIT, 77 | QPU_R_MUTEX_ACQUIRE, 78 | }; 79 | 80 | enum qpu_waddr { 81 | /* 0-31 are the plain regfile a or b fields */ 82 | QPU_W_ACC0 = 32, /* aka r0 */ 83 | QPU_W_ACC1, 84 | QPU_W_ACC2, 85 | QPU_W_ACC3, 86 | QPU_W_TMU_NOSWAP, 87 | QPU_W_ACC5, 88 | QPU_W_HOST_INT, 89 | QPU_W_NOP, 90 | QPU_W_UNIFORMS_ADDRESS, 91 | QPU_W_QUAD_XY, /* X for regfile a, Y for regfile b */ 92 | QPU_W_MS_FLAGS = 42, 93 | QPU_W_REV_FLAG = 42, 94 | QPU_W_TLB_STENCIL_SETUP = 43, 95 | QPU_W_TLB_Z, 96 | QPU_W_TLB_COLOR_MS, 97 | QPU_W_TLB_COLOR_ALL, 98 | QPU_W_TLB_ALPHA_MASK, 99 | QPU_W_VPM, 100 | QPU_W_VPMVCD_SETUP, /* LD for regfile a, ST for regfile b */ 101 | QPU_W_VPM_ADDR, /* LD for regfile a, ST for regfile b */ 102 | QPU_W_MUTEX_RELEASE, 103 | QPU_W_SFU_RECIP, 104 | QPU_W_SFU_RECIPSQRT, 105 | QPU_W_SFU_EXP, 106 | QPU_W_SFU_LOG, 107 | QPU_W_TMU0_S, 108 | QPU_W_TMU0_T, 109 | QPU_W_TMU0_R, 110 | QPU_W_TMU0_B, 111 | QPU_W_TMU1_S, 112 | QPU_W_TMU1_T, 113 | QPU_W_TMU1_R, 114 | QPU_W_TMU1_B, 115 | }; 116 | 117 | enum qpu_sig_bits { 118 | QPU_SIG_SW_BREAKPOINT, 119 | QPU_SIG_NONE, 120 | QPU_SIG_THREAD_SWITCH, 121 | QPU_SIG_PROG_END, 122 | QPU_SIG_WAIT_FOR_SCOREBOARD, 123 | QPU_SIG_SCOREBOARD_UNLOCK, 124 | QPU_SIG_LAST_THREAD_SWITCH, 125 | QPU_SIG_COVERAGE_LOAD, 126 | QPU_SIG_COLOR_LOAD, 127 | QPU_SIG_COLOR_LOAD_END, 128 | QPU_SIG_LOAD_TMU0, 129 | QPU_SIG_LOAD_TMU1, 130 | QPU_SIG_ALPHA_MASK_LOAD, 131 | QPU_SIG_SMALL_IMM, 132 | QPU_SIG_LOAD_IMM, 133 | QPU_SIG_BRANCH 134 | }; 135 | 136 | enum qpu_mux { 137 | /* hardware mux values */ 138 | QPU_MUX_R0, 139 | QPU_MUX_R1, 140 | QPU_MUX_R2, 141 | QPU_MUX_R3, 142 | QPU_MUX_R4, 143 | QPU_MUX_R5, 144 | QPU_MUX_A, 145 | QPU_MUX_B, 146 | 147 | /** 148 | * Non-hardware mux value, stores a small immediate field to be 149 | * programmed into raddr_b in the qpu_reg.index. 150 | */ 151 | QPU_MUX_SMALL_IMM, 152 | }; 153 | 154 | enum qpu_cond { 155 | QPU_COND_NEVER, 156 | QPU_COND_ALWAYS, 157 | QPU_COND_ZS, 158 | QPU_COND_ZC, 159 | QPU_COND_NS, 160 | QPU_COND_NC, 161 | QPU_COND_CS, 162 | QPU_COND_CC, 163 | }; 164 | 165 | enum qpu_pack_mul { 166 | QPU_PACK_MUL_NOP, 167 | QPU_PACK_MUL_8888 = 3, /* replicated to each 8 bits of the 32-bit dst. */ 168 | QPU_PACK_MUL_8A, 169 | QPU_PACK_MUL_8B, 170 | QPU_PACK_MUL_8C, 171 | QPU_PACK_MUL_8D, 172 | }; 173 | 174 | enum qpu_pack_a { 175 | QPU_PACK_A_NOP, 176 | /* convert to 16 bit float if float input, or to int16. */ 177 | QPU_PACK_A_16A, 178 | QPU_PACK_A_16B, 179 | /* replicated to each 8 bits of the 32-bit dst. */ 180 | QPU_PACK_A_8888, 181 | /* Convert to 8-bit unsigned int. */ 182 | QPU_PACK_A_8A, 183 | QPU_PACK_A_8B, 184 | QPU_PACK_A_8C, 185 | QPU_PACK_A_8D, 186 | 187 | /* Saturating variants of the previous instructions. */ 188 | QPU_PACK_A_32_SAT, /* int-only */ 189 | QPU_PACK_A_16A_SAT, /* int or float */ 190 | QPU_PACK_A_16B_SAT, 191 | QPU_PACK_A_8888_SAT, 192 | QPU_PACK_A_8A_SAT, 193 | QPU_PACK_A_8B_SAT, 194 | QPU_PACK_A_8C_SAT, 195 | QPU_PACK_A_8D_SAT, 196 | }; 197 | 198 | enum qpu_unpack { 199 | QPU_UNPACK_NOP, 200 | QPU_UNPACK_16A, 201 | QPU_UNPACK_16B, 202 | QPU_UNPACK_8D_REP, 203 | QPU_UNPACK_8A, 204 | QPU_UNPACK_8B, 205 | QPU_UNPACK_8C, 206 | QPU_UNPACK_8D, 207 | }; 208 | 209 | #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low)) 210 | /* Using the GNU statement expression extension */ 211 | #define QPU_SET_FIELD(value, field) \ 212 | ({ \ 213 | uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \ 214 | assert((fieldval & ~ field ## _MASK) == 0); \ 215 | fieldval & field ## _MASK; \ 216 | }) 217 | 218 | #define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT)) 219 | 220 | #define QPU_UPDATE_FIELD(inst, value, field) \ 221 | (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field)) 222 | 223 | #define QPU_SIG_SHIFT 60 224 | #define QPU_SIG_MASK QPU_MASK(63, 60) 225 | 226 | #define QPU_UNPACK_SHIFT 57 227 | #define QPU_UNPACK_MASK QPU_MASK(59, 57) 228 | 229 | /** 230 | * If set, the pack field means PACK_MUL or R4 packing, instead of normal 231 | * regfile a packing. 232 | */ 233 | #define QPU_PM ((uint64_t)1 << 56) 234 | 235 | #define QPU_PACK_SHIFT 52 236 | #define QPU_PACK_MASK QPU_MASK(55, 52) 237 | 238 | #define QPU_COND_ADD_SHIFT 49 239 | #define QPU_COND_ADD_MASK QPU_MASK(51, 49) 240 | #define QPU_COND_MUL_SHIFT 46 241 | #define QPU_COND_MUL_MASK QPU_MASK(48, 46) 242 | 243 | #define QPU_SF ((uint64_t)1 << 45) 244 | 245 | #define QPU_WADDR_ADD_SHIFT 38 246 | #define QPU_WADDR_ADD_MASK QPU_MASK(43, 38) 247 | #define QPU_WADDR_MUL_SHIFT 32 248 | #define QPU_WADDR_MUL_MASK QPU_MASK(37, 32) 249 | 250 | #define QPU_OP_MUL_SHIFT 29 251 | #define QPU_OP_MUL_MASK QPU_MASK(31, 29) 252 | 253 | #define QPU_RADDR_A_SHIFT 18 254 | #define QPU_RADDR_A_MASK QPU_MASK(23, 18) 255 | #define QPU_RADDR_B_SHIFT 12 256 | #define QPU_RADDR_B_MASK QPU_MASK(17, 12) 257 | #define QPU_SMALL_IMM_SHIFT 12 258 | #define QPU_SMALL_IMM_MASK QPU_MASK(17, 12) 259 | 260 | #define QPU_ADD_A_SHIFT 9 261 | #define QPU_ADD_A_MASK QPU_MASK(11, 9) 262 | #define QPU_ADD_B_SHIFT 6 263 | #define QPU_ADD_B_MASK QPU_MASK(8, 6) 264 | #define QPU_MUL_A_SHIFT 3 265 | #define QPU_MUL_A_MASK QPU_MASK(5, 3) 266 | #define QPU_MUL_B_SHIFT 0 267 | #define QPU_MUL_B_MASK QPU_MASK(2, 0) 268 | 269 | #define QPU_WS ((uint64_t)1 << 44) 270 | 271 | #define QPU_OP_ADD_SHIFT 24 272 | #define QPU_OP_ADD_MASK QPU_MASK(28, 24) 273 | 274 | #endif /* VC4_QPU_DEFINES_H */ 275 | -------------------------------------------------------------------------------- /include/vc4_tools.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2015 Broadcom 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #include 25 | 26 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 27 | #define ATTRIBUTE_CONST __attribute__((__const__)) 28 | 29 | static inline float 30 | uif(uint32_t u) 31 | { 32 | union { 33 | uint32_t u; 34 | float f; 35 | } uf; 36 | 37 | uf.u = u; 38 | return uf.f; 39 | } 40 | 41 | static inline uint32_t 42 | fui(float f) 43 | { 44 | union { 45 | uint32_t u; 46 | float f; 47 | } uf; 48 | 49 | uf.f = f; 50 | return uf.u; 51 | } 52 | 53 | void 54 | vc4_qpu_disasm(FILE *out, const uint64_t *instructions, int num_instructions); 55 | -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | shader_map 2 | shader_missing_end 3 | shader_noop 4 | -------------------------------------------------------------------------------- /tests/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright © 2015 Broadcom 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a 4 | # copy of this software and associated documentation files (the "Software"), 5 | # to deal in the Software without restriction, including without limitation 6 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | # and/or sell copies of the Software, and to permit persons to whom the 8 | # Software is furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice (including the next 11 | # paragraph) shall be included in all copies or substantial portions of the 12 | # Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | # IN THE SOFTWARE. 21 | 22 | AM_CPPFLAGS = \ 23 | -I$(top_srcdir)/include/drm \ 24 | -I$(top_srcdir)/include \ 25 | -I$(srcdir)/lib \ 26 | $() 27 | 28 | AM_CFLAGS = $(LIBDRM_CFLAGS) $(CWARNFLAGS) 29 | 30 | noinst_LTLIBRARIES = libvc4_test.la 31 | 32 | libvc4_test_la_SOURCES = \ 33 | lib/vc4_test.c \ 34 | lib/vc4_test.h \ 35 | lib/vc4_qpu.c \ 36 | lib/vc4_qpu.h \ 37 | $() 38 | 39 | noinst_PROGRAMS = \ 40 | shader_map \ 41 | shader_missing_end \ 42 | shader_noop \ 43 | $() 44 | 45 | TEST_LIBS = $(LIBDRM_LIBS) libvc4_test.la 46 | 47 | shader_map_LDADD = $(TEST_LIBS) 48 | shader_missing_end_LDADD = $(TEST_LIBS) 49 | shader_noop_LDADD = $(TEST_LIBS) 50 | 51 | list-tests: 52 | @echo $(noinst_PROGRAMS) 53 | 54 | -------------------------------------------------------------------------------- /tests/lib/vc4_qpu.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2014 Broadcom 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #include "vc4_test.h" 25 | 26 | #define QPU_MUX(mux, muxfield) \ 27 | QPU_SET_FIELD(mux != QPU_MUX_SMALL_IMM ? mux : QPU_MUX_B, muxfield) 28 | 29 | static uint64_t 30 | set_src_raddr(uint64_t inst, struct qpu_reg src) 31 | { 32 | if (src.mux == QPU_MUX_A) { 33 | assert(QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_NOP || 34 | QPU_GET_FIELD(inst, QPU_RADDR_A) == src.addr); 35 | return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_A); 36 | } 37 | 38 | if (src.mux == QPU_MUX_B) { 39 | assert((QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP || 40 | QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr) && 41 | QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM); 42 | return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_B); 43 | } 44 | 45 | if (src.mux == QPU_MUX_SMALL_IMM) { 46 | if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) { 47 | assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr); 48 | } else { 49 | inst = qpu_set_sig(inst, QPU_SIG_SMALL_IMM); 50 | assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP); 51 | } 52 | return ((inst & ~QPU_RADDR_B_MASK) | 53 | QPU_SET_FIELD(src.addr, QPU_RADDR_B)); 54 | } 55 | 56 | return inst; 57 | } 58 | 59 | uint64_t 60 | qpu_NOP(void) 61 | { 62 | uint64_t inst = 0; 63 | 64 | inst |= QPU_SET_FIELD(QPU_A_NOP, QPU_OP_ADD); 65 | inst |= QPU_SET_FIELD(QPU_M_NOP, QPU_OP_MUL); 66 | 67 | /* Note: These field values are actually non-zero */ 68 | inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD); 69 | inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); 70 | inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); 71 | inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); 72 | inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); 73 | 74 | return inst; 75 | } 76 | 77 | static uint64_t 78 | qpu_a_dst(struct qpu_reg dst) 79 | { 80 | uint64_t inst = 0; 81 | 82 | if (dst.mux <= QPU_MUX_R5) { 83 | /* Translate the mux to the ACCn values. */ 84 | inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_ADD); 85 | } else { 86 | inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_ADD); 87 | if (dst.mux == QPU_MUX_B) 88 | inst |= QPU_WS; 89 | } 90 | 91 | return inst; 92 | } 93 | 94 | static uint64_t 95 | qpu_m_dst(struct qpu_reg dst) 96 | { 97 | uint64_t inst = 0; 98 | 99 | if (dst.mux <= QPU_MUX_R5) { 100 | /* Translate the mux to the ACCn values. */ 101 | inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_MUL); 102 | } else { 103 | inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_MUL); 104 | if (dst.mux == QPU_MUX_A) 105 | inst |= QPU_WS; 106 | } 107 | 108 | return inst; 109 | } 110 | 111 | uint64_t 112 | qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src) 113 | { 114 | uint64_t inst = 0; 115 | 116 | inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); 117 | inst |= QPU_SET_FIELD(QPU_A_OR, QPU_OP_ADD); 118 | inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); 119 | inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); 120 | inst |= qpu_a_dst(dst); 121 | inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD); 122 | inst |= QPU_MUX(src.mux, QPU_ADD_A); 123 | inst |= QPU_MUX(src.mux, QPU_ADD_B); 124 | inst = set_src_raddr(inst, src); 125 | inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); 126 | 127 | return inst; 128 | } 129 | 130 | uint64_t 131 | qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src) 132 | { 133 | uint64_t inst = 0; 134 | 135 | inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); 136 | inst |= QPU_SET_FIELD(QPU_M_V8MIN, QPU_OP_MUL); 137 | inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); 138 | inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); 139 | inst |= qpu_m_dst(dst); 140 | inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL); 141 | inst |= QPU_MUX(src.mux, QPU_MUL_A); 142 | inst |= QPU_MUX(src.mux, QPU_MUL_B); 143 | inst = set_src_raddr(inst, src); 144 | inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD); 145 | 146 | return inst; 147 | } 148 | 149 | uint64_t 150 | qpu_load_imm_ui(struct qpu_reg dst, uint32_t val) 151 | { 152 | uint64_t inst = 0; 153 | 154 | inst |= qpu_a_dst(dst); 155 | inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); 156 | inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD); 157 | inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL); 158 | inst |= QPU_SET_FIELD(QPU_SIG_LOAD_IMM, QPU_SIG); 159 | inst |= val; 160 | 161 | return inst; 162 | } 163 | 164 | uint64_t 165 | qpu_a_alu2(enum qpu_op_add op, 166 | struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1) 167 | { 168 | uint64_t inst = 0; 169 | 170 | inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); 171 | inst |= QPU_SET_FIELD(op, QPU_OP_ADD); 172 | inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); 173 | inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); 174 | inst |= qpu_a_dst(dst); 175 | inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD); 176 | inst |= QPU_MUX(src0.mux, QPU_ADD_A); 177 | inst = set_src_raddr(inst, src0); 178 | inst |= QPU_MUX(src1.mux, QPU_ADD_B); 179 | inst = set_src_raddr(inst, src1); 180 | inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL); 181 | 182 | return inst; 183 | } 184 | 185 | uint64_t 186 | qpu_m_alu2(enum qpu_op_mul op, 187 | struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1) 188 | { 189 | uint64_t inst = 0; 190 | 191 | inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG); 192 | inst |= QPU_SET_FIELD(op, QPU_OP_MUL); 193 | inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); 194 | inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B); 195 | inst |= qpu_m_dst(dst); 196 | inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL); 197 | inst |= QPU_MUX(src0.mux, QPU_MUL_A); 198 | inst = set_src_raddr(inst, src0); 199 | inst |= QPU_MUX(src1.mux, QPU_MUL_B); 200 | inst = set_src_raddr(inst, src1); 201 | inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD); 202 | 203 | return inst; 204 | } 205 | 206 | static bool 207 | merge_fields(uint64_t *merge, 208 | uint64_t a, uint64_t b, 209 | uint64_t mask, uint64_t ignore) 210 | { 211 | if ((a & mask) == ignore) { 212 | *merge = (*merge & ~mask) | (b & mask); 213 | } else if ((b & mask) == ignore) { 214 | *merge = (*merge & ~mask) | (a & mask); 215 | } else { 216 | if ((a & mask) != (b & mask)) 217 | return false; 218 | } 219 | 220 | return true; 221 | } 222 | 223 | int 224 | qpu_num_sf_accesses(uint64_t inst) 225 | { 226 | int accesses = 0; 227 | static const uint32_t specials[] = { 228 | QPU_W_TLB_COLOR_MS, 229 | QPU_W_TLB_COLOR_ALL, 230 | QPU_W_TLB_Z, 231 | QPU_W_TMU0_S, 232 | QPU_W_TMU0_T, 233 | QPU_W_TMU0_R, 234 | QPU_W_TMU0_B, 235 | QPU_W_TMU1_S, 236 | QPU_W_TMU1_T, 237 | QPU_W_TMU1_R, 238 | QPU_W_TMU1_B, 239 | QPU_W_SFU_RECIP, 240 | QPU_W_SFU_RECIPSQRT, 241 | QPU_W_SFU_EXP, 242 | QPU_W_SFU_LOG, 243 | }; 244 | uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 245 | uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 246 | uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 247 | uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 248 | 249 | for (int j = 0; j < ARRAY_SIZE(specials); j++) { 250 | if (waddr_add == specials[j]) 251 | accesses++; 252 | if (waddr_mul == specials[j]) 253 | accesses++; 254 | } 255 | 256 | if (raddr_a == QPU_R_MUTEX_ACQUIRE) 257 | accesses++; 258 | if (raddr_b == QPU_R_MUTEX_ACQUIRE && 259 | QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM) 260 | accesses++; 261 | 262 | /* XXX: semaphore, combined color read/write? */ 263 | switch (QPU_GET_FIELD(inst, QPU_SIG)) { 264 | case QPU_SIG_COLOR_LOAD: 265 | case QPU_SIG_COLOR_LOAD_END: 266 | case QPU_SIG_LOAD_TMU0: 267 | case QPU_SIG_LOAD_TMU1: 268 | accesses++; 269 | } 270 | 271 | return accesses; 272 | } 273 | 274 | static bool 275 | qpu_waddr_ignores_ws(uint32_t waddr) 276 | { 277 | switch(waddr) { 278 | case QPU_W_ACC0: 279 | case QPU_W_ACC1: 280 | case QPU_W_ACC2: 281 | case QPU_W_ACC3: 282 | case QPU_W_TLB_Z: 283 | case QPU_W_TLB_COLOR_MS: 284 | case QPU_W_TLB_COLOR_ALL: 285 | case QPU_W_TLB_ALPHA_MASK: 286 | case QPU_W_VPM: 287 | case QPU_W_SFU_RECIP: 288 | case QPU_W_SFU_RECIPSQRT: 289 | case QPU_W_SFU_EXP: 290 | case QPU_W_SFU_LOG: 291 | case QPU_W_TMU0_S: 292 | case QPU_W_TMU0_T: 293 | case QPU_W_TMU0_R: 294 | case QPU_W_TMU0_B: 295 | case QPU_W_TMU1_S: 296 | case QPU_W_TMU1_T: 297 | case QPU_W_TMU1_R: 298 | case QPU_W_TMU1_B: 299 | return true; 300 | } 301 | 302 | return false; 303 | } 304 | 305 | static void 306 | swap_ra_file_mux_helper(uint64_t *merge, uint64_t *a, uint32_t mux_shift) 307 | { 308 | uint64_t mux_mask = (uint64_t)0x7 << mux_shift; 309 | uint64_t mux_a_val = (uint64_t)QPU_MUX_A << mux_shift; 310 | uint64_t mux_b_val = (uint64_t)QPU_MUX_B << mux_shift; 311 | 312 | if ((*a & mux_mask) == mux_a_val) { 313 | *a = (*a & ~mux_mask) | mux_b_val; 314 | *merge = (*merge & ~mux_mask) | mux_b_val; 315 | } 316 | } 317 | 318 | static bool 319 | try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b) 320 | { 321 | uint32_t raddr_a_a = QPU_GET_FIELD(*a, QPU_RADDR_A); 322 | uint32_t raddr_a_b = QPU_GET_FIELD(*a, QPU_RADDR_B); 323 | uint32_t raddr_b_a = QPU_GET_FIELD(*b, QPU_RADDR_A); 324 | uint32_t raddr_b_b = QPU_GET_FIELD(*b, QPU_RADDR_B); 325 | 326 | if (raddr_a_b != QPU_R_NOP) 327 | return false; 328 | 329 | switch (raddr_a_a) { 330 | case QPU_R_UNIF: 331 | case QPU_R_VARY: 332 | break; 333 | default: 334 | return false; 335 | } 336 | 337 | if (!(*merge & QPU_PM) && 338 | QPU_GET_FIELD(*merge, QPU_UNPACK) != QPU_UNPACK_NOP) { 339 | return false; 340 | } 341 | 342 | if (raddr_b_b != QPU_R_NOP && 343 | raddr_b_b != raddr_a_a) 344 | return false; 345 | 346 | /* Move raddr A to B in instruction a. */ 347 | *a = (*a & ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); 348 | *a = (*a & ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B); 349 | *merge = QPU_UPDATE_FIELD(*merge, raddr_b_a, QPU_RADDR_A); 350 | *merge = QPU_UPDATE_FIELD(*merge, raddr_a_a, QPU_RADDR_B); 351 | swap_ra_file_mux_helper(merge, a, QPU_ADD_A_SHIFT); 352 | swap_ra_file_mux_helper(merge, a, QPU_ADD_B_SHIFT); 353 | swap_ra_file_mux_helper(merge, a, QPU_MUL_A_SHIFT); 354 | swap_ra_file_mux_helper(merge, a, QPU_MUL_B_SHIFT); 355 | 356 | return true; 357 | } 358 | 359 | static bool 360 | convert_mov(uint64_t *inst) 361 | { 362 | uint32_t add_a = QPU_GET_FIELD(*inst, QPU_ADD_A); 363 | uint32_t waddr_add = QPU_GET_FIELD(*inst, QPU_WADDR_ADD); 364 | uint32_t cond_add = QPU_GET_FIELD(*inst, QPU_COND_ADD); 365 | 366 | /* Is it a MOV? */ 367 | if (QPU_GET_FIELD(*inst, QPU_OP_ADD) != QPU_A_OR || 368 | (add_a != QPU_GET_FIELD(*inst, QPU_ADD_B))) { 369 | return false; 370 | } 371 | 372 | if (QPU_GET_FIELD(*inst, QPU_SIG) != QPU_SIG_NONE) 373 | return false; 374 | 375 | /* We could maybe support this in the .8888 and .8a-.8d cases. */ 376 | if (*inst & QPU_PM) 377 | return false; 378 | 379 | *inst = QPU_UPDATE_FIELD(*inst, QPU_A_NOP, QPU_OP_ADD); 380 | *inst = QPU_UPDATE_FIELD(*inst, QPU_M_V8MIN, QPU_OP_MUL); 381 | 382 | *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_A); 383 | *inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_B); 384 | *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_A); 385 | *inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_B); 386 | 387 | *inst = QPU_UPDATE_FIELD(*inst, waddr_add, QPU_WADDR_MUL); 388 | *inst = QPU_UPDATE_FIELD(*inst, QPU_W_NOP, QPU_WADDR_ADD); 389 | 390 | *inst = QPU_UPDATE_FIELD(*inst, cond_add, QPU_COND_MUL); 391 | *inst = QPU_UPDATE_FIELD(*inst, QPU_COND_NEVER, QPU_COND_ADD); 392 | 393 | if (!qpu_waddr_ignores_ws(waddr_add)) 394 | *inst ^= QPU_WS; 395 | 396 | return true; 397 | } 398 | 399 | static bool 400 | writes_a_file(uint64_t inst) 401 | { 402 | if (!(inst & QPU_WS)) 403 | return QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32; 404 | else 405 | return QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32; 406 | } 407 | 408 | static bool 409 | reads_r4(uint64_t inst) 410 | { 411 | return (QPU_GET_FIELD(inst, QPU_ADD_A) == QPU_MUX_R4 || 412 | QPU_GET_FIELD(inst, QPU_ADD_B) == QPU_MUX_R4 || 413 | QPU_GET_FIELD(inst, QPU_MUL_A) == QPU_MUX_R4 || 414 | QPU_GET_FIELD(inst, QPU_MUL_B) == QPU_MUX_R4); 415 | } 416 | 417 | uint64_t 418 | qpu_merge_inst(uint64_t a, uint64_t b) 419 | { 420 | uint64_t merge = a | b; 421 | bool ok = true; 422 | uint32_t a_sig = QPU_GET_FIELD(a, QPU_SIG); 423 | uint32_t b_sig = QPU_GET_FIELD(b, QPU_SIG); 424 | 425 | if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP && 426 | QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP) { 427 | if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP || 428 | QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP || 429 | !(convert_mov(&a) || convert_mov(&b))) { 430 | return 0; 431 | } else { 432 | merge = a | b; 433 | } 434 | } 435 | 436 | if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP && 437 | QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP) 438 | return 0; 439 | 440 | if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b)) 441 | return 0; 442 | 443 | if (a_sig == QPU_SIG_LOAD_IMM || 444 | b_sig == QPU_SIG_LOAD_IMM || 445 | a_sig == QPU_SIG_SMALL_IMM || 446 | b_sig == QPU_SIG_SMALL_IMM) { 447 | return 0; 448 | } 449 | 450 | ok = ok && merge_fields(&merge, a, b, QPU_SIG_MASK, 451 | QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG)); 452 | 453 | /* Misc fields that have to match exactly. */ 454 | ok = ok && merge_fields(&merge, a, b, QPU_SF, ~0); 455 | 456 | if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK, 457 | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) { 458 | /* Since we tend to use regfile A by default both for register 459 | * allocation and for our special values (uniforms and 460 | * varyings), try swapping uniforms and varyings to regfile B 461 | * to resolve raddr A conflicts. 462 | */ 463 | if (!try_swap_ra_file(&merge, &a, &b) && 464 | !try_swap_ra_file(&merge, &b, &a)) { 465 | return 0; 466 | } 467 | } 468 | 469 | ok = ok && merge_fields(&merge, a, b, QPU_RADDR_B_MASK, 470 | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B)); 471 | 472 | ok = ok && merge_fields(&merge, a, b, QPU_WADDR_ADD_MASK, 473 | QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD)); 474 | ok = ok && merge_fields(&merge, a, b, QPU_WADDR_MUL_MASK, 475 | QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL)); 476 | 477 | /* Allow disagreement on WS (swapping A vs B physical reg file as the 478 | * destination for ADD/MUL) if one of the original instructions 479 | * ignores it (probably because it's just writing to accumulators). 480 | */ 481 | if (qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_ADD)) && 482 | qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_MUL))) { 483 | merge = (merge & ~QPU_WS) | (b & QPU_WS); 484 | } else if (qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_ADD)) && 485 | qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_MUL))) { 486 | merge = (merge & ~QPU_WS) | (a & QPU_WS); 487 | } else { 488 | if ((a & QPU_WS) != (b & QPU_WS)) 489 | return 0; 490 | } 491 | 492 | if (!merge_fields(&merge, a, b, QPU_PM, ~0)) { 493 | /* If one instruction has PM bit set and the other not, the 494 | * one without PM shouldn't do packing/unpacking, and we 495 | * have to make sure non-NOP packing/unpacking from PM 496 | * instruction aren't added to it. 497 | */ 498 | uint64_t temp; 499 | 500 | /* Let a be the one with PM bit */ 501 | if (!(a & QPU_PM)) { 502 | temp = a; 503 | a = b; 504 | b = temp; 505 | } 506 | 507 | if ((b & (QPU_PACK_MASK | QPU_UNPACK_MASK)) != 0) 508 | return 0; 509 | 510 | if ((a & QPU_PACK_MASK) != 0 && 511 | QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP) 512 | return 0; 513 | 514 | if ((a & QPU_UNPACK_MASK) != 0 && reads_r4(b)) 515 | return 0; 516 | } else { 517 | /* packing: Make sure that non-NOP packs agree, then deal with 518 | * special-case failing of adding a non-NOP pack to something 519 | * with a NOP pack. 520 | */ 521 | if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0)) 522 | return 0; 523 | bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) != 524 | QPU_GET_FIELD(merge, QPU_PACK)); 525 | bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) != 526 | QPU_GET_FIELD(merge, QPU_PACK)); 527 | if (!(merge & QPU_PM)) { 528 | /* Make sure we're not going to be putting a new 529 | * a-file packing on either half. 530 | */ 531 | if (new_a_pack && writes_a_file(a)) 532 | return 0; 533 | 534 | if (new_b_pack && writes_a_file(b)) 535 | return 0; 536 | } else { 537 | /* Make sure we're not going to be putting new MUL 538 | * packing oneither half. 539 | */ 540 | if (new_a_pack && 541 | QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP) 542 | return 0; 543 | 544 | if (new_b_pack && 545 | QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP) 546 | return 0; 547 | } 548 | 549 | /* unpacking: Make sure that non-NOP unpacks agree, then deal 550 | * with special-case failing of adding a non-NOP unpack to 551 | * something with a NOP unpack. 552 | */ 553 | if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0)) 554 | return 0; 555 | bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) != 556 | QPU_GET_FIELD(merge, QPU_UNPACK)); 557 | bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) != 558 | QPU_GET_FIELD(merge, QPU_UNPACK)); 559 | if (!(merge & QPU_PM)) { 560 | /* Make sure we're not going to be putting a new 561 | * a-file packing on either half. 562 | */ 563 | if (new_a_unpack && 564 | QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP) 565 | return 0; 566 | 567 | if (new_b_unpack && 568 | QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP) 569 | return 0; 570 | } else { 571 | /* Make sure we're not going to be putting new r4 572 | * unpack on either half. 573 | */ 574 | if (new_a_unpack && reads_r4(a)) 575 | return 0; 576 | 577 | if (new_b_unpack && reads_r4(b)) 578 | return 0; 579 | } 580 | } 581 | 582 | if (ok) 583 | return merge; 584 | else 585 | return 0; 586 | } 587 | 588 | uint64_t 589 | qpu_set_sig(uint64_t inst, uint32_t sig) 590 | { 591 | assert(QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_NONE); 592 | return QPU_UPDATE_FIELD(inst, sig, QPU_SIG); 593 | } 594 | 595 | uint64_t 596 | qpu_set_cond_add(uint64_t inst, uint32_t cond) 597 | { 598 | assert(QPU_GET_FIELD(inst, QPU_COND_ADD) == QPU_COND_ALWAYS); 599 | return QPU_UPDATE_FIELD(inst, cond, QPU_COND_ADD); 600 | } 601 | 602 | uint64_t 603 | qpu_set_cond_mul(uint64_t inst, uint32_t cond) 604 | { 605 | assert(QPU_GET_FIELD(inst, QPU_COND_MUL) == QPU_COND_ALWAYS); 606 | return QPU_UPDATE_FIELD(inst, cond, QPU_COND_MUL); 607 | } 608 | 609 | bool 610 | qpu_waddr_is_tlb(uint32_t waddr) 611 | { 612 | switch (waddr) { 613 | case QPU_W_TLB_COLOR_ALL: 614 | case QPU_W_TLB_COLOR_MS: 615 | case QPU_W_TLB_Z: 616 | return true; 617 | default: 618 | return false; 619 | } 620 | } 621 | 622 | bool 623 | qpu_inst_is_tlb(uint64_t inst) 624 | { 625 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 626 | 627 | return (qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) || 628 | qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_MUL)) || 629 | sig == QPU_SIG_COLOR_LOAD || 630 | sig == QPU_SIG_WAIT_FOR_SCOREBOARD); 631 | } 632 | 633 | /** 634 | * Returns the small immediate value to be encoded in to the raddr b field if 635 | * the argument can be represented as one, or ~0 otherwise. 636 | */ 637 | uint32_t 638 | qpu_encode_small_immediate(uint32_t i) 639 | { 640 | if (i <= 15) 641 | return i; 642 | if ((int)i < 0 && (int)i >= -16) 643 | return i + 32; 644 | 645 | switch (i) { 646 | case 0x3f800000: 647 | return 32; 648 | case 0x40000000: 649 | return 33; 650 | case 0x40800000: 651 | return 34; 652 | case 0x41000000: 653 | return 35; 654 | case 0x41800000: 655 | return 36; 656 | case 0x42000000: 657 | return 37; 658 | case 0x42800000: 659 | return 38; 660 | case 0x43000000: 661 | return 39; 662 | case 0x3b800000: 663 | return 40; 664 | case 0x3c000000: 665 | return 41; 666 | case 0x3c800000: 667 | return 42; 668 | case 0x3d000000: 669 | return 43; 670 | case 0x3d800000: 671 | return 44; 672 | case 0x3e000000: 673 | return 45; 674 | case 0x3e800000: 675 | return 46; 676 | case 0x3f000000: 677 | return 47; 678 | } 679 | 680 | return ~0; 681 | } 682 | -------------------------------------------------------------------------------- /tests/lib/vc4_qpu.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2014 Broadcom 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #ifndef VC4_QPU_H 25 | #define VC4_QPU_H 26 | 27 | #include 28 | #include 29 | 30 | #include "vc4_qpu_defines.h" 31 | 32 | struct vc4_compile; 33 | 34 | struct qpu_reg { 35 | enum qpu_mux mux; 36 | uint8_t addr; 37 | }; 38 | 39 | static inline struct qpu_reg 40 | qpu_rn(int n) 41 | { 42 | struct qpu_reg r = { 43 | QPU_MUX_R0 + n, 44 | 0, 45 | }; 46 | 47 | return r; 48 | } 49 | 50 | static inline struct qpu_reg 51 | qpu_ra(int addr) 52 | { 53 | struct qpu_reg r = { 54 | QPU_MUX_A, 55 | addr, 56 | }; 57 | 58 | return r; 59 | } 60 | 61 | static inline struct qpu_reg 62 | qpu_rb(int addr) 63 | { 64 | struct qpu_reg r = { 65 | QPU_MUX_B, 66 | addr, 67 | }; 68 | 69 | return r; 70 | } 71 | 72 | static inline struct qpu_reg 73 | qpu_vary(void) 74 | { 75 | struct qpu_reg r = { 76 | QPU_MUX_A, 77 | QPU_R_VARY, 78 | }; 79 | 80 | return r; 81 | } 82 | 83 | static inline struct qpu_reg 84 | qpu_unif(void) 85 | { 86 | struct qpu_reg r = { 87 | QPU_MUX_A, 88 | QPU_R_UNIF, 89 | }; 90 | 91 | return r; 92 | } 93 | 94 | static inline struct qpu_reg 95 | qpu_vrsetup(void) 96 | { 97 | return qpu_ra(QPU_W_VPMVCD_SETUP); 98 | } 99 | 100 | static inline struct qpu_reg 101 | qpu_vwsetup(void) 102 | { 103 | return qpu_rb(QPU_W_VPMVCD_SETUP); 104 | } 105 | 106 | static inline struct qpu_reg 107 | qpu_tlbc(void) 108 | { 109 | struct qpu_reg r = { 110 | QPU_MUX_A, 111 | QPU_W_TLB_COLOR_ALL, 112 | }; 113 | 114 | return r; 115 | } 116 | 117 | static inline struct qpu_reg qpu_r0(void) { return qpu_rn(0); } 118 | static inline struct qpu_reg qpu_r1(void) { return qpu_rn(1); } 119 | static inline struct qpu_reg qpu_r2(void) { return qpu_rn(2); } 120 | static inline struct qpu_reg qpu_r3(void) { return qpu_rn(3); } 121 | static inline struct qpu_reg qpu_r4(void) { return qpu_rn(4); } 122 | static inline struct qpu_reg qpu_r5(void) { return qpu_rn(5); } 123 | 124 | uint64_t qpu_NOP(void) ATTRIBUTE_CONST; 125 | uint64_t qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src) ATTRIBUTE_CONST; 126 | uint64_t qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src) ATTRIBUTE_CONST; 127 | uint64_t qpu_a_alu2(enum qpu_op_add op, struct qpu_reg dst, 128 | struct qpu_reg src0, struct qpu_reg src1) ATTRIBUTE_CONST; 129 | uint64_t qpu_m_alu2(enum qpu_op_mul op, struct qpu_reg dst, 130 | struct qpu_reg src0, struct qpu_reg src1) ATTRIBUTE_CONST; 131 | uint64_t qpu_merge_inst(uint64_t a, uint64_t b) ATTRIBUTE_CONST; 132 | uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val) ATTRIBUTE_CONST; 133 | uint64_t qpu_set_sig(uint64_t inst, uint32_t sig) ATTRIBUTE_CONST; 134 | uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST; 135 | uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST; 136 | uint32_t qpu_encode_small_immediate(uint32_t i) ATTRIBUTE_CONST; 137 | 138 | bool qpu_waddr_is_tlb(uint32_t waddr) ATTRIBUTE_CONST; 139 | bool qpu_inst_is_tlb(uint64_t inst) ATTRIBUTE_CONST; 140 | int qpu_num_sf_accesses(uint64_t inst) ATTRIBUTE_CONST; 141 | 142 | static inline uint64_t 143 | qpu_load_imm_f(struct qpu_reg dst, float val) 144 | { 145 | return qpu_load_imm_ui(dst, fui(val)); 146 | } 147 | 148 | #define A_ALU2(op) \ 149 | static inline uint64_t \ 150 | qpu_a_##op(struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1) \ 151 | { \ 152 | return qpu_a_alu2(QPU_A_##op, dst, src0, src1); \ 153 | } 154 | 155 | #define M_ALU2(op) \ 156 | static inline uint64_t \ 157 | qpu_m_##op(struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1) \ 158 | { \ 159 | return qpu_m_alu2(QPU_M_##op, dst, src0, src1); \ 160 | } 161 | 162 | #define A_ALU1(op) \ 163 | static inline uint64_t \ 164 | qpu_a_##op(struct qpu_reg dst, struct qpu_reg src0) \ 165 | { \ 166 | return qpu_a_alu2(QPU_A_##op, dst, src0, src0); \ 167 | } 168 | 169 | /*A_ALU2(NOP) */ 170 | A_ALU2(FADD) 171 | A_ALU2(FSUB) 172 | A_ALU2(FMIN) 173 | A_ALU2(FMAX) 174 | A_ALU2(FMINABS) 175 | A_ALU2(FMAXABS) 176 | A_ALU1(FTOI) 177 | A_ALU1(ITOF) 178 | A_ALU2(ADD) 179 | A_ALU2(SUB) 180 | A_ALU2(SHR) 181 | A_ALU2(ASR) 182 | A_ALU2(ROR) 183 | A_ALU2(SHL) 184 | A_ALU2(MIN) 185 | A_ALU2(MAX) 186 | A_ALU2(AND) 187 | A_ALU2(OR) 188 | A_ALU2(XOR) 189 | A_ALU1(NOT) 190 | A_ALU1(CLZ) 191 | A_ALU2(V8ADDS) 192 | A_ALU2(V8SUBS) 193 | 194 | /* M_ALU2(NOP) */ 195 | M_ALU2(FMUL) 196 | M_ALU2(MUL24) 197 | M_ALU2(V8MULD) 198 | M_ALU2(V8MIN) 199 | M_ALU2(V8MAX) 200 | M_ALU2(V8ADDS) 201 | M_ALU2(V8SUBS) 202 | 203 | void 204 | vc4_qpu_validate(uint64_t *insts, uint32_t num_inst); 205 | 206 | #endif /* VC4_QPU_H */ 207 | -------------------------------------------------------------------------------- /tests/lib/vc4_test.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2015 Broadcom 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #include "vc4_test.h" 25 | 26 | int 27 | main_func_for_single_test(int argc, char **argv, void (*func)(int fd)) 28 | { 29 | if (argc != 1) { 30 | fprintf(stderr, "usage: %s\n", argv[0]); 31 | vc4_report_result(VC4_RESULT_FAIL); 32 | } 33 | 34 | int fd = drmOpen("vc4", NULL); 35 | if (fd == -1) { 36 | fprintf(stderr, "Failed to open drm.\n"); 37 | vc4_report_result(VC4_RESULT_SKIP); 38 | } 39 | 40 | func(fd); 41 | 42 | /* If func() didn't report anything, then this is an error. */ 43 | fprintf(stderr, "%s: Exited without reporting another result\n", 44 | argv[0]); 45 | vc4_report_result(VC4_RESULT_FAIL); 46 | 47 | return 1; 48 | } 49 | 50 | void 51 | vc4_report_result(enum vc4_result result) 52 | { 53 | switch (result) { 54 | case VC4_RESULT_PASS: 55 | printf("Passed.\n"); 56 | exit(0); 57 | case VC4_RESULT_FAIL: 58 | printf("Failed.\n"); 59 | exit(1); 60 | case VC4_RESULT_SKIP: 61 | printf("Skipped.\n"); 62 | exit(77); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /tests/lib/vc4_test.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2015 Broadcom 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include "xf86drm.h" 33 | #include "vc4_tools.h" 34 | #include "vc4_drm.h" 35 | #include "vc4_qpu.h" 36 | 37 | enum vc4_result { 38 | VC4_RESULT_PASS, 39 | VC4_RESULT_FAIL, 40 | VC4_RESULT_SKIP, 41 | }; 42 | 43 | int main_func_for_single_test(int argc, char **argv, void (*func)(int fd)); 44 | void vc4_report_result(enum vc4_result result); 45 | 46 | #define SINGLE_TEST_WITH_DRM() \ 47 | static void func(int fd); \ 48 | int main(int argc, char **argv) \ 49 | { \ 50 | return main_func_for_single_test(argc, argv, func); \ 51 | } \ 52 | static void func(int fd) 53 | -------------------------------------------------------------------------------- /tests/shader_map.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2015 Broadcom 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #include 25 | #include "vc4_test.h" 26 | 27 | static int 28 | get_shader_bo(int fd) 29 | { 30 | uint64_t prog[3]; 31 | prog[0] = qpu_set_sig(qpu_NOP(), QPU_SIG_PROG_END); 32 | prog[1] = qpu_NOP(); 33 | prog[2] = qpu_NOP(); 34 | 35 | struct drm_vc4_create_shader_bo create = { 36 | .size = sizeof(prog), 37 | .data = (uintptr_t)(void *)prog, 38 | }; 39 | 40 | int ret = ioctl(fd, DRM_IOCTL_VC4_CREATE_SHADER_BO, &create); 41 | if (ret != 0) { 42 | fprintf(stderr, "Create unexpectedly returned %d\n", errno); 43 | vc4_report_result(VC4_RESULT_FAIL); 44 | } 45 | 46 | return create.handle; 47 | } 48 | 49 | static int 50 | get_mmap_offset(int fd, int handle) 51 | { 52 | struct drm_vc4_mmap_bo map = { 53 | .handle = handle, 54 | }; 55 | 56 | int ret = ioctl(fd, DRM_IOCTL_VC4_MMAP_BO, &map); 57 | if (ret != 0) { 58 | fprintf(stderr, "Map unexpectedly returned %d\n", errno); 59 | vc4_report_result(VC4_RESULT_FAIL); 60 | } 61 | 62 | return map.offset; 63 | } 64 | 65 | static void * 66 | do_map(int fd, uint64_t offset, int prot) 67 | { 68 | return mmap(NULL, 3 * sizeof(uint64_t), prot, MAP_SHARED, fd, offset); 69 | } 70 | 71 | SINGLE_TEST_WITH_DRM() 72 | { 73 | int handle = get_shader_bo(fd); 74 | uint64_t offset = get_mmap_offset(fd, handle); 75 | void *map; 76 | bool pass = true; 77 | 78 | printf("Testing a mapping with PROT_READ | PROT_WRITE\n"); 79 | map = do_map(fd, offset, PROT_READ | PROT_WRITE); 80 | if (map != MAP_FAILED) { 81 | fprintf(stderr, "mmap returned %p, expected MAP_FAILELD\n", 82 | map); 83 | pass = false; 84 | } 85 | 86 | printf("Testing a mapping with PROT_READ\n"); 87 | map = do_map(fd, offset, PROT_READ); 88 | if (map == MAP_FAILED) { 89 | fprintf(stderr, "mmap returned MAP_FAILELD\n"); 90 | pass = false; 91 | } 92 | 93 | vc4_report_result(pass ? VC4_RESULT_PASS : VC4_RESULT_FAIL); 94 | } 95 | -------------------------------------------------------------------------------- /tests/shader_missing_end.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2015 Broadcom 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #include "vc4_test.h" 25 | 26 | static bool test_create(int fd, uint64_t *prog, uint32_t size) 27 | { 28 | struct drm_vc4_create_shader_bo create = { 29 | .size = size, 30 | .data = (uintptr_t)(void *)prog, 31 | }; 32 | 33 | int ret = ioctl(fd, DRM_IOCTL_VC4_CREATE_SHADER_BO, &create); 34 | if (ret != -1 || errno != EINVAL) { 35 | fprintf(stderr, "Unexpected non-EINVAL (%d/%d)\n", ret, errno); 36 | return false; 37 | } else { 38 | printf("Got EINVAL\n"); 39 | return true; 40 | } 41 | } 42 | 43 | SINGLE_TEST_WITH_DRM() 44 | { 45 | /* Use a page-sized program to try to trigger overflow. */ 46 | uint64_t prog[4096 / sizeof(uint64_t)]; 47 | uint32_t last = ARRAY_SIZE(prog) - 1; 48 | bool pass = true; 49 | 50 | for (int i = 0; i < ARRAY_SIZE(prog); i++) 51 | prog[i] = qpu_NOP(); 52 | 53 | printf("Testing with no PROG_END at all\n"); 54 | pass = test_create(fd, prog, sizeof(prog)) && pass; 55 | 56 | printf("Testing with PROG_END at last instruction\n"); 57 | prog[last] = qpu_set_sig(qpu_NOP(), QPU_SIG_PROG_END); 58 | pass = test_create(fd, prog, sizeof(prog)) && pass; 59 | prog[last] = qpu_NOP(); 60 | 61 | printf("Testing with PROG_END at second-to-last instruction\n"); 62 | prog[last - 1] = qpu_set_sig(qpu_NOP(), QPU_SIG_PROG_END); 63 | pass = test_create(fd, prog, sizeof(prog)) && pass; 64 | prog[last - 1] = qpu_NOP(); 65 | 66 | vc4_report_result(pass ? VC4_RESULT_PASS : VC4_RESULT_FAIL); 67 | } 68 | -------------------------------------------------------------------------------- /tests/shader_noop.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2015 Broadcom 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #include "vc4_test.h" 25 | 26 | SINGLE_TEST_WITH_DRM() 27 | { 28 | uint64_t prog[3]; 29 | 30 | prog[0] = qpu_set_sig(qpu_NOP(), QPU_SIG_PROG_END); 31 | prog[1] = qpu_NOP(); 32 | prog[2] = qpu_NOP(); 33 | 34 | struct drm_vc4_create_shader_bo create = { 35 | .size = sizeof(prog), 36 | .data = (uintptr_t)(void *)prog, 37 | }; 38 | 39 | int ret = ioctl(fd, DRM_IOCTL_VC4_CREATE_SHADER_BO, &create); 40 | if (ret != 0) { 41 | fprintf(stderr, "Create unexpectedly returned %d\n", errno); 42 | vc4_report_result(VC4_RESULT_FAIL); 43 | } else { 44 | printf("Successfully created shader %d\n", create.handle); 45 | vc4_report_result(VC4_RESULT_PASS); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /tools/Makefile.am: -------------------------------------------------------------------------------- 1 | # Copyright © 2015 Broadcom 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a 4 | # copy of this software and associated documentation files (the "Software"), 5 | # to deal in the Software without restriction, including without limitation 6 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | # and/or sell copies of the Software, and to permit persons to whom the 8 | # Software is furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice (including the next 11 | # paragraph) shall be included in all copies or substantial portions of the 12 | # Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 20 | # IN THE SOFTWARE. 21 | 22 | AM_CPPFLAGS = -I$(top_srcdir)/include/drm -I$(top_srcdir)/include 23 | AM_CFLAGS = $(LIBDRM_CFLAGS) $(CWARNFLAGS) 24 | 25 | if HAVE_SIMPENROSE 26 | SIMPENROSE_PROGS = \ 27 | vc4_dump_to_clif \ 28 | $() 29 | endif 30 | 31 | bin_PROGRAMS = \ 32 | $(SIMPENROSE_PROGS) \ 33 | vc4_dump_hang_state \ 34 | vc4_dump_parse \ 35 | $() 36 | 37 | vc4_dump_hang_state_LDADD = $(LIBDRM_LIBS) 38 | vc4_dump_to_clif_LDFLAGS = $(SIMPENROSE_LIBS) 39 | 40 | vc4_dump_parse_SOURCES = \ 41 | vc4_dump_parse.c \ 42 | vc4_dump_parse.h \ 43 | vc4_dump_parse_cl.c \ 44 | vc4_qpu_disasm.c \ 45 | $() 46 | -------------------------------------------------------------------------------- /tools/vc4_dump_hang_state.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2015 Broadcom 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include "xf86drm.h" 35 | #include "vc4_drm.h" 36 | 37 | struct hang { 38 | struct drm_vc4_get_hang_state *get_state; 39 | struct drm_vc4_get_hang_state_bo *bo_state; 40 | void **maps; 41 | uint32_t bo_count; 42 | }; 43 | 44 | static void 45 | get_hang_state(int fd, struct hang *hang) 46 | { 47 | int ret; 48 | 49 | hang->get_state = calloc(1, sizeof(*hang->get_state)); 50 | if (!hang->get_state) 51 | err(1, "malloc failure"); 52 | 53 | ret = ioctl(fd, DRM_IOCTL_VC4_GET_HANG_STATE, hang->get_state); 54 | if (ret) { 55 | if (errno == ENOENT) { 56 | fprintf(stdout, "No hang state recorded\n"); 57 | exit(0); 58 | } 59 | 60 | if (errno == EACCES && geteuid() != 0) { 61 | fprintf(stderr, "Root permission is required to " 62 | "get hang state\n"); 63 | exit(1); 64 | } 65 | 66 | err(1, "Initial get hang state failed"); 67 | } 68 | 69 | hang->bo_count = hang->get_state->bo_count; 70 | hang->bo_state = calloc(hang->bo_count, 71 | sizeof(struct drm_vc4_get_hang_state_bo)); 72 | if (!hang->bo_state) 73 | err(1, "malloc failure"); 74 | hang->get_state->bo = (uintptr_t)hang->bo_state; 75 | 76 | 77 | ret = ioctl(fd, DRM_IOCTL_VC4_GET_HANG_STATE, hang->get_state); 78 | if (ret) 79 | err(1, "Full get hang state failed"); 80 | } 81 | 82 | static void 83 | map_bos(int fd, struct hang *hang) 84 | { 85 | int ret; 86 | 87 | hang->maps = calloc(hang->bo_count, sizeof(*hang->maps)); 88 | if (!hang->maps) 89 | err(1, "malloc failure"); 90 | 91 | for (int i = 0; i < hang->bo_count; i++) { 92 | struct drm_vc4_mmap_bo map; 93 | 94 | memset(&map, 0, sizeof(map)); 95 | map.handle = hang->bo_state[i].handle; 96 | ret = ioctl(fd, DRM_IOCTL_VC4_MMAP_BO, &map); 97 | if (ret) { 98 | err(1, "Couldn't get map offset for " 99 | "bo %d (handle %d)", i, hang->bo_state[i].handle); 100 | } 101 | 102 | hang->maps[i] = mmap(NULL, hang->bo_state[i].size, 103 | PROT_READ, MAP_SHARED, 104 | fd, map.offset); 105 | 106 | if (hang->maps[i] == MAP_FAILED) { 107 | err(1, "Failed to map BO %d (handle %d)", 108 | i, hang->bo_state[i].handle); 109 | } 110 | } 111 | } 112 | 113 | static void 114 | write_hang_state(const char *filename, struct hang *hang) 115 | { 116 | uint32_t data; 117 | FILE *f; 118 | 119 | if (strcmp(filename, "-") == 0) 120 | f = stdout; 121 | else 122 | f = fopen(filename, "w+"); 123 | if (!f) 124 | err(1, "Couldn't open %s for writing", filename); 125 | 126 | /* Version */ 127 | data = 0; 128 | fwrite(&data, sizeof(data), 1, f); 129 | 130 | fwrite(hang->get_state, sizeof(*hang->get_state), 1, f); 131 | 132 | fwrite(hang->bo_state, sizeof(*hang->bo_state), hang->bo_count, f); 133 | 134 | for (int i = 0; i < hang->bo_count; i++) 135 | fwrite(hang->maps[i], hang->bo_state[i].size, 1, f); 136 | 137 | if (ferror(f)) 138 | errx(1, "Error writing hang state file\n"); 139 | 140 | fclose(f); 141 | } 142 | 143 | static void 144 | usage(const char *name) 145 | { 146 | fprintf(stderr, "Usage: %s hang_file\n", name); 147 | exit(1); 148 | } 149 | 150 | int 151 | main(int argc, char **argv) 152 | { 153 | int fd; 154 | struct hang hang; 155 | 156 | if (argc != 2) 157 | usage(argv[0]); 158 | 159 | memset(&hang, 0, sizeof(hang)); 160 | 161 | fd = drmOpen("vc4", NULL); 162 | if (fd == -1) 163 | err(1, "couldn't open DRM node"); 164 | 165 | get_hang_state(fd, &hang); 166 | map_bos(fd, &hang); 167 | write_hang_state(argv[1], &hang); 168 | 169 | return 0; 170 | } 171 | -------------------------------------------------------------------------------- /tools/vc4_dump_parse.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2015 Broadcom 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include "vc4_drm.h" 37 | 38 | #include "list.h" 39 | #include "vc4_tools.h" 40 | #include "vc4_dump_parse.h" 41 | #include "vc4_packet.h" 42 | #include "vc4_qpu_defines.h" 43 | 44 | static void * 45 | map_input(const char *filename) 46 | { 47 | int fd; 48 | void *map; 49 | struct stat stat; 50 | int ret; 51 | 52 | fd = open(filename, 0); 53 | if (fd == -1) 54 | err(1, "Couldn't open input file %s", filename); 55 | 56 | ret = fstat(fd, &stat); 57 | if (ret) 58 | err(1, "Couldn't get size of input file %s", filename); 59 | 60 | map = mmap(NULL, stat.st_size, PROT_READ, MAP_SHARED, fd, 0); 61 | if (map == MAP_FAILED) 62 | err(1, "Couldn't map input file %s", filename); 63 | 64 | return map; 65 | } 66 | 67 | struct vc4_mem_area_rec { 68 | struct list_head link; 69 | 70 | enum vc4_mem_area_type type; 71 | void *addr; 72 | uint32_t paddr; 73 | uint32_t size; 74 | uint8_t prim_mode; 75 | 76 | /* GL shader rec bits. */ 77 | uint8_t attributes; 78 | bool extended; 79 | }; 80 | 81 | static struct { 82 | struct drm_vc4_get_hang_state *state; 83 | struct drm_vc4_get_hang_state_bo *bo_state; 84 | void **map; 85 | 86 | struct list_head mem_areas; 87 | } dump; 88 | 89 | static void 90 | dump_bo_list(void) 91 | { 92 | fprintf(stderr, "BOs:\n"); 93 | 94 | for (int i = 0; i < dump.state->bo_count; i++) { 95 | uint32_t paddr = dump.bo_state[i].paddr; 96 | fprintf(stderr, "0x%08x..0x%08x (%p)\n", 97 | paddr, 98 | paddr + dump.bo_state[i].size - 1, 99 | dump.map[i]); 100 | } 101 | } 102 | 103 | void * 104 | vc4_paddr_to_pointer(uint32_t addr) 105 | { 106 | for (int i = 0; i < dump.state->bo_count; i++) { 107 | uint32_t paddr = dump.bo_state[i].paddr; 108 | if (addr >= paddr && addr < paddr + dump.bo_state[i].size) 109 | return dump.map[i] + (addr - paddr); 110 | } 111 | 112 | fprintf(stderr, "Couldn't translate address 0x%08x\n", addr); 113 | dump_bo_list(); 114 | 115 | return NULL; 116 | } 117 | 118 | uint32_t 119 | vc4_pointer_to_paddr(void *p) 120 | { 121 | for (int i = 0; i < dump.state->bo_count; i++) { 122 | void *map = dump.map[i]; 123 | if (p >= map && p < map + dump.bo_state[i].size) 124 | return dump.bo_state[i].paddr + (p - map); 125 | } 126 | 127 | fprintf(stderr, "Couldn't translate pointer %p\n", p); 128 | dump_bo_list(); 129 | 130 | return 0; 131 | } 132 | 133 | static uint32_t 134 | vc4_get_end_paddr(uint32_t paddr) 135 | { 136 | for (int i = 0; i < dump.state->bo_count; i++) { 137 | uint32_t start = dump.bo_state[i].paddr; 138 | uint32_t end = start + dump.bo_state[i].size; 139 | if (paddr >= start && paddr < end) 140 | return end; 141 | } 142 | 143 | fprintf(stderr, "Couldn't translate paddr 0x%08x\n", paddr); 144 | dump_bo_list(); 145 | 146 | return 0; 147 | } 148 | 149 | static struct vc4_mem_area_rec * 150 | vc4_add_mem_area_to_list(struct vc4_mem_area_rec *rec) 151 | { 152 | /* Don't add exact duplicates of memory areas to the list. We have to 153 | * be careful to not compare the list pointers, since the new rec 154 | * won't be in the list. 155 | */ 156 | struct vc4_mem_area_rec compare_a = *rec; 157 | memset(&compare_a.link, 0, sizeof(compare_a.link)); 158 | list_for_each_entry(struct vc4_mem_area_rec, list_rec, &dump.mem_areas, 159 | link) { 160 | struct vc4_mem_area_rec compare_b = *list_rec; 161 | memset(&compare_b.link, 0, sizeof(compare_b.link)); 162 | if (memcmp(&compare_a, &compare_b, sizeof(compare_a)) == 0) 163 | return list_rec; 164 | } 165 | 166 | struct vc4_mem_area_rec *list_rec = malloc(sizeof(*list_rec)); 167 | *list_rec = *rec; 168 | list_addtail(&list_rec->link, &dump.mem_areas); 169 | return list_rec; 170 | } 171 | 172 | static void 173 | vc4_init_mem_area(struct vc4_mem_area_rec *rec, enum vc4_mem_area_type type, 174 | uint32_t paddr, uint32_t size) 175 | { 176 | memset(rec, 0, sizeof(*rec)); 177 | rec->type = type; 178 | rec->paddr = paddr; 179 | rec->addr = vc4_paddr_to_pointer(paddr); 180 | rec->size = size; 181 | rec->prim_mode = ~0; 182 | } 183 | 184 | static void 185 | vc4_init_mem_area_unsized(struct vc4_mem_area_rec *rec, 186 | enum vc4_mem_area_type type, uint32_t paddr) 187 | { 188 | vc4_init_mem_area(rec, type, paddr, vc4_get_end_paddr(paddr) - paddr); 189 | } 190 | 191 | struct vc4_mem_area_rec * 192 | vc4_parse_add_mem_area_sized(enum vc4_mem_area_type type, uint32_t paddr, 193 | uint32_t size) 194 | { 195 | struct vc4_mem_area_rec rec; 196 | vc4_init_mem_area(&rec, type, paddr, size); 197 | return vc4_add_mem_area_to_list(&rec); 198 | } 199 | 200 | struct vc4_mem_area_rec * 201 | vc4_parse_add_mem_area(enum vc4_mem_area_type type, uint32_t paddr) 202 | { 203 | struct vc4_mem_area_rec rec; 204 | vc4_init_mem_area_unsized(&rec, type, paddr); 205 | return vc4_add_mem_area_to_list(&rec); 206 | } 207 | 208 | void 209 | vc4_parse_add_sublist(uint32_t paddr, uint8_t prim_mode) 210 | { 211 | struct vc4_mem_area_rec rec; 212 | vc4_init_mem_area_unsized(&rec, VC4_MEM_AREA_SUB_LIST, paddr); 213 | rec.prim_mode = prim_mode; 214 | vc4_add_mem_area_to_list(&rec); 215 | } 216 | 217 | void 218 | vc4_parse_add_compressed_list(uint32_t paddr, uint8_t prim_mode) 219 | { 220 | struct vc4_mem_area_rec rec; 221 | vc4_init_mem_area_unsized(&rec, 222 | VC4_MEM_AREA_COMPRESSED_PRIM_LIST, paddr); 223 | rec.prim_mode = prim_mode; 224 | vc4_add_mem_area_to_list(&rec); 225 | } 226 | 227 | void 228 | vc4_parse_add_gl_shader_rec(uint32_t paddr, uint8_t attributes, bool extended) 229 | { 230 | uint32_t size = 36 + attributes * 8; 231 | 232 | assert(!extended); 233 | 234 | struct vc4_mem_area_rec rec; 235 | vc4_init_mem_area(&rec, VC4_MEM_AREA_GL_SHADER_REC, paddr, size); 236 | rec.attributes = attributes; 237 | rec.extended = extended; 238 | vc4_add_mem_area_to_list(&rec); 239 | } 240 | 241 | void 242 | vc4_parse_add_nv_shader_rec(uint32_t paddr) 243 | { 244 | struct vc4_mem_area_rec rec; 245 | vc4_init_mem_area(&rec, VC4_MEM_AREA_NV_SHADER_REC, paddr, 16); 246 | vc4_add_mem_area_to_list(&rec); 247 | } 248 | 249 | static void 250 | set_bo_maps(void *input) 251 | { 252 | uint32_t *version = input; 253 | if (*version != 0) { 254 | fprintf(stderr, "Input had wrong version %d\n", *version); 255 | exit(1); 256 | } 257 | 258 | dump.state = (void *)&version[1]; 259 | dump.bo_state = (void *)&dump.state[1]; 260 | 261 | dump.map = calloc(dump.state->bo_count, sizeof(*dump.map)); 262 | if (!dump.map) 263 | err(1, "malloc failure"); 264 | 265 | void *next_map = &dump.bo_state[dump.state->bo_count]; 266 | for (int i = 0; i < dump.state->bo_count; i++) { 267 | dump.map[i] = next_map; 268 | next_map += dump.bo_state[i].size; 269 | } 270 | } 271 | 272 | static void 273 | parse_cls(void) 274 | { 275 | if (dump.state->start_bin != dump.state->ct0ea) { 276 | printf("Bin CL at 0x%08x\n", dump.state->start_bin); 277 | vc4_dump_cl(dump.state->start_bin, dump.state->ct0ea, 278 | false, false, ~0); 279 | } 280 | 281 | printf("Render CL at 0x%08x\n", dump.state->start_render); 282 | vc4_dump_cl(dump.state->start_render, dump.state->ct1ea, 283 | true, false, ~0); 284 | } 285 | 286 | static void 287 | parse_sublists(void) 288 | { 289 | list_for_each_entry(struct vc4_mem_area_rec, rec, &dump.mem_areas, 290 | link) { 291 | switch (rec->type) { 292 | case VC4_MEM_AREA_SUB_LIST: 293 | printf("Sublist at 0x%08x:\n", rec->paddr); 294 | if (!rec->addr) { 295 | printf(" No mapping found\n"); 296 | continue; 297 | } 298 | vc4_dump_cl(rec->paddr, rec->paddr + rec->size, true, 299 | false, rec->prim_mode); 300 | printf("\n"); 301 | break; 302 | case VC4_MEM_AREA_COMPRESSED_PRIM_LIST: 303 | printf("Compressed list at 0x%08x:\n", rec->paddr); 304 | if (!rec->addr) { 305 | printf(" No mapping found\n"); 306 | continue; 307 | } 308 | vc4_dump_cl(rec->paddr, rec->paddr + rec->size, true, 309 | true, rec->prim_mode); 310 | printf("\n"); 311 | break; 312 | default: 313 | break; 314 | } 315 | } 316 | } 317 | 318 | static void 319 | parse_gl_shader_rec(struct vc4_mem_area_rec *rec) 320 | { 321 | uint32_t paddr = rec->paddr; 322 | void *addr = rec->addr; 323 | uint8_t *b = addr; 324 | uint16_t *s = addr; 325 | 326 | printf("GL Shader rec at 0x%08x " 327 | "(%d attributes, %sextended):\n", rec->paddr, 328 | rec->attributes, 329 | rec->extended ? "" : "not "); 330 | 331 | if (!rec->addr) { 332 | printf(" No mapping found\n"); 333 | return; 334 | } 335 | 336 | printf("0x%08x: 0x%04x: %s, %s, %s\n", 337 | paddr, s[0], 338 | (s[0] & VC4_SHADER_FLAG_ENABLE_CLIPPING) ? 339 | "clipped" : "unclipped", 340 | (s[0] & VC4_SHADER_FLAG_FS_SINGLE_THREAD) ? 341 | "single thread" : "dual thread", 342 | (s[0] & VC4_SHADER_FLAG_VS_POINT_SIZE) ? 343 | "point size" : "no point size"); 344 | 345 | printf("0x%08x: 0x%02x: fs num uniforms\n", paddr + 2, b[2]); 346 | printf("0x%08x: 0x%02x: fs inputs\n", paddr + 3, b[3]); 347 | printf("0x%08x: 0x%04x: fs code\n", paddr + 4, 348 | *(uint32_t *)(addr + 4)); 349 | printf("0x%08x: 0x%04x: fs uniforms\n", paddr + 8, 350 | *(uint32_t *)(addr + 8)); 351 | vc4_parse_add_mem_area(VC4_MEM_AREA_FS, 352 | *(uint32_t *)(addr + 4)); 353 | 354 | printf("0x%08x: 0x%04x: vs num uniforms\n", paddr + 12, 355 | *(uint16_t *)(addr + 12)); 356 | printf("0x%08x: 0x%02x: vs inputs\n", paddr + 14, b[14]); 357 | printf("0x%08x: 0x%02x: vs attr size\n", paddr + 15, b[15]); 358 | printf("0x%08x: 0x%04x: vs code\n", paddr + 16, 359 | *(uint32_t *)(addr + 16)); 360 | printf("0x%08x: 0x%04x: vs uniforms\n", paddr + 20, 361 | *(uint32_t *)(addr + 20)); 362 | vc4_parse_add_mem_area(VC4_MEM_AREA_VS, 363 | *(uint32_t *)(addr + 16)); 364 | 365 | printf("0x%08x: 0x%04x: cs num uniforms\n", paddr + 24, 366 | *(uint16_t *)(addr + 24)); 367 | printf("0x%08x: 0x%02x: cs inputs\n", paddr + 26, b[26]); 368 | printf("0x%08x: 0x%02x: cs attr size\n", paddr + 27, b[27]); 369 | printf("0x%08x: 0x%04x: cs code\n", paddr + 28, 370 | *(uint32_t *)(addr + 28)); 371 | printf("0x%08x: 0x%04x: cs uniforms\n", paddr + 32, 372 | *(uint32_t *)(addr + 32)); 373 | vc4_parse_add_mem_area(VC4_MEM_AREA_CS, 374 | *(uint32_t *)(addr + 28)); 375 | 376 | for (int i = 0; i < rec->attributes; i++) { 377 | uint32_t ext_stride = 0; 378 | if (rec->extended) 379 | ext_stride = *(uint32_t *)(addr + 100 + i * 4); 380 | 381 | printf("0x%08x: 0x%08x: attr %d addr\n", 382 | paddr + 36 + i * 8, 383 | *(uint32_t *)(addr + 36 + i * 8), i); 384 | printf("0x%08x: 0x%04x: attr %d %db, %db stride\n", 385 | paddr + 40 + i * 8, 386 | *(uint16_t *)(addr + 40 + i * 8), 387 | i, 388 | *(uint8_t *)(addr + 40 + i * 8) + 1, 389 | *(uint8_t *)(addr + 41 + i * 8) + ext_stride); 390 | printf("0x%08x: 0x%04x: attr %d %2d VS VPM, %2d CS VPM\n", 391 | paddr + 42 + i * 8, 392 | *(uint16_t *)(addr + 42 + i * 8), 393 | i, 394 | *(uint8_t *)(addr + 42 + i * 8), 395 | *(uint8_t *)(addr + 43 + i * 8)); 396 | } 397 | 398 | printf("\n"); 399 | } 400 | 401 | static void 402 | parse_nv_shader_rec(struct vc4_mem_area_rec *rec) 403 | { 404 | uint32_t paddr = rec->paddr; 405 | void *addr = rec->addr; 406 | uint8_t *b = addr; 407 | 408 | printf("NV Shader rec at 0x%08x:\n", rec->paddr); 409 | 410 | if (!rec->addr) { 411 | printf(" No mapping found\n"); 412 | return; 413 | } 414 | 415 | printf("0x%08x: 0x%02x: %sclip coords, %s, %s, %s\n", 416 | paddr, b[0], 417 | (b[0] & VC4_SHADER_FLAG_SHADED_CLIP_COORDS) ? 418 | "" : "no ", 419 | (b[0] & VC4_SHADER_FLAG_ENABLE_CLIPPING) ? 420 | "clipped" : "unclipped", 421 | (b[0] & VC4_SHADER_FLAG_FS_SINGLE_THREAD) ? 422 | "single thread" : "dual thread", 423 | (b[0] & VC4_SHADER_FLAG_VS_POINT_SIZE) ? 424 | "point size" : "no point size"); 425 | 426 | printf("0x%08x: 0x%02x: vertex stride\n", paddr + 1, b[1]); 427 | printf("0x%08x: 0x%02x: fs num uniforms\n", paddr + 2, b[2]); 428 | printf("0x%08x: 0x%02x: fs inputs\n", paddr + 3, b[3]); 429 | printf("0x%08x: 0x%04x: fs code\n", paddr + 4, 430 | *(uint32_t *)(addr + 4)); 431 | vc4_parse_add_mem_area(VC4_MEM_AREA_FS, 432 | *(uint32_t *)(addr + 4)); 433 | printf("0x%08x: 0x%04x: fs uniforms\n", paddr + 8, 434 | *(uint32_t *)(addr + 8)); 435 | printf("0x%08x: 0x%04x: vertex data\n", paddr + 12, 436 | *(uint32_t *)(addr + 12)); 437 | 438 | printf("\n"); 439 | } 440 | 441 | static void 442 | parse_shader_recs(void) 443 | { 444 | list_for_each_entry(struct vc4_mem_area_rec, rec, &dump.mem_areas, 445 | link) { 446 | switch (rec->type) { 447 | case VC4_MEM_AREA_GL_SHADER_REC: 448 | parse_gl_shader_rec(rec); 449 | break; 450 | case VC4_MEM_AREA_NV_SHADER_REC: 451 | parse_nv_shader_rec(rec); 452 | break; 453 | default: 454 | break; 455 | } 456 | } 457 | } 458 | 459 | static void 460 | parse_shaders(void) 461 | { 462 | list_for_each_entry(struct vc4_mem_area_rec, rec, &dump.mem_areas, 463 | link) { 464 | const char *type = NULL; 465 | 466 | switch (rec->type) { 467 | case VC4_MEM_AREA_CS: 468 | type = "CS"; 469 | break; 470 | case VC4_MEM_AREA_VS: 471 | type = "VS"; 472 | break; 473 | case VC4_MEM_AREA_FS: 474 | type = "FS"; 475 | break; 476 | default: 477 | continue; 478 | } 479 | 480 | printf("%s at 0x%08x:\n", type, rec->paddr); 481 | 482 | if (!rec->addr) { 483 | printf(" No mapping found\n"); 484 | continue; 485 | } 486 | 487 | uint32_t end_offset = ~0; 488 | for (uint32_t offset = 0; 489 | offset < end_offset; 490 | offset += sizeof(uint64_t)) { 491 | uint64_t inst = *(uint64_t *)(rec->addr + offset); 492 | 493 | printf("0x%08x: ", rec->paddr + offset); 494 | vc4_qpu_disasm(stdout, &inst, 1); 495 | printf("\n"); 496 | 497 | if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_PROG_END) { 498 | /* Parse two more instructions (the delay 499 | * slots), then stop. 500 | */ 501 | end_offset = offset + 12; 502 | } 503 | } 504 | printf("\n"); 505 | } 506 | } 507 | 508 | static void 509 | usage(const char *name) 510 | { 511 | fprintf(stderr, "Usage: %s input.dump\n", name); 512 | exit(1); 513 | } 514 | 515 | static const struct { 516 | int bit; 517 | const char *name; 518 | } errstat_bits[] = { 519 | { 15, "L2CARE: L2C AXI receive FIFO overrun error" }, 520 | { 14, "VCMRE: VCM error (binner)" }, 521 | { 13, "VCMRE: VCM error (renderer)" }, 522 | { 12, "VCDI: VCD Idle" }, 523 | { 11, "VCDE: VCD error - FIFO pointers out of snyc" }, 524 | { 10, "VDWE: VDW error - address overflows" }, 525 | { 9, "VPMEAS: VPM error - allocated size error" }, 526 | { 8, "VPMEFNA: VPM error - free non-allocated" }, 527 | { 7, "VPMEWNA: VPM error - write non-allocated" }, 528 | { 6, "VPMERNA: VPM error - read non-allocated" }, 529 | { 5, "VPMERR: VPM error - read range" }, 530 | { 4, "VPMEWR: VPM error - write range" }, 531 | { 3, "VPAERRGL: VPM allocator error - renderer request greater than limit" }, 532 | { 2, "VPAEBRGL: VPM allocator error - binner request greater than limit" }, 533 | { 1, "VPAERGS: VPM allocator error - request too big" }, 534 | { 0, "VPAEABB: VPM allocator error - allocating base while busy" }, 535 | }; 536 | 537 | static void 538 | dump_registers(void) 539 | { 540 | printf("Bin CL: 0x%08x to 0x%08x\n", 541 | dump.state->start_bin, dump.state->ct0ea); 542 | printf("Bin current: 0x%08x\n", dump.state->ct0ca); 543 | printf("Render CL: 0x%08x to 0x%08x\n", 544 | dump.state->start_render, dump.state->ct1ea); 545 | printf("Render current: 0x%08x\n", dump.state->ct1ca); 546 | printf("\n"); 547 | 548 | printf("V3D_VPMBASE: 0x%08x\n", dump.state->vpmbase); 549 | printf("V3D_DBGE: 0x%08x\n", dump.state->dbge); 550 | printf("V3D_FDBGO: 0x%08x: %s\n", dump.state->fdbgo, 551 | (dump.state->fdbgo & ~((1 << 1) | 552 | (1 << 2) | 553 | (1 << 11))) ? 554 | "some errors" : "no errors"); 555 | printf("V3D_FDBGB: 0x%08x\n", dump.state->fdbgb); 556 | printf("V3D_FDBGR: 0x%08x\n", dump.state->fdbgr); 557 | printf("V3D_FDBGS: 0x%08x\n", dump.state->fdbgs); 558 | printf("\n"); 559 | printf("V3D_ERRSTAT: 0x%08x\n", dump.state->errstat); 560 | for (int i = 0; i < ARRAY_SIZE(errstat_bits); i++) { 561 | if (dump.state->errstat & (1 << errstat_bits[i].bit)) 562 | printf("V3D_ERRSTAT: %s\n", errstat_bits[i].name); 563 | } 564 | 565 | printf("\n"); 566 | } 567 | 568 | int 569 | main(int argc, char **argv) 570 | { 571 | void *input; 572 | 573 | list_inithead(&dump.mem_areas); 574 | 575 | if (argc != 2) 576 | usage(argv[0]); 577 | 578 | input = map_input(argv[1]); 579 | set_bo_maps(input); 580 | 581 | dump_registers(); 582 | parse_cls(); 583 | parse_sublists(); 584 | parse_shader_recs(); 585 | parse_shaders(); 586 | 587 | return 0; 588 | } 589 | -------------------------------------------------------------------------------- /tools/vc4_dump_parse.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2014 Broadcom 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #include 25 | #include 26 | 27 | struct vc4_mem_area_rec; 28 | 29 | enum vc4_mem_area_type { 30 | VC4_MEM_AREA_GL_SHADER_REC, 31 | VC4_MEM_AREA_NV_SHADER_REC, 32 | VC4_MEM_AREA_SUB_LIST, 33 | VC4_MEM_AREA_COMPRESSED_PRIM_LIST, 34 | VC4_MEM_AREA_CS, 35 | VC4_MEM_AREA_VS, 36 | VC4_MEM_AREA_FS, 37 | }; 38 | 39 | void vc4_dump_cl(uint32_t start, uint32_t end, bool is_render, 40 | bool in_compressed_list, uint8_t prim_mode); 41 | 42 | uint32_t vc4_pointer_to_paddr(void *p); 43 | void *vc4_paddr_to_pointer(uint32_t addr); 44 | 45 | struct vc4_mem_area_rec * 46 | vc4_parse_add_mem_area(enum vc4_mem_area_type type, uint32_t paddr); 47 | 48 | struct vc4_mem_area_rec * 49 | vc4_parse_add_mem_area_sized(enum vc4_mem_area_type type, uint32_t paddr, 50 | uint32_t size); 51 | 52 | void vc4_parse_add_sublist(uint32_t paddr, uint8_t prim_mode); 53 | void vc4_parse_add_compressed_list(uint32_t paddr, uint8_t prim_mode); 54 | void vc4_parse_add_gl_shader_rec(uint32_t paddr, uint8_t attributes, 55 | bool extended); 56 | void vc4_parse_add_nv_shader_rec(uint32_t paddr); 57 | -------------------------------------------------------------------------------- /tools/vc4_dump_parse_cl.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2014 Broadcom 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #include 25 | #include "vc4_dump_parse.h" 26 | #include "vc4_packet.h" 27 | #include "vc4_tools.h" 28 | 29 | struct cl_dump_state { 30 | void *cl; 31 | uint32_t offset; 32 | uint32_t end; 33 | 34 | uint8_t prim_mode; 35 | }; 36 | 37 | #define dump_VC4_PACKET_LINE_WIDTH dump_float 38 | #define dump_VC4_PACKET_POINT_SIZE dump_float 39 | 40 | static const char * const prim_name[] = { 41 | "points", 42 | "lines", 43 | "line_loop", 44 | "line_strip", 45 | "triangles", 46 | "triangle_strip", 47 | "triangle_fan" 48 | }; 49 | 50 | static void 51 | dump_printf(struct cl_dump_state *state, uint32_t offset, 52 | const char *format, ...) 53 | __attribute__ ((format(__printf__, 3, 4))); 54 | 55 | static void 56 | dump_printf(struct cl_dump_state *state, uint32_t offset, 57 | const char *format, ...) 58 | { 59 | va_list ap; 60 | 61 | printf("0x%08x: ", state->offset + offset); 62 | va_start(ap, format); 63 | vprintf(format, ap); 64 | va_end(ap); 65 | } 66 | 67 | static void 68 | dump_float(struct cl_dump_state *state) 69 | { 70 | dump_printf(state, 0, "%f (0x%08x)\n", 71 | uif(*(uint32_t *)state->cl), *(uint32_t *)state->cl); 72 | } 73 | 74 | static void 75 | dump_VC4_PACKET_BRANCH(struct cl_dump_state *state) 76 | { 77 | uint32_t *addr = state->cl; 78 | 79 | dump_printf(state, 0, "addr 0x%08x\n", *addr); 80 | 81 | vc4_parse_add_sublist(*addr, state->prim_mode); 82 | } 83 | 84 | static void 85 | dump_VC4_PACKET_BRANCH_TO_SUB_LIST(struct cl_dump_state *state) 86 | { 87 | uint32_t *addr = state->cl; 88 | 89 | dump_printf(state, 0, "addr 0x%08x\n", *addr); 90 | 91 | vc4_parse_add_sublist(*addr, state->prim_mode); 92 | } 93 | 94 | static void 95 | dump_loadstore_full(struct cl_dump_state *state) 96 | { 97 | uint32_t bits = *(uint32_t *)state->cl; 98 | 99 | dump_printf(state, 0, "addr 0x%08x%s%s%s%s\n", 100 | bits & ~0xf, 101 | (bits & VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL) ? "" : " clear", 102 | (bits & VC4_LOADSTORE_FULL_RES_DISABLE_ZS) ? "" : " zs", 103 | (bits & VC4_LOADSTORE_FULL_RES_DISABLE_COLOR) ? "" : " color", 104 | (bits & VC4_LOADSTORE_FULL_RES_EOF) ? " eof" : ""); 105 | } 106 | 107 | static void 108 | dump_VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER(struct cl_dump_state *state) 109 | { 110 | dump_loadstore_full(state); 111 | } 112 | 113 | static void 114 | dump_VC4_PACKET_STORE_FULL_RES_TILE_BUFFER(struct cl_dump_state *state) 115 | { 116 | dump_loadstore_full(state); 117 | } 118 | 119 | static void 120 | dump_loadstore_general(struct cl_dump_state *state) 121 | { 122 | uint8_t *bytes = state->cl; 123 | uint32_t *addr = state->cl + 2; 124 | 125 | const char *fullvg = ""; 126 | const char *fullzs = ""; 127 | const char *fullcolor = ""; 128 | const char *buffer = "???"; 129 | 130 | switch ((bytes[0] & 0x7)){ 131 | case 0: 132 | buffer = "none"; 133 | break; 134 | case 1: 135 | buffer = "color"; 136 | break; 137 | case 2: 138 | buffer = "zs"; 139 | break; 140 | case 3: 141 | buffer = "z"; 142 | break; 143 | case 4: 144 | buffer = "vgmask"; 145 | break; 146 | case 5: 147 | buffer = "full"; 148 | if (*addr & (1 << 0)) 149 | fullcolor = " !color"; 150 | if (*addr & (1 << 1)) 151 | fullzs = " !zs"; 152 | if (*addr & (1 << 2)) 153 | fullvg = " !vgmask"; 154 | break; 155 | } 156 | 157 | const char *tiling = "???"; 158 | switch ((bytes[0] >> 4) & 7) { 159 | case 0: 160 | tiling = "linear"; 161 | break; 162 | case 1: 163 | tiling = "T"; 164 | break; 165 | case 2: 166 | tiling = "LT"; 167 | break; 168 | } 169 | 170 | const char *format = "???"; 171 | switch (bytes[1] & 3) { 172 | case 0: 173 | format = "RGBA8888"; 174 | break; 175 | case 1: 176 | format = "BGR565_DITHER"; 177 | break; 178 | case 2: 179 | format = "BGR565"; 180 | break; 181 | } 182 | 183 | dump_printf(state, 0, "0x%02x %s %s\n", bytes[0], buffer, tiling); 184 | dump_printf(state, 1, "0x%02x %s\n", bytes[1], format); 185 | dump_printf(state, 2, "addr 0x%08x %s%s%s%s\n", 186 | *addr & ~15, 187 | fullcolor, fullzs, fullvg, 188 | (*addr & (1 << 3)) ? " EOF" : ""); 189 | } 190 | 191 | static void 192 | dump_VC4_PACKET_STORE_TILE_BUFFER_GENERAL(struct cl_dump_state *state) 193 | { 194 | dump_loadstore_general(state); 195 | } 196 | 197 | static void 198 | dump_VC4_PACKET_LOAD_TILE_BUFFER_GENERAL(struct cl_dump_state *state) 199 | { 200 | dump_loadstore_general(state); 201 | } 202 | 203 | static void 204 | dump_VC4_PACKET_GL_INDEXED_PRIMITIVE(struct cl_dump_state *state) 205 | { 206 | uint8_t *b = state->cl; 207 | uint32_t *count = state->cl + 1; 208 | uint32_t *ib_offset = state->cl + 5; 209 | uint32_t *max_index = state->cl + 9; 210 | 211 | dump_printf(state, 0, "0x%02x %s %s\n", 212 | b[0], (b[0] & VC4_INDEX_BUFFER_U16) ? "16-bit" : "8-bit", 213 | prim_name[b[0] & 0x7]); 214 | dump_printf(state, 1, " %d verts\n", *count); 215 | dump_printf(state, 5, "0x%08x IB offset\n", *ib_offset); 216 | dump_printf(state, 9, "0x%08x max index\n", *max_index); 217 | } 218 | 219 | static void 220 | dump_VC4_PACKET_GL_ARRAY_PRIMITIVE(struct cl_dump_state *state) 221 | { 222 | uint8_t *b = state->cl; 223 | uint32_t *count = state->cl + 1; 224 | uint32_t *start = state->cl + 5; 225 | 226 | dump_printf(state, 0, "0x%02x %s\n", b[0], prim_name[b[0] & 0x7]); 227 | dump_printf(state, 1, "%d verts\n", *count); 228 | dump_printf(state, 5, "0x%08x start\n", *start); 229 | } 230 | 231 | static void 232 | dump_VC4_PACKET_PRIMITIVE_LIST_FORMAT(struct cl_dump_state *state) 233 | { 234 | uint8_t *b = state->cl; 235 | const char *prim_mode = "unknown"; 236 | const char *data_type = "unknown"; 237 | 238 | switch (*b & 0xf) { 239 | case 0: 240 | prim_mode = "points"; 241 | break; 242 | case 1: 243 | prim_mode = "lines"; 244 | break; 245 | case 2: 246 | prim_mode = "triangles"; 247 | break; 248 | case 3: 249 | prim_mode = "RHT"; 250 | break; 251 | } 252 | 253 | switch (*b >> 4) { 254 | case 1: 255 | data_type = "16-bit index"; 256 | break; 257 | case 3: 258 | prim_mode = "32-bit x/y"; 259 | break; 260 | } 261 | 262 | dump_printf(state, 0, "0x%02x: prim_mode %s, data_type %s\n", 263 | *b, prim_mode, data_type); 264 | 265 | state->prim_mode = *b & 0x0f; 266 | } 267 | 268 | static void 269 | dump_VC4_PACKET_GL_SHADER_STATE(struct cl_dump_state *state) 270 | { 271 | uint32_t *addr = state->cl; 272 | uint32_t paddr = *addr & ~0xf; 273 | uint8_t attributes = *addr & 7; 274 | bool extended; 275 | 276 | if (attributes == 0) 277 | attributes = 8; 278 | extended = *addr & (1 << 3); 279 | 280 | dump_printf(state, 0, "0x%08x %d attr count, %s\n", 281 | paddr, attributes, 282 | extended ? "extended" : "unextended"); 283 | 284 | vc4_parse_add_gl_shader_rec(paddr, attributes, extended); 285 | } 286 | 287 | static void 288 | dump_VC4_PACKET_NV_SHADER_STATE(struct cl_dump_state *state) 289 | { 290 | uint32_t *addr = state->cl; 291 | 292 | dump_printf(state, 0, "0x%08x\n", *addr); 293 | 294 | vc4_parse_add_nv_shader_rec(*addr); 295 | } 296 | 297 | static void 298 | dump_VC4_PACKET_CONFIGURATION_BITS(struct cl_dump_state *state) 299 | { 300 | uint8_t *b = state->cl; 301 | const char *msaa; 302 | 303 | switch (b[0] & VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_MASK) { 304 | case VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_NONE: 305 | msaa = "1x"; 306 | break; 307 | case VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X: 308 | msaa = "4x"; 309 | break; 310 | case VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_16X: 311 | msaa = "16x"; 312 | break; 313 | default: 314 | msaa = "unknownx"; 315 | break; 316 | } 317 | 318 | dump_printf(state, 0, 319 | "0x%02x f %d, b %d, %s, depthoff %d, aapointslines %d, %s\n", 320 | b[0], 321 | (b[0] & VC4_CONFIG_BITS_ENABLE_PRIM_FRONT) != 0, 322 | (b[0] & VC4_CONFIG_BITS_ENABLE_PRIM_BACK) != 0, 323 | (b[0] & VC4_CONFIG_BITS_CW_PRIMITIVES) ? "cw" : "ccw", 324 | (b[0] & VC4_CONFIG_BITS_ENABLE_DEPTH_OFFSET) != 0, 325 | (b[0] & VC4_CONFIG_BITS_AA_POINTS_AND_LINES) != 0, 326 | msaa); 327 | 328 | dump_printf(state, 1, "0x%02x z_upd %d, z_func %d\n", b[1], 329 | (b[1] & VC4_CONFIG_BITS_Z_UPDATE) != 0, 330 | ((b[1] >> VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT) & 0x7)); 331 | 332 | 333 | dump_printf(state, 2, "0x%02x ez %d, ezup %d\n", b[2], 334 | (b[2] & VC4_CONFIG_BITS_EARLY_Z) != 0, 335 | (b[2] & VC4_CONFIG_BITS_EARLY_Z_UPDATE) != 0); 336 | 337 | } 338 | 339 | static void 340 | dump_VC4_PACKET_FLAT_SHADE_FLAGS(struct cl_dump_state *state) 341 | { 342 | uint32_t *bits = state->cl; 343 | 344 | dump_printf(state, 0, "bits 0x%08x\n", *bits); 345 | } 346 | 347 | static void 348 | dump_VC4_PACKET_CLIP_WINDOW(struct cl_dump_state *state) 349 | { 350 | uint16_t *o = state->cl; 351 | 352 | dump_printf(state, 0, "%d, %d (b,l)\n", o[0], o[1]); 353 | dump_printf(state, 2, "%d, %d (w,h)\n", o[2], o[3]); 354 | } 355 | 356 | static void 357 | dump_VC4_PACKET_VIEWPORT_OFFSET(struct cl_dump_state *state) 358 | { 359 | uint16_t *o = state->cl; 360 | 361 | dump_printf(state, 0, "%f, %f (0x%04x, 0x%04x)\n", 362 | o[0] / 16.0, o[1] / 16.0, 363 | o[0], o[1]); 364 | } 365 | 366 | static void 367 | dump_VC4_PACKET_CLIPPER_XY_SCALING(struct cl_dump_state *state) 368 | { 369 | uint32_t *scale = state->cl; 370 | 371 | dump_printf(state, 0, "%f, %f (%f, %f, 0x%08x, 0x%08x)\n", 372 | uif(scale[0]) / 16.0, uif(scale[1]) / 16.0, 373 | uif(scale[0]), uif(scale[1]), 374 | scale[0], scale[1]); 375 | } 376 | 377 | static void 378 | dump_VC4_PACKET_CLIPPER_Z_SCALING(struct cl_dump_state *state) 379 | { 380 | uint32_t *translate = state->cl; 381 | uint32_t *scale = state->cl + 8; 382 | 383 | dump_printf(state, 0, "%f, %f (0x%08x, 0x%08x)\n", 384 | uif(translate[0]), uif(translate[1]), 385 | translate[0], translate[1]); 386 | 387 | dump_printf(state, 8, "%f, %f (0x%08x, 0x%08x)\n", 388 | uif(scale[0]), uif(scale[1]), 389 | scale[0], scale[1]); 390 | } 391 | 392 | static void 393 | dump_VC4_PACKET_TILE_BINNING_MODE_CONFIG(struct cl_dump_state *state) 394 | { 395 | uint32_t *tile_alloc_addr = state->cl; 396 | uint32_t *tile_alloc_size = state->cl + 4; 397 | uint32_t *tile_state_addr = state->cl + 8; 398 | uint8_t *bin_x = state->cl + 12; 399 | uint8_t *bin_y = state->cl + 13; 400 | uint8_t *flags = state->cl + 14; 401 | 402 | dump_printf(state, 0, " tile alloc addr 0x%08x\n", *tile_alloc_addr); 403 | dump_printf(state, 4, " tile alloc size %db\n", *tile_alloc_size); 404 | dump_printf(state, 8, " tile state addr 0x%08x\n", *tile_state_addr); 405 | dump_printf(state, 12, " tiles (%d, %d)\n", *bin_x, *bin_y); 406 | dump_printf(state, 14, " flags 0x%02x\n", *flags); 407 | } 408 | 409 | static void 410 | dump_VC4_PACKET_TILE_RENDERING_MODE_CONFIG(struct cl_dump_state *state) 411 | { 412 | uint32_t *render_offset = state->cl; 413 | uint16_t *shorts = state->cl + 4; 414 | 415 | dump_printf(state, 0, "color offset 0x%08x\n", *render_offset); 416 | dump_printf(state, 4, "width %d\n", shorts[0]); 417 | dump_printf(state, 6, "height %d\n", shorts[1]); 418 | 419 | const char *format = "???"; 420 | switch (VC4_GET_FIELD(shorts[2], VC4_RENDER_CONFIG_FORMAT)) { 421 | case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED: 422 | format = "BGR565_DITHERED"; 423 | break; 424 | case VC4_RENDER_CONFIG_FORMAT_RGBA8888: 425 | format = "RGBA8888"; 426 | break; 427 | case VC4_RENDER_CONFIG_FORMAT_BGR565: 428 | format = "BGR565"; 429 | break; 430 | } 431 | if (shorts[2] & VC4_RENDER_CONFIG_TILE_BUFFER_64BIT) 432 | format = "64bit"; 433 | 434 | const char *tiling = "???"; 435 | switch (VC4_GET_FIELD(shorts[2], VC4_RENDER_CONFIG_MEMORY_FORMAT)) { 436 | case VC4_TILING_FORMAT_LINEAR: 437 | tiling = "linear"; 438 | break; 439 | case VC4_TILING_FORMAT_T: 440 | tiling = "T"; 441 | break; 442 | case VC4_TILING_FORMAT_LT: 443 | tiling = "LT"; 444 | break; 445 | } 446 | 447 | const char *earlyz = ""; 448 | if (shorts[2] & VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE) { 449 | earlyz = "early_z disabled"; 450 | } else { 451 | if (shorts[2] & VC4_RENDER_CONFIG_EARLY_Z_DIRECTION_G) 452 | earlyz = "early_z >"; 453 | else 454 | earlyz = "early_z <"; 455 | } 456 | 457 | const char *decimate; 458 | switch (shorts[2] & VC4_RENDER_CONFIG_DECIMATE_MODE_MASK) { 459 | case VC4_RENDER_CONFIG_DECIMATE_MODE_1X: 460 | decimate = "1x"; 461 | break; 462 | case VC4_RENDER_CONFIG_DECIMATE_MODE_4X: 463 | decimate = "4x"; 464 | break; 465 | case VC4_RENDER_CONFIG_DECIMATE_MODE_16X: 466 | decimate = "16x"; 467 | break; 468 | default: 469 | decimate = "unknown"; 470 | break; 471 | } 472 | 473 | dump_printf(state, 8, "0x%04x %s, %s, %s, %s, decimate %s\n", shorts[2], 474 | format, tiling, 475 | earlyz, 476 | (shorts[2] & VC4_RENDER_CONFIG_MS_MODE_4X) ? "ms_4x" : "ss", 477 | decimate); 478 | } 479 | 480 | static void 481 | dump_VC4_PACKET_CLEAR_COLORS(struct cl_dump_state *state) 482 | { 483 | uint32_t *colors = state->cl; 484 | uint8_t *s = state->cl + 12; 485 | 486 | dump_printf(state, 0, "0x%08x rgba8888[0]\n", colors[0]); 487 | dump_printf(state, 4, "0x%08x rgba8888[1]\n", colors[1]); 488 | dump_printf(state, 8, "0x%08x zs\n", colors[2]); 489 | dump_printf(state, 12, "0x%02x stencil\n", *s); 490 | } 491 | 492 | static void 493 | dump_VC4_PACKET_TILE_COORDINATES(struct cl_dump_state *state) 494 | { 495 | uint8_t *tilecoords = state->cl; 496 | 497 | dump_printf(state, 0, "%d, %d\n", 498 | tilecoords[0], tilecoords[1]); 499 | } 500 | 501 | static void 502 | dump_VC4_PACKET_GEM_HANDLES(struct cl_dump_state *state) 503 | { 504 | uint32_t *handles = state->cl; 505 | 506 | dump_printf(state, 0, "handle 0: %d, handle 1: %d\n", 507 | handles[0], handles[1]); 508 | } 509 | 510 | #define PACKET_DUMP(name) [name] = { #name, name ## _SIZE, dump_##name } 511 | #define PACKET(name) [name] = { #name, name ## _SIZE, NULL } 512 | 513 | static const struct packet_info { 514 | const char *name; 515 | uint8_t size; 516 | void (*dump_func)(struct cl_dump_state *state); 517 | } packet_info[] = { 518 | PACKET(VC4_PACKET_HALT), 519 | PACKET(VC4_PACKET_NOP), 520 | 521 | PACKET(VC4_PACKET_FLUSH), 522 | PACKET(VC4_PACKET_FLUSH_ALL), 523 | PACKET(VC4_PACKET_START_TILE_BINNING), 524 | PACKET(VC4_PACKET_INCREMENT_SEMAPHORE), 525 | PACKET(VC4_PACKET_WAIT_ON_SEMAPHORE), 526 | 527 | PACKET_DUMP(VC4_PACKET_BRANCH), 528 | PACKET_DUMP(VC4_PACKET_BRANCH_TO_SUB_LIST), 529 | PACKET(VC4_PACKET_RETURN_FROM_SUB_LIST), 530 | 531 | PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER), 532 | PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF), 533 | PACKET_DUMP(VC4_PACKET_STORE_FULL_RES_TILE_BUFFER), 534 | PACKET_DUMP(VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER), 535 | PACKET_DUMP(VC4_PACKET_STORE_TILE_BUFFER_GENERAL), 536 | PACKET_DUMP(VC4_PACKET_LOAD_TILE_BUFFER_GENERAL), 537 | 538 | PACKET_DUMP(VC4_PACKET_GL_INDEXED_PRIMITIVE), 539 | PACKET_DUMP(VC4_PACKET_GL_ARRAY_PRIMITIVE), 540 | 541 | PACKET(VC4_PACKET_COMPRESSED_PRIMITIVE), 542 | PACKET(VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE), 543 | 544 | PACKET_DUMP(VC4_PACKET_PRIMITIVE_LIST_FORMAT), 545 | 546 | PACKET_DUMP(VC4_PACKET_GL_SHADER_STATE), 547 | PACKET_DUMP(VC4_PACKET_NV_SHADER_STATE), 548 | PACKET(VC4_PACKET_VG_SHADER_STATE), 549 | 550 | PACKET_DUMP(VC4_PACKET_CONFIGURATION_BITS), 551 | PACKET_DUMP(VC4_PACKET_FLAT_SHADE_FLAGS), 552 | PACKET_DUMP(VC4_PACKET_POINT_SIZE), 553 | PACKET_DUMP(VC4_PACKET_LINE_WIDTH), 554 | PACKET(VC4_PACKET_RHT_X_BOUNDARY), 555 | PACKET(VC4_PACKET_DEPTH_OFFSET), 556 | PACKET_DUMP(VC4_PACKET_CLIP_WINDOW), 557 | PACKET_DUMP(VC4_PACKET_VIEWPORT_OFFSET), 558 | PACKET(VC4_PACKET_Z_CLIPPING), 559 | PACKET_DUMP(VC4_PACKET_CLIPPER_XY_SCALING), 560 | PACKET_DUMP(VC4_PACKET_CLIPPER_Z_SCALING), 561 | 562 | PACKET_DUMP(VC4_PACKET_TILE_BINNING_MODE_CONFIG), 563 | PACKET_DUMP(VC4_PACKET_TILE_RENDERING_MODE_CONFIG), 564 | PACKET_DUMP(VC4_PACKET_CLEAR_COLORS), 565 | PACKET_DUMP(VC4_PACKET_TILE_COORDINATES), 566 | 567 | PACKET_DUMP(VC4_PACKET_GEM_HANDLES), 568 | }; 569 | 570 | /* Prints a single entry from Table 39: Compressed Triangles List Indices, and 571 | * returns the length of the encoding. 572 | */ 573 | static uint32_t 574 | dump_compressed_triangle(struct cl_dump_state *state, uint32_t offset) 575 | { 576 | uint8_t *cl = state->cl; 577 | uint32_t index_size = 2; 578 | 579 | if (cl[offset] == 129) { 580 | uint16_t *index = (void *)(&cl[offset + 1]); 581 | dump_printf(state, offset, "0x%02x: 3 abs, 0 rel indices\n", 582 | cl[offset]); 583 | dump_printf(state, offset + 2, "index 0: 0x%04x\n", index[0]); 584 | dump_printf(state, offset + 4, "index 1: 0x%04x\n", index[1]); 585 | dump_printf(state, offset + 6, "index 2: 0x%04x\n", index[2]); 586 | return 1 + 3 * index_size; 587 | } else if ((cl[offset] & 0xf) == 15) { 588 | uint16_t *index = (void *)(&cl[offset + 2]); 589 | dump_printf(state, offset, "0x%02x: 1 abs, 2 rel indices\n", 590 | cl[offset]); 591 | dump_printf(state, offset + 2, "index 0: 0x%04x\n", *index); 592 | return 2 + index_size; 593 | } else if ((cl[offset] & 0x3) == 3) { 594 | dump_printf(state, offset, 595 | "0x%02x: 3 rel indices (%d, %d, %d)\n", 596 | cl[offset], 597 | (int8_t)cl[offset] >> 4, 598 | ((int8_t)cl[offset + 1] << 4) >> 4, 599 | (int8_t)cl[offset + 1] >> 4); 600 | return 2; 601 | } else { 602 | dump_printf(state, offset, "0x%02x: 1 rel index (%d)\n", 603 | cl[offset], (int8_t)cl[offset] >> 2); 604 | return 1; 605 | } 606 | } 607 | 608 | static uint32_t 609 | dump_compressed_primitive(struct cl_dump_state *state) 610 | { 611 | uint8_t *cl = state->cl; 612 | uint32_t offset = 0; 613 | 614 | while (state->offset + offset < state->end) { 615 | if (cl[offset] == 128) { 616 | dump_printf(state, offset, "0x%02x: escape\n", 617 | cl[offset]); 618 | return offset + 1; 619 | } else if (cl[offset] == 130) { 620 | /* The packet's offset is a 2's complement relative 621 | * branch. 622 | */ 623 | int16_t branch = *(int16_t *)&cl[offset + 1]; 624 | uint32_t addr = (((state->offset + offset) & ~31) + 625 | (branch << 5)); 626 | dump_printf(state, offset, 627 | "0x%02x: relative branch 0x%08x (0x%04x)\n", 628 | cl[offset], addr, (uint16_t)branch); 629 | vc4_parse_add_compressed_list(addr, state->prim_mode); 630 | return ~0; 631 | } else { 632 | switch (state->prim_mode) { 633 | case VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES: 634 | offset += dump_compressed_triangle(state, 635 | offset) - 1; 636 | break; 637 | default: 638 | dump_printf(state, offset, 639 | "0x%02x: unknown (UNPARSED!)\n", 640 | cl[offset]); 641 | } 642 | } 643 | 644 | offset++; 645 | } 646 | 647 | printf("0x%08x: CL overflow!\n", offset); 648 | return offset; 649 | } 650 | 651 | static uint32_t 652 | dump_clipped_compressed_primitive(struct cl_dump_state *state) 653 | { 654 | uint32_t *addr = state->cl; 655 | 656 | dump_printf(state, 0, "clipped verts at 0x%08x, clip 0x%1x\n", 657 | *addr & ~0x7, *addr & 0x7); 658 | 659 | state->offset += 4; 660 | state->cl += 4; 661 | uint32_t compressed_len = dump_compressed_primitive(state); 662 | if (compressed_len == ~0) 663 | return compressed_len; 664 | else 665 | return compressed_len + 4; 666 | } 667 | 668 | void 669 | vc4_dump_cl(uint32_t start, uint32_t end, bool is_render, 670 | bool in_compressed_list, uint8_t start_prim_mode) 671 | { 672 | uint32_t offset = start; 673 | uint8_t *cmds = vc4_paddr_to_pointer(start); 674 | struct cl_dump_state state; 675 | 676 | if (!cmds) { 677 | fprintf(stderr, "No mapping found\n"); 678 | return; 679 | } 680 | 681 | state.end = end; 682 | state.prim_mode = start_prim_mode; 683 | 684 | /* A relative branch in a compressed list will continue at the branch 685 | * target still in a compressed list. 686 | */ 687 | if (in_compressed_list) { 688 | state.cl = cmds; 689 | state.offset = offset; 690 | uint32_t len = dump_compressed_primitive(&state); 691 | if (len == ~0) 692 | return; 693 | 694 | cmds = state.cl + len; 695 | offset = state.offset + len; 696 | } 697 | 698 | while (offset < end) { 699 | uint8_t header = *cmds; 700 | uint32_t size; 701 | 702 | if (header > ARRAY_SIZE(packet_info) || 703 | !packet_info[header].name) { 704 | printf("0x%08x: Unknown packet 0x%02x (%d)!\n", 705 | offset, header, header); 706 | return; 707 | } 708 | 709 | const struct packet_info *p = packet_info + header; 710 | printf("0x%08x: 0x%02x %s\n", 711 | offset, 712 | header, p->name); 713 | 714 | /* Use the per-packet size, unless it's variable length. */ 715 | size = p->size; 716 | 717 | state.cl = cmds + 1; 718 | state.offset = offset + 1; 719 | if (header == VC4_PACKET_COMPRESSED_PRIMITIVE) { 720 | uint32_t len = dump_compressed_primitive(&state); 721 | if (len == ~0) 722 | return; 723 | size = len + 1; 724 | } else if (header == VC4_PACKET_CLIPPED_COMPRESSED_PRIMITIVE) { 725 | uint32_t len = dump_clipped_compressed_primitive(&state); 726 | if (len == ~0) 727 | return; 728 | size = len + 1; 729 | } else if (offset + size <= end && p->dump_func) { 730 | p->dump_func(&state); 731 | } else { 732 | for (uint32_t i = 1; i < size; i++) { 733 | if (offset + i >= end) { 734 | printf("0x%08x: CL overflow!\n", 735 | offset + i); 736 | return; 737 | } 738 | printf("0x%08x: 0x%02x\n", 739 | offset + i, 740 | cmds[i]); 741 | } 742 | } 743 | 744 | switch (header) { 745 | case VC4_PACKET_HALT: 746 | case VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF: 747 | case VC4_PACKET_RETURN_FROM_SUB_LIST: 748 | case VC4_PACKET_BRANCH: 749 | return; 750 | default: 751 | break; 752 | } 753 | 754 | offset += size; 755 | cmds += size; 756 | } 757 | } 758 | 759 | -------------------------------------------------------------------------------- /tools/vc4_dump_to_clif.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2015 Broadcom 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | #include "vc4_drm.h" 37 | 38 | static void * 39 | map_input(const char *filename) 40 | { 41 | int fd; 42 | void *map; 43 | struct stat stat; 44 | int ret; 45 | 46 | fd = open(filename, 0); 47 | if (fd == -1) 48 | err(1, "Couldn't open input file %s", filename); 49 | 50 | ret = fstat(fd, &stat); 51 | if (ret) 52 | err(1, "Couldn't get size of input file %s", filename); 53 | 54 | map = mmap(NULL, stat.st_size, PROT_READ, MAP_SHARED, fd, 0); 55 | if (map == MAP_FAILED) 56 | err(1, "Couldn't map input file %s", filename); 57 | 58 | return map; 59 | } 60 | 61 | static struct { 62 | struct drm_vc4_get_hang_state *state; 63 | struct drm_vc4_get_hang_state_bo *bo_state; 64 | void **map; 65 | } dump; 66 | 67 | static void 68 | dump_bo_list(void) 69 | { 70 | fprintf(stderr, "BOs:\n"); 71 | 72 | for (int i = 0; i < dump.state->bo_count; i++) { 73 | uint32_t paddr = dump.bo_state[i].paddr; 74 | fprintf(stderr, "0x%08x..0x%08x (%p)\n", 75 | paddr, paddr + dump.bo_state[i].size - 1, 76 | dump.map[i]); 77 | } 78 | } 79 | 80 | #include "autoclif/autoclif.h" 81 | static void *from_addr(V3D_ADDR_T addr) 82 | { 83 | for (int i = 0; i < dump.state->bo_count; i++) { 84 | uint32_t paddr = dump.bo_state[i].paddr; 85 | if (addr >= paddr && addr < paddr + dump.bo_state[i].size) 86 | return dump.map[i] + (addr - paddr); 87 | } 88 | 89 | fprintf(stderr, "Couldn't translate address 0x%08x\n", addr); 90 | dump_bo_list(); 91 | 92 | return NULL; 93 | } 94 | 95 | static V3D_ADDR_T to_addr(void *p) 96 | { 97 | for (int i = 0; i < dump.state->bo_count; i++) { 98 | void *map = dump.map[i]; 99 | if (p >= map && p < map + dump.bo_state[i].size) 100 | return dump.bo_state[i].paddr + (p - map); 101 | } 102 | 103 | fprintf(stderr, "Couldn't translate pointer %p\n", p); 104 | dump_bo_list(); 105 | 106 | return NULL; 107 | } 108 | 109 | static void 110 | parse_input(void *input) 111 | { 112 | uint32_t *version = input; 113 | if (*version != 0) { 114 | fprintf(stderr, "Input had wrong version %d\n", *version); 115 | exit(1); 116 | } 117 | 118 | dump.state = (void *)&version[1]; 119 | dump.bo_state = (void *)&dump.state[1]; 120 | 121 | dump.map = calloc(dump.state->bo_count, sizeof(*dump.map)); 122 | if (!dump.map) 123 | err(1, "malloc failure"); 124 | 125 | void *next_map = &dump.bo_state[dump.state->bo_count]; 126 | for (int i = 0; i < dump.state->bo_count; i++) { 127 | dump.map[i] = next_map; 128 | next_map += dump.bo_state[i].size; 129 | } 130 | } 131 | 132 | static void 133 | write_clif(const char *filename) 134 | { 135 | V3D_IDENT_T ident = { 136 | .tlb_w = 64, 137 | .tlb_h = 64, 138 | }; 139 | 140 | autoclif_begin(from_addr, to_addr, &ident); 141 | 142 | if (dump.state->start_bin != dump.state->ct0ea) 143 | autoclif_bin(dump.state->start_bin, dump.state->ct0ea); 144 | autoclif_render(dump.state->start_render, dump.state->ct1ea); 145 | autoclif_end(filename); 146 | } 147 | 148 | static void 149 | usage(const char *name) 150 | { 151 | fprintf(stderr, "Usage: %s input.dump output.clif\n", name); 152 | exit(1); 153 | } 154 | 155 | int 156 | main(int argc, char **argv) 157 | { 158 | void *input; 159 | 160 | if (argc != 3) 161 | usage(argv[0]); 162 | 163 | input = map_input(argv[1]); 164 | parse_input(input); 165 | write_clif(argv[2]); 166 | 167 | return 0; 168 | } 169 | -------------------------------------------------------------------------------- /tools/vc4_qpu_disasm.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright © 2014 Broadcom 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining a 5 | * copy of this software and associated documentation files (the "Software"), 6 | * to deal in the Software without restriction, including without limitation 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | * and/or sell copies of the Software, and to permit persons to whom the 9 | * Software is furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice (including the next 12 | * paragraph) shall be included in all copies or substantial portions of the 13 | * Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | * IN THE SOFTWARE. 22 | */ 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | #include "vc4_qpu_defines.h" 29 | #include "vc4_tools.h" 30 | 31 | static const char *qpu_add_opcodes[] = { 32 | [QPU_A_NOP] = "nop", 33 | [QPU_A_FADD] = "fadd", 34 | [QPU_A_FSUB] = "fsub", 35 | [QPU_A_FMIN] = "fmin", 36 | [QPU_A_FMAX] = "fmax", 37 | [QPU_A_FMINABS] = "fminabs", 38 | [QPU_A_FMAXABS] = "fmaxabs", 39 | [QPU_A_FTOI] = "ftoi", 40 | [QPU_A_ITOF] = "itof", 41 | [QPU_A_ADD] = "add", 42 | [QPU_A_SUB] = "sub", 43 | [QPU_A_SHR] = "shr", 44 | [QPU_A_ASR] = "asr", 45 | [QPU_A_ROR] = "ror", 46 | [QPU_A_SHL] = "shl", 47 | [QPU_A_MIN] = "min", 48 | [QPU_A_MAX] = "max", 49 | [QPU_A_AND] = "and", 50 | [QPU_A_OR] = "or", 51 | [QPU_A_XOR] = "xor", 52 | [QPU_A_NOT] = "not", 53 | [QPU_A_CLZ] = "clz", 54 | [QPU_A_V8ADDS] = "v8adds", 55 | [QPU_A_V8SUBS] = "v8subs", 56 | }; 57 | 58 | static const char *qpu_mul_opcodes[] = { 59 | [QPU_M_NOP] = "nop", 60 | [QPU_M_FMUL] = "fmul", 61 | [QPU_M_MUL24] = "mul24", 62 | [QPU_M_V8MULD] = "v8muld", 63 | [QPU_M_V8MIN] = "v8min", 64 | [QPU_M_V8MAX] = "v8max", 65 | [QPU_M_V8ADDS] = "v8adds", 66 | [QPU_M_V8SUBS] = "v8subs", 67 | }; 68 | 69 | static const char *qpu_sig[] = { 70 | [QPU_SIG_SW_BREAKPOINT] = "sig_brk", 71 | [QPU_SIG_NONE] = "", 72 | [QPU_SIG_THREAD_SWITCH] = "sig_switch", 73 | [QPU_SIG_PROG_END] = "sig_end", 74 | [QPU_SIG_WAIT_FOR_SCOREBOARD] = "sig_wait_score", 75 | [QPU_SIG_SCOREBOARD_UNLOCK] = "sig_unlock_score", 76 | [QPU_SIG_LAST_THREAD_SWITCH] = "sig_thread_switch", 77 | [QPU_SIG_COVERAGE_LOAD] = "sig_coverage_load", 78 | [QPU_SIG_COLOR_LOAD] = "sig_color_load", 79 | [QPU_SIG_COLOR_LOAD_END] = "sig_color_load_end", 80 | [QPU_SIG_LOAD_TMU0] = "load_tmu0", 81 | [QPU_SIG_LOAD_TMU1] = "load_tmu1", 82 | [QPU_SIG_ALPHA_MASK_LOAD] = "sig_alpha_mask_load", 83 | [QPU_SIG_SMALL_IMM] = "sig_small_imm", 84 | [QPU_SIG_LOAD_IMM] = "sig_load_imm", 85 | [QPU_SIG_BRANCH] = "sig_branch", 86 | }; 87 | 88 | static const char *qpu_pack_mul[] = { 89 | [QPU_PACK_MUL_NOP] = "", 90 | [QPU_PACK_MUL_8888] = "8888", 91 | [QPU_PACK_MUL_8A] = "8a", 92 | [QPU_PACK_MUL_8B] = "8b", 93 | [QPU_PACK_MUL_8C] = "8c", 94 | [QPU_PACK_MUL_8D] = "8d", 95 | }; 96 | 97 | /* The QPU unpack for A and R4 files can be described the same, it's just that 98 | * the R4 variants are convert-to-float only, with no int support. 99 | */ 100 | static const char *qpu_unpack[] = { 101 | [QPU_UNPACK_NOP] = "", 102 | [QPU_UNPACK_16A] = "16a", 103 | [QPU_UNPACK_16B] = "16b", 104 | [QPU_UNPACK_8D_REP] = "8d_rep", 105 | [QPU_UNPACK_8A] = "8a", 106 | [QPU_UNPACK_8B] = "8b", 107 | [QPU_UNPACK_8C] = "8c", 108 | [QPU_UNPACK_8D] = "8d", 109 | }; 110 | 111 | static const char *special_read_a[] = { 112 | "uni", 113 | NULL, 114 | NULL, 115 | "vary", 116 | NULL, 117 | NULL, 118 | "elem", 119 | "nop", 120 | NULL, 121 | "x_pix", 122 | "ms_flags", 123 | NULL, 124 | NULL, 125 | NULL, 126 | NULL, 127 | NULL, 128 | "vpm_read", 129 | "vpm_ld_busy", 130 | "vpm_ld_wait", 131 | "mutex_acq" 132 | }; 133 | 134 | static const char *special_read_b[] = { 135 | "uni", 136 | NULL, 137 | NULL, 138 | "vary", 139 | NULL, 140 | NULL, 141 | "qpu", 142 | "nop", 143 | NULL, 144 | "y_pix", 145 | "rev_flag", 146 | NULL, 147 | NULL, 148 | NULL, 149 | NULL, 150 | NULL, 151 | "vpm_read", 152 | "vpm_st_busy", 153 | "vpm_st_wait", 154 | "mutex_acq" 155 | }; 156 | 157 | /** 158 | * This has the B-file descriptions for register writes. 159 | * 160 | * Since only a couple of regs are different between A and B, the A overrides 161 | * are in get_special_write_desc(). 162 | */ 163 | static const char *special_write[] = { 164 | [QPU_W_ACC0] = "r0", 165 | [QPU_W_ACC1] = "r1", 166 | [QPU_W_ACC2] = "r2", 167 | [QPU_W_ACC3] = "r3", 168 | [QPU_W_TMU_NOSWAP] = "tmu_noswap", 169 | [QPU_W_ACC5] = "r5", 170 | [QPU_W_HOST_INT] = "host_int", 171 | [QPU_W_NOP] = "nop", 172 | [QPU_W_UNIFORMS_ADDRESS] = "uniforms_addr", 173 | [QPU_W_QUAD_XY] = "quad_y", 174 | [QPU_W_MS_FLAGS] = "ms_flags", 175 | [QPU_W_TLB_STENCIL_SETUP] = "tlb_stencil_setup", 176 | [QPU_W_TLB_Z] = "tlb_z", 177 | [QPU_W_TLB_COLOR_MS] = "tlb_color_ms", 178 | [QPU_W_TLB_COLOR_ALL] = "tlb_color_all", 179 | [QPU_W_VPM] = "vpm", 180 | [QPU_W_VPMVCD_SETUP] = "vw_setup", 181 | [QPU_W_VPM_ADDR] = "vw_addr", 182 | [QPU_W_MUTEX_RELEASE] = "mutex_release", 183 | [QPU_W_SFU_RECIP] = "sfu_recip", 184 | [QPU_W_SFU_RECIPSQRT] = "sfu_recipsqrt", 185 | [QPU_W_SFU_EXP] = "sfu_exp", 186 | [QPU_W_SFU_LOG] = "sfu_log", 187 | [QPU_W_TMU0_S] = "tmu0_s", 188 | [QPU_W_TMU0_T] = "tmu0_t", 189 | [QPU_W_TMU0_R] = "tmu0_r", 190 | [QPU_W_TMU0_B] = "tmu0_b", 191 | [QPU_W_TMU1_S] = "tmu1_s", 192 | [QPU_W_TMU1_T] = "tmu1_t", 193 | [QPU_W_TMU1_R] = "tmu1_r", 194 | [QPU_W_TMU1_B] = "tmu1_b", 195 | }; 196 | 197 | static const char *qpu_pack_a[] = { 198 | [QPU_PACK_A_NOP] = "", 199 | [QPU_PACK_A_16A] = ".16a", 200 | [QPU_PACK_A_16B] = ".16b", 201 | [QPU_PACK_A_8888] = ".8888", 202 | [QPU_PACK_A_8A] = ".8a", 203 | [QPU_PACK_A_8B] = ".8b", 204 | [QPU_PACK_A_8C] = ".8c", 205 | [QPU_PACK_A_8D] = ".8d", 206 | 207 | [QPU_PACK_A_32_SAT] = ".sat", 208 | [QPU_PACK_A_16A_SAT] = ".16a.sat", 209 | [QPU_PACK_A_16B_SAT] = ".16b.sat", 210 | [QPU_PACK_A_8888_SAT] = ".8888.sat", 211 | [QPU_PACK_A_8A_SAT] = ".8a.sat", 212 | [QPU_PACK_A_8B_SAT] = ".8b.sat", 213 | [QPU_PACK_A_8C_SAT] = ".8c.sat", 214 | [QPU_PACK_A_8D_SAT] = ".8d.sat", 215 | }; 216 | 217 | static const char *qpu_condflags[] = { 218 | [QPU_COND_NEVER] = ".never", 219 | [QPU_COND_ALWAYS] = "", 220 | [QPU_COND_ZS] = ".zs", 221 | [QPU_COND_ZC] = ".zc", 222 | [QPU_COND_NS] = ".ns", 223 | [QPU_COND_NC] = ".nc", 224 | [QPU_COND_CS] = ".cs", 225 | [QPU_COND_CC] = ".cc", 226 | }; 227 | 228 | #define DESC(array, index) \ 229 | ((index >= ARRAY_SIZE(array) || !(array)[index]) ? \ 230 | "???" : (array)[index]) 231 | 232 | static const char * 233 | get_special_write_desc(int reg, bool is_a) 234 | { 235 | if (is_a) { 236 | switch (reg) { 237 | case QPU_W_QUAD_XY: 238 | return "quad_x"; 239 | case QPU_W_VPMVCD_SETUP: 240 | return "vr_setup"; 241 | case QPU_W_VPM_ADDR: 242 | return "vr_addr"; 243 | } 244 | } 245 | 246 | return special_write[reg]; 247 | } 248 | 249 | static void 250 | vc4_qpu_disasm_pack_mul(FILE *out, uint32_t pack) 251 | { 252 | fprintf(out, ".%s", DESC(qpu_pack_mul, pack)); 253 | } 254 | 255 | static void 256 | vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack) 257 | { 258 | fprintf(out, "%s", DESC(qpu_pack_a, pack)); 259 | } 260 | 261 | static void 262 | vc4_qpu_disasm_unpack(FILE *out, uint32_t unpack) 263 | { 264 | if (unpack != QPU_UNPACK_NOP) 265 | fprintf(out, ".%s", DESC(qpu_unpack, unpack)); 266 | } 267 | 268 | static void 269 | print_alu_dst(FILE *out, uint64_t inst, bool is_mul) 270 | { 271 | bool is_a = is_mul == ((inst & QPU_WS) != 0); 272 | uint32_t waddr = (is_mul ? 273 | QPU_GET_FIELD(inst, QPU_WADDR_MUL) : 274 | QPU_GET_FIELD(inst, QPU_WADDR_ADD)); 275 | const char *file = is_a ? "a" : "b"; 276 | uint32_t pack = QPU_GET_FIELD(inst, QPU_PACK); 277 | 278 | if (waddr <= 31) 279 | fprintf(out, "r%s%d", file, waddr); 280 | else if (get_special_write_desc(waddr, is_a)) 281 | fprintf(out, "%s", get_special_write_desc(waddr, is_a)); 282 | else 283 | fprintf(out, "%s%d?", file, waddr); 284 | 285 | if (is_mul && (inst & QPU_PM)) { 286 | vc4_qpu_disasm_pack_mul(out, pack); 287 | } else if (is_a && !(inst & QPU_PM)) { 288 | vc4_qpu_disasm_pack_a(out, pack); 289 | } 290 | } 291 | 292 | static void 293 | print_alu_src(FILE *out, uint64_t inst, uint32_t mux) 294 | { 295 | bool is_a = mux != QPU_MUX_B; 296 | const char *file = is_a ? "a" : "b"; 297 | uint32_t raddr = (is_a ? 298 | QPU_GET_FIELD(inst, QPU_RADDR_A) : 299 | QPU_GET_FIELD(inst, QPU_RADDR_B)); 300 | uint32_t unpack = QPU_GET_FIELD(inst, QPU_UNPACK); 301 | 302 | if (mux <= QPU_MUX_R5) 303 | fprintf(out, "r%d", mux); 304 | else if (!is_a && 305 | QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) { 306 | uint32_t si = QPU_GET_FIELD(inst, QPU_SMALL_IMM); 307 | if (si <= 15) 308 | fprintf(out, "%d", si); 309 | else if (si <= 31) 310 | fprintf(out, "%d", -16 + (si - 16)); 311 | else if (si <= 39) 312 | fprintf(out, "%.1f", (float)(1 << (si - 32))); 313 | else if (si <= 47) 314 | fprintf(out, "%f", 1.0f / (1 << (48 - si))); 315 | else 316 | fprintf(out, "", si); 317 | } else if (raddr <= 31) 318 | fprintf(out, "r%s%d", file, raddr); 319 | else { 320 | if (is_a) 321 | fprintf(out, "%s", DESC(special_read_a, raddr - 32)); 322 | else 323 | fprintf(out, "%s", DESC(special_read_b, raddr - 32)); 324 | } 325 | 326 | if (((mux == QPU_MUX_A && !(inst & QPU_PM)) || 327 | (mux == QPU_MUX_R4 && (inst & QPU_PM)))) { 328 | vc4_qpu_disasm_unpack(out, unpack); 329 | } 330 | } 331 | 332 | static void 333 | print_add_op(FILE *out, uint64_t inst) 334 | { 335 | uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); 336 | uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_ADD); 337 | bool is_mov = (op_add == QPU_A_OR && 338 | QPU_GET_FIELD(inst, QPU_ADD_A) == 339 | QPU_GET_FIELD(inst, QPU_ADD_B)); 340 | 341 | fprintf(out, "%s%s%s ", 342 | is_mov ? "mov" : DESC(qpu_add_opcodes, op_add), 343 | ((inst & QPU_SF) && op_add != QPU_A_NOP) ? ".sf" : "", 344 | op_add != QPU_A_NOP ? DESC(qpu_condflags, cond) : ""); 345 | 346 | print_alu_dst(out, inst, false); 347 | fprintf(out, ", "); 348 | 349 | print_alu_src(out, inst, QPU_GET_FIELD(inst, QPU_ADD_A)); 350 | 351 | if (!is_mov) { 352 | fprintf(out, ", "); 353 | 354 | print_alu_src(out, inst, QPU_GET_FIELD(inst, QPU_ADD_B)); 355 | } 356 | } 357 | 358 | static void 359 | print_mul_op(FILE *out, uint64_t inst) 360 | { 361 | uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); 362 | uint32_t op_mul = QPU_GET_FIELD(inst, QPU_OP_MUL); 363 | uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_MUL); 364 | bool is_mov = (op_mul == QPU_M_V8MIN && 365 | QPU_GET_FIELD(inst, QPU_MUL_A) == 366 | QPU_GET_FIELD(inst, QPU_MUL_B)); 367 | 368 | fprintf(out, "%s%s%s ", 369 | is_mov ? "mov" : DESC(qpu_mul_opcodes, op_mul), 370 | ((inst & QPU_SF) && op_add == QPU_A_NOP) ? ".sf" : "", 371 | op_mul != QPU_M_NOP ? DESC(qpu_condflags, cond) : ""); 372 | 373 | print_alu_dst(out, inst, true); 374 | fprintf(out, ", "); 375 | 376 | print_alu_src(out, inst, QPU_GET_FIELD(inst, QPU_MUL_A)); 377 | 378 | if (!is_mov) { 379 | fprintf(out, ", "); 380 | print_alu_src(out, inst, QPU_GET_FIELD(inst, QPU_MUL_B)); 381 | } 382 | } 383 | 384 | static void 385 | print_load_imm(FILE *out, uint64_t inst) 386 | { 387 | uint32_t imm = inst; 388 | uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 389 | uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 390 | uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); 391 | uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL); 392 | 393 | fprintf(out, "load_imm "); 394 | print_alu_dst(out, inst, false); 395 | fprintf(out, "%s, ", (waddr_add != QPU_W_NOP ? 396 | DESC(qpu_condflags, cond_add) : "")); 397 | print_alu_dst(out, inst, true); 398 | fprintf(out, "%s, ", (waddr_mul != QPU_W_NOP ? 399 | DESC(qpu_condflags, cond_mul) : "")); 400 | fprintf(out, "0x%08x (%f)", imm, uif(imm)); 401 | } 402 | 403 | void 404 | vc4_qpu_disasm(FILE *out, const uint64_t *instructions, int num_instructions) 405 | { 406 | for (int i = 0; i < num_instructions; i++) { 407 | uint64_t inst = instructions[i]; 408 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 409 | 410 | switch (sig) { 411 | case QPU_SIG_BRANCH: 412 | fprintf(out, "branch"); 413 | break; 414 | case QPU_SIG_LOAD_IMM: 415 | print_load_imm(out, inst); 416 | break; 417 | default: 418 | if (sig != QPU_SIG_NONE) 419 | fprintf(out, "%s ", DESC(qpu_sig, sig)); 420 | print_add_op(out, inst); 421 | fprintf(out, " ; "); 422 | print_mul_op(out, inst); 423 | break; 424 | } 425 | 426 | if (num_instructions != 1) 427 | fprintf(out, "\n"); 428 | } 429 | } 430 | --------------------------------------------------------------------------------