└── tools ├── .gitignore ├── Doxyfile ├── Makefile ├── README.txt ├── README_asm2bin.txt ├── README_bin2asm.txt ├── README_decode.txt └── src ├── asm2bin.hpp ├── asm2bin.l ├── asm2bin.ypp ├── binary.cpp ├── binary.hpp ├── binary20.cpp ├── binary20.hpp ├── binary35.cpp ├── binary35.hpp ├── binary50.cpp ├── binary50.hpp ├── cfghelpers.cpp ├── cfghelpers.hpp ├── common.cpp ├── common.hpp ├── cudacommon.cpp ├── cudacommon.hpp ├── decode.hpp ├── decode.l ├── decode.ypp ├── decode_common.cpp ├── decode_common.hpp ├── decode_output.cpp ├── decode_output.hpp ├── doxygen.dox ├── elf.cpp ├── elf.hpp ├── elf2asm.cpp ├── elfmanip.cpp ├── elfmanip.hpp ├── output.cpp └── output.hpp /tools/.gitignore: -------------------------------------------------------------------------------- 1 | objs 2 | asm2bin 3 | bin2asm 4 | decode 5 | -------------------------------------------------------------------------------- /tools/Makefile: -------------------------------------------------------------------------------- 1 | CC = @g++ 2 | LEXER = @flex 3 | PARSER = @bison 4 | 5 | CFLAGS = -Isrc -O3 -Wall 6 | rm = @rm 7 | OBJs = objs/asm2bin.tab.o \ 8 | objs/asm2bin.yy.o \ 9 | objs/binary.o \ 10 | objs/binary20.o \ 11 | objs/binary35.o \ 12 | objs/binary50.o \ 13 | objs/cfghelpers.o \ 14 | objs/common.o \ 15 | objs/cudacommon.o \ 16 | objs/elf.o \ 17 | objs/elfmanip.o \ 18 | objs/output.o 19 | ELFOBJs = objs/elf.o \ 20 | objs/elfmanip.o 21 | DECODEOBJS = objs/decode.tab.o \ 22 | objs/decode.yy.o \ 23 | objs/decode_common.o \ 24 | objs/decode_output.o 25 | 26 | ifeq ($(OS),Windows_NT) 27 | rm = @del 28 | CFLAGS += -D WINDOWS 29 | MKDIR_FLAGS = "" 30 | 31 | cuobjdump_location := $(shell where cuobjdump) 32 | ifneq ($(cuobjdump_location),) 33 | CFLAGS += -D HAS_CUOBJDUMP 34 | endif 35 | else 36 | MKDIR_FLAGS = -p 37 | UNAME_S := $(shell uname -s) 38 | UNAME_P := $(shell uname -p) 39 | 40 | cuobjdump_location := $(shell which cuobjdump) 41 | ifneq ($(cuobjdump_location),) 42 | CFLAGS += -D HAS_CUOBJDUMP 43 | endif 44 | 45 | ifeq ($(UNAME_S),Linux) 46 | CFLAGS += -D LINUX 47 | ifeq ($(UNAME_P),x86_64) 48 | CFLAGS += -D AMD64 49 | CFLAGS += -D LINUX64 50 | endif 51 | else ifeq ($(UNAME_P),x86_64) 52 | CFLAGS += -D AMD64 53 | endif 54 | endif 55 | 56 | default: objs bin2asm asm2bin decode 57 | 58 | debug: CFLAGS += -g -O0 59 | debug: default 60 | 61 | objs: 62 | @mkdir ${MKDIR_FLAGS} "objs" 63 | 64 | objs/asm2bin.yy.o: src/asm2bin.l src/common.hpp 65 | ${LEXER} -oobjs/asm2bin.yy.c src/asm2bin.l 66 | ${CC} ${CFLAGS} -c objs/asm2bin.yy.c -o $@ 67 | 68 | objs/asm2bin.tab.o: src/asm2bin.ypp src/asm2bin.hpp src/cfghelpers.hpp src/common.hpp 69 | ${PARSER} -oobjs/asm2bin.tab.cpp -dv src/asm2bin.ypp 70 | ${CC} ${CFLAGS} -c objs/asm2bin.tab.cpp -o $@ 71 | 72 | objs/binary.o: src/binary*.cpp src/binary*.hpp src/common.hpp 73 | ${CC} ${CFLAGS} -c src/binary.cpp -o $@ 74 | 75 | objs/binary20.o: src/binary20.cpp src/binary20.hpp src/binary.hpp src/common.hpp 76 | ${CC} ${CFLAGS} -c src/binary20.cpp -o $@ 77 | 78 | objs/binary35.o: src/binary35.cpp src/binary35.hpp src/binary.hpp src/common.hpp 79 | ${CC} ${CFLAGS} -c src/binary35.cpp -o $@ 80 | 81 | objs/binary50.o: src/binary50.cpp src/binary50.hpp src/binary.hpp src/common.hpp 82 | ${CC} ${CFLAGS} -c src/binary50.cpp -o $@ 83 | 84 | objs/cfghelpers.o: src/cfghelpers.hpp src/cfghelpers.cpp src/common.hpp src/cudacommon.hpp src/output.hpp 85 | ${CC} ${CFLAGS} -c src/cfghelpers.cpp -o $@ 86 | 87 | objs/common.o: src/common.cpp src/common.hpp 88 | ${CC} ${CFLAGS} -c src/common.cpp -o $@ 89 | 90 | objs/cudacommon.o: src/cudacommon.cpp src/cfghelpers.hpp src/cudacommon.hpp src/common.hpp 91 | ${CC} ${CFLAGS} -c src/cudacommon.cpp -o $@ 92 | 93 | objs/elf.o: src/elf.cpp src/elf.hpp 94 | ${CC} ${CFLAGS} -c src/elf.cpp -o $@ 95 | 96 | objs/elfmanip.o: src/elfmanip.cpp src/elfmanip.hpp src/common.hpp src/elf.hpp 97 | ${CC} ${CFLAGS} -c src/elfmanip.cpp -o $@ 98 | 99 | objs/output.o: src/output.cpp src/output.hpp src/common.hpp src/cfghelpers.hpp src/cudacommon.hpp 100 | ${CC} ${CFLAGS} -c src/output.cpp -o $@ 101 | 102 | bin2asm: ${ELFOBJs} objs/common.o src/elf2asm.cpp 103 | ${CC} ${CFLAGS} ${ELFOBJs} objs/common.o src/elf2asm.cpp -o bin2asm ${ELFLinks} 104 | 105 | asm2bin: ${OBJs} 106 | ${CC} ${CFLAGS} ${OBJs} -o asm2bin ${ELFLinks} 107 | 108 | 109 | objs/decode.yy.o: src/decode.l src/decode_common.hpp 110 | ${LEXER} -oobjs/decode.yy.c src/decode.l 111 | ${CC} ${CFLAGS} -c objs/decode.yy.c -o $@ 112 | 113 | objs/decode.tab.o: src/decode.ypp src/decode.hpp src/decode_common.hpp 114 | ${PARSER} -oobjs/decode.tab.cpp -dv src/decode.ypp 115 | ${CC} ${CFLAGS} -c objs/decode.tab.cpp -o $@ 116 | 117 | objs/decode_common.o: src/decode_common.cpp src/decode_common.hpp 118 | ${CC} ${CFLAGS} -c src/decode_common.cpp -o $@ 119 | 120 | objs/decode_output.o: src/decode_output.cpp src/decode_output.hpp src/decode_common.hpp 121 | ${CC} ${CFLAGS} -c src/decode_output.cpp -o $@ 122 | 123 | decode: ${DECODEOBJS} 124 | ${CC} ${CFLAGS} ${DECODEOBJS} -o $@ 125 | 126 | clean: 127 | ${rm} -f asm2bin asm2bin.exe bin2asm bin2asm.exe decode decode.exe *.o .temp* 128 | ${rm} -f objs/* 129 | 130 | -------------------------------------------------------------------------------- /tools/README.txt: -------------------------------------------------------------------------------- 1 | ======================== 2 | =====THIS DIRECTORY===== 3 | ======================== 4 | This directory contains source code for our assembler generator tool, plus related tools. 5 | We hope to continue updating this, such as to add support for the Volta generation of NVIDIA devices. 6 | 7 | ======================== 8 | =======OUR TOOLS======== 9 | ======================== 10 | The bin2asm tool extract kernel functions' assembly code from nvcc-generated executables. 11 | The asm2bin tool can assemble assembly code for several architectures, overwriting binary in an executable. 12 | The decode tool can generate assembler code for use with the asm2bin tool. 13 | 14 | If writing functions to analyze/modify GPU code, we recommend calling them in asm2bin. 15 | There's a line in asm2bin.ypp that says "//This is where we might call functions to modify or optimize the GPU code." 16 | 17 | ======================== 18 | ======DEPENDENCIES====== 19 | ======================== 20 | Our tools are all compiled with g++. 21 | The asm2bin and decode tools are dependent on flex and bison. 22 | The bin2asm tool expects that the CUDA Toolkit be installed and part of your PATH. 23 | 24 | ======================== 25 | =====COMPATIBILITY====== 26 | ======================== 27 | The asm2bin and decode tools should be run on a Linux machine for full compatibility. 28 | The decode tool has been tested with compute capabilities between 3.2 and 6.2, inclusive. 29 | 30 | ======================== 31 | ======COMPILATION======= 32 | ======================== 33 | Our tools can be compiled by running make in this directory. 34 | 35 | ======================== 36 | =COMMAND LINE ARGUMENTS= 37 | ======================== 38 | Run any of the three tools with the -h flag for some details on runtime arguments. 39 | See separate readme files for more info about the distinct tools: bin2asm, asm2bin, and decode. 40 | 41 | ======================== 42 | =====DOCUMENTATION====== 43 | ======================== 44 | Documentation can be generated with Doxygen. 45 | -------------------------------------------------------------------------------- /tools/README_asm2bin.txt: -------------------------------------------------------------------------------- 1 | ======================== 2 | ========ASM2BIN========= 3 | ======================== 4 | 5 | Our asm2bin program can parse assembly code in the format generated by our bin2asm tool, and modify CUDA binaries. 6 | If you want to modify the GPU code programmatically, consider doing so from inside this tool; in asm2bin.ypp, you'll 7 | find a comment that says: //This is where we might call functions to modify or optimize the GPU code. 8 | 9 | Below are some examples of how to run the program, and brief descriptions of the results. 10 | 11 | ./asm2bin assembly.txt -printBlocks 12 | This parses the GPU code in assembly.txt, and outputs it to stdout in a more human-readable format. 13 | Note that asm2bin can also accept the human-readable format as input. 14 | 15 | ./asm2bin assembly.txt -write program.exe 16 | This parses the GPU code in assembly.txt, and overwrites the matching kernel function in program.exe. 17 | Warning: if new assembly is larger than the original, and you're on a non-Linux machine, 18 | then our tool will terminate without modifying the binary. 19 | -------------------------------------------------------------------------------- /tools/README_bin2asm.txt: -------------------------------------------------------------------------------- 1 | ======================== 2 | ========BIN2ASM========= 3 | ======================== 4 | 5 | Our bin2asm program is used to extract assembly code and some metadata, using NVIDIA's cuobjdump tool for assistance. 6 | 7 | Here are some examples of how to run the program, and brief descriptions of the results. 8 | 9 | ./bin2asm a.out 10 | This outputs a list of CUDA kernel functions in a.out, pre-pended with unique ID numbers for identification. 11 | 12 | ./bin2asm a.out 3 13 | This outputs the assembly code of the kernel with ID number 3. 14 | 15 | ./bin2asm -hex a.out 3 16 | This outputs the assembly code of the kernel with ID number 3, including binary code. 17 | This format is necessary when gathering assembly&binary for our decode tool. -------------------------------------------------------------------------------- /tools/README_decode.txt: -------------------------------------------------------------------------------- 1 | ======================== 2 | =========DECODE========= 3 | ======================== 4 | 5 | This program is used to analyze NVIDIA ISA encodings, and generate assembler code. 6 | 7 | This tool has been tested on ISAs with versions between 3.2 and 6.2. 8 | It is not yet compatible with newer architectures, as we need to update our structures to support 16-byte instructions. 9 | 10 | This program has three running modes: 11 | 12 | ./decode assembly.txt < oldPersistent.dat > newPersistent.dat 13 | The above command will analyze the code in file assembly.txt (generated by our bin2asm tool with the -hex flag). 14 | It will combine the analysis with encodings in oldPersistent.dat, and output new encodings to newPersistent.dat. 15 | 16 | ./decode -probe assembly.txt < persistent.dat > bitflip.txt 17 | The above command will generate bit-flipped code based on analysis of assembly.txt plus encodings in persistent.dat. 18 | The bit-flipped code in bitflip.txt can be injected into an executable using our asm2bin tool. 19 | After injection, new assembly for analysis can be retrieved with bin2asm using the -hex flag. 20 | 21 | ./decode -final assembly.txt < persistent.dat > assembler.cpp 22 | The above command will generate an assembler based on analysis of assembly.txt plus encodings in persistent.dat. 23 | This assembler can be placed inside a C++ function; see our binary50.cpp and binary35.cpp files in src/ for examples. 24 | 25 | Note that during the first run for a given architecture, since no persistent data exists, you should just enter -1 into stdin. -------------------------------------------------------------------------------- /tools/src/asm2bin.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ASM2BIN_PARSE_HPP 2 | #define ASM2BIN_PARSE_HPP 3 | 4 | #include "common.hpp" 5 | 6 | /** 7 | * @file asm2bin.hpp 8 | * Defines functions and constants used by the asm2bin parser. 9 | */ 10 | 11 | /** 12 | * Deletes scheduling metadata instructions from all of the basic blocks. 13 | */ 14 | void stripSCHI(); 15 | 16 | /** 17 | * Inserts scheduling metadata instructions throughout code. 18 | * Note that this assumes there are no such instructions already present. 19 | */ 20 | void insertSCHIs(); 21 | 22 | /** 23 | * Creates an array of binary code equivalent to the assembly. 24 | * @return a structure containing the array of code 25 | */ 26 | kernelData * generateByteCode(); 27 | 28 | /** 29 | * Gets a list of functions within the code, indicated by CAL instructions. 30 | * Useful for fixing up the ELF after adding/removing code. 31 | * @return a list of ascending addresses corresponding to function starts 32 | */ 33 | node * getCallTargets(); 34 | 35 | /** 36 | * Called by parser after reaching the end of the assembly code. 37 | */ 38 | void finishParsing(); 39 | 40 | #endif 41 | 42 | -------------------------------------------------------------------------------- /tools/src/asm2bin.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | #include "common.hpp" 4 | #include "asm2bin.tab.hpp" 5 | %} 6 | %option nounput 7 | %option noyywrap 8 | extern YYSTYPE yylval; 9 | extern int isatty(); 10 | 11 | hex [-~]?(0x[0-9a-fA-F]+)|(\|0x[0-9a-fA-F]+\|) 12 | bitlist \{([0-9]*,)*[0-9]*\} 13 | float \-?[0-9]+(\.[0-9]+)?(e[\+\-][0-9]+)? 14 | pred !?(P[0-6])|!?[pP][tT] 15 | reg -?[-~]?\|?(RZ|R[0-9]+)\|? 16 | specialName SR_[\_\.A-Za-z0-9]+ 17 | specialID SR[0-9]+ 18 | const ([-~]\ *)?\|?c\|?\ *\[.*\]\ *\[.*\]\|? 19 | memory ([-~]\ *)?\|?\[[^\[\]]*\]\|? 20 | hexBinaryInst [0-9a-f]{16}\ \/\/[\ ] 21 | decimal_line_number [\_a-zA-Z0-9]+\ \([0-9]+\) 22 | 23 | %% 24 | {hexBinaryInst} {} 25 | "//Shared memory usage: "-?0[xX][a-fA-F0-9]+[^\n]* { 26 | yylval.token_.lexeme = yytext + 23; 27 | return(METADATA_SHAREDMEM); 28 | } 29 | "//Shared memory usage: "-?[0-9]+[^\n]* { 30 | yylval.token_.lexeme = yytext + 23; 31 | return(METADATA_SHAREDMEM); 32 | } 33 | "//Frame Size: "-?0[xX][a-fA-F0-9]+[^\n]* { 34 | yylval.token_.lexeme = yytext + 14; 35 | return(METADATA_FRAME_SIZE); 36 | } 37 | "//Frame Size: "-?[0-9]+[^\n]* { 38 | yylval.token_.lexeme = yytext + 14; 39 | return(METADATA_FRAME_SIZE); 40 | } 41 | "//Min Stack Size: "-?0[xX][a-fA-F0-9]+[^\n]* { 42 | yylval.token_.lexeme = yytext + 18; 43 | return(METADATA_MIN_STACK_SIZE); 44 | } 45 | "//Min Stack Size: "-?[0-9]+[^\n]* { 46 | yylval.token_.lexeme = yytext + 18; 47 | return(METADATA_MIN_STACK_SIZE); 48 | } 49 | "//Max Stack Size: "-?0[xX][a-fA-F0-9]+[^\n]* { 50 | yylval.token_.lexeme = yytext + 18; 51 | return(METADATA_MAX_STACK_SIZE); 52 | } 53 | "//Max Stack Size: "-?[0-9]+[^\n]* { 54 | yylval.token_.lexeme = yytext + 18; 55 | return(METADATA_MAX_STACK_SIZE); 56 | } 57 | "//Name: "[0-9a-zA-Z_]+[^\n]* { 58 | yylval.token_.lexeme = (char*) malloc(strlen(yytext) - 7); 59 | strcpy(yylval.token_.lexeme, yytext + 8); 60 | int x = 0; 61 | while(true) { 62 | if(yylval.token_.lexeme[x] == 0 || yylval.token_.lexeme[x] == '\n' || yylval.token_.lexeme[x] == '\r') { 63 | yylval.token_.lexeme[x] = 0; 64 | break; 65 | } 66 | x++; 67 | } 68 | return(METADATA_KERNELNAME); 69 | } 70 | "//Arch: sm_"[0-9]+[^\n]* { 71 | yylval.token_.lexeme = yytext + 11; 72 | return(METADATA_ARCH); 73 | } 74 | "//Function count: "[0-9]+[^\n]* { 75 | yylval.token_.lexeme = yytext + 18; 76 | return(METADATA_FUNCTIONCOUNT); 77 | } 78 | "//Function: "[0-9a-zA-Z\_\$]+[^\n]* { 79 | yylval.token_.lexeme = (char*) malloc(strlen(yytext) - 11); 80 | strcpy(yylval.token_.lexeme, yytext + 12); 81 | int x = 0; 82 | while(true) { 83 | if(yylval.token_.lexeme[x] == 0 || yylval.token_.lexeme[x] == '\n' || yylval.token_.lexeme[x] == '\r') { 84 | yylval.token_.lexeme[x] = 0; 85 | break; 86 | } 87 | x++; 88 | } 89 | return(METADATA_FUNCTIONNAME); 90 | } 91 | "//cuobjdump: "[0-9]+[^\n]* { 92 | yylval.token_.lexeme = yytext + 13; 93 | return(METADATA_CUOBJDUMP); 94 | } 95 | "//SCHI: 0x"[0-9a-fA-F]+[^\n]*|"SCHI: 0x"[0-9a-fA-F]+[^\n]* { 96 | yylval.token_.lexeme = (char*) malloc(strlen(yytext) - 9); 97 | strcpy(yylval.token_.lexeme, yytext + 10); 98 | int x = 0; 99 | while(true) { 100 | if(yylval.token_.lexeme[x] == 0 || yylval.token_.lexeme[x] == '\n' || yylval.token_.lexeme[x] == '\r') { 101 | yylval.token_.lexeme[x] = 0; 102 | break; 103 | } 104 | x++; 105 | } 106 | return(INLINE_SCHI_VALUE); 107 | } 108 | "SCHI50:" { 109 | return(SCHI50); 110 | } 111 | "//"[^\n]* { 112 | //return(COMMENT); 113 | } 114 | label[a-zA-Z0-9\_]*\: { 115 | yylval.token_.lexeme = (char *) malloc(strlen(yytext)-5); 116 | yytext[strlen(yytext)-1] = 0; 117 | strcpy(yylval.token_.lexeme, yytext+5); 118 | return(LABEL); 119 | } 120 | label[a-zA-Z0-9\_]* { 121 | yylval.token_.type = type_texture_operand; 122 | yylval.token_.lexeme = (char *) malloc(strlen(yytext)-4); 123 | strcpy(yylval.token_.lexeme, yytext+5); 124 | return(LABEL_OP); 125 | } 126 | 1D|ARRAY_1D|RECT|2D|ARRAY_2D|3D|CUBE|ARRAY_CUBE { 127 | yylval.token_.type = type_texture_operand; 128 | yylval.token_.lexeme = yytext; 129 | return(TEXOP); 130 | } 131 | R|G|B|A|RG|RA|GA|BA|RGB|RGA|RBA|GBA|RGBA|INVALID5|INVALID6|INVALID7 { 132 | yylval.token_.type = type_channel; 133 | yylval.token_.lexeme = yytext; 134 | return(CHANNEL); 135 | } 136 | SB[0-9] { 137 | yylval.token_.type = type_sb; 138 | yylval.token_.lexeme = yytext + 2; 139 | return(SB_OPERAND); 140 | } 141 | {decimal_line_number} { 142 | yylval.token_.lexeme = strstr(yytext, "(") + 1; 143 | return(LINENUMBER); 144 | } 145 | {hex} { 146 | yylval.token_.type = type_hex; 147 | yylval.token_.lexeme = yytext; 148 | return(HEXVAL); 149 | } 150 | {bitlist} { 151 | yylval.token_.type = type_bit_list; 152 | yylval.token_.lexeme = yytext; 153 | return(BITLIST); 154 | } 155 | {float} { 156 | yylval.token_.type = type_hex; 157 | yylval.token_.lexeme = yytext; 158 | return(DECIMAL); 159 | } 160 | \+INF { 161 | return(PLUSINF); 162 | } 163 | \-INF { 164 | return(NEGINF); 165 | } 166 | \+SNAN { 167 | return(PLUSSNAN); 168 | } 169 | \+QNAN { 170 | return(PLUSQNAN); 171 | } 172 | @ { 173 | return(GUARD); 174 | } 175 | {pred} { 176 | yylval.token_.type = type_predicate; 177 | yylval.token_.lexeme = yytext; 178 | return(PREDICATE); 179 | } 180 | {reg} { 181 | yylval.token_.type = type_register; 182 | yylval.token_.lexeme = yytext; 183 | return(REG); 184 | } 185 | {const} { 186 | yylval.token_.type = type_const_mem; 187 | yylval.token_.lexeme = yytext; 188 | return(CONST); 189 | } 190 | {memory} { 191 | yylval.token_.type = type_mem; 192 | yylval.token_.lexeme = yytext; 193 | return(MEMORY); 194 | } 195 | {specialName} { 196 | yylval.token_.type = type_special_reg; 197 | yylval.token_.lexeme = yytext; 198 | return(SPECIALNAME); 199 | } 200 | {specialID} { 201 | yylval.token_.type = type_special_reg; 202 | yylval.token_.lexeme = yytext; 203 | return(SPECIALID); 204 | } 205 | \.[\?0-9A-Za-z\_]* { 206 | yylval.token_.type = type_mod; 207 | yylval.token_.lexeme = (char *) malloc(strlen(yytext)); 208 | strcpy(yylval.token_.lexeme,yytext+1); 209 | return(MOD); 210 | } 211 | CC { 212 | yylval.token_.type = type_other_operand; 213 | return(CC); 214 | } 215 | PR { 216 | yylval.token_.type = type_other_operand; 217 | return(PR); 218 | } 219 | MOV { 220 | return(MOV); 221 | } 222 | MOV32I { 223 | return(MOV32I); 224 | } 225 | LD { 226 | return(LD); 227 | } 228 | LDU { 229 | return(LDU); 230 | } 231 | LDL { 232 | return(LDL); 233 | } 234 | LDS { 235 | return(LDS); 236 | } 237 | LDC { 238 | return(LDC); 239 | } 240 | ST { 241 | return(ST); 242 | } 243 | STL { 244 | return(STL); 245 | } 246 | STS { 247 | return(STS); 248 | } 249 | LDLK { 250 | return(LDLK); 251 | } 252 | LDSLK { 253 | return(LDSLK); 254 | } 255 | STUL { 256 | return(STUL); 257 | } 258 | STSUL { 259 | return(STSUL); 260 | } 261 | FADD { 262 | return(FADD); 263 | } 264 | FADD32I { 265 | return(FADD32I); 266 | } 267 | FMUL { 268 | return(FMUL); 269 | } 270 | FMUL32I { 271 | return(FMUL32I); 272 | } 273 | FFMA { 274 | return(FFMA); 275 | } 276 | FSET { 277 | return(FSET); 278 | } 279 | FSETP { 280 | return(FSETP); 281 | } 282 | DSETP { 283 | return(DSETP); 284 | } 285 | FCMP { 286 | return(FCMP); 287 | } 288 | MUFU { 289 | return(MUFU); 290 | } 291 | DADD { 292 | return(DADD); 293 | } 294 | DMUL { 295 | return(DMUL); 296 | } 297 | DFMA { 298 | return(DFMA); 299 | } 300 | IADD { 301 | return(IADD); 302 | } 303 | ISUB { 304 | return(ISUB); 305 | } 306 | IADD32I { 307 | return(IADD32I); 308 | } 309 | IMNMX { 310 | return(IMNMX); 311 | } 312 | IMUL { 313 | return(IMUL); 314 | } 315 | IMUL32I { 316 | return(IMUL32I); 317 | } 318 | IMAD { 319 | return(IMAD); 320 | } 321 | ISCADD { 322 | return(ISCADD); 323 | } 324 | ISET { 325 | return(ISET); 326 | } 327 | ISETP { 328 | return(ISETP); 329 | } 330 | ICMP { 331 | return(ICMP); 332 | } 333 | I2F { 334 | return(I2F); 335 | } 336 | I2I { 337 | return(I2I); 338 | } 339 | F2I { 340 | return(F2I); 341 | } 342 | F2F { 343 | return(F2F); 344 | } 345 | LOP { 346 | return(LOP); 347 | } 348 | LOP32I { 349 | return(LOP32I); 350 | } 351 | SHL { 352 | return(SHL); 353 | } 354 | SHR { 355 | return(SHR); 356 | } 357 | BFE { 358 | return(BFE); 359 | } 360 | BFI { 361 | return(BFI); 362 | } 363 | SEL { 364 | return(SEL); 365 | } 366 | SCHI { 367 | return(SCHI); 368 | } 369 | SSY { 370 | return(SSY); 371 | } 372 | BRA { 373 | return(BRA); 374 | } 375 | BRX { 376 | return(BRX); 377 | } 378 | PCNT { 379 | return(PCNT); 380 | } 381 | CONT { 382 | return(CONT); 383 | } 384 | PBK { 385 | return(PBK); 386 | } 387 | BRK { 388 | return(BRK); 389 | } 390 | CAL { 391 | return(CAL); 392 | } 393 | RET { 394 | return(RET); 395 | } 396 | EXIT { 397 | return(EXIT); 398 | } 399 | NOP { 400 | return(NOP); 401 | } 402 | BAR { 403 | return(BAR); 404 | } 405 | BPT { 406 | return(BPT); 407 | } 408 | B2R { 409 | return(B2R); 410 | } 411 | S2R { 412 | return(S2R); 413 | } 414 | PSETP { 415 | return(PSETP); 416 | } 417 | PSET { 418 | return(PSET); 419 | } 420 | FLO { 421 | return(FLO); 422 | } 423 | P2R { 424 | return(P2R); 425 | } 426 | R2P { 427 | return(R2P); 428 | } 429 | TEX { 430 | return(TEX); 431 | } 432 | TEXDEPBAR { 433 | return(TEXDEPBAR); 434 | } 435 | RRO { 436 | return(RRO); 437 | } 438 | PRMT { 439 | return(PRMT); 440 | } 441 | VADD { 442 | return(VADD); 443 | } 444 | DMNMX { 445 | return(DMNMX); 446 | } 447 | FMNMX { 448 | return(FMNMX); 449 | } 450 | RED { 451 | return(RED); 452 | } 453 | VOTE { 454 | return(VOTE); 455 | } 456 | POPC { 457 | return(POPC); 458 | } 459 | MEMBAR { 460 | return(MEMBAR); 461 | } 462 | STSCUL { 463 | return(STSCUL); 464 | } 465 | LEPC { 466 | return(LEPC); 467 | } 468 | CSETP { 469 | return(CSETP); 470 | } 471 | ISCADD32I { 472 | return(ISCADD32I); 473 | } 474 | VMNMX { 475 | return(VMNMX); 476 | } 477 | TLD { 478 | return(TLD); 479 | } 480 | SHF { 481 | return(SHF); 482 | } 483 | FCHK { 484 | return(FCHK); 485 | } 486 | JCAL { 487 | return(JCAL); 488 | } 489 | SHFL { 490 | return(SHFL); 491 | } 492 | LDG { 493 | return(LDG); 494 | } 495 | LD_LDU { 496 | return(LD_LDU); 497 | } 498 | ATOM { 499 | return(ATOM); 500 | } 501 | CCTL { 502 | return(CCTL); 503 | } 504 | XMAD { 505 | return(XMAD); 506 | } 507 | SYNC { 508 | return(SYNC); 509 | } 510 | STG { 511 | return(STG); 512 | } 513 | IADD3 { 514 | return(IADD3); 515 | } 516 | VABSDIFF { 517 | return(VABSDIFF); 518 | } 519 | DEPBAR { 520 | return(DEPBAR); 521 | } 522 | LOP3 { 523 | return(LOP3); 524 | } 525 | TLDS { 526 | return(TLDS); 527 | } 528 | TEXS { 529 | return(TEXS); 530 | } 531 | LEA { 532 | return(LEA); 533 | } 534 | DSET { 535 | return(DSET); 536 | } 537 | PHI { 538 | return(PHI); 539 | } 540 | BINCODE { 541 | return(BINCODE); 542 | } 543 | 544 | [a-fA-F0-9]{16}\: { 545 | //garbage 546 | } 547 | 548 | [ \t\n\r;,] { 549 | } 550 | [A-Z0-9]* { 551 | printf("\n\nERROR: %s is an unrecognized string here\n\n",yytext); 552 | yyterminate(); 553 | } 554 | . { 555 | printf("\n\nERROR: %c is an illegal character here\n\n",yytext[0]); 556 | yyterminate(); 557 | } 558 | %% 559 | #ifdef WINDOWS 560 | int yylex_destroy() { 561 | yy_delete_buffer(YY_CURRENT_BUFFER); 562 | return 0; 563 | } 564 | #endif 565 | -------------------------------------------------------------------------------- /tools/src/binary.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "binary.hpp" 6 | #include "binary20.hpp" 7 | #include "binary35.hpp" 8 | #include "binary50.hpp" 9 | #include "common.hpp" 10 | using namespace std; 11 | 12 | long long getBinaryDifference(unsigned long long val1, int val2, char sizeLimit) { 13 | long long answer = 0; 14 | if(val1 > (unsigned long long)val2) { 15 | answer = val1 - val2; 16 | } 17 | else { 18 | answer = val2 - val1; 19 | unsigned long long temp = 0xffffffffffffffffLL >> (64 - sizeLimit); 20 | temp ^= answer; 21 | temp++; 22 | temp &= 0xffffffffffffffffLL >> (64 - sizeLimit); 23 | answer = temp; 24 | } 25 | return answer; 26 | } 27 | 28 | void setBinary(unsigned long long * binary, int location, long long value, int size, char overwrite) { 29 | int x; 30 | if(overwrite) { 31 | for(x = 0; x < size; x++) { 32 | if(value & (0x1LL << x)) { 33 | *binary |= 0x1LL << location; 34 | } 35 | else if(*binary & (0x1LL << location)) { 36 | *binary ^= 0x1LL << location; 37 | } 38 | location--; 39 | } 40 | } 41 | else { 42 | for(x = 0; x < size; x++) { 43 | if(value & (0x1LL << x)) { 44 | *binary |= 0x1LL << location; 45 | } 46 | location--; 47 | } 48 | } 49 | } 50 | 51 | bool printedStatusMessage = false; 52 | char* instructionToHexString(instruction * inst, int arch, int cuobjdump_version) { 53 | if(arch < 20) { 54 | cerr << "FATAL ERROR: The sm_1x architecture is not supported.\n"; 55 | exit(0); 56 | } else if(arch == 20 || arch == 21 || arch == 30) { 57 | return instructionToHexString20(inst, arch, cuobjdump_version); 58 | } else if(arch == 32 || arch == 35 || arch == 37) { 59 | return instructionToHexString35(inst, cuobjdump_version); 60 | } else if(arch == 50 || arch == 52 || arch == 53 || arch == 60 || arch == 61 || arch == 62) { 61 | return instructionToHexString50(inst, cuobjdump_version); 62 | } else { 63 | cerr << "FATAL ERROR: No assembler found for architecture " << arch << ".\n"; 64 | exit(0); 65 | } 66 | } 67 | 68 | -------------------------------------------------------------------------------- /tools/src/binary.hpp: -------------------------------------------------------------------------------- 1 | #ifndef BINARY_HPP 2 | #define BINARY_HPP 3 | #include "common.hpp" 4 | 5 | /** 6 | * @file binary.hpp 7 | * Defines functions used to convert assembly-style instructions to hex/binary. 8 | */ 9 | 10 | /** 11 | * Gets the difference between two values. 12 | * Used to determine jump sizes when converting instructions to binary/hex. 13 | * @param val1 The first value 14 | * @param val2 The second value 15 | * @param sizeLimit The number of bits in the difference 16 | * @return a 64-bit integer whose lowest bits equate to (val1-val2) 17 | */ 18 | long long getBinaryDifference(unsigned long long val1, int val2, char sizeLimit); 19 | 20 | /** 21 | * Sets bits inside of a 64-bit unsigned integer, which represents a binary instruction. 22 | * @param binary The address of the value being altered 23 | * @param location The index of the first bit being set 24 | * @param value The bits used to set binary 25 | * @param size The number of bits in value 26 | * @param overwrite Indicates whether to overwrite 1's with 0's 27 | */ 28 | void setBinary(unsigned long long * binary, int location, long long value, int size, char overwrite); 29 | 30 | /** 31 | * Converts an assembly instruction to a hex string, equivalent to binary code. 32 | * @param inst The instruction being converted 33 | * @param arch The kernel function's architecture (the value of xx in sm_xx). 34 | * @param cuobjdump_version The cuobjdump version with which the assembly code was generated. 35 | * @return a char array containing 16 hexadecimal characters 36 | */ 37 | char* instructionToHexString(instruction * inst, int arch, int cubojdump_version); 38 | 39 | #endif 40 | 41 | -------------------------------------------------------------------------------- /tools/src/binary20.hpp: -------------------------------------------------------------------------------- 1 | #ifndef BINARY20_HPP 2 | #define BINARY20_HPP 3 | #include "common.hpp" 4 | 5 | /** 6 | * @file binary20.hpp 7 | * Defines functions used to convert assembly-style instructions to hex/binary for sm_20, sm_21, sm_30. 8 | */ 9 | 10 | /** 11 | * Get the binary value of an 'imme' operand. 12 | * An imme is usually an operand than could be either a hex value, constant memory, or a register. 13 | * @param op is the operand 14 | * @param sizeLimit is the number of bits the value can use (excluding the two bits that identify the type) 15 | * @param special indicates a unique value type: special & 1 means ignore negative; special & 2 means its the 4th operand and constant memory 16 | * @return a 64-bit integer whose lowest bits are the binary value 17 | */ 18 | long long getImme20(operand * op, char sizeLimit, char special); 19 | 20 | /** 21 | * Converts an assembly instruction to a hex string, equivalent to binary code for sm_20, sm_21, sm_30. 22 | * @param inst The instruction being converted 23 | * @param arch The sm_xx architecture of the kernel code. 24 | * @param cuobjdump_version The cuobjdump version with which the assembly code was generated. 25 | * @return a char array containing 16 hexadecimal characters 26 | */ 27 | char* instructionToHexString20(instruction * inst, int arch, int cuobjdump_version); 28 | 29 | #endif 30 | 31 | -------------------------------------------------------------------------------- /tools/src/binary35.hpp: -------------------------------------------------------------------------------- 1 | #ifndef BINARY35_HPP 2 | #define BINARY35_HPP 3 | #include "common.hpp" 4 | 5 | /** 6 | * @file binary35.hpp 7 | * Defines functions used to convert assembly-style instructions to hex/binary for sm_3x. 8 | */ 9 | 10 | /** 11 | * Converts an assembly instruction to a hex string, equivalent to binary code for sm_3x. 12 | * @param inst The instruction being converted 13 | * @param cuobjdump_version The cuobjdump version with which the assembly code was generated. 14 | * @return a char array containing 16 hexadecimal characters 15 | */ 16 | char* instructionToHexString35(instruction * inst, int cuobjdump_version); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /tools/src/binary50.hpp: -------------------------------------------------------------------------------- 1 | #ifndef BINARY50_HPP 2 | #define BINARY50_HPP 3 | #include "common.hpp" 4 | 5 | /** 6 | * @file binary50.hpp 7 | * Defines functions used to convert assembly-style instructions to hex/binary for sm_5x and sm_6x. 8 | */ 9 | 10 | /** 11 | * Converts an assembly instruction to a hex string, equivalent to binary code for sm_5x or sm_6x. 12 | * @param inst The instruction being converted 13 | * @param cuobjdump_version The cuobjdump version with which the assembly code was generated. 14 | * @return a char array containing 16 hexadecimal characters 15 | */ 16 | char* instructionToHexString50(instruction * inst, int cuobjdump_version); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /tools/src/cfghelpers.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "cfghelpers.hpp" 8 | #include "common.hpp" 9 | #include "cudacommon.hpp" 10 | #include "output.hpp" 11 | using namespace std; 12 | 13 | extern node * _blocks; 14 | extern char* _kernel_name; 15 | extern int _arch; 16 | extern int _shared_memory; 17 | extern char** _function_names; 18 | extern int _num_functions; 19 | extern node * _functions; 20 | extern int _tag; 21 | extern int _num_registers; 22 | extern int _counter; 23 | extern bool hasMod(instruction* inst, char* mod); 24 | 25 | bool addBranchToLine(blockNode * a, instruction * line) { 26 | blockNode * b; 27 | node * j = _blocks; 28 | bool added = false; 29 | while(j) { 30 | b = (blockNode*) j->value; 31 | if(line == b->instructions->value) { 32 | if(a) { 33 | added = added || addNode(&a->branches, (void*)b, &blockorder); 34 | addNode(&b->predecessors, (void*)a, &blockorder); 35 | } else { 36 | return false; 37 | } 38 | break; 39 | } else { 40 | node * insts = b->instructions; 41 | while(insts->next) { 42 | if(insts->next->value == line) { 43 | break; 44 | } 45 | insts = insts->next; 46 | } 47 | 48 | if(insts->next) { 49 | added = true; 50 | 51 | //split block 52 | blockNode * newBlock; 53 | initBlock(&newBlock); 54 | newBlock->first = line->line; 55 | newBlock->forward = b->forward; 56 | newBlock->branches = b->branches; 57 | newBlock->last = b->last; 58 | newBlock->guarded = b->guarded; 59 | newBlock->target = b->target; 60 | newBlock->end = b->end; 61 | b->end = end_generic; 62 | b->last = line->line - 1; 63 | b->branches = 0; 64 | b->forward = 0; 65 | 66 | //split instruction lists 67 | newBlock->instructions = insts->next; 68 | insts->next = 0; 69 | 70 | //Fix addresses: 71 | newBlock->address = ((instruction*)newBlock->instructions->value)->address; 72 | int orgSize = b->size; 73 | b->size = newBlock->address - b->address; 74 | newBlock->size = orgSize - b->size; 75 | 76 | //Add appropriate successors: 77 | addNode(&b->forward, (void*)newBlock, &blockorder); 78 | addNode(&newBlock->predecessors, (void*)b, &blockorder); 79 | if(a) { 80 | addNode(&a->branches, (void*)newBlock, &blockorder); 81 | addNode(&newBlock->predecessors, (void*)a, &blockorder); 82 | } 83 | 84 | //Change predecessors for other blocks appropriately 85 | node * blocks = _blocks; 86 | while(blocks) { 87 | blockNode * block = (blockNode*) blocks->value; 88 | if(containsValue(block->predecessors, (int*)b)) { 89 | if(!newBlock->guarded) { 90 | deleteNode(&block->predecessors, b, &blockorder); 91 | } 92 | addNode(&block->predecessors, newBlock, &blockorder); 93 | } 94 | blocks = blocks->next; 95 | } 96 | 97 | //Add newBlock to blocks list: 98 | addNode(&_blocks, (void*)newBlock, &blockorder); 99 | break; 100 | } 101 | } 102 | j = j->next; 103 | } 104 | 105 | return added; 106 | } 107 | 108 | bool addBranchToLine(blockNode * a, int line) { 109 | blockNode * b; 110 | node * k; 111 | node * j = _blocks; 112 | bool added = false; 113 | while(j) { 114 | b = (blockNode*) j->value; 115 | if(line == b->first && a) { 116 | added = added || addNode(&a->branches, (void*)b, &blockorder); 117 | 118 | addNode(&b->predecessors, (void*)a, &blockorder); 119 | break; 120 | } else if(line > b->first && line <= b->last) { 121 | added = true; 122 | 123 | //split block 124 | blockNode * newBlock; 125 | initBlock(&newBlock); 126 | newBlock->first = line; 127 | newBlock->forward = b->forward; 128 | newBlock->branches = b->branches; 129 | newBlock->last = b->last; 130 | newBlock->guarded = b->guarded; 131 | newBlock->target = b->target; 132 | newBlock->end = b->end; 133 | b->end = end_generic; 134 | b->last = line - 1; 135 | b->branches = 0; 136 | b->forward = 0; 137 | 138 | //split instruction lists 139 | k = b->instructions; 140 | int kline = b->first + 1; 141 | while(kline < line) { 142 | k = k->next; 143 | kline++; 144 | } 145 | newBlock->instructions = k->next; 146 | k->next = 0; 147 | 148 | //Fix addresses: 149 | newBlock->address = ((instruction*)newBlock->instructions->value)->address; 150 | int orgSize = b->size; 151 | b->size = newBlock->address - b->address; 152 | newBlock->size = orgSize - b->size; 153 | 154 | //Add appropriate successors: 155 | addNode(&b->forward, (void*)newBlock, &blockorder); 156 | addNode(&newBlock->predecessors, (void*)b, &blockorder); 157 | if(a) { 158 | addNode(&a->branches, (void*)newBlock, &blockorder); 159 | addNode(&newBlock->predecessors, (void*)a, &blockorder); 160 | } 161 | 162 | //Change predecessors for other blocks appropriately 163 | node * blocks = _blocks; 164 | while(blocks) { 165 | blockNode * block = (blockNode*) blocks->value; 166 | if(containsValue(block->predecessors, (int*)b)) { 167 | if(!newBlock->guarded) { 168 | deleteNode(&block->predecessors, b, &blockorder); 169 | } 170 | addNode(&block->predecessors, newBlock, &blockorder); 171 | } 172 | blocks = blocks->next; 173 | } 174 | 175 | //Add newBlock to blocks list: 176 | addNode(&_blocks, (void*)newBlock, &blockorder); 177 | break; 178 | } 179 | j = j->next; 180 | } 181 | 182 | return added; 183 | } 184 | 185 | bool addBranchToAddress(blockNode * a, int address) { 186 | blockNode * b; 187 | node * j = _blocks; 188 | while(j) { 189 | b = (blockNode*) j->value; 190 | if(address >= b->address && address < b->address + b->size) { 191 | node * insts = b->instructions; 192 | while(insts) { 193 | instruction * inst = (instruction*) insts->value; 194 | if(inst->address == address) { 195 | int lineNumber = inst->line; 196 | return addBranchToLine(a, lineNumber); 197 | } 198 | 199 | insts = insts->next; 200 | } 201 | } 202 | 203 | j = j->next; 204 | } 205 | 206 | cerr << "ERROR ch~132: can't find branch target at address 0x" << std::hex << address << ".\n"; 207 | cerr << "\tThis error may mean that a branch jumps to a nonexistent instruction.\n"; 208 | cerr << "\tSince we use pointers in our IR, this will cause problems.\n"; 209 | return false; 210 | } 211 | 212 | /** 213 | * Gets the address corresponding to a given label. 214 | * //TODO: this would be better with some preprocessing 215 | * @param label The label to look for 216 | * @return The address of an instruction with matching label, or -1. 217 | */ 218 | int labelToAddress(char * label) { 219 | node * blocks = _blocks; 220 | while(blocks) { 221 | blockNode * block = (blockNode*) blocks->value; 222 | node * instructions = block->instructions; 223 | while(instructions) { 224 | instruction * inst = (instruction*) instructions->value; 225 | if(inst->label) { 226 | if(!strcmp(inst->label, label)) { 227 | return inst->address; 228 | } 229 | } 230 | instructions = instructions->next; 231 | } 232 | blocks = blocks->next; 233 | } 234 | return -1; 235 | } 236 | 237 | void labelsToAddresses() { 238 | node * blocks = _blocks; 239 | while(blocks) { 240 | blockNode * block = (blockNode*) blocks->value; 241 | node * instructions = block->instructions; 242 | while(instructions) { 243 | instruction * inst = (instruction*) instructions->value; 244 | for(int x = 0; x < inst->num_operands; x++) { 245 | if(inst->operands[x]->type == type_label) { 246 | inst->operands[x]->val1 = labelToAddress(inst->operands[x]->strval); 247 | if(inst->operands[x]->val1 >= 0) { 248 | inst->operands[x]->type = type_hex; 249 | if(!instructions->next) { 250 | if(block->end == end_call || block->end == end_branch) { 251 | block->target = inst->operands[x]->val1; 252 | } 253 | } 254 | } else { 255 | cerr << "ERROR ch~261: can't find label " << inst->operands[x]->strval << ".\n"; 256 | cerr << "\tThis error may mean that a branch jumps to a nonexistent instruction.\n"; 257 | inst->operands[x]->val1 = 0x0; 258 | inst->operands[x]->type = type_hex; 259 | block->target = 0x0; 260 | } 261 | } 262 | } 263 | instructions = instructions->next; 264 | } 265 | blocks = blocks->next; 266 | } 267 | } 268 | 269 | void addressesToLabels() { 270 | //Apply labels to blocks, overwriting existing labels: 271 | int counter = 0; 272 | node * blocks = _blocks; 273 | while(blocks) { 274 | blockNode * block = (blockNode*) blocks->value; 275 | node * instructions = block->instructions; 276 | instruction * firstInst = (instruction*) instructions->value; 277 | if(firstInst->label) { 278 | free(firstInst->label); 279 | } 280 | firstInst->label = (char*) malloc(sizeof(char) * sizeof(int) * 4); 281 | sprintf(firstInst->label, "%d", counter); 282 | blocks = blocks->next; 283 | counter++; 284 | } 285 | 286 | //Convert addresses to labels for branches, synchronizations instructions, etcetera: 287 | blocks = _blocks; 288 | while(blocks) { 289 | blockNode * block = (blockNode*) blocks->value; 290 | node * instructions = block->instructions; 291 | while(instructions) { 292 | instruction * inst = (instruction*) instructions->value; 293 | if(inst->ptr) { 294 | for(int x = 0; x < inst->num_operands; x++) { 295 | if(inst->operands[x]->type == type_hex && ((blockNode*)inst->ptr)->address == (long long)inst->operands[x]->val1) { 296 | blockNode * ptr = ((blockNode*)inst->ptr); 297 | instruction * target = (instruction*) ptr->instructions->value; 298 | inst->operands[x]->type = type_label; 299 | if(inst->operands[x]->strval) { 300 | free(inst->operands[x]->strval); 301 | } 302 | inst->operands[x]->strval = (char*) malloc(sizeof(char) * (strlen(target->label) + 1)); 303 | strcpy(inst->operands[x]->strval, target->label); 304 | } 305 | } 306 | } 307 | instructions = instructions->next; 308 | } 309 | blocks = blocks->next; 310 | } 311 | } 312 | 313 | //Helper for combine blocks 314 | void combineBlocksHelper(node * blocks, blockNode * deleted, blockNode * combined) { 315 | while(blocks) { 316 | blockNode * block = (blockNode *) blocks->value; 317 | deleteNode(&block->forward, deleted, &intorder); 318 | deleteNode(&block->branches, deleted, &intorder); 319 | deleteNode(&block->predecessors, deleted, &intorder); 320 | blocks = blocks->next; 321 | } 322 | } 323 | 324 | //This function combines adjacent blocks that have different predicate guards. 325 | //In other words, this redefines basic blocks in terms of control flow, instead of execution. 326 | void combineBlocks() { 327 | node * blocks = _blocks; 328 | while(blocks->next) { 329 | bool combined = false; 330 | blockNode * block1 = (blockNode*) blocks->value; 331 | blockNode * block2 = (blockNode*) blocks->next->value; 332 | 333 | if(block1->end == end_generic && !block2->isBranchTarget) { 334 | combined = true; 335 | while(block2->instructions) { 336 | addLast(&block1->instructions, block2->instructions->value); 337 | node * temp = block2->instructions; 338 | block2->instructions = block2->instructions->next; 339 | free(temp); 340 | } 341 | 342 | block1->end = block2->end; 343 | cleanNodes(block1->forward); 344 | block1->forward = block2->forward; 345 | block2->forward = 0; 346 | cleanNodes(block1->branches); 347 | block1->branches = block2->branches; 348 | block2->branches = 0; 349 | 350 | combineBlocksHelper(_blocks, block2, block1); 351 | 352 | node * sucs = block1->forward; 353 | while(sucs) { 354 | blockNode * suc = (blockNode *) sucs->value; 355 | addNode(&suc->predecessors, block1, &blockorder); 356 | sucs = sucs->next; 357 | } 358 | sucs = block1->branches; 359 | while(sucs) { 360 | blockNode * suc = (blockNode *) sucs->value; 361 | addNode(&suc->predecessors, block1, &blockorder); 362 | sucs = sucs->next; 363 | } 364 | 365 | cleanBlock(block2); 366 | node * temp = blocks->next; 367 | blocks->next = blocks->next->next; 368 | free(temp); 369 | } 370 | 371 | if(!combined) { 372 | blocks = blocks->next; 373 | } 374 | } 375 | 376 | blocks = _blocks; 377 | while(blocks->next) { 378 | blockNode * block = (blockNode *) blocks->value; 379 | if(!block->forward) { 380 | addFirst(&block->forward, blocks->next->value); 381 | } 382 | if(block->forward->value != blocks->next->value) { 383 | cerr << "SANITY CHECK ERROR ch~294.\n"; 384 | cerr << "\tThis error absolutely should not ever happen.\n"; 385 | cerr << "\tThis error means the assembly code is now non-linear, even without branches.\n"; 386 | } 387 | blocks = blocks->next; 388 | } 389 | } 390 | 391 | void changePtrs(blockNode* oldVal, blockNode* newVal) { 392 | blockNode * a; 393 | instruction * inst; 394 | node * i = _blocks; 395 | while(i) { 396 | a = (blockNode*) i->value; 397 | node * j = a->instructions; 398 | while(j) { 399 | inst = (instruction*) j->value; 400 | 401 | if(inst->op == opcode_SSY || 402 | inst->op == opcode_BRA || 403 | inst->op == opcode_CAL || 404 | inst->op == opcode_PBK || 405 | inst->op == opcode_PCNT || 406 | inst->op == opcode_BRX) {//found relevant instruction 407 | if(inst->ptr == oldVal) { 408 | inst->ptr = newVal; 409 | } 410 | } 411 | 412 | j = j->next; 413 | } 414 | 415 | i = i->next; 416 | } 417 | } 418 | 419 | void deleteBlock(blockNode* deleted) { 420 | //Fix up successors: 421 | node * iter = _blocks; 422 | while(iter) { 423 | blockNode* block = (blockNode*) iter->value; 424 | 425 | if(block == deleted) { 426 | if(iter->next) { 427 | //Change target block for appropriate SSYS, BRAs, CALs, etcetera 428 | changePtrs(deleted, (blockNode*) iter->next->value); 429 | 430 | //If deleted block is start of function, fix function list: 431 | node * iter2 = _functions; 432 | while(iter2) { 433 | blockNode* func = (blockNode*) iter2->value; 434 | if(func == deleted) { 435 | if(func->end == end_return) { 436 | cerr << "SANITY CHECK ERROR ~5432 deleted subroutine\n"; 437 | cerr << "\tThis error suggests we (accidentally?) deleted a subroutine in the assembly.\n"; 438 | } 439 | else if(func->end == end_generic || func->guarded) { 440 | iter2->value = iter->next->value; 441 | } 442 | } 443 | 444 | iter2 = iter2->next; 445 | } 446 | } 447 | } 448 | else { 449 | //If deleted block is a successor, adds its successor(s?) to this block's lists: 450 | if((block->end == end_call || block->end == end_branch)) { 451 | node * i = block->branches; 452 | while(i != 0) { 453 | if(i->value == deleted) { 454 | node * j = deleted->forward; 455 | if(listSize(deleted->forward)) { 456 | while(j) { 457 | addNode(&block->branches, j->value, &blockorder); 458 | addNode(&((blockNode*)j->value)->predecessors, block, &blockorder); 459 | j = j->next; 460 | } 461 | } 462 | else { 463 | node * temp = iter; 464 | blockNode * b; 465 | do { 466 | b = (blockNode*) temp->value; 467 | addNode(&block->branches, b, &blockorder); 468 | addNode(&b->predecessors, block, &blockorder); 469 | temp = temp->next; 470 | } while(b->guarded); 471 | } 472 | } 473 | i = i->next; 474 | } 475 | } 476 | if(block->end == end_generic || block->end == end_call) { 477 | node * i = block->forward; 478 | while(i != 0) { 479 | if(i->value == deleted) { 480 | node * j = deleted->forward; 481 | while(j) { 482 | addNode(&block->forward, j->value, &blockorder); 483 | addNode(&((blockNode*)j->value)->predecessors, block, &blockorder); 484 | j = j->next; 485 | } 486 | } 487 | i = i->next; 488 | } 489 | } 490 | 491 | //Remove deleted block from this block's successor & predecessor lists: 492 | deleteNode(&block->forward, deleted, &intorder); 493 | deleteNode(&block->branches, deleted, &intorder); 494 | deleteNode(&block->predecessors, deleted, &intorder); 495 | //deleteNode(&block->pseudoSuccessors, deleted, &intorder); 496 | } 497 | 498 | iter = iter->next; 499 | } 500 | 501 | //Remove deleted block from block list: 502 | deleteNode(&_blocks, deleted, &intorder); 503 | 504 | //Cleanup: 505 | cleanBlock(deleted); 506 | } 507 | 508 | void fixAssembly() { 509 | //Delete empty blocks: 510 | node * iter = _blocks; 511 | while(iter) { 512 | blockNode* block = (blockNode*) iter->value; 513 | if(block->instructions == 0) { 514 | deleteBlock(block); 515 | } 516 | else { 517 | break; 518 | } 519 | iter = _blocks; 520 | } 521 | iter = _blocks; 522 | while(iter && iter->next) { 523 | blockNode* block = (blockNode*) iter->next->value; 524 | if(block->instructions == 0) { 525 | deleteBlock(block); 526 | iter = _blocks; 527 | continue; 528 | } 529 | iter = iter->next; 530 | } 531 | 532 | //Fix line numbers and addresses: 533 | iter = _blocks; 534 | int trueLine = 0; 535 | int blockid = -1; 536 | int addr = 0; 537 | _num_registers = 0; 538 | while(iter) { 539 | blockNode* block = (blockNode*) iter->value; 540 | block->isBranchTarget = false; 541 | block->address = addr; 542 | block->first = trueLine; 543 | block->id = ++blockid; 544 | block->size = 0; 545 | 546 | if(!block->instructions) { 547 | cerr << "SANITY CHECK ERROR ~2951 empty block\n"; 548 | cerr << "\tThis error means the assembly has a basic block with no instructions.\n"; 549 | } 550 | 551 | node * iter2 = block->instructions; 552 | while(iter2) { 553 | instruction * inst = (instruction*) iter2->value; 554 | 555 | inst->address = addr; 556 | addr += inst->size; 557 | block->size += inst->size; 558 | 559 | inst->line = trueLine++; 560 | inst->blockID = blockid; 561 | iter2 = iter2->next; 562 | 563 | if(inst->size != 8) { 564 | cerr << "WARNING: non-8byte instruction code for: " << opcodes[inst->op] << "\n"; 565 | cerr << "\t(Currently, this tool is not equipped to handle instructions of other sizes.)\n"; 566 | } 567 | 568 | for(int x = 0; x < inst->num_operands; x++) { 569 | int reg = inst->operands[x]->reg; 570 | if(reg >= 0) { 571 | inst->operands[x]->reg = inst->operands[x]->val1; 572 | reg = inst->operands[x]->reg; 573 | int align = inst->operands[x]->width; 574 | if(align == 3) { 575 | align = 4; 576 | } 577 | int tempreg = reg - (reg % align); 578 | if(tempreg != reg) { 579 | cerr << "WARNING ch~481: a register has incorrect alignment: R" << reg << " in operand[" << x << "] of "; 580 | fprintfInstruction(cerr, inst); 581 | cerr << ".\n"; 582 | static bool seenError = false; 583 | if(!seenError) { 584 | cerr << "\tNote that instructions with 64-bit or larger operands expect registers to be aligned to locations divisible by 2 or 4.\n"; 585 | seenError = true; 586 | } 587 | } 588 | if(tempreg + inst->operands[x]->width > _num_registers) { 589 | _num_registers = tempreg + inst->operands[x]->width; 590 | } 591 | } 592 | } 593 | } 594 | 595 | block->last = trueLine - 1; 596 | iter = iter->next; 597 | } 598 | 599 | //Fix operands for BRA, CAL, SSY, etcetera 600 | iter = _blocks; 601 | while(iter) { 602 | blockNode* block = (blockNode*) iter->value; 603 | 604 | node * iter2 = block->instructions; 605 | while(iter2) { 606 | instruction* inst = (instruction*) iter2->value; 607 | if(inst->ptr) { 608 | if(inst->op != opcode_BRX) { 609 | blockNode* targetBlock = (blockNode*) inst->ptr; 610 | for(int x = 0; x < inst->num_operands; x++) { 611 | if(inst->operands[x]->type == type_hex) { 612 | inst->operands[x]->val1 = targetBlock->address; 613 | } 614 | } 615 | targetBlock->isBranchTarget = true; 616 | } 617 | else { 618 | static bool seenError = false; 619 | if(!seenError) { 620 | cerr << "ERROR ch~620: This tool is not able to handle addressing for the BRX instruction.\n"; 621 | seenError = true; 622 | } 623 | } 624 | } 625 | 626 | iter2 = iter2->next; 627 | } 628 | 629 | _counter = block->last; 630 | iter = iter->next; 631 | } 632 | 633 | //Fix function IDs: 634 | int funcID = 0; 635 | blockNode * previousBlock = 0; 636 | iter = _blocks; 637 | while(iter) { 638 | blockNode* block = (blockNode*) iter->value; 639 | if(previousBlock) { 640 | if(previousBlock->end != end_generic) { 641 | node * preds = block->predecessors; 642 | while(preds) { 643 | blockNode * pred = (blockNode*) preds->value; 644 | if(pred->end == end_call) { 645 | if(!containsValue(pred->forward, block)) { 646 | funcID++; 647 | break; 648 | } 649 | } 650 | preds = preds->next; 651 | } 652 | } 653 | } 654 | 655 | block->funcID = funcID; 656 | 657 | node * iter2 = block->instructions; 658 | while(iter2) { 659 | instruction* inst = (instruction*) iter2->value; 660 | inst->funcID = funcID; 661 | iter2 = iter2->next; 662 | } 663 | 664 | previousBlock = block; 665 | iter = iter->next; 666 | } 667 | } 668 | 669 | void propogateBranches(blockEnd branch_type) { 670 | blockNode * a; 671 | node * i = _blocks; 672 | while(i) { 673 | a = (blockNode*) i->value; 674 | if(a->end == branch_type && a->target) { 675 | addBranchToAddress(a, a->target); 676 | } 677 | i = i->next; 678 | } 679 | } 680 | 681 | bool propogateReturns(blockNode * search, int target, int tag) { 682 | search->tag = tag; 683 | bool changed = false; 684 | 685 | if(search->guarded) { 686 | blockNode * next = getBlock(search->last + 1); 687 | if(next->tag != tag) { 688 | changed = changed || propogateReturns(next, target, tag); 689 | } 690 | } 691 | 692 | //Check if this block ends in a return: 693 | if(search->end == end_return) { 694 | changed = changed || addBranchToAddress(search, target); 695 | if(!search->guarded) {//no guard; end of path 696 | return changed; 697 | } 698 | } 699 | 700 | node * n; 701 | 702 | //If appropriate, search following blocks for returns: 703 | if(search->end == end_generic || search->end == end_call) { 704 | n = search->forward; 705 | while(n) { 706 | if(((blockNode*)n->value)->tag != tag) { 707 | changed = changed || propogateReturns((blockNode*)n->value, target, tag); 708 | } 709 | n = n->next; 710 | } 711 | } 712 | 713 | //Search branching successors for returns: 714 | if(search->end == end_branch) { 715 | n = search->branches; 716 | while(n) { 717 | if(((blockNode*)n->value)->tag != tag) { 718 | changed = changed || propogateReturns((blockNode*)n->value, target, tag); 719 | } 720 | n = n->next; 721 | } 722 | } 723 | 724 | return changed; 725 | } 726 | 727 | bool propogateCallReturns() { 728 | blockNode * a; 729 | blockNode * b; 730 | node * j; 731 | node * i = _blocks; 732 | bool changed = false; 733 | while(i) { 734 | a = (blockNode*) i->value;//current block being checked 735 | 736 | if(a->end == end_call) {//block ends with a call 737 | //Search successors for returns: 738 | j = a->branches; 739 | while(j) { 740 | b = (blockNode*) j->value; 741 | if(a->target == b->address) {//b is target of call 742 | node * insts = a->instructions; 743 | while(insts->next) insts = insts->next; 744 | instruction * inst = (instruction*) insts->value; 745 | changed = changed || propogateReturns(b, inst->address + inst->size, ++_tag); 746 | break; 747 | } 748 | j = j->next; 749 | } 750 | 751 | if(!j) {//call's target is not among the successors 752 | printf("\nSANITY-CHECK FAILED ch~493: cannot find called address\n"); 753 | } 754 | } 755 | i = i->next; 756 | } 757 | 758 | return changed; 759 | } 760 | 761 | bool propogatePointersHelper(blockNode * search, stack > ptrs, int tag, bool firstCall) { 762 | if(search->tag == tag) { 763 | return false; 764 | } 765 | if(!firstCall) { 766 | search->tag = tag; 767 | } 768 | 769 | bool changed = false; 770 | if(search->guarded) { 771 | blockNode * next = getBlock(search->last + 1); 772 | changed = changed || propogatePointersHelper(next, ptrs, tag, false); 773 | } 774 | 775 | int addedPtrs = 0; 776 | 777 | node * iter = search->instructions; 778 | while(iter) { 779 | instruction * inst = (instruction*) iter->value;//current instruction being checked 780 | if(inst->op == opcode_SSY || inst->op == opcode_PBK || inst->op == opcode_PCNT) { 781 | long long target = inst->operands[0]->val1; 782 | ptrs.push(make_pair(inst->op, target)); 783 | addedPtrs++; 784 | } else if(!ptrs.empty()) { 785 | opcode mustMatch = opcode_NOP; 786 | if(hasMod(inst, "S") || inst->op == opcode_SYNC) {//mod S is used prior to arch 50 787 | mustMatch = opcode_SSY; 788 | } 789 | else if(inst->op == opcode_CONT) { 790 | mustMatch = opcode_PCNT; 791 | } 792 | else if(inst->op == opcode_BRK) { 793 | mustMatch = opcode_PBK; 794 | } 795 | if(mustMatch != opcode_NOP) { 796 | //look for desired instruction in stack: 797 | pair targetPair = ptrs.top(); 798 | ptrs.pop(); 799 | while(targetPair.first != mustMatch && !ptrs.empty()) { 800 | targetPair = ptrs.top(); 801 | ptrs.pop(); 802 | } 803 | //if found desired instruction, match it to this jump 804 | if(targetPair.first == mustMatch) { 805 | long long target = targetPair.second; 806 | if(addedPtrs) { 807 | addedPtrs--; 808 | } 809 | changed = changed || addBranchToAddress(search, target); 810 | } 811 | } 812 | } 813 | iter = iter->next; 814 | } 815 | 816 | if(!ptrs.empty()) { 817 | node * children = 0; 818 | if(search->end == end_generic || search->end == end_call) { 819 | children = search->forward; 820 | } else if(search->end == end_branch) { 821 | children = search->branches; 822 | } else if(search->end == end_return) { 823 | static bool subroutineWarning = false; 824 | if(!subroutineWarning) { 825 | cerr << "Warning ch~976; subroutine may modify the pointer stack\n"; 826 | subroutineWarning = true; 827 | } 828 | } 829 | while(children) { 830 | changed = changed || propogatePointersHelper((blockNode*)children->value, ptrs, tag, false); 831 | children = children->next; 832 | } 833 | } 834 | 835 | //Garbage removal: 836 | while(addedPtrs) { 837 | addedPtrs--; 838 | ptrs.pop(); 839 | } 840 | 841 | return changed; 842 | } 843 | 844 | bool propogatePointers() { 845 | node * blocks = _blocks; 846 | bool changed = false; 847 | stack > ptrs; 848 | while(blocks) { 849 | blockNode * block = (blockNode*) blocks->value;//current block being checked 850 | changed = changed || propogatePointersHelper(block, ptrs, ++_tag, true); 851 | blocks = blocks->next; 852 | } 853 | 854 | return changed; 855 | } 856 | 857 | void setPtr() { 858 | _functions = 0; 859 | blockNode * a; 860 | instruction * inst; 861 | node * i = _blocks; 862 | while(i) { 863 | a = (blockNode*) i->value; 864 | node * j = a->instructions; 865 | while(j) { 866 | inst = (instruction*) j->value; 867 | 868 | if(inst->op == opcode_SSY || 869 | inst->op == opcode_BRA || 870 | inst->op == opcode_CAL || 871 | inst->op == opcode_PBK || 872 | inst->op == opcode_PCNT) {//found relevant instruction 873 | int target = -1; 874 | for(int x = 0; x < inst->num_operands; x++) { 875 | if(inst->operands[x]->type == type_hex) { 876 | target = inst->operands[x]->val1; 877 | break; 878 | } 879 | } 880 | 881 | //Search for target at start of each block 882 | node * k = _blocks; 883 | while(k) { 884 | a = (blockNode*) k->value; 885 | if(a->address == target) { 886 | inst->ptr = a; 887 | if(inst->op == opcode_CAL) { 888 | addNode(&_functions, a, &blockorder); 889 | } 890 | break; 891 | } 892 | 893 | k = k->next; 894 | } 895 | 896 | if(!k) {//target is not at the start of a block 897 | addBranchToAddress(NULL, target); 898 | 899 | //Search for target 900 | k = _blocks; 901 | while(k) { 902 | a = (blockNode*) k->value; 903 | if(a->address == target) { 904 | inst->ptr = a; 905 | if(inst->op == opcode_CAL) { 906 | addNode(&_functions, a, &blockorder); 907 | } 908 | break; 909 | } 910 | 911 | k = k->next; 912 | } 913 | 914 | //Sanity check: 915 | if(!k) { 916 | fprintf(stderr, "SANITY CHECK ERROR ch~1203; pointer to non-existant address 0x%x\n", target); 917 | } 918 | } 919 | } 920 | else if(inst->op == opcode_BRX) { 921 | //TODO handle BRX 922 | inst->ptr = 0; 923 | } 924 | else { 925 | inst->ptr = 0; 926 | } 927 | 928 | j = j->next; 929 | } 930 | 931 | i = i->next; 932 | } 933 | 934 | if(_num_functions && listSize(_functions) < _num_functions) { 935 | cerr << "SANITY CHECK FAILED ~1783; found " << listSize(_functions) << "/" << _num_functions << " functions.\n"; 936 | cerr << "\tMismatch between actual and expected subroutine count.\n"; 937 | } 938 | else if(_num_functions && listSize(_functions) > _num_functions) { 939 | cerr << "WARNING: found " << listSize(_functions) << "/" << _num_functions << " functions.\n"; 940 | cerr << "\tMismatch between actual and expected subroutine count.\n"; 941 | } 942 | } 943 | -------------------------------------------------------------------------------- /tools/src/cfghelpers.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CFGHELPERS_HPP 2 | #define CFGHELPERS_HPP 3 | #include "common.hpp" 4 | #include 5 | #include 6 | #include 7 | using namespace std; 8 | 9 | /** 10 | * @file cfghelpers.hpp 11 | * Defines functions used to create and modify the control flow graph. 12 | */ 13 | 14 | /** 15 | * Adds a branch from a given block to a given line. 16 | * If the line is in the middle of a basic block, it will be split into two blocks. 17 | * If block is NULL, this function simply splits the block which contains the target line. 18 | * @param a The block being branched from, or NULL. 19 | * @param line The target instruction. 20 | * @return true if the branch was added; false if it already existed 21 | */ 22 | bool addBranchToLine(blockNode * a, instruction * line); 23 | 24 | /** 25 | * Adds a branch from a given block to a given line number. 26 | * If the line is in the middle of a basic block, it will be split into two blocks. 27 | * If block is NULL, this function simply splits the block which contains the target line. 28 | * @param a The block being branched from, or NULL. 29 | * @param line The target line number. 30 | * @return true if the branch was added; false if it already existed 31 | */ 32 | bool addBranchToLine(blockNode * a, int line); 33 | 34 | /** 35 | * Adds a branch from a given block to a given address. 36 | * Finds line number for address, then calls #addBranchToLine 37 | * @param a The block being branched from, or NULL. 38 | * @param address The target address. 39 | * @return true if the branch was added; false if it already existed 40 | */ 41 | bool addBranchToAddress(blockNode * a, int address); 42 | 43 | /** 44 | * Finds addresses for labels, and adds the addresses to metadata as appropriate. 45 | */ 46 | void labelsToAddresses(); 47 | 48 | /** 49 | * Sets up labels for blocks; replaces hex addresses with label operands. 50 | * Overwrites existing labels. 51 | */ 52 | void addressesToLabels(); 53 | 54 | /** 55 | * Combines adjacent blocks that should together form a basic block if predicate guards are ignored. 56 | */ 57 | void combineBlocks(); 58 | 59 | /** 60 | * Helper to fix ptrs (jump targets) after a block is deleted. 61 | * @param oldVal The ptr value to change 62 | * @param newVal The ptr value to replace oldVal with 63 | */ 64 | void changePtrs(blockNode* oldVal, blockNode* newVal); 65 | 66 | /** 67 | * Deletes a basic block, adding its successors to its predecessors' successors. 68 | * @param deleted The block to delete 69 | */ 70 | void deleteBlock(blockNode* deleted); 71 | 72 | /** 73 | * Fixes line numbers & addresses after addition/deletion of instructions. 74 | */ 75 | void fixAssembly(); 76 | 77 | /** 78 | * Adds successors to blocks with the specified end type. 79 | * @param The type of instruction the blocks end with. 80 | */ 81 | void propogateBranches(blockEnd branch_type); 82 | 83 | /** 84 | * Helper for propogateCallReturns. 85 | * Recursively adds successors to blocks that end with a RET. 86 | * @param search The current block in which we're searching for a RET 87 | * @param target The address to jump to after a RET 88 | * @param tag A unique value to avoid infinite recursion 89 | * @return true iff changes were made 90 | */ 91 | bool propogateReturns(blockNode * search, int target, int tag); 92 | 93 | /** 94 | * Add successors to blocks that end in a CAL or RET. 95 | * @return true iff changes were made, false otherwise 96 | */ 97 | bool propogateCallReturns(); 98 | 99 | /** 100 | * Adds pointers based on thread divergence, break, and continue instructions. 101 | * @param search The current block in which we're searching for SSY/.S|SYNC, PBK/BRK, and PCNT/CONT 102 | * @param ptrs A stack of pointers, as pairs 103 | * @param tag A unique value used to avoid double visiting any blocks 104 | * @param firstCall True iff at depth 0 of recursion 105 | * @return true iff changes were made, false otherwise 106 | */ 107 | bool propogatePointersHelper(blockNode * search, stack > ptrs, int tag, bool firstCall); 108 | 109 | /** 110 | * Adds pointers based on thread divergence, break, and continue instructions. 111 | * @return true iff changes were made, false otherwise 112 | */ 113 | bool propogatePointers(); 114 | 115 | /** 116 | * Sets target blocks for relevant instructions. 117 | * Call this after first setting up all the basic blocks. 118 | */ 119 | void setPtr(); 120 | 121 | #endif 122 | -------------------------------------------------------------------------------- /tools/src/common.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "common.hpp" 5 | 6 | /** 7 | * Names of special registers for sm_2x. 8 | * Corresponds one-to-one with the specialIDs array. 9 | */ 10 | std::string _specialNames20[] = {"laneid", "virtcfg", "virtid", "pm0", "pm1", "pm2", "pm3", "pm4", "pm5", "pm6", "pm7", "prim_type", "invocation_id", "y_direction", "machine_id_0", "machine_id_1", "machine_id_2", "machine_id_3", "affinity", "tid", "tid.x", "tid.y", "tid.z", "ctaparam", "ctaid.x", "ctaid.y", "ctaid.z", "ntid", "ntid.x", "ntid.y", "ntid.z", "gridparam", "nctaid.x", "nctaid.y", "nctaid.z", "swinlo", "swinsz", "smemsz", "smembanks", "lwinlo", "lwinsz", "lmemlosz", "lmemhioff", "eqmask", "ltmask", "lemask", "gtmask", "gemask", "clocklo", "clockhi"}; 11 | 12 | /** 13 | * Values of special registers for sm_2x. 14 | * Corresponds one-to-one with the specialNames array. 15 | */ 16 | const int _specialIDs20[] = {0,2,3,4,5,6,7,8,9,10,11,16,17,18,24,25,26,27,28,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,80,81}; 17 | 18 | /** 19 | * Number of recognized special registers for sm_2x. 20 | */ 21 | const int _numSpecials20 = 62; 22 | 23 | /** 24 | * Names of special registers for sm_3x through sm_6x 25 | * //TODO deal with SM_SHADER_TYPE, which does not start with SR_ 26 | */ 27 | std::string _specialNames35[] = {"laneid", "clock", "virtcfg", "virtid", "pm0", "pm1", "pm2", "pm3", "pm4", "pm5", "pm6", "pm7", "", "", "", "", "prim_type", "invocation_id", "y_direction", "thread_kill", "shader_type", "directcbewriteaddresslow", "directcbewriteaddresshigh", "directcbewriteenabled", "machine_id_0", "machine_id_1", "machine_id_2", "machine_id_3", "affinity", "invocation_info", "wscalefactor_xy", "wscalefactor_z", "tid", "tid.x", "tid.y", "tid.z", "cta_param", "ctaid.x", "ctaid.y", "ctaid.z", "ntid", "cirqueueincrminusone", "nlatc", "", "", "", "", "", "swinlo", "swinsz", "smemsz", "smembanks", "lwinlo", "lwinsz", "lmemlosz", "lmemhioff", "eqmask", "ltmask", "lemask", "gtmask", "gemask", "regalloc", "ctxaddr", "", "globalerrorstatus", "", "warperrorstatus", "warperrorstatusclear", "", "", "", "", "pm_hi0", "pm_hi1", "pm_hi2", "pm_hi3", "pm_hi4", "pm_hi5", "pm_hi6", "pm_hi7", "clocklo", "clockhi", "globaltimerlo", "globaltimerhi", "", "", "", "", "", "", "", "", "", "", "", "", "hwtaskid", "circularqueueentryindex", "circularqueueentryaddresslow", "circularqueueentryaddresshigh"}; 28 | 29 | /** 30 | * Names of texture operands. 31 | * Corresponds one-to-one with the textureIDs array. 32 | */ 33 | const char * _textureNames[] = {"1D", "ARRAY_1D", "RECT", "2D", "ARRAY_2D", "3D", "CUBE", "ARRAY_CUBE"}; 34 | 35 | /** 36 | * Values of texture operands. 37 | * Corresponds one-to-one with the textureNames array. 38 | */ 39 | const int _textureIDs[] = {0,1,2,2,3,4,6,7}; 40 | 41 | /** 42 | * Number of recognized texture operands. 43 | */ 44 | const int _numTextures = 8; 45 | 46 | /** 47 | * Names of texture operands. 48 | * Corresponds one-to-one with the textureIDs array. 49 | */ 50 | const char * _channelNames[] = {"R", "G", "B", "A", "RG", "RA", "GA", "BA", "RGB", "RGA", "RBA", "GBA", "RGBA", "INVALID5", "INVALID6", "INVALID7", }; 51 | 52 | /** 53 | * Values of texture operands. 54 | * Corresponds one-to-one with the textureNames array. 55 | */ 56 | const int _channelIDs[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; 57 | 58 | /** 59 | * Number of recognized texture operands. 60 | */ 61 | const int _numChannels = 16; 62 | 63 | /** 64 | * Names of miscellaneous operands. 65 | */ 66 | const char * _otherNames[] = {"CC", "PR"}; 67 | 68 | /** 69 | * Number of items in otherNames array. 70 | */ 71 | const int _numOthers = 2; 72 | 73 | /** 74 | * List of opcode names. 75 | * An opcode's index in this array should be the same as the the corresponding 'opcode' enum value. 76 | */ 77 | const char * opcodes[] = {"MOV", "MOV32I", "LD", "LDU", "LDL", "LDS", "LDC", "ST", "STL", "STS", "LDLK", "LDSLK", "STUL", "STSUL", "FADD", "FADD32I", "FMUL", "FMUL32I", "FFMA", "FSET", "FSETP", "DSETP", "FCMP", "MUFU", "DADD", "DMUL", "DFMA", "IADD", "IADD32I", "IMNMX", "IMUL", "IMUL32I", "IMAD", "ISCADD", "ISET", "ISETP", "ICMP", "I2F", "I2I", "F2I", "F2F", "LOP", "LOP32I", "SHL", "SHR", "BFE", "BFI", "SEL", "SCHI", "SSY", "BRA", "BRX", "PCNT", "CONT", "PBK", "BRK", "CAL", "RET", "EXIT", "NOP", "BAR", "BPT", "B2R", "S2R", "PSETP", "PSET", "FLO", "P2R", "R2P", "TEX", "TEXDEPBAR", "RRO", "PRMT", "VADD", "DMNMX", "FMNMX", "RED", "VOTE", "POPC", "MEMBAR", "STSCUL", "LEPC", "CSETP", "ISCADD32I", "VMNMX", "TLD", "SHF", "FCHK", "ISUB", "JCAL", "SHFL", "LDG", "LD_LDU", "ATOM", "CCTL", "XMAD", "SYNC", "STG", "IADD3", "VABSDIFF", "DEPBAR", "LOP3", "TLDS", "TEXS", "LEA", "DSET", "PHI", "BINCODE"}; 78 | 79 | int getSpecialID(const char * lexeme, bool format, int arch) { 80 | if(arch <= 30) { 81 | for(int x = 0; x < _numSpecials20; x++) { 82 | for(int y = 0; y >= 0; y++) { 83 | if(lexeme[y] == _specialNames20[x][y]) { 84 | if(lexeme[y] == 0) { 85 | //Make them match perfectly for printing out later: 86 | if(format) { 87 | for(int z = 0; z < y; z++) { 88 | _specialNames20[x][z] = lexeme[z]; 89 | } 90 | } 91 | 92 | //Return value: 93 | return _specialIDs20[x]; 94 | } 95 | } else if(lexeme[y] == '_' && _specialNames20[x][y] == '.') { 96 | //interchangeable; format varies depending on CUDA version 97 | } else if(lexeme[y] == '.' && _specialNames20[x][y] == '_') { 98 | //interchangeable; format varies depending on CUDA version 99 | } else if(lexeme[y] >= 'A' && lexeme[y] <= 'Z' && lexeme[y] == _specialNames20[x][y] - ('a' - 'A')) { 100 | //interchangeable; format varies depending on CUDA version 101 | } else if(lexeme[y] >= 'a' && lexeme[y] <= 'z' && lexeme[y] == _specialNames20[x][y] - ('A' - 'a')) { 102 | //interchangeable; format varies depending on CUDA version 103 | } else { 104 | break; 105 | } 106 | } 107 | } 108 | } else if(arch < 70) { 109 | int numSpecials = sizeof(_specialNames35) / sizeof(std::string); 110 | for(int x = 0; x < numSpecials; x++) { 111 | for(int y = 0; y >= 0; y++) { 112 | if(lexeme[y] == _specialNames35[x][y]) { 113 | if(lexeme[y] == 0) { 114 | //Make them match perfectly for printing out later: 115 | if(format) { 116 | for(int z = 0; z < y; z++) { 117 | _specialNames35[x][z] = lexeme[z]; 118 | } 119 | } 120 | 121 | //Return value: 122 | return x; 123 | } 124 | } else if(lexeme[y] == '_' && _specialNames35[x][y] == '.') { 125 | //interchangeable; format varies depending on CUDA version 126 | } else if(lexeme[y] == '.' && _specialNames35[x][y] == '_') { 127 | //interchangeable; format varies depending on CUDA version 128 | } else if(lexeme[y] >= 'A' && lexeme[y] <= 'Z' && lexeme[y] == _specialNames35[x][y] - ('a' - 'A')) { 129 | //interchangeable; format varies depending on CUDA version 130 | } else if(lexeme[y] >= 'a' && lexeme[y] <= 'z' && lexeme[y] == _specialNames35[x][y] - ('A' - 'a')) { 131 | //interchangeable; format varies depending on CUDA version 132 | } else { 133 | break; 134 | } 135 | } 136 | } 137 | } else { 138 | fprintf(stderr, "ERROR c~98: Unimplemented.\n"); 139 | } 140 | 141 | fprintf(stderr,"WARNING: Found unrecognized special register \"%s\".\n",lexeme); 142 | return -1; 143 | } 144 | 145 | int getOtherID(const char * lexeme) { 146 | for(int x = 0; x < _numOthers; x++) { 147 | if(!strcmp(lexeme, _otherNames[x])) { 148 | return x; 149 | } 150 | } 151 | 152 | fprintf(stderr,"\n*SANITY CHECK ERROR ~22* This message is unreachable! \"%s\".\n\n",lexeme); 153 | return -1; 154 | } 155 | 156 | int getTextureID(const char * lexeme) { 157 | for(int x = 0; x < _numTextures; x++) { 158 | if(!strcmp(lexeme, _textureNames[x])) { 159 | return _textureIDs[x]; 160 | } 161 | } 162 | 163 | fprintf(stderr,"\n*SANITY CHECK ERROR ~33* Found unrecognized texture operand \"%s\".\n\n",lexeme); 164 | return -1; 165 | } 166 | 167 | int getChannelID(const char * lexeme) { 168 | for(int x = 0; x < _numChannels; x++) { 169 | if(!strcmp(lexeme, _channelNames[x])) { 170 | return _channelIDs[x]; 171 | } 172 | } 173 | 174 | fprintf(stderr,"\n*SANITY CHECK ERROR c~162: Found unrecognized channel operand \"%s\".\n\n",lexeme); 175 | return -1; 176 | } 177 | 178 | void initBlock(blockNode** block) { 179 | *block = (blockNode*) malloc(sizeof(blockNode)); 180 | (*block)->forward = 0; 181 | (*block)->branches = 0; 182 | (*block)->predecessors = 0; 183 | (*block)->instructions = 0; 184 | (*block)->last = 65535; 185 | (*block)->size = 0; 186 | (*block)->end = end_generic; 187 | (*block)->guarded = false; 188 | (*block)->force_guard = false; 189 | (*block)->tag = 0; 190 | (*block)->target = 0; 191 | (*block)->isBranchTarget = false; 192 | } 193 | 194 | instruction* newInstruction(opcode op, node * mods, node * operands) { 195 | instruction * inst = (instruction*) malloc(sizeof(instruction)); 196 | inst->label = 0; 197 | inst->op = op; 198 | inst->mods = mods; 199 | inst->guard = 0; 200 | inst->num_operands = 0; 201 | inst->operands = 0; 202 | inst->ptr = 0; 203 | //inst->bar = 0; 204 | inst->SCHIVal = 0; 205 | inst->depBarrierW = 7; 206 | inst->depBarrierR = 7; 207 | inst->depBarrierMask = 0; 208 | inst->size = 8; 209 | inst->marked = 0; 210 | 211 | if(operands) { 212 | inst->num_operands = listSize(operands); 213 | inst->operands = (operand**) malloc(inst->num_operands*sizeof(operand*)); 214 | node *o = operands; 215 | int x = 0; 216 | while(o) { 217 | inst->operands[x] = (operand*) o->value; 218 | o = o->next; 219 | x++; 220 | } 221 | cleanNodes(operands); 222 | } 223 | 224 | return inst; 225 | } 226 | 227 | instruction* newInstruction(opcode op, node * mods) { 228 | return newInstruction(op, mods, (node*)0); 229 | } 230 | 231 | instruction* newInstruction(opcode op, node * mods, operand * o1) { 232 | instruction * inst = newInstruction(op, mods); 233 | inst->num_operands = 1; 234 | inst->operands = (operand**) malloc(1*sizeof(operand*)); 235 | inst->operands[0] = o1; 236 | 237 | return inst; 238 | } 239 | 240 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2) { 241 | instruction * inst = newInstruction(op, mods); 242 | inst->num_operands = 2; 243 | inst->operands = (operand**) malloc(2*sizeof(operand*)); 244 | inst->operands[0] = o1; 245 | inst->operands[1] = o2; 246 | 247 | return inst; 248 | } 249 | 250 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2, operand * o3) { 251 | instruction * inst = newInstruction(op, mods); 252 | inst->num_operands = 3; 253 | inst->operands = (operand**) malloc(3*sizeof(operand*)); 254 | inst->operands[0] = o1; 255 | inst->operands[1] = o2; 256 | inst->operands[2] = o3; 257 | 258 | return inst; 259 | } 260 | 261 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2, operand * o3, operand * o4) { 262 | instruction * inst = newInstruction(op, mods); 263 | inst->num_operands = 4; 264 | inst->operands = (operand**) malloc(4*sizeof(operand*)); 265 | inst->operands[0] = o1; 266 | inst->operands[1] = o2; 267 | inst->operands[2] = o3; 268 | inst->operands[3] = o4; 269 | return inst; 270 | } 271 | 272 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2, operand * o3, operand * o4, operand * o5) { 273 | instruction * inst = newInstruction(op, mods); 274 | inst->num_operands = 5; 275 | inst->operands = (operand**) malloc(5 * sizeof(operand*)); 276 | inst->operands[0] = o1; 277 | inst->operands[1] = o2; 278 | inst->operands[2] = o3; 279 | inst->operands[3] = o4; 280 | inst->operands[4] = o5; 281 | return inst; 282 | } 283 | 284 | operand* newOperand(token_type type, long long val1, int val2, int val3) { 285 | operand * op = (operand*) malloc(sizeof(operand)); 286 | op->type = type; 287 | op->strval = 0; 288 | op->val1 = val1; 289 | op->val2 = val2; 290 | op->val3 = val3; 291 | op->reg = -1; 292 | op->dest = 0; 293 | op->mods = 0; 294 | op->width = 1; 295 | op->properties = (operand_prop) 0; 296 | op->converted = false; 297 | 298 | if(type == type_register || type == type_const_mem || type == type_mem) { 299 | if(val1 >= 0) { 300 | op->reg = val1; 301 | } 302 | } 303 | 304 | return op; 305 | } 306 | 307 | operand * newOperand(token_type type, operand_prop props, long long val1, int val2, int val3) { 308 | operand * op = newOperand(type, val1, val2, val3); 309 | op->properties = props; 310 | return op; 311 | } 312 | 313 | node * stringsToList(const char * str1, const char * str2, const char * str3) { 314 | node * list = 0; 315 | 316 | if(!str1) { 317 | return list; 318 | } 319 | char * str = (char *) malloc(strlen(str1) + 1); 320 | memcpy(str, str1, strlen(str1) + 1); 321 | addLast(&list, str); 322 | 323 | if(!str2) { 324 | return list; 325 | } 326 | str = (char *) malloc(strlen(str2) + 1); 327 | memcpy(str, str2, strlen(str2) + 1); 328 | addLast(&list, str); 329 | 330 | if(!str3) { 331 | return list; 332 | } 333 | str = (char *) malloc(strlen(str3) + 1); 334 | memcpy(str, str3, strlen(str3) + 1); 335 | addLast(&list, str); 336 | 337 | return list; 338 | } 339 | 340 | node * addNode(node **first, void * value, long long (*comparator)(void*, void*)) { 341 | //check if new node replaces first 342 | if(!*first || comparator((*first)->value,value) > 0) { 343 | node * newNode = (node*) malloc(sizeof(node)); 344 | newNode->value = value; 345 | newNode->next = *first; 346 | *first = newNode; 347 | return newNode; 348 | } 349 | 350 | if(!comparator((*first)->value,value)) { 351 | return 0; 352 | } 353 | 354 | //find location to place new node 355 | node* iterator = *first; 356 | while(iterator->next && comparator(iterator->next->value,value) < 0) { 357 | iterator = iterator->next; 358 | } 359 | 360 | //if already in list, return 361 | if(iterator->next && !comparator(iterator->next->value,value)) { 362 | return 0; 363 | } 364 | 365 | //add new node to list 366 | node * newNode = (node*) malloc(sizeof(node)); 367 | newNode->value = value; 368 | newNode->next = iterator->next; 369 | iterator->next = newNode; 370 | return newNode; 371 | } 372 | 373 | void addLast(node ** first, void * value) { 374 | //check if new node replaces first 375 | if(!*first) { 376 | node * newNode = (node*) malloc(sizeof(node)); 377 | newNode->value = value; 378 | newNode->next = *first; 379 | *first = newNode; 380 | return; 381 | } 382 | 383 | //find last 384 | node* iterator = *first; 385 | while(iterator->next != 0) { 386 | iterator = iterator->next; 387 | } 388 | 389 | //add new node to list 390 | node * newNode = (node*) malloc(sizeof(node)); 391 | newNode->value = value; 392 | newNode->next = iterator->next; 393 | iterator->next = newNode; 394 | return; 395 | } 396 | 397 | void addFirst(node ** first, void * value) { 398 | node * newNode = (node*) malloc(sizeof(node)); 399 | newNode->value = value; 400 | newNode->next = *first; 401 | *first = newNode; 402 | } 403 | 404 | void addAfter(node **first, node * afterMe, void * value) { 405 | node * newNode = (node*) malloc(sizeof(node)); 406 | newNode->value = value; 407 | if(afterMe != 0) { 408 | newNode->next = afterMe->next; 409 | afterMe->next = newNode; 410 | } 411 | else { 412 | newNode->next = *first; 413 | *first = newNode; 414 | } 415 | } 416 | 417 | void addBefore(node **first, node * beforeMe, void * value) { 418 | if(!*first || (*first) == beforeMe) { 419 | node * newNode = (node*) malloc(sizeof(node)); 420 | newNode->value = value; 421 | newNode->next = *first; 422 | *first = newNode; 423 | return; 424 | } 425 | 426 | //find location to place new node 427 | node* iterator = *first; 428 | while(iterator->next != beforeMe) { 429 | iterator = iterator->next; 430 | } 431 | 432 | //add new node to list 433 | node * newNode = (node*) malloc(sizeof(node)); 434 | newNode->value = value; 435 | newNode->next = iterator->next; 436 | iterator->next = newNode; 437 | } 438 | 439 | void addAtIndex(node ** first, void * value, int index) { 440 | if(index < 1) { 441 | addFirst(first, value); 442 | return; 443 | } 444 | 445 | //Find place to add to: 446 | node* iterator = *first; 447 | while(iterator->next != 0 && index > 1) { 448 | iterator = iterator->next; 449 | index--; 450 | } 451 | 452 | //add new node to list 453 | node * newNode = (node*) malloc(sizeof(node)); 454 | newNode->value = value; 455 | newNode->next = iterator->next; 456 | iterator->next = newNode; 457 | return; 458 | } 459 | 460 | node * removeNode(node **first, void * value, long long (*comparator)(void*, void*)) { 461 | //make sure list is nonempty 462 | if(!*first) { 463 | return 0; 464 | } 465 | 466 | //check if first node is deleted 467 | if(!comparator((*first)->value,value)) { 468 | node * n = *first; 469 | *first = (*first)->next; 470 | return n; 471 | } 472 | 473 | //find location of node to delete 474 | node* iterator = *first; 475 | while(iterator->next != 0 && comparator(iterator->next->value,value)) { 476 | iterator = iterator->next; 477 | } 478 | 479 | //if node not found, return 0 480 | if(iterator->next == 0) { 481 | return 0; 482 | } 483 | 484 | //delete node 485 | node * n = iterator->next; 486 | iterator->next = iterator->next->next; 487 | n->next = 0; 488 | return n; 489 | } 490 | 491 | bool deleteNode(node **first, void * value, long long (*comparator)(void*, void*)) { 492 | //make sure list is nonempty 493 | if(!*first) { 494 | return false; 495 | } 496 | 497 | //check if first node is deleted 498 | if(!comparator((*first)->value,value)) { 499 | node * n = *first; 500 | *first = (*first)->next; 501 | free(n); 502 | return true; 503 | } 504 | 505 | //find location of node to delete 506 | node* iterator = *first; 507 | while(iterator->next && comparator(iterator->next->value,value)) { 508 | iterator = iterator->next; 509 | } 510 | 511 | //if node not found, return 0 512 | if(!iterator->next) { 513 | return false; 514 | } 515 | 516 | //delete node 517 | node * n = iterator->next; 518 | iterator->next = iterator->next->next; 519 | free(n); 520 | return true; 521 | } 522 | 523 | bool deleteNode(node **first, void * value, long long (*comparator)(void*, void*), void (*valueCleaner)(void *)) { 524 | //make sure list is nonempty 525 | if(!*first) { 526 | return false; 527 | } 528 | 529 | //check if first node is deleted 530 | if(!comparator((*first)->value,value)) { 531 | node * n = *first; 532 | *first = (*first)->next; 533 | valueCleaner(n->value); 534 | free(n); 535 | return true; 536 | } 537 | 538 | //find location of node to delete 539 | node* iterator = *first; 540 | while(iterator->next && comparator(iterator->next->value,value)) { 541 | iterator = iterator->next; 542 | } 543 | 544 | //if node not found, return 0 545 | if(!iterator->next) { 546 | return false; 547 | } 548 | 549 | //delete node 550 | node * n = iterator->next; 551 | iterator->next = iterator->next->next; 552 | valueCleaner(n->value); 553 | free(n); 554 | return true; 555 | } 556 | 557 | bool deleteNode(node **first, node * del) { 558 | //make sure list is nonempty 559 | if(!*first) { 560 | return false; 561 | } 562 | 563 | //check if first node is deleted 564 | if(*first == del) { 565 | node * n = *first; 566 | *first = (*first)->next; 567 | free(n); 568 | return true; 569 | } 570 | 571 | //find location of node to delete 572 | node* iterator = *first; 573 | while(iterator->next && iterator->next != del) { 574 | iterator = iterator->next; 575 | } 576 | 577 | //if node not found, return 0 578 | if(!iterator->next) { 579 | return false; 580 | } 581 | 582 | //delete node 583 | node * n = iterator->next; 584 | iterator->next = iterator->next->next; 585 | free(n); 586 | return true; 587 | } 588 | 589 | long long instructionorder(void * a, void * b) { 590 | instruction * s = (instruction*) a; 591 | instruction * t = (instruction*) b; 592 | return s->line - t->line; 593 | } 594 | 595 | long long blockorder(void * a, void * b) { 596 | blockNode * s = (blockNode*) a; 597 | blockNode * t = (blockNode*) b; 598 | return s->first - t->first; 599 | } 600 | 601 | long long intorder(void * a, void * b) { 602 | long long i = (long long) a; 603 | long long j = (long long) b; 604 | return i - j; 605 | } 606 | 607 | long long charstarorder(void * a, void * b) { 608 | char * i = (char*) a; 609 | char * j = (char*) b; 610 | return (long long)strcmp(i, j); 611 | } 612 | 613 | long long lexicographicorder(void * a, void * b) { 614 | char * i = (char*) a; 615 | char * j = (char*) b; 616 | 617 | return strcmp(i,j); 618 | } 619 | 620 | int listSize(node *first) { 621 | int count = 0; 622 | 623 | while(first) { 624 | count++; 625 | first = first->next; 626 | } 627 | 628 | return count; 629 | } 630 | 631 | node * containsValue(node *first, void * val) { 632 | while(first && first->value != val) { 633 | first = first->next; 634 | } 635 | 636 | return first; 637 | } 638 | 639 | void cleanNodes(node *first) { 640 | node * next; 641 | while(first) { 642 | next = first->next; 643 | free(first); 644 | first = next; 645 | } 646 | } 647 | 648 | void cleanNodesFully(node *first, void (*valueCleaner)(void *)) { 649 | node * next; 650 | while(first) { 651 | next = first->next; 652 | valueCleaner(first->value); 653 | free(first); 654 | first = next; 655 | } 656 | } 657 | 658 | void cleanOperand(operand * o) { 659 | if(o->strval) { 660 | free(o->strval); 661 | } 662 | cleanNodesFully(o->mods,&free); 663 | free(o); 664 | } 665 | 666 | void cleanInstruction(instruction* i) { 667 | if(i->label) { 668 | free(i->label); 669 | } 670 | if(i->guard) { 671 | free(i->guard); 672 | } 673 | cleanNodesFully(i->mods, &free); 674 | if(i->num_operands > 0) { 675 | int x; 676 | for(x = 0; x < i->num_operands; x++) { 677 | cleanOperand(i->operands[x]); 678 | } 679 | } 680 | free(i->operands); 681 | 682 | free(i); 683 | } 684 | 685 | void cleanBlock(blockNode* block) { 686 | node * n = block->instructions; 687 | node * next; 688 | while(n != 0) { 689 | next = n->next; 690 | instruction * i = (instruction*) n->value; 691 | cleanInstruction(i); 692 | free(n); 693 | n = next; 694 | } 695 | cleanNodes(block->forward); 696 | cleanNodes(block->branches); 697 | cleanNodes(block->predecessors); 698 | 699 | free(block); 700 | } 701 | -------------------------------------------------------------------------------- /tools/src/cudacommon.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "cfghelpers.hpp" 6 | #include "cudacommon.hpp" 7 | using namespace std; 8 | 9 | extern node* _blocks; 10 | extern node* _functions; 11 | extern int _tag; 12 | extern bool _verbose; 13 | 14 | bool hasMod(instruction* inst, const char* mod) { 15 | node * n = inst->mods; 16 | while(n) { 17 | if(!strcmp((char*)n->value, mod)) { 18 | return true; 19 | } 20 | n = n->next; 21 | } 22 | 23 | return false; 24 | } 25 | 26 | bool hasMod(operand* op, const char* mod) { 27 | node * n = op->mods; 28 | while(n) { 29 | if(!strcmp((char*)n->value, mod)) { 30 | return true; 31 | } 32 | n = n->next; 33 | } 34 | 35 | return false; 36 | } 37 | 38 | bool hasTypeMod(instruction* inst, const char* mod, char later) { 39 | node * n = inst->mods; 40 | bool seenType = 0; 41 | while(n) { 42 | if(!strcmp((char*)n->value, mod)) { 43 | if(seenType ^ !later) { 44 | return true; 45 | } 46 | } 47 | 48 | if(!strcmp((char*)n->value, "U16")) { 49 | seenType = true; 50 | } else if(!strcmp((char*)n->value, "U32")) { 51 | seenType = true; 52 | } else if(!strcmp((char*)n->value, "U64")) { 53 | seenType = true; 54 | } else if(!strcmp((char*)n->value, "S16")) { 55 | seenType = true; 56 | } else if(!strcmp((char*)n->value, "S32")) { 57 | seenType = true; 58 | } else if(!strcmp((char*)n->value, "S64")) { 59 | seenType = true; 60 | } else if(!strcmp((char*)n->value, "F16")) { 61 | seenType = true; 62 | } else if(!strcmp((char*)n->value, "F32")) { 63 | seenType = true; 64 | } else if(!strcmp((char*)n->value, "F64")) { 65 | seenType = true; 66 | } else if(!strcmp((char*)n->value, "32")) { 67 | seenType = true; 68 | } else if(!strcmp((char*)n->value, "64")) { 69 | seenType = true; 70 | } else if(!strcmp((char*)n->value, "128")) { 71 | seenType = true; 72 | } 73 | 74 | n = n->next; 75 | } 76 | 77 | return false; 78 | } 79 | 80 | blockNode * getBlock(int line) { 81 | blockNode * b; 82 | node * n = _blocks; 83 | 84 | while(n) { 85 | b = (blockNode*) n->value; 86 | if(b->first <= line && b->last >= line) { 87 | return b; 88 | } 89 | n = n->next; 90 | } 91 | 92 | return 0; 93 | } 94 | 95 | blockNode * getBlock(instruction * inst) { 96 | blockNode * b; 97 | node * n = _blocks; 98 | 99 | while(n) { 100 | b = (blockNode*) n->value; 101 | if(inst->blockID == b->id) { 102 | return b; 103 | } 104 | n = n->next; 105 | } 106 | 107 | return 0; 108 | } 109 | 110 | instruction * getLine(int line) { 111 | blockNode * b = getBlock(line); 112 | node * n = b->instructions; 113 | int counter = b->first; 114 | while(counter < line) { 115 | n = n->next; 116 | counter++; 117 | } 118 | 119 | return (instruction*) n->value; 120 | } 121 | -------------------------------------------------------------------------------- /tools/src/cudacommon.hpp: -------------------------------------------------------------------------------- 1 | #ifndef CUDACOMMON_HPP 2 | #define CUDACOMMON_HPP 3 | #include "common.hpp" 4 | 5 | /** 6 | * @file cudacommon.hpp 7 | * Defines functions used to analyze CUDA code. 8 | */ 9 | 10 | /** 11 | * Checks if instruction has given mod attached to opcode. 12 | * @param inst The instruction 13 | * @param mod The mod string 14 | * @return true iff mod is present, false iff not 15 | */ 16 | bool hasMod(instruction* inst, const char* mod); 17 | 18 | /** 19 | * Checks if operand has given mod. 20 | * @param op The operand 21 | * @param mod The mod string 22 | * @return true iff mod is present, false iff not 23 | */ 24 | bool hasMod(operand* op, const char* mod); 25 | 26 | /** 27 | * Checks if instruction has given mod in appropriate place, associated with the opcode. 28 | * @param inst The instruction 29 | * @param mod The mod string 30 | * @param later Should be 1 iff target mod is after a type mod; 0 if before 31 | * @return true iff mod is present, false iff not 32 | */ 33 | bool hasTypeMod(instruction* inst, const char* mod, char later); 34 | 35 | /** 36 | * Gets the basic block which contains the given line. 37 | * @param line The line number for an instruction 38 | * @return the blockNode which contains the line, or 0 if no such line exists 39 | */ 40 | blockNode * getBlock(int line); 41 | 42 | /** 43 | * Gets the basic block which contains the given instruction. 44 | * @param inst The instruction 45 | * @return the blockNode which contains the instruction, or 0 if no such line exists 46 | */ 47 | blockNode * getBlock(instruction * inst); 48 | 49 | /** 50 | * Gets the instruction with the given line number. 51 | * @param line The line number for an instruction 52 | * @return the desired instruction 53 | */ 54 | instruction * getLine(int line); 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /tools/src/decode.hpp: -------------------------------------------------------------------------------- 1 | #ifndef DECODE_PARSE_HPP 2 | #define DECODE_PARSE_HPP 3 | 4 | #include "decode_common.hpp" 5 | 6 | /** 7 | * @file decode.hpp 8 | * Defines some functions, structs, and constants used to analyze encodings and generate assemblers. 9 | */ 10 | 11 | //Various modifier types: 12 | #define NUM_MOD_TYPES 12 13 | #define MOD_FLAG 0 14 | #define MOD_TYPE 1 15 | #define MOD_PROPOSITION 2 16 | #define MOD_COMPARE 3 17 | #define MOD_ROUND 4 18 | #define MOD_SHIFTDIRECTION 5 19 | #define MOD_INVALIDCHKMODE 6 20 | #define MOD_IADD3 7 21 | #define MOD_MUFU 8 22 | #define MOD_XMAD 9 23 | #define MOD_IMNMX 10 24 | #define MOD_FTZ 11 25 | 26 | /** 27 | * Struct representing a modifier's encoding info. 28 | */ 29 | typedef struct { 30 | /** 31 | * The modifier's ASCII name inside the assembly. 32 | */ 33 | char * token; 34 | 35 | /** 36 | * The value of the binary instruction when this modifier was first observed. 37 | */ 38 | bool vals[64]; 39 | 40 | /** 41 | * Whether or not each bit is part of this modifier's encoding. 42 | * Initialized to all true, then set to false for bits confirmed not to matter. 43 | */ 44 | bool matters[64]; 45 | 46 | /** 47 | * Possible value of the binary instruction when this modifier was *not* present. 48 | */ 49 | bool antivals[64]; 50 | 51 | /** 52 | * True iff the antivals array has been initialized. 53 | */ 54 | bool seenAntiVal; 55 | 56 | /** 57 | * The modifier's type, such as MOD_FLAG, MOD_TYPE, MOD_ROUND, etc. 58 | * MOD_FLAG is used for modifiers not known to be associated with a particular type. 59 | */ 60 | int type; 61 | 62 | /** 63 | * The relative order of this modifier, with respect to its type. 64 | * For example, the second MOD_ROUND type modifier has count=1 (the default value is count=0). 65 | */ 66 | int count;//0 for generic or 1st non-generic of its own type, 1 for second non-generic of its own type 67 | 68 | /** 69 | * Whether we want to optimize number of lines for this modifier in generated assembler. 70 | * If true, then this is combined with matching modifier from operations that have the same opcode. 71 | */ 72 | bool combine;//false iff we should avoid combining this with same mod from other versions of same operation 73 | } operationMod; 74 | 75 | /** 76 | * Struct representing an operand's encoding info. 77 | */ 78 | typedef struct { 79 | /** 80 | * The operand's type. 81 | */ 82 | token_type type; 83 | 84 | /** 85 | * Possible starting locations for the encoding of the operand's first component. 86 | */ 87 | bool possibleStart1[64]; 88 | 89 | /** 90 | * At each location in possibleStart1, the max number of bits that seem to match. 91 | */ 92 | int maxBits1[64]; 93 | 94 | /** 95 | * Possible starting locations for the encoding of the operand's second component. 96 | * (I treat literal offset in memory operands as the second component.) 97 | */ 98 | bool possibleStart2[64]; 99 | 100 | /** 101 | * At each location in possibleStart2, the max number of bits that seem to match. 102 | */ 103 | int maxBits2[64]; 104 | 105 | /** 106 | * Possible starting locations for the encoding of the operand's third component. 107 | * (I treat constant memory bank in const memory operands as the third component.) 108 | */ 109 | bool possibleStart3[64]; 110 | 111 | /** 112 | * At each location in possibleStart3, the max number of bits that seem to match. 113 | */ 114 | int maxBits3[64]; 115 | 116 | /** 117 | * The number of components in the operand. 118 | * Set to 1 for most types, 2 for memory, 3 for constant memory. 119 | */ 120 | int components; 121 | 122 | /** 123 | * A linked list of modifier encodings attached to this operand. 124 | */ 125 | node * mods; 126 | 127 | /** 128 | * Indicates which unary functions we've seen attached to this operand. 129 | * Has 1 bit for each unary function. 130 | */ 131 | operand_prop properties; 132 | 133 | /** 134 | * For the four known unary functions, which bits are used in its encoding. 135 | */ 136 | bool propMatters[4][64]; 137 | 138 | /** 139 | * For the four known unary functions, the instruction's binary when they were seen. 140 | */ 141 | bool propVals[4][64]; 142 | 143 | /** 144 | * True if this (hex literal) operand was actually written as a float/double literal in decimal. 145 | */ 146 | bool decimal; 147 | 148 | /** 149 | * True if this (hex literal) operand is encoded as a relative address. 150 | */ 151 | bool relative; 152 | 153 | /** 154 | * True if the arithmetic negation unary function uses twos complement. 155 | * False otherwise (e.g. if it's encoded as a single bit somewhere). 156 | */ 157 | bool incNegative;//true iff negative prop is applied to hex operand instead of flipping a bit 158 | 159 | /** 160 | * True if this (hex literal) operand is encoded as its opposite. 161 | * We've seen this happen with the last operand in ISUB instructions, since ISUB is really a special case of IADD. 162 | */ 163 | bool addlNegative; 164 | 165 | /** 166 | * True if the encoding for the third component is bit-shifted due to limited space in binary. 167 | */ 168 | bool shiftComp3; 169 | } operationOperand; 170 | 171 | /** 172 | * Struct representing an operation's encoding. 173 | * If two instructions have the same opcode name AND the same operand types, they are considered the same operation. 174 | */ 175 | typedef struct { 176 | /** 177 | * The opcode, using our own arbitrary identifiers. 178 | */ 179 | int op;//uses enum id from common.hpp 180 | 181 | /** 182 | * True iff we've confirmed that this instruction can have predicate guards. 183 | * Some instructions, such as CAL, cannot. 184 | * Note that depending on the nvcc compiler version, NVIDIA's encoding will vary for cases where guards are disallowed. 185 | * (I.e. it's sometimes encoded the same as the null predicate PT, and sometimes the same as P0). 186 | */ 187 | bool confirmedGuard; 188 | 189 | /** 190 | * The instruction's binary the first time that we saw this operation. 191 | */ 192 | bool binid[64]; 193 | 194 | /** 195 | * Which bits control the opcode for this operation. 196 | */ 197 | bool binidmatters[64]; 198 | 199 | /** 200 | * Array of operand encodings. 201 | */ 202 | operationOperand * operands[8]; 203 | 204 | /** 205 | * The number of operands for this operation. 206 | */ 207 | int numOperands; 208 | 209 | /** 210 | * Linked list of modifier encodings that can be attached to the opcode. 211 | */ 212 | node * mods; 213 | 214 | /** 215 | * Value of instruction's binary in cases where each particular modifier type was completely absent. 216 | */ 217 | int noModBits[NUM_MOD_TYPES][64]; 218 | } operation; 219 | 220 | /** 221 | * Returns the kind of mod a mod string corresponds to. 222 | * @param modstr The mod string 223 | * @return modifier's type if known, or MOD_FLAG if assumed to be generic 224 | */ 225 | int getModType(const char * modstr); 226 | 227 | /** 228 | * Returns the unique integer ID we associate with the given opcode string. 229 | * @param opname The opcode's string 230 | * @return opcode's integer ID, or -1 on failure 231 | */ 232 | int getOpcode(const char * opname); 233 | 234 | /** 235 | * Performs analysis of an instruction, updating the operation list. 236 | * @param inst The parsed assembly code 237 | * @param hexstring the binary code (in hexadecimal format) 238 | */ 239 | void analyze(instruction * inst, char * hexstring); 240 | 241 | #endif 242 | 243 | -------------------------------------------------------------------------------- /tools/src/decode.l: -------------------------------------------------------------------------------- 1 | %{ 2 | #include 3 | #include "decode_common.hpp" 4 | #include "decode.tab.hpp" 5 | %} 6 | %option nounput 7 | %option noyywrap 8 | extern YYSTYPE yylval; 9 | extern int isatty(); 10 | 11 | hex [-~]?(0x[0-9a-fA-F]+)|(\|0x[0-9a-fA-F]+\|) 12 | bitlist \{([0-9]*,)*[0-9]*\} 13 | float \-?[0-9]+(\.[0-9]+)?(e[\+\-][0-9]+)? 14 | pred !?(P[0-6])|!?[pP][tT] 15 | reg -?[-~]?\|?(RZ|R[0-9]+)\|? 16 | specialName SR_[\_\.A-Za-z0-9]+ 17 | specialID SR[0-9]+ 18 | const ([-~]\ *)?\|?c\|?\ *\[.*\]\ *\[.*\]\|? 19 | memory ([-~]\ *)?\|?\[.*\]\|? 20 | decimal_line_number [\_a-zA-Z0-9]+\ \([0-9]+\) 21 | 22 | %% 23 | "//Shared memory usage: "-?0[xX][a-fA-F0-9]+[^\n]* { 24 | yylval.token_.lexeme = yytext + 23; 25 | return(METADATA_SHAREDMEM); 26 | } 27 | "//Shared memory usage: "-?[0-9]+[^\n]* { 28 | yylval.token_.lexeme = yytext + 23; 29 | return(METADATA_SHAREDMEM); 30 | } 31 | "//Frame Size: "-?0[xX][a-fA-F0-9]+[^\n]* { 32 | yylval.token_.lexeme = yytext + 14; 33 | return(METADATA_FRAME_SIZE); 34 | } 35 | "//Frame Size: "-?[0-9]+[^\n]* { 36 | yylval.token_.lexeme = yytext + 14; 37 | return(METADATA_FRAME_SIZE); 38 | } 39 | "//Min Stack Size: "-?0[xX][a-fA-F0-9]+[^\n]* { 40 | yylval.token_.lexeme = yytext + 18; 41 | return(METADATA_MIN_STACK_SIZE); 42 | } 43 | "//Min Stack Size: "-?[0-9]+[^\n]* { 44 | yylval.token_.lexeme = yytext + 18; 45 | return(METADATA_MIN_STACK_SIZE); 46 | } 47 | "//Max Stack Size: "-?0[xX][a-fA-F0-9]+[^\n]* { 48 | yylval.token_.lexeme = yytext + 18; 49 | return(METADATA_MAX_STACK_SIZE); 50 | } 51 | "//Max Stack Size: "-?[0-9]+[^\n]* { 52 | yylval.token_.lexeme = yytext + 18; 53 | return(METADATA_MAX_STACK_SIZE); 54 | } 55 | "//Name: "[0-9a-zA-Z_]+[^\n]* { 56 | yylval.token_.lexeme = (char*) malloc(strlen(yytext) - 7); 57 | strcpy(yylval.token_.lexeme, yytext + 8); 58 | int x = 0; 59 | while(true) { 60 | if(yylval.token_.lexeme[x] == 0 || yylval.token_.lexeme[x] == '\n' || yylval.token_.lexeme[x] == '\r') { 61 | yylval.token_.lexeme[x] = 0; 62 | break; 63 | } 64 | x++; 65 | } 66 | return(METADATA_KERNELNAME); 67 | } 68 | "//Arch: sm_"[0-9]+[^\n]* { 69 | yylval.token_.lexeme = yytext + 11; 70 | return(METADATA_ARCH); 71 | } 72 | "//Function count: "[0-9]+[^\n]* { 73 | yylval.token_.lexeme = yytext + 18; 74 | return(METADATA_FUNCTIONCOUNT); 75 | } 76 | "//Function: "[0-9a-zA-Z\_\$]+[^\n]* { 77 | yylval.token_.lexeme = (char*) malloc(strlen(yytext) - 11); 78 | strcpy(yylval.token_.lexeme, yytext + 12); 79 | int x = 0; 80 | while(true) { 81 | if(yylval.token_.lexeme[x] == 0 || yylval.token_.lexeme[x] == '\n' || yylval.token_.lexeme[x] == '\r') { 82 | yylval.token_.lexeme[x] = 0; 83 | break; 84 | } 85 | x++; 86 | } 87 | return(METADATA_FUNCTIONNAME); 88 | } 89 | "//cuobjdump: "[0-9]+[^\n]* { 90 | yylval.token_.lexeme = yytext + 13; 91 | return(METADATA_CUOBJDUMP); 92 | } 93 | "//"[^\n]* { 94 | //return(COMMENT); 95 | } 96 | 1D|ARRAY_1D|RECT|2D|ARRAY_2D|3D|CUBE|ARRAY_CUBE { 97 | yylval.token_.type = type_texture_operand; 98 | yylval.token_.lexeme = (char *) malloc(strlen(yytext)+1); 99 | strcpy(yylval.token_.lexeme,yytext); 100 | return(TEXOP); 101 | } 102 | R|G|B|A|RG|RA|GA|BA|RGB|RGA|RBA|GBA|RGBA|INVALID5|INVALID6|INVALID7 { 103 | yylval.token_.type = type_channel; 104 | yylval.token_.lexeme = yytext; 105 | return(CHANNEL); 106 | } 107 | SB[0-9] { 108 | yylval.token_.type = type_sb; 109 | yylval.token_.lexeme = yytext + 2; 110 | return(SB_OPERAND); 111 | } 112 | {hex} { 113 | yylval.token_.type = type_hex; 114 | yylval.token_.lexeme = yytext; 115 | return(HEXVAL); 116 | } 117 | {bitlist} { 118 | yylval.token_.type = type_bit_list; 119 | yylval.token_.lexeme = yytext; 120 | return(BITLIST); 121 | } 122 | {float} { 123 | yylval.token_.type = type_hex; 124 | yylval.token_.lexeme = yytext; 125 | return(DECIMAL); 126 | } 127 | \+INF { 128 | return(PLUSINF); 129 | } 130 | \-INF { 131 | return(NEGINF); 132 | } 133 | \+QNAN { 134 | return(PLUSQNAN); 135 | } 136 | \+SNAN { 137 | return(PLUSSNAN); 138 | } 139 | @ { 140 | return(GUARD); 141 | } 142 | {pred} { 143 | yylval.token_.type = type_predicate; 144 | yylval.token_.lexeme = yytext; 145 | return(PREDICATE); 146 | } 147 | {reg} { 148 | yylval.token_.type = type_register; 149 | yylval.token_.lexeme = yytext; 150 | return(REG); 151 | } 152 | {const} { 153 | yylval.token_.type = type_const_mem; 154 | yylval.token_.lexeme = yytext; 155 | return(CONST); 156 | } 157 | {memory} { 158 | yylval.token_.type = type_mem; 159 | yylval.token_.lexeme = yytext; 160 | return(MEMORY); 161 | } 162 | {specialName} { 163 | yylval.token_.type = type_special_reg; 164 | yylval.token_.lexeme = yytext; 165 | return(SPECIALNAME); 166 | } 167 | {specialID} { 168 | yylval.token_.type = type_special_reg; 169 | yylval.token_.lexeme = yytext; 170 | return(SPECIALID); 171 | } 172 | \.[\?0-9A-Za-z\_]* { 173 | yylval.token_.type = type_mod; 174 | yylval.token_.lexeme = (char *) malloc(strlen(yytext)); 175 | strcpy(yylval.token_.lexeme,yytext+1); 176 | return(MOD); 177 | } 178 | CC { 179 | yylval.token_.type = type_other_operand; 180 | return(CC); 181 | } 182 | PR { 183 | yylval.token_.type = type_other_operand; 184 | return(PR); 185 | } 186 | [0-9a-fA-F]{16}\: { 187 | yylval.token_.lexeme = (char *) malloc(strlen(yytext)+1); 188 | strcpy(yylval.token_.lexeme,yytext); 189 | return(HEXCODE); 190 | } 191 | [A-Z0-9_]* { 192 | yylval.token_.lexeme = (char *) malloc(strlen(yytext)+1); 193 | strcpy(yylval.token_.lexeme,yytext); 194 | return(OPCODE); 195 | } 196 | ; { 197 | return(SEMICOLON); 198 | } 199 | [ \t\n\r,] { 200 | } 201 | \| { 202 | return (STRAYPIPE); 203 | } 204 | . { 205 | printf("\n\nERROR: %c is an illegal character here\n\n",yytext[0]); 206 | yyterminate(); 207 | } 208 | %% 209 | #ifdef WINDOWS 210 | int yylex_destroy() { 211 | yy_delete_buffer(YY_CURRENT_BUFFER); 212 | return 0; 213 | } 214 | #endif 215 | -------------------------------------------------------------------------------- /tools/src/decode_common.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "decode_common.hpp" 5 | 6 | /** 7 | * Names of special registers for sm_2x. 8 | * Corresponds one-to-one with the specialIDs array. 9 | */ 10 | std::string _specialNames20[] = {"laneid", "virtcfg", "virtid", "pm0", "pm1", "pm2", "pm3", "pm4", "pm5", "pm6", "pm7", "prim_type", "invocation_id", "y_direction", "machine_id_0", "machine_id_1", "machine_id_2", "machine_id_3", "affinity", "tid", "tid.x", "tid.y", "tid.z", "ctaparam", "ctaid.x", "ctaid.y", "ctaid.z", "ntid", "ntid.x", "ntid.y", "ntid.z", "gridparam", "nctaid.x", "nctaid.y", "nctaid.z", "swinlo", "swinsz", "smemsz", "smembanks", "lwinlo", "lwinsz", "lmemlosz", "lmemhioff", "eqmask", "ltmask", "lemask", "gtmask", "gemask", "clocklo", "clockhi"}; 11 | 12 | /** 13 | * Values of special registers for sm_2x. 14 | * Corresponds one-to-one with the specialNames array. 15 | */ 16 | const int _specialIDs20[] = {0,2,3,4,5,6,7,8,9,10,11,16,17,18,24,25,26,27,28,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,80,81}; 17 | 18 | /** 19 | * Number of recognized special registers for sm_2x. 20 | */ 21 | const int _numSpecials20 = 62; 22 | 23 | /** 24 | * Names of special registers for sm_3x through sm_6x 25 | * //TODO deal with SM_SHADER_TYPE, which does not start with SR_ 26 | */ 27 | std::string _specialNames35[] = {"laneid", "clock", "virtcfg", "virtid", "pm0", "pm1", "pm2", "pm3", "pm4", "pm5", "pm6", "pm7", "", "", "", "", "prim_type", "invocation_id", "y_direction", "thread_kill", "shader_type", "directcbewriteaddresslow", "directcbewriteaddresshigh", "directcbewriteenabled", "machine_id_0", "machine_id_1", "machine_id_2", "machine_id_3", "affinity", "invocation_info", "wscalefactor_xy", "wscalefactor_z", "tid", "tid.x", "tid.y", "tid.z", "cta_param", "ctaid.x", "ctaid.y", "ctaid.z", "ntid", "cirqueueincrminusone", "nlatc", "", "", "", "", "", "swinlo", "swinsz", "smemsz", "smembanks", "lwinlo", "lwinsz", "lmemlosz", "lmemhioff", "eqmask", "ltmask", "lemask", "gtmask", "gemask", "regalloc", "ctxaddr", "", "globalerrorstatus", "", "warperrorstatus", "warperrorstatusclear", "", "", "", "", "pm_hi0", "pm_hi1", "pm_hi2", "pm_hi3", "pm_hi4", "pm_hi5", "pm_hi6", "pm_hi7", "clocklo", "clockhi", "globaltimerlo", "globaltimerhi", "", "", "", "", "", "", "", "", "", "", "", "", "hwtaskid", "circularqueueentryindex", "circularqueueentryaddresslow", "circularqueueentryaddresshigh"}; 28 | 29 | /** 30 | * Names of texture operands. 31 | * Corresponds one-to-one with the textureIDs array. 32 | */ 33 | const char * _textureNames[] = {"1D", "ARRAY_1D", "RECT", "2D", "ARRAY_2D", "3D", "CUBE", "ARRAY_CUBE"}; 34 | 35 | /** 36 | * Values of texture operands. 37 | * Corresponds one-to-one with the textureNames array. 38 | */ 39 | const int _textureIDs[] = {0,1,2,2,3,4,6,7}; 40 | 41 | /** 42 | * Number of recognized texture operands. 43 | */ 44 | const int _numTextures = 8; 45 | 46 | /** 47 | * Names of texture operands. 48 | * Corresponds one-to-one with the textureIDs array. 49 | */ 50 | const char * _channelNames[] = {"R", "G", "B", "A", "RG", "RA", "GA", "BA", "RGB", "RGA", "RBA", "GBA", "RGBA", "INVALID5", "INVALID6", "INVALID7", }; 51 | 52 | /** 53 | * Values of texture operands. 54 | * Corresponds one-to-one with the textureNames array. 55 | */ 56 | const int _channelIDs[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; 57 | 58 | /** 59 | * Number of recognized texture operands. 60 | */ 61 | const int _numChannels = 16; 62 | 63 | /** 64 | * Names of miscellaneous operands. 65 | */ 66 | const char * _otherNames[] = {"CC", "PR"}; 67 | 68 | /** 69 | * Number of items in otherNames array. 70 | */ 71 | const int _numOthers = 2; 72 | 73 | /** 74 | * List of opcode names. 75 | * An opcode's index in this array should be the same as the the corresponding 'opcode' enum value. 76 | */ 77 | const char * opcodes[] = {"MOV", "MOV32I", "LD", "LDU", "LDL", "LDS", "LDC", "ST", "STL", "STS", "LDLK", "LDSLK", "STUL", "STSUL", "FADD", "FADD32I", "FMUL", "FMUL32I", "FFMA", "FSET", "FSETP", "DSETP", "FCMP", "MUFU", "DADD", "DMUL", "DFMA", "IADD", "IADD32I", "IMNMX", "IMUL", "IMUL32I", "IMAD", "ISCADD", "ISET", "ISETP", "ICMP", "I2F", "I2I", "F2I", "F2F", "LOP", "LOP32I", "SHL", "SHR", "BFE", "BFI", "SEL", "SCHI", "SSY", "BRA", "BRX", "PCNT", "CONT", "PBK", "BRK", "CAL", "RET", "EXIT", "NOP", "BAR", "BPT", "B2R", "S2R", "PSETP", "PSET", "FLO", "P2R", "R2P", "TEX", "TEXDEPBAR", "RRO", "PRMT", "VADD", "DMNMX", "FMNMX", "RED", "VOTE", "POPC", "MEMBAR", "STSCUL", "LEPC", "CSETP", "ISCADD32I", "VMNMX", "TLD", "SHF", "FCHK", "ISUB", "JCAL", "SHFL", "LDG", "LD_LDU", "ATOM", "CCTL", "XMAD", "SYNC", "STG", "IADD3", "VABSDIFF", "DEPBAR", "LOP3", "TLDS", "TEXS", "LEA", "DSET", 0}; 78 | 79 | int getSpecialID(const char * lexeme, bool format, int arch) { 80 | if(arch <= 30) { 81 | for(int x = 0; x < _numSpecials20; x++) { 82 | for(int y = 0; y >= 0; y++) { 83 | if(lexeme[y] == _specialNames20[x][y]) { 84 | if(lexeme[y] == 0) { 85 | //Make them match perfectly for printing out later: 86 | if(format) { 87 | for(int z = 0; z < y; z++) { 88 | _specialNames20[x][z] = lexeme[z]; 89 | } 90 | } 91 | 92 | //Return value: 93 | return _specialIDs20[x]; 94 | } 95 | } else if(lexeme[y] == '_' && _specialNames20[x][y] == '.') { 96 | //interchangeable; format varies depending on CUDA version 97 | } else if(lexeme[y] == '.' && _specialNames20[x][y] == '_') { 98 | //interchangeable; format varies depending on CUDA version 99 | } else if(lexeme[y] >= 'A' && lexeme[y] <= 'Z' && lexeme[y] == _specialNames20[x][y] - ('a' - 'A')) { 100 | //interchangeable; format varies depending on CUDA version 101 | } else if(lexeme[y] >= 'a' && lexeme[y] <= 'z' && lexeme[y] == _specialNames20[x][y] - ('A' - 'a')) { 102 | //interchangeable; format varies depending on CUDA version 103 | } else { 104 | break; 105 | } 106 | } 107 | } 108 | } else if(arch < 70) { 109 | int numSpecials = sizeof(_specialNames35) / sizeof(std::string); 110 | for(int x = 0; x < numSpecials; x++) { 111 | for(int y = 0; y >= 0; y++) { 112 | if(lexeme[y] == _specialNames35[x][y]) { 113 | if(lexeme[y] == 0) { 114 | //Make them match perfectly for printing out later: 115 | if(format) { 116 | for(int z = 0; z < y; z++) { 117 | _specialNames35[x][z] = lexeme[z]; 118 | } 119 | } 120 | 121 | //Return value: 122 | return x; 123 | } 124 | } else if(lexeme[y] == '_' && _specialNames35[x][y] == '.') { 125 | //interchangeable; format varies depending on CUDA version 126 | } else if(lexeme[y] == '.' && _specialNames35[x][y] == '_') { 127 | //interchangeable; format varies depending on CUDA version 128 | } else if(lexeme[y] >= 'A' && lexeme[y] <= 'Z' && lexeme[y] == _specialNames35[x][y] - ('a' - 'A')) { 129 | //interchangeable; format varies depending on CUDA version 130 | } else if(lexeme[y] >= 'a' && lexeme[y] <= 'z' && lexeme[y] == _specialNames35[x][y] - ('A' - 'a')) { 131 | //interchangeable; format varies depending on CUDA version 132 | } else { 133 | break; 134 | } 135 | } 136 | } 137 | } else { 138 | fprintf(stderr, "ERROR c~98: Unimplemented.\n"); 139 | } 140 | 141 | fprintf(stderr,"WARNING: Found unrecognized special register \"%s\".\n",lexeme); 142 | return -1; 143 | } 144 | 145 | int getOtherID(const char * lexeme) { 146 | for(int x = 0; x < _numOthers; x++) { 147 | if(!strcmp(lexeme, _otherNames[x])) { 148 | return x; 149 | } 150 | } 151 | 152 | fprintf(stderr,"\n*SANITY CHECK ERROR ~22* This message is unreachable! \"%s\".\n\n",lexeme); 153 | return -1; 154 | } 155 | 156 | int getTextureID(const char * lexeme) { 157 | for(int x = 0; x < _numTextures; x++) { 158 | if(!strcmp(lexeme, _textureNames[x])) { 159 | return _textureIDs[x]; 160 | } 161 | } 162 | 163 | fprintf(stderr,"\n*SANITY CHECK ERROR ~33* Found unrecognized texture operand \"%s\".\n\n",lexeme); 164 | return -1; 165 | } 166 | 167 | int getChannelID(const char * lexeme) { 168 | for(int x = 0; x < _numChannels; x++) { 169 | if(!strcmp(lexeme, _channelNames[x])) { 170 | return _channelIDs[x]; 171 | } 172 | } 173 | 174 | fprintf(stderr,"\n*SANITY CHECK ERROR c~162: Found unrecognized channel operand \"%s\".\n\n",lexeme); 175 | return -1; 176 | } 177 | 178 | instruction* newInstruction(opcode op, node * mods, node * operands) { 179 | instruction * inst = (instruction*) malloc(sizeof(instruction)); 180 | inst->label = 0; 181 | inst->op = op; 182 | inst->mods = mods; 183 | inst->guard = 0; 184 | inst->num_operands = 0; 185 | inst->operands = 0; 186 | 187 | if(operands) { 188 | inst->num_operands = listSize(operands); 189 | inst->operands = (operand**) malloc(inst->num_operands*sizeof(operand*)); 190 | node *o = operands; 191 | int x = 0; 192 | while(o) { 193 | inst->operands[x] = (operand*) o->value; 194 | o = o->next; 195 | x++; 196 | } 197 | cleanNodes(operands); 198 | } 199 | 200 | return inst; 201 | } 202 | 203 | instruction* newInstruction(opcode op, node * mods) { 204 | return newInstruction(op, mods, (node*)0); 205 | } 206 | 207 | instruction* newInstruction(opcode op, node * mods, operand * o1) { 208 | instruction * inst = newInstruction(op, mods); 209 | inst->num_operands = 1; 210 | inst->operands = (operand**) malloc(1*sizeof(operand*)); 211 | inst->operands[0] = o1; 212 | 213 | return inst; 214 | } 215 | 216 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2) { 217 | instruction * inst = newInstruction(op, mods); 218 | inst->num_operands = 2; 219 | inst->operands = (operand**) malloc(2*sizeof(operand*)); 220 | inst->operands[0] = o1; 221 | inst->operands[1] = o2; 222 | 223 | return inst; 224 | } 225 | 226 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2, operand * o3) { 227 | instruction * inst = newInstruction(op, mods); 228 | inst->num_operands = 3; 229 | inst->operands = (operand**) malloc(3*sizeof(operand*)); 230 | inst->operands[0] = o1; 231 | inst->operands[1] = o2; 232 | inst->operands[2] = o3; 233 | 234 | return inst; 235 | } 236 | 237 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2, operand * o3, operand * o4) { 238 | instruction * inst = newInstruction(op, mods); 239 | inst->num_operands = 4; 240 | inst->operands = (operand**) malloc(4*sizeof(operand*)); 241 | inst->operands[0] = o1; 242 | inst->operands[1] = o2; 243 | inst->operands[2] = o3; 244 | inst->operands[3] = o4; 245 | return inst; 246 | } 247 | 248 | operand* newOperand(token_type type, long long val1, int val2, int val3) { 249 | operand * op = (operand*) malloc(sizeof(operand)); 250 | op->type = type; 251 | op->val1 = val1; 252 | op->val2 = val2; 253 | op->val3 = val3; 254 | op->reg = -1; 255 | op->mods = 0; 256 | op->properties = (operand_prop) 0; 257 | op->converted = false; 258 | op->bad = false; 259 | op->decimal = false; 260 | 261 | if(type == type_register || type == type_const_mem || type == type_mem) { 262 | if(val1 >= 0) { 263 | op->reg = val1; 264 | } 265 | } 266 | 267 | return op; 268 | } 269 | 270 | operand* newOperand(token_type type, operand_prop props, long long val1, int val2, int val3) { 271 | operand * op = newOperand(type, val1, val2, val3); 272 | op->properties = props; 273 | return op; 274 | } 275 | 276 | node *addNode(node **first, void * value, long (*comparator)(void*, void*)) { 277 | //check if new node replaces first 278 | if(!*first || comparator((*first)->value,value) > 0) { 279 | node * newNode = (node*) malloc(sizeof(node)); 280 | newNode->value = value; 281 | newNode->next = *first; 282 | *first = newNode; 283 | return newNode; 284 | } 285 | 286 | if(!comparator((*first)->value,value)) { 287 | return 0; 288 | } 289 | 290 | //find location to place new node 291 | node* iterator = *first; 292 | while(iterator->next && comparator(iterator->next->value,value) < 0) { 293 | iterator = iterator->next; 294 | } 295 | 296 | //if already in list, return 297 | if(iterator->next && !comparator(iterator->next->value,value)) { 298 | return 0; 299 | } 300 | 301 | //add new node to list 302 | node * newNode = (node*) malloc(sizeof(node)); 303 | newNode->value = value; 304 | newNode->next = iterator->next; 305 | iterator->next = newNode; 306 | return newNode; 307 | } 308 | 309 | void addLast(node ** first, void * value) { 310 | //check if new node replaces first 311 | if(!*first) { 312 | node * newNode = (node*) malloc(sizeof(node)); 313 | newNode->value = value; 314 | newNode->next = *first; 315 | *first = newNode; 316 | return; 317 | } 318 | 319 | //find last 320 | node* iterator = *first; 321 | while(iterator->next != 0) { 322 | iterator = iterator->next; 323 | } 324 | 325 | //add new node to list 326 | node * newNode = (node*) malloc(sizeof(node)); 327 | newNode->value = value; 328 | newNode->next = iterator->next; 329 | iterator->next = newNode; 330 | return; 331 | } 332 | 333 | void addFirst(node ** first, void * value) { 334 | node * newNode = (node*) malloc(sizeof(node)); 335 | newNode->value = value; 336 | newNode->next = *first; 337 | *first = newNode; 338 | } 339 | 340 | void addAfter(node **first, node * afterMe, void * value) { 341 | node * newNode = (node*) malloc(sizeof(node)); 342 | newNode->value = value; 343 | if(afterMe != 0) { 344 | newNode->next = afterMe->next; 345 | afterMe->next = newNode; 346 | } 347 | else { 348 | newNode->next = *first; 349 | *first = newNode; 350 | } 351 | } 352 | 353 | void addBefore(node **first, node * beforeMe, void * value) { 354 | if(!*first || (*first) == beforeMe) { 355 | node * newNode = (node*) malloc(sizeof(node)); 356 | newNode->value = value; 357 | newNode->next = *first; 358 | *first = newNode; 359 | return; 360 | } 361 | 362 | //find location to place new node 363 | node* iterator = *first; 364 | while(iterator->next != beforeMe) { 365 | iterator = iterator->next; 366 | } 367 | 368 | //add new node to list 369 | node * newNode = (node*) malloc(sizeof(node)); 370 | newNode->value = value; 371 | newNode->next = iterator->next; 372 | iterator->next = newNode; 373 | } 374 | 375 | void addAtIndex(node ** first, void * value, int index) { 376 | if(index < 1) { 377 | addFirst(first, value); 378 | return; 379 | } 380 | 381 | //Find place to add to: 382 | node* iterator = *first; 383 | while(iterator->next != 0 && index > 1) { 384 | iterator = iterator->next; 385 | index--; 386 | } 387 | 388 | //add new node to list 389 | node * newNode = (node*) malloc(sizeof(node)); 390 | newNode->value = value; 391 | newNode->next = iterator->next; 392 | iterator->next = newNode; 393 | return; 394 | } 395 | 396 | node * removeNode(node **first, void * value, long (*comparator)(void*, void*)) { 397 | //make sure list is nonempty 398 | if(!*first) { 399 | return 0; 400 | } 401 | 402 | //check if first node is deleted 403 | if(!comparator((*first)->value,value)) { 404 | node * n = *first; 405 | *first = (*first)->next; 406 | return n; 407 | } 408 | 409 | //find location of node to delete 410 | node* iterator = *first; 411 | while(iterator->next != 0 && comparator(iterator->next->value,value)) { 412 | iterator = iterator->next; 413 | } 414 | 415 | //if node not found, return 0 416 | if(iterator->next == 0) { 417 | return 0; 418 | } 419 | 420 | //delete node 421 | node * n = iterator->next; 422 | iterator->next = iterator->next->next; 423 | n->next = 0; 424 | return n; 425 | } 426 | 427 | bool deleteNode(node **first, void * value, long long (*comparator)(void*, void*)) { 428 | //make sure list is nonempty 429 | if(!*first) { 430 | return false; 431 | } 432 | 433 | //check if first node is deleted 434 | if(!comparator((*first)->value,value)) { 435 | node * n = *first; 436 | *first = (*first)->next; 437 | free(n); 438 | return true; 439 | } 440 | 441 | //find location of node to delete 442 | node* iterator = *first; 443 | while(iterator->next && comparator(iterator->next->value,value)) { 444 | iterator = iterator->next; 445 | } 446 | 447 | //if node not found, return 0 448 | if(!iterator->next) { 449 | return false; 450 | } 451 | 452 | //delete node 453 | node * n = iterator->next; 454 | iterator->next = iterator->next->next; 455 | free(n); 456 | return true; 457 | } 458 | 459 | bool deleteNode(node **first, void * value, long long (*comparator)(void*, void*), void (*valueCleaner)(void *)) { 460 | //make sure list is nonempty 461 | if(!*first) { 462 | return false; 463 | } 464 | 465 | //check if first node is deleted 466 | if(!comparator((*first)->value,value)) { 467 | node * n = *first; 468 | *first = (*first)->next; 469 | valueCleaner(n->value); 470 | free(n); 471 | return true; 472 | } 473 | 474 | //find location of node to delete 475 | node* iterator = *first; 476 | while(iterator->next && comparator(iterator->next->value,value)) { 477 | iterator = iterator->next; 478 | } 479 | 480 | //if node not found, return 0 481 | if(!iterator->next) { 482 | return false; 483 | } 484 | 485 | //delete node 486 | node * n = iterator->next; 487 | iterator->next = iterator->next->next; 488 | valueCleaner(n->value); 489 | free(n); 490 | return true; 491 | } 492 | 493 | bool deleteNode(node **first, node * del) { 494 | //make sure list is nonempty 495 | if(!*first) { 496 | return false; 497 | } 498 | 499 | //check if first node is deleted 500 | if(*first == del) { 501 | node * n = *first; 502 | *first = (*first)->next; 503 | free(n); 504 | return true; 505 | } 506 | 507 | //find location of node to delete 508 | node* iterator = *first; 509 | while(iterator->next && iterator->next != del) { 510 | iterator = iterator->next; 511 | } 512 | 513 | //if node not found, return 0 514 | if(!iterator->next) { 515 | return false; 516 | } 517 | 518 | //delete node 519 | node * n = iterator->next; 520 | iterator->next = iterator->next->next; 521 | free(n); 522 | return true; 523 | } 524 | 525 | long long intorder(void * a, void * b) { 526 | long long i = (long long) a; 527 | long long j = (long long) b; 528 | return i - j; 529 | } 530 | 531 | long long charstarorder(void * a, void * b) { 532 | char * i = (char*) a; 533 | char * j = (char*) b; 534 | return (long)strcmp(i, j); 535 | } 536 | 537 | long long lexicographicorder(void * a, void * b) { 538 | char * i = (char*) a; 539 | char * j = (char*) b; 540 | 541 | return strcmp(i,j); 542 | } 543 | 544 | int listSize(node *first) { 545 | int count = 0; 546 | 547 | while(first) { 548 | count++; 549 | first = first->next; 550 | } 551 | 552 | return count; 553 | } 554 | 555 | node * containsValue(node *first, void * val) { 556 | while(first && first->value != val) { 557 | first = first->next; 558 | } 559 | 560 | return first; 561 | } 562 | 563 | void cleanNodes(node *first) { 564 | node * next; 565 | while(first) { 566 | next = first->next; 567 | free(first); 568 | first = next; 569 | } 570 | } 571 | 572 | void cleanNodesFully(node *first, void (*valueCleaner)(void *)) { 573 | node * next; 574 | while(first) { 575 | next = first->next; 576 | valueCleaner(first->value); 577 | free(first); 578 | first = next; 579 | } 580 | } 581 | 582 | void cleanOperand(operand * o) { 583 | cleanNodesFully(o->mods,&free); 584 | free(o); 585 | } 586 | 587 | void cleanInstruction(instruction* i) { 588 | if(i->guard) { 589 | free(i->guard); 590 | } 591 | cleanNodesFully(i->mods, &free); 592 | if(i->num_operands > 0) { 593 | int x; 594 | for(x = 0; x < i->num_operands; x++) { 595 | cleanOperand(i->operands[x]); 596 | } 597 | free(i->operands); 598 | } 599 | 600 | free(i); 601 | } 602 | 603 | -------------------------------------------------------------------------------- /tools/src/decode_common.hpp: -------------------------------------------------------------------------------- 1 | #ifndef DECODE_COMMON_HPP 2 | #define DECODE_COMMON_HPP 3 | #include 4 | #include 5 | 6 | /** 7 | * @file decode_common.hpp 8 | * Defines enums, structs, and functions that are used in multiple parts of the program. 9 | */ 10 | 11 | #define MAXREGISTERS 255 12 | 13 | /** 14 | * Identifies a token's 'type' when parsing. 15 | * Also used to keep track of operand types. 16 | */ 17 | typedef enum { 18 | type_opcode, 19 | type_register, 20 | type_special_reg, 21 | type_predicate, 22 | type_guard, 23 | type_sb, 24 | type_hex, 25 | type_bit_list, 26 | type_mod, 27 | type_const_mem, 28 | type_mem, 29 | type_texture_operand, 30 | type_channel, 31 | type_other_operand, 32 | type_label, 33 | } token_type; 34 | 35 | /** 36 | * Used to build linked lists. 37 | */ 38 | typedef struct node { 39 | void * value; 40 | struct node * next; 41 | } node; 42 | 43 | /** 44 | * Mostly used to hold data during parsing. 45 | */ 46 | typedef struct { 47 | token_type type; 48 | char* lexeme; 49 | } token; 50 | 51 | /** 52 | * Used to note attributes and unary operations of an operand. 53 | */ 54 | typedef enum { 55 | prop_minus = 1, 56 | prop_bitwise_complement = 2, 57 | prop_absolute_value = 4, 58 | prop_not = 8, 59 | 60 | prop_float = 128,//not a real property; used here for accurate asm->asm 61 | prop_double = 256,//not a real property; used here for accurate asm->asm 62 | prop_exp = 512,//not a real property; used here for accurate asm->asm 63 | prop_float32i = 1024,//not a real property; used here for accurate asm->asm 64 | } operand_prop; 65 | inline operand_prop operator|(operand_prop a, operand_prop b) { 66 | return static_cast(static_cast(a) | static_cast(b)); 67 | } 68 | inline operand_prop operator-(operand_prop a, operand_prop b) { 69 | return static_cast(static_cast(a) - static_cast(b)); 70 | } 71 | 72 | /** 73 | * Represents a parsed operand. 74 | */ 75 | typedef struct { 76 | /** 77 | * The list of mods applies to this operand, such as ".CC". 78 | */ 79 | node * mods; 80 | 81 | /** 82 | * The operand's type. 83 | */ 84 | token_type type; 85 | 86 | /** 87 | * Part of the operand's value. 88 | * Used for label name for labels. 89 | */ 90 | char * strval; 91 | 92 | /** 93 | * Part of the operand's value. 94 | * This is the register ID in register, predicate, and memory operands. 95 | * This is an integer or bit-shifted float value for hex operands. 96 | */ 97 | long long val1; 98 | 99 | /** 100 | * Part of the operand's value. 101 | * This is the offset for global/local/shared memory operands. 102 | * This is the memory bank value (the first hex value) for constant memory operands. 103 | */ 104 | int val2; 105 | 106 | /** 107 | * Part of the operand's value. 108 | * This is the offset (the second hex value) for constant memory operands. 109 | */ 110 | int val3; 111 | 112 | /** 113 | * The ID of the first register inside this operand, or -1. 114 | * The RZ register results in a value of -1 for this. 115 | */ 116 | int reg; 117 | 118 | /** 119 | * The operand's properties, such as negative and/or absolute value. 120 | * There are some fake properties used for accurate output of assembly code. 121 | */ 122 | operand_prop properties; 123 | 124 | /** 125 | * For base-10 operands (used in float/double instructions), this is the number of digits after the decimal point. 126 | * Used for accurate assembly output. 127 | */ 128 | int precision; 129 | 130 | /** 131 | * Set to true for the last operand in I2F, F2I, I2I, or F2F. 132 | * Used for accurate assembly output. 133 | */ 134 | bool converted; 135 | 136 | bool bad; 137 | 138 | bool decimal; 139 | } operand; 140 | 141 | /** 142 | * Unique ID for each opcode. 143 | */ 144 | typedef enum { 145 | opcode_MOV = 0, 146 | opcode_MOV32I = 1, 147 | opcode_LD = 2, 148 | opcode_LDU = 3, 149 | opcode_LDL = 4, 150 | opcode_LDS = 5, 151 | opcode_LDC = 6, 152 | opcode_ST = 7, 153 | opcode_STL = 8, 154 | opcode_STS = 9, 155 | opcode_LDLK = 10, 156 | opcode_LDSLK = 11, 157 | opcode_STUL = 12, 158 | opcode_STSUL = 13, 159 | opcode_FADD = 14, 160 | opcode_FADD32I = 15, 161 | opcode_FMUL = 16, 162 | opcode_FMUL32I = 17, 163 | opcode_FFMA = 18, 164 | opcode_FSET = 19, 165 | opcode_FSETP = 20, 166 | opcode_DSETP = 21, 167 | opcode_FCMP = 22, 168 | opcode_MUFU = 23, 169 | opcode_DADD = 24, 170 | opcode_DMUL = 25, 171 | opcode_DFMA = 26, 172 | opcode_IADD = 27, 173 | opcode_IADD32I = 28, 174 | opcode_IMNMX = 29, 175 | opcode_IMUL = 30, 176 | opcode_IMUL32I = 31, 177 | opcode_IMAD = 32, 178 | opcode_ISCADD = 33, 179 | opcode_ISET = 34, 180 | opcode_ISETP = 35, 181 | opcode_ICMP = 36, 182 | opcode_I2F = 37, 183 | opcode_I2I = 38, 184 | opcode_F2I = 39, 185 | opcode_F2F = 40, 186 | opcode_LOP = 41, 187 | opcode_LOP32I = 42, 188 | opcode_SHL = 43, 189 | opcode_SHR = 44, 190 | opcode_BFE = 45, 191 | opcode_BFI = 46, 192 | opcode_SEL = 47, 193 | opcode_SCHI = 48,//this is asfermi's name for the fake instructions which contain scheduling information in sm_30 and up 194 | opcode_SSY = 49, 195 | opcode_BRA = 50, 196 | opcode_BRX = 51, 197 | opcode_PCNT = 52, 198 | opcode_CONT = 53, 199 | opcode_PBK = 54, 200 | opcode_BRK = 55, 201 | opcode_CAL = 56, 202 | opcode_RET = 57, 203 | opcode_EXIT = 58, 204 | opcode_NOP = 59, 205 | opcode_BAR = 60, 206 | opcode_BPT = 61, 207 | opcode_B2R = 62, 208 | opcode_S2R = 63, 209 | opcode_PSETP = 64, 210 | opcode_PSET = 65, 211 | opcode_FLO = 66, 212 | opcode_P2R = 67, 213 | opcode_R2P = 68, 214 | opcode_TEX = 69, 215 | opcode_TEXDEPBAR = 70, 216 | opcode_RRO = 71, 217 | opcode_PRMT = 72, 218 | opcode_VADD = 73, 219 | opcode_DMNMX = 74, 220 | opcode_FMNMX = 75, 221 | opcode_RED = 76, 222 | opcode_VOTE = 77, 223 | opcode_POPC = 78, 224 | opcode_MEMBAR = 79, 225 | opcode_STSCUL = 80, 226 | opcode_LEPC = 81, 227 | opcode_CSETP = 82, 228 | opcode_ISCADD32I = 83, 229 | opcode_VMNMX = 84, 230 | opcode_TLD = 85, 231 | opcode_SHF = 86, 232 | opcode_FCHK = 87, 233 | opcode_ISUB = 88, 234 | opcode_JCAL = 89, //calls extern functions like printf 235 | opcode_SHFL = 90, 236 | opcode_LDG = 91, 237 | opcode_LD_LDU = 92, 238 | opcode_ATOM = 93, 239 | opcode_CCTL = 94, 240 | opcode_XMAD = 95, 241 | opcode_SYNC = 96, 242 | opcode_STG = 97, 243 | opcode_IADD3 = 98, 244 | opcode_VABSDIFF = 99, 245 | opcode_DEPBAR = 100, 246 | opcode_LOP3 = 101, 247 | opcode_TLDS = 102, 248 | opcode_TEXS = 103, 249 | opcode_LEA = 104, 250 | opcode_DSET = 105, 251 | } opcode; 252 | 253 | /** 254 | * Represents an assembly instruction. 255 | */ 256 | typedef struct { 257 | /** 258 | * A label representing this instruction's address, or 0. 259 | */ 260 | char * label; 261 | 262 | /** 263 | * The predicate guard, or 0 of there isn't one. 264 | */ 265 | operand * guard; 266 | 267 | /** 268 | * A unique ID representing the instruction's opcode. 269 | */ 270 | opcode op; 271 | 272 | /** 273 | * A list of mods (such as ".LU" or ".128") attached to the opcode. 274 | */ 275 | node * mods; 276 | 277 | /** 278 | * The operands for this instruction. 279 | */ 280 | operand ** operands; 281 | 282 | /** 283 | * The number of operands for this instruction. 284 | */ 285 | int num_operands; 286 | 287 | int address; 288 | } instruction; 289 | 290 | /** 291 | * Names of special registers. 292 | * Corresponds one-to-one with the specialIDs array. 293 | */ 294 | extern std::string _specialNames20[]; 295 | 296 | /** 297 | * Values of special registers. 298 | * Corresponds one-to-one with the specialNames array. 299 | */ 300 | extern const int _specialIDs20[]; 301 | 302 | /** 303 | * Number of recognized special registers. 304 | */ 305 | extern const int _numSpecials20; 306 | 307 | /** 308 | * Names of texture operands. 309 | * Corresponds one-to-one with the textureIDs array. 310 | */ 311 | extern const char * _textureNames[]; 312 | 313 | /** 314 | * Values of texture operands. 315 | * Corresponds one-to-one with the textureNames array. 316 | */ 317 | extern const int _textureIDs[]; 318 | 319 | /** 320 | * Number of recognized texture operands. 321 | */ 322 | extern const int _numTextures; 323 | 324 | /** 325 | * Names of channel operands. 326 | * Corresponds one-to-one with the _channelIDs array. 327 | */ 328 | extern const char * _channelNames[]; 329 | 330 | /** 331 | * Values of channel operands. 332 | * Corresponds one-to-one with the _channelNames array. 333 | */ 334 | extern const int _channelIDs[]; 335 | 336 | /** 337 | * Number of recognized channel operands. 338 | */ 339 | extern const int _numChannels; 340 | 341 | /** 342 | * Names of miscellaneous operands. 343 | */ 344 | extern const char * _otherNames[]; 345 | 346 | /** 347 | * Number of items in otherNames array. 348 | */ 349 | extern const int _numOthers; 350 | 351 | /** 352 | * List of opcode names. 353 | */ 354 | extern const char * opcodes[]; 355 | 356 | /** 357 | * Returns the value which represents a given special register. 358 | * @param lexeme The special register's name 359 | * @param format If true, change special register's name to match given lexeme 360 | * @param arch The architecture (e.g. 20 for sm_20) 361 | */ 362 | int getSpecialID(const char * lexeme, bool format, int arch); 363 | 364 | /** 365 | * Returns a unique ID for certain miscellaneous operand types. 366 | * @param lexeme The operand's name 367 | */ 368 | int getOtherID(const char * lexeme); 369 | 370 | /** 371 | * Returns the value which represents a given texture operand. 372 | * @param lexeme The texture operand's name 373 | */ 374 | int getTextureID(const char * lexeme); 375 | 376 | /** 377 | * Returns the value which represents a given channel operand. 378 | * @param lexeme The channel operand's name 379 | */ 380 | int getChannelID(const char * lexeme); 381 | 382 | 383 | /** 384 | * Create an instruction with given values. 385 | * Frees operand nodes after putting data into proper array. 386 | * @param op The opcode's ID 387 | * @param mods The list of mods for the opcode 388 | * @param operands The list of operands 389 | * @return an instruction 390 | */ 391 | instruction* newInstruction(opcode op, node * mods, node * operands); 392 | 393 | /** 394 | * Create an instruction with no operands. 395 | * Frees operand nodes after putting data into proper array. 396 | * @param op The opcode's ID 397 | * @param mods The list of mods for the opcode 398 | * @return an instruction 399 | */ 400 | instruction* newInstruction(opcode op, node * mods); 401 | 402 | /** 403 | * Create an instruction with one operand. 404 | * Frees operand nodes after putting data into proper array. 405 | * @param op The opcode's ID 406 | * @param mods The list of mods for the opcode 407 | * @param o1 The operand 408 | * @return an instruction 409 | */ 410 | instruction* newInstruction(opcode op, node * mods, operand * o1); 411 | 412 | /** 413 | * Create an instruction with two operands. 414 | * Frees operand nodes after putting data into proper array. 415 | * @param op The opcode's ID 416 | * @param mods The list of mods for the opcode 417 | * @param o1 The first operand 418 | * @param o2 The second operand 419 | * @return an instruction 420 | */ 421 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2); 422 | 423 | /** 424 | * Create an instruction with three operands. 425 | * Frees operand nodes after putting data into proper array. 426 | * @param op The opcode's ID 427 | * @param mods The list of mods for the opcode 428 | * @param o1 The first operand 429 | * @param o2 The second operand 430 | * @param o3 The third operand 431 | * @return an instruction 432 | */ 433 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2, operand * o3); 434 | 435 | /** 436 | * Create an instruction with four operands. 437 | * Frees operand nodes after putting data into proper array. 438 | * @param op The opcode's ID 439 | * @param mods The list of mods for the opcode 440 | * @param o1 The first operand 441 | * @param o2 The second operand 442 | * @param o3 The third operand 443 | * @param o4 The fourth operand 444 | * @return an instruction 445 | */ 446 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2, operand * o3, operand * o4); 447 | 448 | /** 449 | * Create an operand with given values. 450 | * Sets unspecified values to appropriate defaults. 451 | * @param type The operand type 452 | * @param val1 (defaults to 0) 453 | * @param val2 (defaults to 0) 454 | * @param val3 (defaults to 0) 455 | * @return the new operand 456 | */ 457 | operand* newOperand(token_type type, long long val1 = 0, int val2 = 0, int val3 = 0); 458 | 459 | /** 460 | * Create an operand with given values. 461 | * Sets unspecified values to appropriate defaults. 462 | * @param type The operand type 463 | * @param props The operand's properties 464 | * @param val1 465 | * @param val2 466 | * @param val3 467 | * @return the new operand 468 | */ 469 | operand* newOperand(token_type type, operand_prop props, long long val1, int val2, int val3); 470 | 471 | /** 472 | * Add a value to a sorted list of nodes, treated as a set. 473 | * Cannot add same value a second time (unless comparator erroneously varies). 474 | * @param first The address of the list 475 | * @param value The item to add to the list 476 | * @param comparator A comparison function, for sorting the list's items 477 | * @return the added node, or 0 if no change was made 478 | */ 479 | node* addNode(node **first, void * value, long long (*comparator)(void*, void*)); 480 | 481 | /** 482 | * Adds node to end of list, no questions asked. 483 | * @param first The address of the list 484 | * @param value The item to add to the list 485 | */ 486 | void addLast(node ** first, void * value); 487 | 488 | /** 489 | * Adds node to start of list, no questions asked. 490 | * @param first The address of the list 491 | * @param value The item to add to the list 492 | */ 493 | void addFirst(node ** first, void * value); 494 | 495 | /** 496 | * Adds value to list, immediately after specified predecessor. 497 | * If predecessor is 0, value is added to start of list instead. 498 | * @param first The address of the list 499 | * @param afterMe The node we're inserting things after 500 | * @param value The item to add to the list 501 | */ 502 | void addAfter(node ** first, node * afterMe, void * value); 503 | 504 | /** 505 | * Adds value to list, immediately before specified node. 506 | * If node is 0, value is added to start of list. 507 | * @param first The address of the list 508 | * @param beforeMe The node we're inserting things before 509 | * @param value The item to add to the list 510 | */ 511 | void addBefore(node **first, node * beforeMe, void * value); 512 | 513 | /** 514 | * Adds node to list, at specified zero-based index 515 | * @param first The address of the list 516 | * @param value The item to add to the list 517 | * @param index The location in the list to add the element to 518 | */ 519 | void addAtIndex(node ** first, void * value, int index); 520 | 521 | /** 522 | * Removes a node from a list. 523 | * @param first The address of the list 524 | * @param value The item to remove from the list 525 | * @param comparator A comparison function, used to identify the correct item 526 | * @return the removed node on success, or 0 if the node does not exist 527 | */ 528 | node * removeNode(node **first, void * value, long long (*comparator)(void*, void*)); 529 | 530 | /** 531 | * Removes a node from a list and frees it. 532 | * @param first The address of the list 533 | * @param value The item to remove from the list 534 | * @param comparator A comparison function, to identify the corrent item 535 | * @return true on success, false if node does not exist 536 | */ 537 | bool deleteNode(node **first, void * value, long long (*comparator)(void*, void*)); 538 | 539 | /** 540 | * Removes a node from a list, and frees it and its contents. 541 | * @param first The address of the list 542 | * @param value The item to remove from the list 543 | * @param comparator A comparison function, to identify the corrent item 544 | * @param valueCleaner The free function to use on the node's contents 545 | * @return true on success, false if node does not exist 546 | */ 547 | bool deleteNode(node **first, void * value, long long (*comparator)(void*, void*), void (*valueCleaner)(void *)); 548 | 549 | /** 550 | * Removes a node from a list and frees it. 551 | * @param first The address of the list 552 | * @param del The node to delete 553 | * @return true on success, false if node is not in list 554 | */ 555 | bool deleteNode(node **first, node * del); 556 | 557 | /** 558 | * Comparator for addNode; compares long integers by value. 559 | * @param a The first long being compared 560 | * @param b The second long being compared 561 | * @return negative number iff a 2 | #include 3 | #include 4 | #include 5 | #include "decode_common.hpp" 6 | #include "decode_output.hpp" 7 | #include "decode.hpp" 8 | using namespace std; 9 | 10 | #ifdef WINDOWS 11 | #include 12 | #define LLx "I64x" 13 | #else 14 | #define LLx "llx" 15 | #endif 16 | 17 | extern node * _blocks; 18 | extern char* _kernel_name; 19 | extern int _arch; 20 | extern int _cuobjdump_version; 21 | extern int _shared_memory; 22 | extern int _min_stack_size; 23 | extern int _frame_size; 24 | extern char** _function_names; 25 | extern int _num_functions; 26 | extern node * _functions; 27 | extern std::string _specialNames35[]; 28 | 29 | void printfMetadata() { 30 | if(_kernel_name) { 31 | cout << "//Name: " << _kernel_name << "\n"; 32 | } 33 | cout << "//Arch: sm_" << _arch << "\n"; 34 | cout << "//cuobjdump: " << _cuobjdump_version << "\n"; 35 | cout << "//Shared memory usage: " << _shared_memory << "\n"; 36 | cout << "//Min Stack Size: " << _min_stack_size << "\n"; 37 | cout << "//Frame Size: " << _frame_size << "\n"; 38 | if(_function_names) { 39 | cout << "//Function count: " << _num_functions << "\n"; 40 | for(int x = 0; x < _num_functions; x++) { 41 | cout << "//Function: " << _function_names[x] << "\n"; 42 | } 43 | } 44 | } 45 | 46 | char * operandToString(instruction * inst, operand * op) { 47 | char * answer = (char*) malloc(sizeof(char) * 256); 48 | char * pointer = answer; 49 | bool mustShowHex = false; //used for mem operand 50 | 51 | int counter;//because g++ gets mad if anything is defined inside a case 52 | 53 | switch (op->type) { 54 | case type_register: 55 | if(op->properties & prop_not) { 56 | sprintf(pointer, "!"); 57 | pointer++; 58 | } else if(op->properties & prop_minus) { 59 | sprintf(pointer, "-"); 60 | pointer++; 61 | } else if(op->properties & prop_bitwise_complement) { 62 | sprintf(pointer, "~"); 63 | pointer++; 64 | } 65 | if(op->properties & prop_absolute_value) { 66 | sprintf(pointer, "|"); 67 | pointer++; 68 | } 69 | 70 | if(op->reg < 0) { 71 | sprintf(pointer, "RZ"); 72 | pointer+=2; 73 | } else { 74 | sprintf(pointer, "R%d",(int) op->val1); 75 | pointer += strlen(pointer); 76 | } 77 | 78 | if(op->properties & prop_absolute_value) { 79 | sprintf(pointer, "|"); 80 | pointer++; 81 | } 82 | 83 | break; 84 | 85 | case type_special_reg: 86 | if(op->properties & prop_not) { 87 | sprintf(pointer, "!"); 88 | pointer++; 89 | } else if(op->properties & prop_minus) { 90 | sprintf(pointer, "-"); 91 | pointer++; 92 | } else if(op->properties & prop_bitwise_complement) { 93 | sprintf(pointer, "~"); 94 | pointer++; 95 | } 96 | if(op->properties & prop_absolute_value) { 97 | sprintf(pointer, "|"); 98 | pointer++; 99 | } 100 | 101 | sprintf(pointer, "SR"); 102 | pointer += 2; 103 | if(_arch >= 35 && _cuobjdump_version < 55) { 104 | sprintf(pointer, "%d", (int) op->val1); 105 | pointer += strlen(pointer); 106 | } else if(_arch < 35) { 107 | sprintf(pointer, "_"); 108 | pointer++; 109 | for(int x = 0; x < _numSpecials20; x++) { 110 | if(_specialIDs20[x] == (int) op->val1) { 111 | const char * name = _specialNames20[x].c_str(); 112 | sprintf(pointer, "%s", name); 113 | pointer += strlen(pointer); 114 | break; 115 | } 116 | } 117 | } else if(_arch <= 50) { 118 | if(_specialNames35[op->val1].length() > 0) { 119 | sprintf(pointer, "_"); 120 | pointer++; 121 | sprintf(pointer, "%s", _specialNames35[op->val1].c_str()); 122 | pointer += strlen(pointer); 123 | } else { 124 | sprintf(pointer, "%d", (int) op->val1); 125 | pointer += strlen(pointer); 126 | } 127 | } else { 128 | cerr << "WARNING o~120: unimplemented arch for special operands.\n"; 129 | sprintf(pointer, "%d", (int) op->val1); 130 | pointer += strlen(pointer); 131 | } 132 | 133 | if(op->properties & prop_absolute_value) { 134 | sprintf(pointer, "|"); 135 | pointer++; 136 | } 137 | 138 | break; 139 | 140 | case type_predicate: 141 | if(op->properties & prop_not) { 142 | sprintf(pointer, "!"); 143 | pointer++; 144 | } 145 | 146 | if(op->val1 == 7) { 147 | if(_cuobjdump_version <= 50 && _arch < 35) { 148 | sprintf(pointer, "pt"); 149 | pointer += 2; 150 | } else { 151 | sprintf(pointer, "PT"); 152 | pointer += 2; 153 | } 154 | } else { 155 | sprintf(pointer, "P%d", (int) op->val1); 156 | pointer += strlen(pointer); 157 | } 158 | 159 | break; 160 | 161 | case type_label: 162 | sprintf(pointer, "label"); 163 | pointer += 5; 164 | sprintf(pointer, "%s", op->strval); 165 | pointer += strlen(pointer); 166 | break; 167 | 168 | case type_sb: 169 | sprintf(pointer, "SB"); 170 | pointer += 2; 171 | sprintf(pointer, "%d", (int) op->val1); 172 | pointer += strlen(pointer); 173 | break; 174 | 175 | case type_hex: 176 | if(op->properties & prop_not) { 177 | sprintf(pointer, "!"); 178 | pointer++; 179 | } else if(op->properties & prop_minus && !(op->properties & prop_float) && !(op->properties & prop_double)) { 180 | sprintf(pointer, "-"); 181 | pointer++; 182 | } else if(op->properties & prop_bitwise_complement) { 183 | sprintf(pointer, "~"); 184 | pointer++; 185 | } 186 | if(op->properties & prop_absolute_value) { 187 | sprintf(pointer, "|"); 188 | pointer++; 189 | } 190 | 191 | if((op->properties & prop_float) || (op->properties & prop_float32i)) { 192 | if(op->val1 == 0x7f80000000000000LL >> 44) { 193 | sprintf(pointer, "+INF "); 194 | pointer += 5; 195 | } else if(op->val1 == 0xff80000000000000LL >> 44) { 196 | sprintf(pointer, "-INF "); 197 | pointer += 5; 198 | } else { 199 | union {unsigned int i; float f;} u; 200 | u.i = (int)(op->val1 & 0xffffffff); 201 | if(!(op->properties & prop_float32i)) { 202 | u.i = u.i << 12; 203 | } 204 | if(op->properties & prop_exp) { 205 | sprintf(pointer, "%.*e", op->precision, (double)u.f); 206 | pointer += strlen(pointer); 207 | } else { 208 | sprintf(pointer, "%.*f", op->precision, u.f); 209 | pointer += strlen(pointer); 210 | } 211 | } 212 | } 213 | else if(op->properties & prop_double) { 214 | if(op->val1 == 0x7ff0000000000000LL >> 44) { 215 | sprintf(pointer, "+INF "); 216 | pointer += 5; 217 | } else if(op->val1 == 0xfff0000000000000LL >> 44) { 218 | sprintf(pointer, "-INF "); 219 | pointer += 5; 220 | } else { 221 | union {unsigned long long int i; double d;} u; 222 | u.i = op->val1 << 32; 223 | if(!(op->properties & prop_float32i)) { 224 | u.i = u.i << 12; 225 | } 226 | if(op->properties & prop_exp) { 227 | sprintf(pointer, "%.*e", op->precision, u.d); 228 | pointer += strlen(pointer); 229 | } else { 230 | sprintf(pointer, "%.*f", op->precision, u.d); 231 | pointer += strlen(pointer); 232 | } 233 | } 234 | } else { 235 | sprintf(pointer, "0x%" LLx, op->val1); 236 | pointer += strlen(pointer); 237 | } 238 | 239 | if(op->properties & prop_absolute_value) { 240 | sprintf(pointer, "|"); 241 | pointer++; 242 | } 243 | 244 | break; 245 | 246 | case type_bit_list: 247 | sprintf(pointer, "{"); 248 | pointer += 1; 249 | 250 | counter = 0; 251 | for(unsigned int x = 0; x < 10; x++) { 252 | if(op->val1 & (0x1 << x)) { 253 | counter++; 254 | } 255 | } 256 | 257 | for(unsigned int x = 9; x >= 0; x--) { 258 | if(op->val1 & (0x1 << x)) { 259 | counter--; 260 | sprintf(pointer, "%d", x); 261 | pointer += strlen(pointer); 262 | if(counter) { 263 | sprintf(pointer, ","); 264 | pointer++; 265 | } else { 266 | break; 267 | } 268 | } 269 | } 270 | 271 | sprintf(pointer, "}"); 272 | pointer += 1; 273 | 274 | break; 275 | 276 | case type_const_mem: 277 | if(op->properties & prop_not) { 278 | sprintf(pointer, "!"); 279 | pointer++; 280 | } else if(op->properties & prop_minus) { 281 | sprintf(pointer, "-"); 282 | pointer++; 283 | } else if(op->properties & prop_bitwise_complement) { 284 | sprintf(pointer, "~"); 285 | pointer++; 286 | } 287 | if(op->properties & prop_absolute_value) { 288 | sprintf(pointer, "|"); 289 | pointer++; 290 | } 291 | 292 | sprintf(pointer, "c"); 293 | pointer++; 294 | 295 | if(_cuobjdump_version <= 50) { 296 | if(op->properties & prop_absolute_value) { 297 | sprintf(pointer, "|"); 298 | pointer++; 299 | } 300 | } 301 | 302 | if(_cuobjdump_version <= 50) { 303 | sprintf(pointer, " [0x%x] [", op->val2); 304 | pointer += strlen(pointer); 305 | } 306 | else { 307 | if((op->converted && inst->op != opcode_F2F) || (inst->op == opcode_XMAD && op != inst->operands[inst->num_operands - 1])) { 308 | sprintf(pointer, "[0x%x] [", op->val2); 309 | pointer += strlen(pointer); 310 | } else { 311 | sprintf(pointer, "[0x%x][", op->val2); 312 | pointer += strlen(pointer); 313 | } 314 | } 315 | 316 | if(op->reg >= 0) { 317 | sprintf(pointer, "R%d", (int) op->val1); 318 | pointer += strlen(pointer); 319 | if(op->val3) { 320 | sprintf(pointer, "+"); 321 | pointer++; 322 | } 323 | } 324 | if(op->val3 || op->reg < 0) { 325 | if(op->val3 < 0) { 326 | sprintf(pointer, "-0x%" LLx, (~((unsigned long long) op->val3)) + 1); 327 | pointer += strlen(pointer); 328 | } else { 329 | sprintf(pointer, "0x%" LLx, (unsigned long long) op->val3); 330 | pointer += strlen(pointer); 331 | } 332 | } 333 | sprintf(pointer, "]"); 334 | pointer++; 335 | 336 | if(_cuobjdump_version >= 55) { 337 | if(op->properties & prop_absolute_value) { 338 | sprintf(pointer, "|"); 339 | pointer++; 340 | } 341 | } 342 | 343 | break; 344 | 345 | case type_mem: 346 | if(inst->op == opcode_LD_LDU) { 347 | mustShowHex = true; 348 | } 349 | 350 | if(op->properties & prop_not) { 351 | sprintf(pointer, "!"); 352 | pointer++; 353 | } else if(op->properties & prop_minus) { 354 | sprintf(pointer, "-"); 355 | pointer++; 356 | } else if(op->properties & prop_bitwise_complement) { 357 | sprintf(pointer, "~"); 358 | pointer++; 359 | } else if(op->properties & prop_absolute_value) { 360 | sprintf(pointer, "|"); 361 | pointer++; 362 | } 363 | 364 | sprintf(pointer, "["); 365 | pointer++; 366 | if(op->reg >= 0) { 367 | sprintf(pointer, "R%d", (int) op->val1); 368 | pointer += strlen(pointer); 369 | if(op->val2 || mustShowHex) { 370 | sprintf(pointer, "+"); 371 | pointer++; 372 | } 373 | } 374 | if(mustShowHex || op->val2 || op->reg < 0) { 375 | if(op->val2 == 0) { 376 | if(_cuobjdump_version <= 50) { 377 | sprintf(pointer, "0x0"); 378 | pointer += 3; 379 | } else { 380 | sprintf(pointer, "RZ"); 381 | pointer += 2; 382 | } 383 | } else if(op->val2 < 0) { 384 | sprintf(pointer, "-0x%" LLx, (~((unsigned long long) op->val2)) + 1); 385 | pointer += strlen(pointer); 386 | } else { 387 | sprintf(pointer, "0x%" LLx, (unsigned long long) op->val2); 388 | pointer += strlen(pointer); 389 | } 390 | } 391 | sprintf(pointer, "]"); 392 | pointer++; 393 | 394 | if(op->properties & prop_absolute_value) { 395 | sprintf(pointer, "|"); 396 | pointer++; 397 | } 398 | 399 | break; 400 | 401 | case type_texture_operand: 402 | for(int x = 0; x < _numTextures; x++) { 403 | if(_textureIDs[x] == (int) op->val1) { 404 | if(_cuobjdump_version >= 55 && _textureIDs[x+1] == _textureIDs[x]) { 405 | sprintf(pointer, "%s", _textureNames[x+1]); 406 | pointer += strlen(pointer); 407 | } else { 408 | sprintf(pointer, "%s", _textureNames[x]); 409 | pointer += strlen(pointer); 410 | } 411 | break; 412 | } 413 | } 414 | 415 | break; 416 | 417 | case type_channel: 418 | for(int x = 0; x < _numChannels; x++) { 419 | if(_channelIDs[x] == (int) op->val1) { 420 | sprintf(pointer, "%s", _channelNames[x]); 421 | pointer += strlen(pointer); 422 | break; 423 | } 424 | } 425 | 426 | break; 427 | 428 | case type_other_operand: 429 | sprintf(pointer, "%s", _otherNames[op->val1]); 430 | pointer += strlen(pointer); 431 | 432 | break; 433 | 434 | default: 435 | cerr << "SANITY CHECK ERROR o~1871: operand is of non-operand type.\n"; 436 | } 437 | return answer; 438 | } 439 | 440 | void printfOperand(instruction * inst, operand * op) { 441 | char * operand = operandToString(inst, op); 442 | printf(operand); 443 | free(operand); 444 | } 445 | 446 | void fprintfInstruction(ostream & out, instruction * inst) { 447 | //printf predicate guard if it exists 448 | if(inst->guard) { 449 | out << "@"; 450 | char * operand = operandToString(inst, inst->guard); 451 | out << operand << " "; 452 | if(!(inst->guard->properties & prop_not)) { 453 | out << " "; 454 | } 455 | free(operand); 456 | } else { 457 | out << " "; 458 | } 459 | 460 | //printf opcode + mods 461 | out << opcodes[inst->op]; 462 | node * iter3 = inst->mods; 463 | while(iter3 != 0) { 464 | out << "." << (char*)iter3->value; 465 | iter3 = iter3->next; 466 | } 467 | 468 | //printf operands + mods 469 | int x; 470 | for(x = 0; x < inst->num_operands; x++) { 471 | if(x > 0) { 472 | out << ","; 473 | } 474 | out << " "; 475 | char * operand = operandToString(inst, inst->operands[x]); 476 | out << operand; 477 | free(operand); 478 | 479 | iter3 = inst->operands[x]->mods; 480 | while(iter3 != 0) { 481 | out << "." << (char*)iter3->value; 482 | iter3 = iter3->next; 483 | } 484 | } 485 | 486 | out << ";"; 487 | } 488 | -------------------------------------------------------------------------------- /tools/src/decode_output.hpp: -------------------------------------------------------------------------------- 1 | #ifndef DECODE_OUTPUT_HPP 2 | #define DECODE_OUTPUT_HPP 3 | 4 | #include "decode_common.hpp" 5 | 6 | /** 7 | * @file decode_output.hpp 8 | * Defines functions used for output to stdout. 9 | */ 10 | 11 | /** 12 | * Outputs an operand to stdout 13 | * @param inst The operand's instruction 14 | * @param op The operand 15 | */ 16 | void printfOperand(instruction * inst, operand * op); 17 | 18 | /** 19 | * Outputs an instruction. 20 | * @param out The output stream. 21 | * @param inst The instruction to output. 22 | */ 23 | void fprintfInstruction(std::ostream & out, instruction * inst); 24 | 25 | /** 26 | * Prints the assembly's metadata to stdout, in appropriate format for parsing. 27 | */ 28 | void printfMetadata(); 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /tools/src/doxygen.dox: -------------------------------------------------------------------------------- 1 | /*! \mainpage Decoding CUDA Binary 2 | 3 | \section intro Overview 4 | 5 | The code here is what we used in our Decoding CUDA Binary paper for CGO 2019. 

6 | 7 | The decode program is the assembler generator tool.
8 | It can analyze binary encodings, and output bit-flipped code for more analysis, or output assembler code.

9 | 10 | The asm2bin tool contains our existing assemblers, and can be used to inject bit-flipped code into executables.
11 | It can also change the assembly code to a more human-readable format.

12 | 13 | The bin2asm tool can extract assembly code in a format our other tools understand.
14 | Use the -hex flag when extracting assembly for use with the decode tool, so binary is included.

15 | 16 | */ 17 | 18 | -------------------------------------------------------------------------------- /tools/src/elf.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ELF_HPP 2 | #define ELF_HPP 3 | 4 | #include 5 | 6 | /** 7 | * @file elf.hpp 8 | * Defines structures and functions used for analyzing/modifying ELFs. 9 | * This should (hopefully) replace the libelf library in cubin-analyze. 10 | */ 11 | 12 | //Section types: 13 | #define SHT_NULL 0 14 | #define SHT_PROGBITS 1 15 | #define SHT_SYMTAB 2 16 | #define SHT_STRTAB 3 17 | #define SHT_RELA 4 18 | #define SHT_HASH 5 19 | #define SHT_DYNAMIC 6 20 | #define SHT_NOTE 7 21 | #define SHT_NOBITS 8 22 | #define SHT_REL 9 23 | #define SHT_SHLIB 10 24 | #define SHT_DYNSYM 11 25 | #define SHT_NUM 12 26 | #define SHT_LOPROC 0x70000000 27 | #define SHT_HIPROC 0x7fffffff 28 | #define SHT_LOUSER 0x80000000 29 | #define SHT_HIUSER 0xffffffff 30 | 31 | //Program Header types: 32 | #define PT_NULL 0 33 | #define PT_LOAD 1 34 | #define PT_DYNAMIC 2 35 | #define PT_INTERP 3 36 | #define PT_NOTE 4 37 | #define PT_SHLIB 5 38 | #define PT_PHDR 6 39 | 40 | /** 41 | * Struct containing an ELF's identifier in its header. 42 | */ 43 | typedef struct ELF_Identifier { 44 | uint32_t ei_mag; 45 | uint8_t ei_class; 46 | uint8_t ei_data; 47 | uint8_t ei_version; 48 | uint8_t ei_osabi; 49 | uint8_t ei_abiversion; 50 | uint8_t ei_pad[7]; 51 | } ELF_Identifier; 52 | 53 | /** 54 | * Struct containing a 64-bit ELF's header. 55 | */ 56 | typedef struct ELF64_Header { 57 | ELF_Identifier e_ident; 58 | uint16_t e_type; 59 | uint16_t e_machine; 60 | uint32_t e_version; 61 | uint64_t e_entry; 62 | uint64_t e_phoff; 63 | uint64_t e_shoff; 64 | uint32_t e_flags; 65 | uint16_t e_ehsize; 66 | uint16_t e_phentsize; 67 | uint16_t e_phnum; 68 | uint16_t e_shentsize; 69 | uint16_t e_shnum; 70 | uint16_t e_shstrndx; 71 | } ELF64_Header; 72 | 73 | /** 74 | * Struct containing a 32-bit ELF's header. 75 | */ 76 | typedef struct ELF32_Header { 77 | ELF_Identifier e_ident; 78 | uint16_t e_type; 79 | uint16_t e_machine; 80 | uint32_t e_version; 81 | uint32_t e_entry; 82 | uint32_t e_phoff; 83 | uint32_t e_shoff; 84 | uint32_t e_flags; 85 | uint16_t e_ehsize; 86 | uint16_t e_phentsize; 87 | uint16_t e_phnum; 88 | uint16_t e_shentsize; 89 | uint16_t e_shnum; 90 | uint16_t e_shstrndx; 91 | } ELF32_Header; 92 | 93 | /** 94 | * Struct containing one of a 64-bit ELF's section headers. 95 | */ 96 | typedef struct ELF64_SHeader { 97 | uint32_t sh_name; 98 | uint32_t sh_type; 99 | uint64_t sh_flags; 100 | uint64_t sh_addr; 101 | uint64_t sh_offset; 102 | uint64_t sh_size; 103 | uint32_t sh_link; 104 | uint32_t sh_info; 105 | uint64_t sh_addralign; 106 | uint64_t sh_entsize; 107 | } ELF64_SHeader; 108 | 109 | /** 110 | * Struct containing one of a 32-bit ELF's section headers. 111 | */ 112 | typedef struct ELF32_SHeader { 113 | uint32_t sh_name; 114 | uint32_t sh_type; 115 | uint32_t sh_flags; 116 | uint32_t sh_addr; 117 | uint32_t sh_offset; 118 | uint32_t sh_size; 119 | uint32_t sh_link; 120 | uint32_t sh_info; 121 | uint32_t sh_addralign; 122 | uint32_t sh_entsize; 123 | } ELF32_SHeader; 124 | 125 | /** 126 | * Struct containing one of an ELF's section headers. 127 | */ 128 | typedef struct ELF_SHeader { 129 | uint32_t sh_name; 130 | uint32_t sh_type; 131 | uint64_t sh_flags; 132 | uint64_t sh_addr; 133 | uint64_t sh_offset; 134 | uint64_t sh_size; 135 | uint32_t sh_link; 136 | uint32_t sh_info; 137 | uint64_t sh_addralign; 138 | uint64_t sh_entsize; 139 | } ELF_SHeader; 140 | 141 | /** 142 | * Struct containing one of a 64-bit ELF's sections. 143 | */ 144 | typedef struct ELF64_Section { 145 | ELF64_SHeader sheader; 146 | char * data; 147 | } ELF64_Section; 148 | 149 | /** 150 | * Struct containing one of a 32-bit ELF's sections. 151 | */ 152 | typedef struct ELF32_Section { 153 | ELF32_SHeader sheader; 154 | char * data; 155 | } ELF32_Section; 156 | 157 | /** 158 | * Struct containing one of an ELF's sections. 159 | */ 160 | typedef struct ELF_Section { 161 | union { 162 | ELF64_Section * sec64; 163 | ELF32_Section * sec32; 164 | }; 165 | } ELF_Section; 166 | 167 | /** 168 | * Struct containing a program header for one of a 64-bit ELF's segments. 169 | */ 170 | typedef struct ELF64_PHeader { 171 | uint32_t p_type; 172 | uint32_t p_flags; 173 | uint64_t p_offset; 174 | uint64_t p_vaddr; 175 | uint64_t p_paddr; 176 | uint64_t p_filesz; //Reminder: unlike p_memsz, does not include NOBITS sections 177 | uint64_t p_memsz; 178 | uint64_t p_align; 179 | 180 | /** 181 | * List of sections contained by this memory region. 182 | * Set to 0 for type PHDR, which only contains pheaders. 183 | */ 184 | ELF64_Section ** sections; 185 | 186 | /** 187 | * Size of the sections array, or 0 for PHDR segment. 188 | */ 189 | int numSections; 190 | } ELF64_PHeader; 191 | 192 | /** 193 | * Struct containing a program header for one of a 32-bit ELF's segments. 194 | */ 195 | typedef struct ELF32_PHeader { 196 | uint32_t p_type; 197 | uint32_t p_flags; 198 | uint32_t p_offset; 199 | uint32_t p_vaddr; 200 | uint32_t p_paddr; 201 | uint32_t p_filesz; //Reminder: unlike p_memsz, does not include NOBITS sections 202 | uint32_t p_memsz; 203 | uint32_t p_align; 204 | 205 | /** 206 | * List of sections contained by this memory region. 207 | * Set to 0 for type PHDR, which only contains pheaders. 208 | */ 209 | ELF32_Section ** sections; 210 | 211 | /** 212 | * Size of the sections array, or 0 for PHDR segment. 213 | */ 214 | int numSections; 215 | } ELF32_PHeader; 216 | 217 | /** 218 | * Struct containing a 64-bit ELF. 219 | */ 220 | typedef struct ELF64 { 221 | ELF64_Header header; 222 | ELF64_Section ** sections; 223 | ELF64_PHeader ** pheaders; 224 | } ELF64; 225 | 226 | /** 227 | * Struct containing a 32-bit ELF. 228 | */ 229 | typedef struct ELF32 { 230 | ELF32_Header header; 231 | ELF32_Section ** sections; 232 | ELF32_PHeader ** pheaders; 233 | } ELF32; 234 | 235 | /** 236 | * Struct containing an ELF, either 32-bit or 64-bit. 237 | */ 238 | typedef struct ELF { 239 | union { 240 | ELF64 elf64; 241 | ELF32 elf32; 242 | }; 243 | bool x64; 244 | } ELF; 245 | 246 | /** 247 | * Struct containing a symbol from a 32-bit ELF's symbol table. 248 | */ 249 | typedef struct { 250 | uint32_t st_name; 251 | uint32_t st_value; 252 | uint32_t st_size; 253 | unsigned char st_info; 254 | unsigned char st_other; 255 | uint16_t st_shndx; 256 | } ELF32_Sym; 257 | 258 | /** 259 | * Struct containing a symbol from a 64-bit ELF's symbol table. 260 | */ 261 | typedef struct { 262 | uint32_t st_name; 263 | unsigned char st_info; 264 | unsigned char st_other; 265 | uint16_t st_shndx; 266 | uint64_t st_value; 267 | uint64_t st_size; 268 | } ELF64_Sym; 269 | 270 | /** 271 | * Struct containing a symbol from an ELF's symbol table. 272 | */ 273 | typedef struct { 274 | uint32_t st_name; 275 | unsigned char st_info; 276 | unsigned char st_other; 277 | uint16_t st_shndx; 278 | uint64_t st_value; 279 | uint64_t st_size; 280 | } ELF_Sym; 281 | 282 | /** 283 | * Struct containing a line from a 32-bit ELF's dynamic section. 284 | */ 285 | typedef struct ELF32_Dyn { 286 | int32_t d_tag; 287 | union { 288 | uint32_t d_val; 289 | uint32_t d_ptr; 290 | } d_un; 291 | } ELF32_Dyn; 292 | 293 | /** 294 | * Struct containing a line from a 64-bit ELF's dynamic section. 295 | */ 296 | typedef struct ELF64_Dyn { 297 | int64_t d_tag; 298 | union { 299 | uint64_t d_val; 300 | uint64_t d_ptr; 301 | } d_un; 302 | } ELF64_Dyn; 303 | 304 | /** 305 | * Struct containing a line from a 32-bit ELF's relocation section. 306 | */ 307 | typedef struct { 308 | uint32_t r_offset; 309 | uint32_t r_info; 310 | int32_t r_addend; 311 | } ELF32_Rela; 312 | 313 | /** 314 | * Struct containing a line from a 64-bit ELF's relocation section. 315 | */ 316 | typedef struct 317 | { 318 | uint64_t r_offset; 319 | uint64_t r_info; 320 | int64_t r_addend; 321 | } ELF64_Rela1; 322 | 323 | /** 324 | * Struct containing a line from a 64-bit ELF's relocation section. 325 | * It doesn't seem like I used this struct anywhere? 326 | */ 327 | typedef struct { 328 | uint64_t r_offset; 329 | uint32_t r_sym; 330 | uint8_t r_ssym; 331 | uint8_t r_type3; 332 | uint8_t r_type2; 333 | uint8_t r_type; 334 | int64_t r_addend; 335 | } ELF64_Rela2; 336 | 337 | /** 338 | * Struct containing a line from a 64-bit ELF's relocation section. 339 | * It doesn't seem like I used this struct anywhere? 340 | */ 341 | typedef struct { 342 | uint64_t r_offset; 343 | uint8_t r_type; 344 | uint8_t r_type2; 345 | uint8_t r_type3; 346 | uint8_t r_ssym; 347 | uint32_t r_sym; 348 | int64_t r_addend; 349 | } ELF64_Rela3; 350 | 351 | /** 352 | * Encode an ELF into data structures. 353 | * @param elfbytes The entire ELF as a byte array. 354 | * @return a #ELF encoding the given ELF. 355 | */ 356 | ELF * bytes2ELF(const char * elfbytes); 357 | 358 | /** 359 | * Convert an ELF to a byte array. 360 | * @param elf The #ELF64 struct which encodes the ELF. 361 | * @param size A by-reference integer to return the number of bytes. 362 | * @return a char array which, if written to file, results in the same ELF. 363 | */ 364 | char * ELF2bytes(ELF * elf, uint64_t & size); 365 | 366 | /** 367 | * Returns the raw data for an ELF section. 368 | * @param elf 369 | * @param section 370 | * @return a const char array 371 | */ 372 | const char * getSectionData(ELF * elf, const ELF_Section section); 373 | 374 | /** 375 | * Replace the data for an ELF section. 376 | * If section has NOBITS type, it is resized; data parameter is ignored. 377 | * @param elf The ELF. 378 | * @param sectionID The index of the section. 379 | * @param data The new data; a duplicate of this array is used. 380 | * @param dataSize The size of the new data. 381 | * @param oldElfEnd When modifying CUDA, the original address of the end of the modified ELF. 382 | */ 383 | void replaceSectionData(ELF * elf, int sectionID, char * data, unsigned int dataSize, unsigned int oldElfEnd = 0); 384 | 385 | /** 386 | * Returns the number of sections in an #ELF. 387 | * @param elf The ELF. 388 | * @return an int 389 | */ 390 | int getNumSections(ELF * elf); 391 | 392 | /** 393 | * Returns the number of segments (program headers) in an #ELF. 394 | * @param elf The ELF. 395 | * @return an int 396 | */ 397 | int getNumSegments(ELF * elf); 398 | 399 | /** 400 | * Gets an ELF section by index. 401 | * @param elf The #ELF containing the section. 402 | * @param index The index of the section. 403 | * @pre There are more than index sections in the ELF. 404 | */ 405 | ELF_Section getSection(ELF * elf, int index); 406 | 407 | /** 408 | * Gets a (read-only) symbol from a symbol table section in an elf. 409 | * @param elf 410 | * @param section 411 | * @param index 412 | * @return an #ELF_Sym 413 | */ 414 | const ELF_Sym getSymbol(ELF * elf, const ELF_Section section, int index); 415 | 416 | /** 417 | * Gets the header for an ELF section. 418 | * @param elf 419 | * @param section The #ELF_Section 420 | * @return A read-only ELF_SHeader 421 | */ 422 | const ELF_SHeader getHeader(ELF * elf, const ELF_Section section); 423 | 424 | /** 425 | * Returns the name of the given section. 426 | * @param elf The elf containing the section. 427 | * @param section The section for which the name is bring retrieved. 428 | * @return a char pointer to somehwere in the elf's string table. 429 | */ 430 | const char * getName(ELF * elf, const ELF_Section section); 431 | 432 | /** 433 | * Returns the name of the given symbol. 434 | * @param elf The elf containing the symbol. 435 | * @param symtab The header for the symtab section containing the symbol. 436 | * @param sym The symbol for which the name is bring retrieved. 437 | * @return a char pointer to somehwere in the elf's string table. 438 | */ 439 | const char * getName(ELF * elf, ELF_SHeader symtab, const ELF_Sym sym); 440 | 441 | /** 442 | * Adds a new symbol with specified name to a symbol table section in an ELF. 443 | * The st_name value is automatically set. 444 | * @param elf 445 | * @param sec 446 | * @param symbol 447 | * @param name 448 | */ 449 | void addSymbol(ELF * elf, const ELF_Section sec, ELF_Sym symbol, char * name); 450 | 451 | /** 452 | * Adds a new section with specified name to an ELF. 453 | * The sh_addr, sh_offset, and sh_name values are automatically set. 454 | * @param elf 455 | * @param shdr 456 | * @param name 457 | * @param data 458 | */ 459 | void addSection(ELF * elf, ELF_SHeader shdr, char * name, char * data); 460 | 461 | /** 462 | * Adds an existing section to an existing segment in an ELF. 463 | * @param elf 464 | * @param section 465 | * @param segment 466 | */ 467 | void addSectionToSegment(ELF * elf, const ELF_Section section, int segment); 468 | 469 | /** 470 | * Sets the sh_info attribute for an ELF section. 471 | * @param elf 472 | * @param section 473 | * @param info 474 | */ 475 | void setSH_info(ELF * elf, const ELF_Section section, uint32_t info); 476 | 477 | /** 478 | * Sets the st_size attribute for an ELF symbol. 479 | * @param elf 480 | * @param symtab 481 | * @param index The index of the symbol inside its section 482 | * @param size 483 | */ 484 | void setST_size(ELF * elf, const ELF_Section symtab, int index, uint64_t size); 485 | 486 | /** 487 | * Sets the st_value attribute for an ELF symbol. 488 | * @param elf 489 | * @param symtab 490 | * @param index The index of the symbol inside its section 491 | * @param value 492 | */ 493 | void setST_value(ELF * elf, const ELF_Section symtab, int index, uint64_t value); 494 | 495 | /** 496 | * Frees memory in use by an #ELF struct. 497 | * @param elf 498 | */ 499 | void cleanELF(ELF * elf); 500 | 501 | #endif 502 | 503 | -------------------------------------------------------------------------------- /tools/src/elf2asm.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "common.hpp" 8 | #include "elfmanip.hpp" 9 | using namespace std; 10 | 11 | int main(int argc, char ** argv) { 12 | int argElf = 0; 13 | int argKernel = 0; 14 | char * arch = 0; 15 | string sys; 16 | FILE* sass; 17 | char line[512+1]; 18 | 19 | bool flag_includeHex = false; 20 | bool flag_fallbackelf = false; 21 | 22 | //Compatbility check: 23 | #ifndef HAS_CUOBJDUMP 24 | cerr << "FATAL ERROR e2a~24: elf2asm requires cuobjdump in PATH.\n"; 25 | cerr << " Please install the CUDA SDK/Toolkit, fix your PATH variable, and then recompile.\n"; 26 | exit(0); 27 | #endif 28 | 29 | //Read arguments: 30 | for(int x = 1; x < argc; x++) { 31 | if(!strcmp(argv[x],"-h")) { 32 | argElf = 0; 33 | argKernel = 0; 34 | break; 35 | } else if(!strcasecmp(argv[x], "-sm") && x + 1 != argc) { 36 | arch = argv[++x]; 37 | } else if(!strcasecmp(argv[x], "-hex")) { 38 | flag_includeHex = true; 39 | } else if(!strcasecmp(argv[x], "-fallbackelf")) { 40 | flag_fallbackelf = true; 41 | } else if(!argElf) { 42 | argElf = x; 43 | } else if(!argKernel) { 44 | argKernel = x; 45 | } else { 46 | argElf = 0; 47 | argKernel = 0; 48 | break; 49 | } 50 | } 51 | bool flag_list = (argElf && !argKernel); 52 | 53 | if(!argElf || (!argKernel && !flag_list)) { 54 | cout << "Usage:\n"; 55 | cout << argv[0] << " [-sm ] \n"; 56 | cout << "\tLists the kernel functions in the specified file.\n"; 57 | cout << argv[0] << " \n"; 58 | cout << "\tTransform the specified kernel function to assembly.\n"; 59 | cout << argv[0] << " -hex \n"; 60 | cout << "\tTransform the specified kernel function to assembly & binary.\n"; 61 | cout << argv[0] << " -h\n"; 62 | cout << "\tDisplays this usage information.\n"; 63 | return 0; 64 | } 65 | 66 | if(flag_list) { 67 | node * names = getKernelNames(argv[argElf], flag_fallbackelf); 68 | if(!names) { 69 | cerr << "FATAL ERROR e2a ~51; unable to find kernel functions in specified file.\n"; 70 | } 71 | node * iter = names; 72 | int index = 0; 73 | while(iter) { 74 | if(!arch) { 75 | cout << (index+1) << ": " << (char*) iter->value << "\n"; 76 | } 77 | else if(!strncmp(((char*)iter->value)+3, arch, strlen(arch))) { 78 | cout << (index+1) << ": " << (((char*)iter->value)+6) << "\n"; 79 | } 80 | index++; 81 | iter = iter->next; 82 | } 83 | cleanNodesFully(names, &free); 84 | return 0; 85 | } 86 | 87 | //Get kernel data from ELF 88 | kernelData * kern = getKernelFunction(argv[argElf], 0, atoi(argv[argKernel]) - 1, flag_fallbackelf); 89 | if(!kern) { 90 | cerr << "Fatal error e2a~46; could not retrieve specified kernel function.\n"; 91 | return 1; 92 | } 93 | 94 | //Prepare target strings used to find start of assembly code 95 | string strTarget = "Function : "; 96 | strTarget += kern->name; 97 | const char * target = strTarget.c_str(); 98 | 99 | ostringstream oss; 100 | oss << "sm_" << kern->arch; 101 | string strTargetArch = oss.str(); 102 | const char * targetArch = strTargetArch.c_str(); 103 | 104 | //Get cuobjdump version: 105 | char * vers; 106 | #ifdef WINDOWS 107 | sys = "cuobjdump --version"; 108 | sass = popen(sys.c_str(), "r"); 109 | while(fgets(line, 512, sass)) { 110 | vers = strstr(line, "release "); 111 | if(vers) { 112 | vers = vers + strlen("release "); 113 | break; 114 | } 115 | } 116 | #else 117 | sys = "cuobjdump --version | grep -E \"release [0-9]+\\.[0-9]\" | sed -r \"s/.*release ([0-9]+)\\.([0-9]).*/\\1\\2/\""; 118 | sass = popen(sys.c_str(), "r"); 119 | fgets(line, 512, sass); 120 | vers = line; 121 | #endif 122 | char *pos; 123 | if ((pos=strchr(vers, '\n')) != NULL) 124 | *pos = '\0'; 125 | if(!vers || vers[0] < '0' || vers[0] > '9') { 126 | cerr << "FATAL ERROR e2a~151: Unable to get cuobjdump version.\n"; 127 | exit(0); 128 | } 129 | pclose(sass); 130 | 131 | //Prepare cuobjdump output for reading: 132 | sys = "cuobjdump -sass -fun "; 133 | sys += kern->name; 134 | sys += " "; 135 | sys += argv[argElf]; 136 | sass = popen(sys.c_str(), "r"); 137 | 138 | cout << "//Name: " << (kern->name) << "\n"; 139 | cout << "//Arch: sm_" << kern->arch << "\n"; 140 | cout << "//cuobjdump: " << vers << "\n"; 141 | cout << "//Shared memory usage: " << kern->sharedMemory << "\n"; 142 | cout << "//Min Stack Size: " << kern->min_stack_size << "\n"; 143 | cout << "//Max Stack Size: " << kern->max_stack_size << "\n"; 144 | cout << "//Frame Size: " << kern->frame_size << "\n"; 145 | cout << "//Function count: " << listSize(kern->functionNames) << "\n"; 146 | node * iter = kern->functionNames; 147 | while(iter) { 148 | cout << "//Function: " << (char*) iter->value << "\n"; 149 | iter = iter->next; 150 | } 151 | 152 | int seenArch = 9999999; 153 | 154 | //Find function code inside cuobjdump output: 155 | while(fgets(line, 512, sass)) { 156 | if(strstr(line, targetArch)) { 157 | seenArch = 0; 158 | } else { 159 | seenArch++; 160 | } 161 | 162 | if(strstr(line, target) && seenArch < 10) { 163 | int nextaddr = 0;//address of next instruction 164 | 165 | //Write function's assembly to stdout: 166 | while(fgets(line, 512, sass)) { 167 | if(!nextaddr) { 168 | char * address = strstr(line,"/*"); 169 | if(!address) { 170 | continue; 171 | } 172 | } 173 | char* address = strstr(line,"/*"); 174 | if(!address) {//done with function 175 | break;//note: skips any SCHIs at end, but there shouldn't ever be any 176 | } else if(!strstr(address+1,"/*")) { 177 | if(kern->arch >= 50 && nextaddr % (8 * 4)) { 178 | //we use 'continue' here to stop CUDA 7.x's reordering of SCHI from tricking us about addresses 179 | continue; 180 | } 181 | 182 | unsigned long long hexval; 183 | if(kern->bytes) { 184 | hexval = *((unsigned int*)&kern->bytes[nextaddr+4]); 185 | hexval = hexval << 32; 186 | hexval += *((unsigned int*)&kern->bytes[nextaddr]); 187 | if(flag_includeHex) { 188 | cout << std::hex << setfill('0') << setw(16) << hexval << std::dec << ": "; 189 | } 190 | } else { 191 | cerr << "SANITY CHECK ERROR e2a~187\n"; 192 | cerr << "\tThere is no assembly associated with the kernel function.\n"; 193 | exit(0); 194 | } 195 | 196 | if(kern->arch == 30) { 197 | hexval = hexval >> 4; 198 | } else if(kern->arch == 32 || kern->arch == 35 || kern->arch == 37) { 199 | hexval = hexval >> 2; 200 | } else if(kern->arch == 50 || kern->arch == 52 || kern->arch == 53) { 201 | //no opcode; SCHI identified by address, and (almost?) all bits are used 202 | } else if(kern->arch == 60 || kern->arch == 61 || kern->arch == 62) { 203 | //no opcode; SCHI identified by address, and (almost?) all bits are used 204 | } else { 205 | cerr << "ERROR: Don't know how to handle scheduling instructions in this architecture.\n"; 206 | } 207 | cout << "SCHI"; 208 | if(kern->arch < 50) { 209 | for(int x = 0; x < 7; x++) { 210 | if(x) cout << ","; 211 | cout << " 0x"; 212 | cout << std::hex << (hexval & 0xff); 213 | hexval = hexval >> 8; 214 | } 215 | } else if(kern->arch == 50 || kern->arch == 52 || kern->arch == 53) { 216 | for(int x = 0; x < 3; x++) { 217 | if(x) cout << ","; 218 | cout << " 0x"; 219 | cout << std::hex << (hexval & 0x1fffff); 220 | hexval = hexval >> 21; 221 | } 222 | } else if(kern->arch == 60 || kern->arch == 61 || kern->arch == 62) { 223 | for(int x = 0; x < 3; x++) { 224 | if(x) cout << ","; 225 | cout << " 0x"; 226 | cout << std::hex << (hexval & 0x1fffff); 227 | hexval = hexval >> 21; 228 | } 229 | } else { 230 | //unrecognized arch, but already printed an error 231 | for(int x = 0; x < 3; x++) { 232 | if(x) cout << ","; 233 | cout << " 0x"; 234 | cout << std::hex << (hexval & 0x1fffff); 235 | hexval = hexval >> 21; 236 | } 237 | } 238 | cout << std::dec << ";\n"; 239 | nextaddr += 8; 240 | continue; 241 | } else if(kern->bytes && nextaddr >= kern->numBytes) { 242 | cerr << "FATAL ERROR e2a~77: out-of-bounds assembly code (larger than expected size)\n"; 243 | return 1; 244 | } 245 | 246 | address += 2; 247 | int currentaddr = strtoul(address, 0, 16); 248 | 249 | //Deal with SCHI being skipped in cuobjdump (cuobjdump version 5.0) or reordered (cuobjdump version 7.x): 250 | while(currentaddr > nextaddr) { 251 | unsigned long long hexval; 252 | if(kern->bytes) { 253 | hexval = *((unsigned int*)&kern->bytes[nextaddr+4]); 254 | hexval = hexval << 32; 255 | hexval += *((unsigned int*)&kern->bytes[nextaddr]); 256 | if(flag_includeHex) { 257 | cout << std::hex << setfill('0') << setw(16) << hexval << std::dec << ": "; 258 | } 259 | } else { 260 | cerr << "SANITY CHECK ERROR e2a~248\n"; 261 | cerr << "\tNo assembly code is associated with this kernel function.\n"; 262 | exit(0); 263 | } 264 | 265 | if(kern->arch == 30) { 266 | hexval = hexval >> 4; 267 | } else if(kern->arch == 32 || kern->arch == 35 || kern->arch == 37) { 268 | hexval = hexval >> 2; 269 | } else if(kern->arch == 50 || kern->arch == 52 || kern->arch == 53) { 270 | //no opcode; SCHI identified by address, and (almost?) all bits are used 271 | } else if(kern->arch == 60 || kern->arch == 61 || kern->arch == 62) { 272 | //no opcode; SCHI identified by address, and (almost?) all bits are used 273 | } else { 274 | cerr << "ERROR: Don't know how to handle scheduling meta-instructions on this architecture.\n"; 275 | } 276 | cout << "SCHI"; 277 | if(kern->arch < 50) { 278 | for(int x = 0; x < 7; x++) { 279 | if(x) cout << ","; 280 | cout << " 0x"; 281 | cout << std::hex << (hexval & 0xff); 282 | hexval = hexval >> 8; 283 | } 284 | } else if(kern->arch == 50 || kern->arch == 52 || kern->arch == 53) { 285 | for(int x = 0; x < 3; x++) { 286 | if(x) cout << ","; 287 | cout << " 0x"; 288 | cout << std::hex << (hexval & 0x1fffff); 289 | hexval = hexval >> 21; 290 | } 291 | } else if(kern->arch == 60 || kern->arch == 61 || kern->arch == 62) { 292 | for(int x = 0; x < 3; x++) { 293 | if(x) cout << ","; 294 | cout << " 0x"; 295 | cout << std::hex << (hexval & 0x1fffff); 296 | hexval = hexval >> 21; 297 | } 298 | } else { 299 | //unrecognized arch, but already printed an error 300 | for(int x = 0; x < 3; x++) { 301 | if(x) cout << ","; 302 | cout << " 0x"; 303 | cout << std::hex << (hexval & 0x1fffff); 304 | hexval = hexval >> 21; 305 | } 306 | } 307 | cout << std::dec << ";\n"; 308 | 309 | nextaddr += 8; 310 | } 311 | if(currentaddr < nextaddr) { 312 | continue; 313 | } 314 | 315 | if(flag_includeHex) { 316 | unsigned long long hexval; 317 | if(kern->bytes) { 318 | hexval = *((unsigned int*)&kern->bytes[nextaddr+4]); 319 | hexval = hexval << 32; 320 | hexval += *((unsigned int*)&kern->bytes[nextaddr]); 321 | if(flag_includeHex) { 322 | cout << std::hex << setfill('0') << setw(16) << hexval << std::dec << ": "; 323 | } 324 | } else { 325 | cerr << "SANITY CHECK ERROR e2a~295: no assembly code associated with kernel function\n"; 326 | exit(0); 327 | } 328 | } 329 | 330 | //Write assembly code for instruction: 331 | int x = address - line; 332 | while(line[x] != '*' || line[x+1] != '/') { 333 | x++; 334 | } 335 | x += 2; 336 | while(line[x] == ' ' || line[x] == '\t' || line[x] == '{' || line[x] == '}') { 337 | x++; 338 | } 339 | if(line[x] == '/') { 340 | while(line[x] != '*' || line[x+1] != '/') { 341 | x++; 342 | } 343 | x += 2; 344 | while(line[x] == ' ' || line[x] == '\t' || line[x] == '{' || line[x] == '}') { 345 | x++; 346 | } 347 | } 348 | while(line[x] != ';') { 349 | cout << line[x++]; 350 | } 351 | cout << ";\n"; 352 | 353 | //Increment instruction address: 354 | int increment = 0; 355 | char * bytes = strstr(address, "/*"); 356 | if(!bytes) { 357 | cerr << "FATAL ERROR e2a~206: invalid instruction address\n"; 358 | return 1; 359 | } 360 | bytes += 2;//skip "/*" 361 | while((*bytes) != '*' && (*bytes) != '/') { 362 | if((*bytes) == ' ') { 363 | } else if((*bytes) == 'x') { 364 | increment--; 365 | } else { 366 | increment++; 367 | } 368 | bytes++; 369 | } 370 | if(increment % 2) { 371 | cerr << "FATAL ERROR e2a~215: fractional instruction size.\n"; 372 | } 373 | increment = increment/2; 374 | nextaddr += increment; 375 | } 376 | 377 | //Get out of loop: 378 | break; 379 | } 380 | } 381 | 382 | //Cleanup & return: 383 | pclose(sass); 384 | free(kern->name); 385 | if(kern->bytes) { 386 | free(kern->bytes); 387 | } 388 | cleanNodesFully(kern->functionNames, &free); 389 | free(kern); 390 | return 0; 391 | } 392 | -------------------------------------------------------------------------------- /tools/src/elfmanip.hpp: -------------------------------------------------------------------------------- 1 | #ifndef ELFMANIP_HPP 2 | #define ELFMANIP_HPP 3 | #include "common.hpp" 4 | 5 | /** 6 | * @file elfmanip.hpp 7 | * Defines structures and functions used for reading/writing to ELF. 8 | */ 9 | 10 | /** 11 | * This structure can hold the first 0x10 bytes of the .nv_fatbin section. 12 | * After bytes, either the section ends, or another fatHeader is found. 13 | */ 14 | typedef struct { 15 | unsigned int magic; 16 | unsigned int unknown; 17 | unsigned long long size; 18 | } fatHeader; 19 | 20 | /** 21 | * This structure holds a CUDA_ELF object inside an .nv.info* section. 22 | */ 23 | typedef struct { 24 | char format; 25 | char attribute; 26 | char data[]; 27 | } CUDA_INFO; 28 | 29 | /** 30 | * Enum for attribute IDs in CUDA_INFO sections 31 | */ 32 | typedef enum { 33 | EIATTR_ERROR = 0x00, 34 | EIATTR_PAD = 0x01, 35 | EIATTR_IMAGE_SLOT = 0x2, 36 | EIATTR_JUMPTABLE_RELOCS = 0x3, 37 | EIATTR_CTAIDZ_USED = 0x4, 38 | EIATTR_MAX_THREADS = 0x5, 39 | EIATTR_IMAGE_OFFSET = 0x6, 40 | EIATTR_IMAGE_SIZE = 0x07, 41 | EIATTR_TEXTURE_NORMALIZED = 0x08, 42 | EIATTR_SAMPLER_INIT = 0x09, 43 | EIATTR_PARAM_CBANK = 0x0a, 44 | EIATTR_SMEM_PARAM_OFFSETS = 0x0b, 45 | EIATTR_CBANK_PARAM_OFFSETS = 0x0c, 46 | EIATTR_SYNC_STACK = 0x0d, 47 | EIATTR_TEXID_SAMPID_MAP = 0x0e, 48 | EIATTR_EXTERNS = 0x0f, 49 | EIATTR_REQNTID = 0x10, 50 | 51 | /** 52 | * Seems to indicate local-memory stack size. 53 | * After two byte size (equal to 0x8), has 4byte function ID, then 4byte frame size. 54 | * Function ID is based on index in symbol table. 55 | */ 56 | EIATTR_FRAME_SIZE = 0x11, 57 | 58 | /** 59 | * Seems to indicate local-memory stack size. 60 | * After two byte size (equal to 0x8), has 4byte function ID, then 4byte stack size. 61 | * Function ID is based on index in symbol table. 62 | */ 63 | EIATTR_MIN_STACK_SIZE = 0x12, 64 | 65 | EIATTR_SAMPLER_FORCE_UNNORMALIZED = 0x13, 66 | EIATTR_BINDLESS_IMAGE_OFFSETS = 0x14, 67 | EIATTR_BINDLESS_TEXTURE_BANK = 0x15, 68 | EIATTR_BINDLESS_SURFACE_BANK = 0x16, 69 | EIATTR_KPARAM_INFO = 0x17, //I think this describes a parameter for a kernel function 70 | EIATTR_SMEM_PARAM_SIZE = 0x18, 71 | EIATTR_CBANK_PARAM_SIZE = 0x19, 72 | EIATTR_QUERY_NUMATTRIB = 0x1a, 73 | EIATTR_MAXREG_COUNT = 0x1b, 74 | EIATTR_EXIT_INSTR_OFFSETS = 0x1c, 75 | EIATTR_S2RCTAID_INSTR_OFFSETS = 0x1d, 76 | 77 | //error "unknown attribute" for 0x1e and above as of cuobjdump 5.0 78 | 79 | EIATTR_CRS_STACK_SIZE = 0x1e, 80 | EIATTR_NEED_CNP_WRAPPER = 0x1f, 81 | EIATTR_NEED_CNP_PATCH = 0x20, 82 | EIATTR_EXPLICIT_CACHING = 0x21, 83 | 84 | //error "unknown attribute" for 0x22 and above as of cuobjdump 5.5 85 | 86 | EIATTR_ISTYPEP_USED = 0x22, 87 | 88 | //error "unknown attribute" for 0x23 and above as of cuobjdump 6.0 89 | 90 | EIATTR_MAX_STACK_SIZE = 0x23, //Another attribute that indicates local-memory stack size. 91 | EIATTR_SUQ_USED = 0x24, 92 | EIATTR_LD_CACHEMOD_INSTR_OFFSETS = 0x25, 93 | 94 | //error "unknown attribute" for 0x26 and above as of cuobjdump 6.5 95 | 96 | EIATTR_LOAD_CACHE_REQUEST = 0x26, 97 | EIATTR_ATOM_SYS_INSTR_OFFSETS = 0x27, 98 | 99 | //error "unknown attribute" for 0x28 and above as of cuobjdump 8.0 100 | 101 | EIATTR_COOP_GROUP_INSTR_OFFSETS = 0x28, 102 | EIATTR_COOP_GROUP_MASK_REGIDS = 0x29, 103 | EIATTR_SW1850030_WAR = 0x2a, 104 | EIATTR_WMMA_USED = 0x2b, 105 | 106 | //error "unknown attribute" for 0x2c and above as of cuobjdump 9.2 107 | 108 | //Constants to help with error checking: 109 | minAttribute = 0x00, 110 | maxAttribute = 0x2b 111 | } Attribute; 112 | 113 | /** 114 | * Enum for 'Format' values in CUDA_INFO 115 | */ 116 | typedef enum { 117 | //error "unknown Format" for 0x00 118 | 119 | /** 120 | * I haven't seen the following value occur naturally. 121 | * Object's data is of size 0. 122 | * Speculation: NVAL stands for "no value" or "null value" or similar. 123 | */ 124 | EIFMT_NVAL = 0x01, 125 | 126 | /** 127 | * I haven't seen the following value occur naturally. 128 | * Object's data is of size 1. 129 | * Speculation: BVAL stands for "byte value". 130 | */ 131 | EIFMT_BVAL = 0x02, 132 | 133 | /** 134 | * I haven't seen the following value occur naturally. 135 | * Object's data is of size 2. 136 | * Speculation: HVAL stands for "half-int value" or something similar. 137 | */ 138 | EIFMT_HVAL = 0x03, 139 | 140 | /** 141 | * This is the value (usually?) used in naturally compiled CUDA binaries. 142 | * Object's data starts with a two-byte value indicating number of additional bytes. 143 | * (I suspect the size handling may vary depending on the attribute type.) 144 | * Speculation: SVAL stands for "(variable-)size value", or perhaps "special value". 145 | */ 146 | EIFMT_SVAL = 0x04, 147 | 148 | //error "unknown Format" for 0x05 and above as of cuobjdump 5.0 through 8.0 149 | 150 | //Constants to help with error checking: 151 | minFormat = 0x01, 152 | maxFormat = 0x04 153 | } Format; 154 | 155 | /** 156 | * Overwrites a kernel function inside an ELF. 157 | * @param file The name of the executable ELF 158 | * @param kernel Contains the name of the function and the new bytecode 159 | * @param calls A list of function offsets 160 | * @param forceFallback Iff true, fallback function is used even on proper ELF files. 161 | * @return true iff the kernel is successfully overwritten 162 | */ 163 | bool overwriteKernel(char* file, kernelData * kernel, node * calls, bool forceFallback); 164 | 165 | /** 166 | * Helper for overwriteKernel. 167 | * @param oldData The address of array containing nv_fatbin data 168 | * @oaram newData 169 | * @param original_size The number of bytes in oldData 170 | * @param kernel Contains the kernel function's name & the new bytecode 171 | * @param calls A list of function offsets 172 | * @param targetFatOffset Is set to the offset (relative to oldData) of header for fatbin containing target ELF. 173 | * @param oldEndELF Is set to the offset (relative to oldData) of the end of the target ELF prior to modification. 174 | * @return the size of the new nv_fatbin data, or -1 on failure 175 | */ 176 | int overwriteKernelHelper(const char * oldData, char *& newData, unsigned int original_size, kernelData * kernel, node * calls, unsigned int & targetFatOffset, unsigned int & oldEndELF); 177 | 178 | /** 179 | * Returns contents of a specified kernel function. 180 | * @param file The name of the executable ELF 181 | * @param kernel The name of the kernel function 182 | * @param kernelNumber The index of the kernel function; only used if kernel is NULL 183 | * @param forceFallback Iff true, fallback function is used even on proper ELF files. 184 | * @return a kernelData pointer, or 0 on failure 185 | */ 186 | kernelData* getKernelFunction(char * file, char * kernel, int kernelNumber, bool forceFallback); 187 | 188 | /** 189 | * Helper for getKernelFunction. 190 | * @param bytes The data in the nv_fatbin section 191 | * @param size The number of bytes in the nv_fatbin section 192 | * @param kernel The name of the kernel function 193 | * @param kernelNumber The index of the kernel function; only used if kernel is NULL 194 | * @return a kernelData pointer, or 0 on failure 195 | */ 196 | kernelData* getKernelFunctionHelper(const char * bytes, unsigned int size, char * kernel, int kernelNumber); 197 | 198 | /** 199 | * Returns a list of kernel function names within the specified ELF. 200 | * @param file is the location of the executable ELF 201 | * @param forceFallback Iff true, fallback function is used even on proper ELF files. 202 | * @return a list of char* 203 | */ 204 | node * getKernelNames(char * file, bool forceFallback); 205 | 206 | /** 207 | * Helper for getKernelNames. 208 | * Returns a list of kernel function names from within the fatbin section 209 | * @param bytes The data in the nv_fatbin section 210 | * @param size The number of bytes in nv_fatbin 211 | * @return a list of char* 212 | */ 213 | node * getKernelNamesHelper(const char * bytes, int size); 214 | 215 | #endif 216 | 217 | -------------------------------------------------------------------------------- /tools/src/output.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "binary.hpp" 6 | #include "cfghelpers.hpp" 7 | #include "common.hpp" 8 | #include "cudacommon.hpp" 9 | #include "output.hpp" 10 | #include "asm2bin.hpp" 11 | using namespace std; 12 | 13 | #ifdef WINDOWS 14 | #include 15 | #define LLx "I64x" 16 | #else 17 | #define LLx "llx" 18 | #endif 19 | 20 | extern node * _blocks; 21 | extern char* _kernel_name; 22 | extern int _arch; 23 | extern int _cuobjdump_version; 24 | extern int _shared_memory; 25 | extern int _min_stack_size; 26 | extern int _max_stack_size; 27 | extern int _frame_size; 28 | extern char** _function_names; 29 | extern int _num_functions; 30 | extern node * _functions; 31 | extern int _tag; 32 | extern std::string _specialNames35[]; 33 | 34 | void printfMetadata() { 35 | if(_kernel_name) { 36 | cout << "//Name: " << _kernel_name << "\n"; 37 | } 38 | cout << "//Arch: sm_" << _arch << "\n"; 39 | cout << "//cuobjdump: " << _cuobjdump_version << "\n"; 40 | cout << "//Shared memory usage: " << _shared_memory << "\n"; 41 | cout << "//Min Stack Size: " << _min_stack_size << "\n"; 42 | cout << "//Max Stack Size: " << _max_stack_size << "\n"; 43 | cout << "//Frame Size: " << _frame_size << "\n"; 44 | if(_function_names) { 45 | cout << "//Function count: " << _num_functions << "\n"; 46 | for(int x = 0; x < _num_functions; x++) { 47 | cout << "//Function: " << _function_names[x] << "\n"; 48 | } 49 | } 50 | } 51 | 52 | char * operandToString(instruction * inst, operand * op) { 53 | char * answer = (char*) malloc(sizeof(char) * 256); 54 | char * pointer = answer; 55 | bool mustShowHex = false; //used for mem operand 56 | 57 | int counter;//because g++ gets mad if anything is defined inside a case 58 | 59 | switch (op->type) { 60 | case type_register: 61 | if(op->properties & prop_not) { 62 | sprintf(pointer, "!"); 63 | pointer++; 64 | } else if(op->properties & prop_minus) { 65 | sprintf(pointer, "-"); 66 | pointer++; 67 | } else if(op->properties & prop_bitwise_complement) { 68 | sprintf(pointer, "~"); 69 | pointer++; 70 | } 71 | if(op->properties & prop_absolute_value) { 72 | sprintf(pointer, "|"); 73 | pointer++; 74 | } 75 | 76 | if(op->reg < 0) { 77 | sprintf(pointer, "RZ"); 78 | pointer+=2; 79 | } else { 80 | sprintf(pointer, "R%d",(int) op->val1); 81 | pointer += strlen(pointer); 82 | } 83 | 84 | if(op->properties & prop_absolute_value) { 85 | sprintf(pointer, "|"); 86 | pointer++; 87 | } 88 | 89 | break; 90 | 91 | case type_special_reg: 92 | if(op->properties & prop_not) { 93 | sprintf(pointer, "!"); 94 | pointer++; 95 | } else if(op->properties & prop_minus) { 96 | sprintf(pointer, "-"); 97 | pointer++; 98 | } else if(op->properties & prop_bitwise_complement) { 99 | sprintf(pointer, "~"); 100 | pointer++; 101 | } 102 | if(op->properties & prop_absolute_value) { 103 | sprintf(pointer, "|"); 104 | pointer++; 105 | } 106 | 107 | sprintf(pointer, "SR"); 108 | pointer += 2; 109 | if(_arch >= 35 && _cuobjdump_version < 55) { 110 | sprintf(pointer, "%d", (int) op->val1); 111 | pointer += strlen(pointer); 112 | } else if(_arch < 35) { 113 | sprintf(pointer, "_"); 114 | pointer++; 115 | for(int x = 0; x < _numSpecials20; x++) { 116 | if(_specialIDs20[x] == (int) op->val1) { 117 | const char * name = _specialNames20[x].c_str(); 118 | sprintf(pointer, "%s", name); 119 | pointer += strlen(pointer); 120 | break; 121 | } 122 | } 123 | } else if(_arch < 70) { 124 | if(_specialNames35[op->val1].length() > 0) { 125 | sprintf(pointer, "_"); 126 | pointer++; 127 | sprintf(pointer, "%s", _specialNames35[op->val1].c_str()); 128 | pointer += strlen(pointer); 129 | } else { 130 | sprintf(pointer, "%d", (int) op->val1); 131 | pointer += strlen(pointer); 132 | } 133 | } else { 134 | cerr << "WARNINg o~120: unimplemented arch for special operands.\n"; 135 | sprintf(pointer, "%d", (int) op->val1); 136 | pointer += strlen(pointer); 137 | } 138 | 139 | if(op->properties & prop_absolute_value) { 140 | sprintf(pointer, "|"); 141 | pointer++; 142 | } 143 | 144 | break; 145 | 146 | case type_predicate: 147 | if(op->properties & prop_not) { 148 | sprintf(pointer, "!"); 149 | pointer++; 150 | } 151 | 152 | if(op->val1 == 7) { 153 | if(_cuobjdump_version <= 50 && _arch < 35) { 154 | sprintf(pointer, "pt"); 155 | pointer += 2; 156 | } else { 157 | sprintf(pointer, "PT"); 158 | pointer += 2; 159 | } 160 | } else { 161 | sprintf(pointer, "P%d", (int) op->val1); 162 | pointer += strlen(pointer); 163 | } 164 | 165 | break; 166 | 167 | case type_label: 168 | sprintf(pointer, "label"); 169 | pointer += 5; 170 | sprintf(pointer, "%s", op->strval); 171 | pointer += strlen(pointer); 172 | break; 173 | 174 | case type_sb: 175 | sprintf(pointer, "SB"); 176 | pointer += 2; 177 | sprintf(pointer, "%d", (int) op->val1); 178 | pointer += strlen(pointer); 179 | break; 180 | 181 | case type_hex: 182 | if(op->properties & prop_not) { 183 | sprintf(pointer, "!"); 184 | pointer++; 185 | } else if(op->properties & prop_minus && !(op->properties & prop_float) && !(op->properties & prop_double)) { 186 | sprintf(pointer, "-"); 187 | pointer++; 188 | } else if(op->properties & prop_bitwise_complement) { 189 | sprintf(pointer, "~"); 190 | pointer++; 191 | } 192 | if(op->properties & prop_absolute_value) { 193 | sprintf(pointer, "|"); 194 | pointer++; 195 | } 196 | 197 | if((op->properties & prop_float) || (op->properties & prop_float32i)) { 198 | if(op->val1 == 0x7f80000000000000LL >> 44) { 199 | sprintf(pointer, "+INF "); 200 | pointer += 5; 201 | } else if(op->val1 == 0xff80000000000000LL >> 44) { 202 | sprintf(pointer, "-INF "); 203 | pointer += 5; 204 | } else { 205 | union {unsigned int i; float f;} u; 206 | u.i = (int)(op->val1 & 0xffffffff); 207 | if(!(op->properties & prop_float32i)) { 208 | u.i = u.i << 12; 209 | } 210 | if(op->properties & prop_exp) { 211 | sprintf(pointer, "%.*e", op->precision, (double)u.f); 212 | pointer += strlen(pointer); 213 | } else { 214 | sprintf(pointer, "%.*f", op->precision, u.f); 215 | pointer += strlen(pointer); 216 | } 217 | } 218 | } 219 | else if(op->properties & prop_double) { 220 | if(op->val1 == 0x7ff0000000000000LL >> 44) { 221 | sprintf(pointer, "+INF "); 222 | pointer += 5; 223 | } else if(op->val1 == 0xfff0000000000000LL >> 44) { 224 | sprintf(pointer, "-INF "); 225 | pointer += 5; 226 | } else { 227 | union {unsigned long long int i; double d;} u; 228 | u.i = op->val1 << 32; 229 | if(!(op->properties & prop_float32i)) { 230 | u.i = u.i << 12; 231 | } 232 | if(op->properties & prop_exp) { 233 | sprintf(pointer, "%.*e", op->precision, u.d); 234 | pointer += strlen(pointer); 235 | } else { 236 | sprintf(pointer, "%.*f", op->precision, u.d); 237 | pointer += strlen(pointer); 238 | } 239 | } 240 | } else { 241 | sprintf(pointer, "0x%" LLx, op->val1); 242 | pointer += strlen(pointer); 243 | } 244 | 245 | if(op->properties & prop_absolute_value) { 246 | sprintf(pointer, "|"); 247 | pointer++; 248 | } 249 | 250 | break; 251 | 252 | case type_bit_list: 253 | sprintf(pointer, "{"); 254 | pointer += 1; 255 | 256 | counter = 0; 257 | for(unsigned int x = 0; x < 10; x++) { 258 | if(op->val1 & (0x1 << x)) { 259 | counter++; 260 | } 261 | } 262 | 263 | for(unsigned int x = 9; x >= 0; x--) { 264 | if(op->val1 & (0x1 << x)) { 265 | counter--; 266 | sprintf(pointer, "%d", x); 267 | pointer += strlen(pointer); 268 | if(counter) { 269 | sprintf(pointer, ","); 270 | pointer++; 271 | } else { 272 | break; 273 | } 274 | } 275 | } 276 | 277 | sprintf(pointer, "}"); 278 | pointer += 1; 279 | 280 | break; 281 | 282 | case type_const_mem: 283 | if(op->properties & prop_not) { 284 | sprintf(pointer, "!"); 285 | pointer++; 286 | } else if(op->properties & prop_minus) { 287 | sprintf(pointer, "-"); 288 | pointer++; 289 | } else if(op->properties & prop_bitwise_complement) { 290 | sprintf(pointer, "~"); 291 | pointer++; 292 | } 293 | if(op->properties & prop_absolute_value) { 294 | sprintf(pointer, "|"); 295 | pointer++; 296 | } 297 | 298 | sprintf(pointer, "c"); 299 | pointer++; 300 | 301 | if(_cuobjdump_version <= 50) { 302 | if(op->properties & prop_absolute_value) { 303 | sprintf(pointer, "|"); 304 | pointer++; 305 | } 306 | } 307 | 308 | if(_cuobjdump_version <= 50) { 309 | sprintf(pointer, " [0x%x] [", op->val2); 310 | pointer += strlen(pointer); 311 | } 312 | else { 313 | if((op->converted && inst->op != opcode_F2F) || (inst->op == opcode_XMAD && op != inst->operands[inst->num_operands - 1])) { 314 | sprintf(pointer, "[0x%x] [", op->val2); 315 | pointer += strlen(pointer); 316 | } else { 317 | sprintf(pointer, "[0x%x][", op->val2); 318 | pointer += strlen(pointer); 319 | } 320 | } 321 | 322 | if(op->reg >= 0) { 323 | sprintf(pointer, "R%d", (int) op->val1); 324 | pointer += strlen(pointer); 325 | if(op->val3) { 326 | sprintf(pointer, "+"); 327 | pointer++; 328 | } 329 | } 330 | if(op->val3 || op->reg < 0) { 331 | if(op->val3 < 0) { 332 | sprintf(pointer, "-0x%" LLx, (~((unsigned long long) op->val3)) + 1); 333 | pointer += strlen(pointer); 334 | } else { 335 | sprintf(pointer, "0x%" LLx, (unsigned long long) op->val3); 336 | pointer += strlen(pointer); 337 | } 338 | } 339 | sprintf(pointer, "]"); 340 | pointer++; 341 | 342 | if(_cuobjdump_version >= 55) { 343 | if(op->properties & prop_absolute_value) { 344 | sprintf(pointer, "|"); 345 | pointer++; 346 | } 347 | } 348 | 349 | break; 350 | 351 | case type_mem: 352 | if(inst->op == opcode_LD_LDU) { 353 | mustShowHex = true; 354 | } 355 | 356 | if(op->properties & prop_not) { 357 | sprintf(pointer, "!"); 358 | pointer++; 359 | } else if(op->properties & prop_minus) { 360 | sprintf(pointer, "-"); 361 | pointer++; 362 | } else if(op->properties & prop_bitwise_complement) { 363 | sprintf(pointer, "~"); 364 | pointer++; 365 | } else if(op->properties & prop_absolute_value) { 366 | sprintf(pointer, "|"); 367 | pointer++; 368 | } 369 | 370 | sprintf(pointer, "["); 371 | pointer++; 372 | if(op->reg >= 0) { 373 | sprintf(pointer, "R%d", (int) op->val1); 374 | pointer += strlen(pointer); 375 | if(op->val2 || mustShowHex) { 376 | sprintf(pointer, "+"); 377 | pointer++; 378 | } 379 | } 380 | if(mustShowHex || op->val2 || op->reg < 0) { 381 | if(op->val2 == 0) { 382 | if(_cuobjdump_version <= 50) { 383 | sprintf(pointer, "0x0"); 384 | pointer += 3; 385 | } else { 386 | sprintf(pointer, "RZ"); 387 | pointer += 2; 388 | } 389 | } else if(op->val2 < 0) { 390 | sprintf(pointer, "-0x%" LLx, (~((unsigned long long) op->val2)) + 1); 391 | pointer += strlen(pointer); 392 | } else { 393 | sprintf(pointer, "0x%" LLx, (unsigned long long) op->val2); 394 | pointer += strlen(pointer); 395 | } 396 | } 397 | sprintf(pointer, "]"); 398 | pointer++; 399 | 400 | if(op->properties & prop_absolute_value) { 401 | sprintf(pointer, "|"); 402 | pointer++; 403 | } 404 | 405 | break; 406 | 407 | case type_texture_operand: 408 | for(int x = 0; x < _numTextures; x++) { 409 | if(_textureIDs[x] == (int) op->val1) { 410 | if(_cuobjdump_version >= 55 && _textureIDs[x+1] == _textureIDs[x]) { 411 | sprintf(pointer, "%s", _textureNames[x+1]); 412 | pointer += strlen(pointer); 413 | } else { 414 | sprintf(pointer, "%s", _textureNames[x]); 415 | pointer += strlen(pointer); 416 | } 417 | break; 418 | } 419 | } 420 | 421 | break; 422 | 423 | case type_channel: 424 | for(int x = 0; x < _numChannels; x++) { 425 | if(_channelIDs[x] == (int) op->val1) { 426 | sprintf(pointer, "%s", _channelNames[x]); 427 | pointer += strlen(pointer); 428 | break; 429 | } 430 | } 431 | 432 | break; 433 | 434 | case type_other_operand: 435 | sprintf(pointer, "%s", _otherNames[op->val1]); 436 | pointer += strlen(pointer); 437 | 438 | break; 439 | 440 | default: 441 | cerr << "SANITY CHECK ERROR ~1871: operand is of non-operand type.\n"; 442 | } 443 | return answer; 444 | } 445 | 446 | void fprintfInstruction(ostream & out, instruction * inst) { 447 | //printf predicate guard if it exists 448 | if(inst->guard) { 449 | out << "@"; 450 | char * operand = operandToString(inst, inst->guard); 451 | out << operand << " "; 452 | if(!(inst->guard->properties & prop_not)) { 453 | out << " "; 454 | } 455 | free(operand); 456 | } 457 | else { 458 | out << " "; 459 | } 460 | 461 | //printf opcode + mods 462 | out << opcodes[inst->op]; 463 | node * iter3 = inst->mods; 464 | while(iter3 != 0) { 465 | out << "." << (char*)iter3->value; 466 | iter3 = iter3->next; 467 | } 468 | 469 | //printf operands + mods 470 | int x; 471 | for(x = 0; x < inst->num_operands; x++) { 472 | if(x > 0) { 473 | out << ","; 474 | } 475 | out << " "; 476 | char * operand = operandToString(inst, inst->operands[x]); 477 | out << operand; 478 | free(operand); 479 | 480 | iter3 = inst->operands[x]->mods; 481 | while(iter3 != 0) { 482 | out << "." << (char*)iter3->value; 483 | iter3 = iter3->next; 484 | } 485 | } 486 | 487 | out << ";"; 488 | 489 | //print SCHI value: 490 | if(_arch < 30) { 491 | //do nothing 492 | } else if(_arch < 50) { 493 | out << " //SCHI: 0x" << std::hex << inst->SCHIVal << std::dec; 494 | } else if(_arch >= 50 && _arch < 70) { 495 | out << " SCHI50: 0x" << std::hex << inst->SCHIVal << ", " << std::dec << inst->depBarrierW << ", " << inst->depBarrierR << ", "; 496 | for(int x = 0; x < 6; x++) { 497 | if(inst->depBarrierMask & (1 << x)) { 498 | out << "1"; 499 | } else { 500 | out << "0"; 501 | } 502 | } 503 | } else { 504 | cerr << "ERROR UNIMPLEMENTED o~507: don't know how to output scheduling metadata\n"; 505 | } 506 | } 507 | 508 | void printfBlocks(bool metadata) { 509 | int maxLength = 40; 510 | 511 | if(metadata) { 512 | printfMetadata(); 513 | } 514 | 515 | node * iter = _blocks; 516 | node * iter2; 517 | while(iter) { 518 | blockNode * currentBlock = (blockNode*) iter->value; 519 | 520 | //Print label for block: 521 | printf("label"); 522 | printf("%s", ((instruction*)currentBlock->instructions->value)->label); 523 | printf(":"); 524 | 525 | //if(currentBlock->canLoop) { 526 | // printf("\tCAN LOOP"); 527 | //} 528 | 529 | //printf instructions inside the basic block: 530 | iter2 = currentBlock->instructions; 531 | node * iter3; 532 | while(iter2) { 533 | printf("\n\t"); 534 | instruction* inst = (instruction*) iter2->value; 535 | 536 | //printf predicate guard if it exists 537 | if(inst->guard) { 538 | printf("@"); 539 | char * operand = operandToString(inst, inst->guard); 540 | printf("%s ", operand); 541 | if(!(inst->guard->properties & prop_not)) { 542 | printf(" "); 543 | } 544 | free(operand); 545 | } 546 | else { 547 | printf(" "); 548 | } 549 | 550 | int length = 0; 551 | 552 | //printf opcode + mods 553 | printf("%s", opcodes[inst->op]); 554 | length += strlen(opcodes[inst->op]); 555 | iter3 = inst->mods; 556 | while(iter3 != 0) { 557 | printf(".%s", (char*)iter3->value); 558 | length += 1 + strlen((char*)iter3->value); 559 | iter3 = iter3->next; 560 | } 561 | 562 | //printf operands + mods 563 | int x; 564 | for(x = 0; x < inst->num_operands; x++) { 565 | if(x > 0) { 566 | printf(","); 567 | length++; 568 | } 569 | printf(" "); 570 | length++; 571 | char * operand = operandToString(inst, inst->operands[x]); 572 | printf("%s", operand); 573 | length += strlen(operand); 574 | free(operand); 575 | 576 | iter3 = inst->operands[x]->mods; 577 | while(iter3 != 0) { 578 | //if(strcmp((char*)iter3->value, "reuse")) { 579 | printf(".%s", (char*)iter3->value); 580 | length += 1 + strlen((char*)iter3->value); 581 | //} 582 | iter3 = iter3->next; 583 | } 584 | } 585 | 586 | 587 | printf(";"); 588 | length++; 589 | 590 | if(length > maxLength) { 591 | maxLength = length; 592 | } 593 | 594 | //print SCHI value: 595 | if(_arch < 30) { 596 | //do nothing 597 | } else if(_arch < 50) { 598 | for(int x = length; x < maxLength; x++) { 599 | printf(" "); 600 | } 601 | printf("//SCHI: 0x%" LLx, (unsigned long long) inst->SCHIVal); 602 | } else if(_arch < 70) { 603 | for(int x = length; x < maxLength; x++) { 604 | printf(" "); 605 | } 606 | printf(" SCHI50: 0x%x, %d, %d, " , inst->SCHIVal, inst->depBarrierW, inst->depBarrierR); 607 | for(int x = 0; x < 6; x++) { 608 | if(inst->depBarrierMask & (1 << x)) { 609 | printf("1"); 610 | } else { 611 | printf("0"); 612 | } 613 | } 614 | } else { 615 | cerr << "ERROR UNIMPLEMENTED o~798: don't know how to handle scheduling metadata on this architecture\n"; 616 | } 617 | 618 | iter2 = iter2->next; 619 | } 620 | printf("\n\n"); 621 | 622 | iter = iter->next; 623 | } 624 | } 625 | 626 | void printfHex() { 627 | node * iter = _blocks; 628 | node * iter2; 629 | while(iter) { 630 | blockNode* currentBlock = (blockNode*) iter->value; 631 | //printf instructions inside the basic block: 632 | iter2 = currentBlock->instructions; 633 | node * iter3; 634 | while(iter2) { 635 | instruction* inst = (instruction*) iter2->value; 636 | 637 | char * hex = instructionToHexString(inst, _arch, _cuobjdump_version); 638 | printf("%s", hex); 639 | free(hex); 640 | 641 | printf(" // "); 642 | 643 | //printf predicate guard if it exists 644 | if(inst->guard) { 645 | printf("@"); 646 | printf("%s", operandToString(inst, inst->guard)); 647 | printf(" "); 648 | } 649 | 650 | //printf opcode + mods 651 | printf("%s", opcodes[inst->op]); 652 | iter3 = inst->mods; 653 | while(iter3) { 654 | printf(".%s", (char*)iter3->value); 655 | iter3 = iter3->next; 656 | } 657 | 658 | //printf operands + mods 659 | int x; 660 | for(x = 0; x < inst->num_operands; x++) { 661 | if(x > 0 && inst->op != opcode_BRX) { 662 | printf(","); 663 | } 664 | printf(" "); 665 | printf("%s", operandToString(inst, inst->operands[x])); 666 | 667 | iter3 = inst->operands[x]->mods; 668 | while(iter3 != 0) { 669 | printf(".%s", (char*)iter3->value); 670 | iter3 = iter3->next; 671 | } 672 | } 673 | 674 | printf("\n"); 675 | iter2 = iter2->next; 676 | } 677 | 678 | iter = iter->next; 679 | } 680 | } 681 | 682 | void printfAssembly(bool metadata) { 683 | if(metadata) { 684 | printfMetadata(); 685 | } 686 | 687 | node * iter = _blocks; 688 | node * iter2; 689 | while(iter) { 690 | blockNode* currentBlock = (blockNode*) iter->value; 691 | int line = currentBlock->first; 692 | 693 | //printf instructions inside the basic block: 694 | iter2 = currentBlock->instructions; 695 | node * iter3; 696 | while(iter2) { 697 | instruction* inst = (instruction*) iter2->value; 698 | 699 | //printf predicate guard if it exists 700 | if(inst->guard) { 701 | printf("@"); 702 | printf("%s", operandToString(inst, inst->guard)); 703 | printf(" "); 704 | } 705 | 706 | //printf opcode + mods 707 | printf("%s", opcodes[inst->op]); 708 | iter3 = inst->mods; 709 | while(iter3) { 710 | printf(".%s", (char*)iter3->value); 711 | iter3 = iter3->next; 712 | } 713 | 714 | //printf operands + mods 715 | int x; 716 | for(x = 0; x < inst->num_operands; x++) { 717 | if(x > 0 && inst->op != opcode_BRX) { 718 | printf(","); 719 | } 720 | printf(" "); 721 | 722 | printf("%s", operandToString(inst, inst->operands[x])); 723 | 724 | iter3 = inst->operands[x]->mods; 725 | while(iter3 != 0) { 726 | printf(".%s", (char*)iter3->value); 727 | iter3 = iter3->next; 728 | } 729 | } 730 | 731 | if(inst->num_operands == 0) { 732 | if(_cuobjdump_version > 55 && (inst->op == opcode_EXIT || inst->op == opcode_RET || inst->op == opcode_BRK || inst->op == opcode_CONT || inst->op == opcode_SYNC)) { 733 | /* empty */ 734 | } else if(_cuobjdump_version > 50 && !inst->mods && inst->op != opcode_NOP) { 735 | printf(" "); 736 | } 737 | } 738 | 739 | printf(";"); 740 | 741 | cout << "\n"; 742 | line++; 743 | iter2 = iter2->next; 744 | } 745 | 746 | iter = iter->next; 747 | } 748 | } 749 | -------------------------------------------------------------------------------- /tools/src/output.hpp: -------------------------------------------------------------------------------- 1 | #ifndef OUTPUT_HPP 2 | #define OUTPUT_HPP 3 | 4 | #include "common.hpp" 5 | 6 | /** 7 | * @file output.hpp 8 | * Defines functions used for output to stdout. 9 | */ 10 | 11 | /** 12 | * Outputs an instruction. 13 | * @param out The output stream. 14 | * @param inst The instruction to output. 15 | */ 16 | void fprintfInstruction(std::ostream & out, instruction * inst); 17 | 18 | /** 19 | * Outputs the assembly code, split into basic blocks, to stdout 20 | * @param metadata If true, prints out the metadata. 21 | */ 22 | void printfBlocks(bool metadata); 23 | 24 | /** 25 | * Outputs assembly code in a format optimized for ease of reading the dispatch values. 26 | */ 27 | void printfSCHIVals(); 28 | 29 | /** 30 | * Outputs the assembly code, together with hexadecimal equivalent, to stdout 31 | */ 32 | void printfHex(); 33 | 34 | /** 35 | * Prints the assembly to stdout, in appropriate format for parsing. 36 | * @param metadata True iff metadata for parsing should be included 37 | */ 38 | void printfAssembly(bool metadata); 39 | 40 | #endif 41 | --------------------------------------------------------------------------------