└── tools
├── .gitignore
├── src
├── binary35.hpp
├── binary50.hpp
├── decode_output.hpp
├── doxygen.dox
├── output.hpp
├── asm2bin.hpp
├── binary20.hpp
├── binary.hpp
├── cudacommon.hpp
├── binary.cpp
├── cudacommon.cpp
├── cfghelpers.hpp
├── decode.l
├── decode.hpp
├── elfmanip.hpp
├── asm2bin.l
├── elf2asm.cpp
├── elf.hpp
├── decode_output.cpp
├── decode_common.hpp
├── decode_common.cpp
├── output.cpp
├── common.cpp
└── cfghelpers.cpp
├── README_bin2asm.txt
├── README_asm2bin.txt
├── README_decode.txt
├── README.txt
└── Makefile
/tools/.gitignore:
--------------------------------------------------------------------------------
1 | objs
2 | asm2bin
3 | bin2asm
4 | decode
5 |
--------------------------------------------------------------------------------
/tools/src/binary35.hpp:
--------------------------------------------------------------------------------
1 | #ifndef BINARY35_HPP
2 | #define BINARY35_HPP
3 | #include "common.hpp"
4 |
5 | /**
6 | * @file binary35.hpp
7 | * Defines functions used to convert assembly-style instructions to hex/binary for sm_3x.
8 | */
9 |
10 | /**
11 | * Converts an assembly instruction to a hex string, equivalent to binary code for sm_3x.
12 | * @param inst The instruction being converted
13 | * @param cuobjdump_version The cuobjdump version with which the assembly code was generated.
14 | * @return a char array containing 16 hexadecimal characters
15 | */
16 | char* instructionToHexString35(instruction * inst, int cuobjdump_version);
17 |
18 | #endif
19 |
--------------------------------------------------------------------------------
/tools/src/binary50.hpp:
--------------------------------------------------------------------------------
1 | #ifndef BINARY50_HPP
2 | #define BINARY50_HPP
3 | #include "common.hpp"
4 |
5 | /**
6 | * @file binary50.hpp
7 | * Defines functions used to convert assembly-style instructions to hex/binary for sm_5x and sm_6x.
8 | */
9 |
10 | /**
11 | * Converts an assembly instruction to a hex string, equivalent to binary code for sm_5x or sm_6x.
12 | * @param inst The instruction being converted
13 | * @param cuobjdump_version The cuobjdump version with which the assembly code was generated.
14 | * @return a char array containing 16 hexadecimal characters
15 | */
16 | char* instructionToHexString50(instruction * inst, int cuobjdump_version);
17 |
18 | #endif
19 |
--------------------------------------------------------------------------------
/tools/README_bin2asm.txt:
--------------------------------------------------------------------------------
1 | ========================
2 | ========BIN2ASM=========
3 | ========================
4 |
5 | Our bin2asm program is used to extract assembly code and some metadata, using NVIDIA's cuobjdump tool for assistance.
6 |
7 | Here are some examples of how to run the program, and brief descriptions of the results.
8 |
9 | ./bin2asm a.out
10 | This outputs a list of CUDA kernel functions in a.out, pre-pended with unique ID numbers for identification.
11 |
12 | ./bin2asm a.out 3
13 | This outputs the assembly code of the kernel with ID number 3.
14 |
15 | ./bin2asm -hex a.out 3
16 | This outputs the assembly code of the kernel with ID number 3, including binary code.
17 | This format is necessary when gathering assembly&binary for our decode tool.
--------------------------------------------------------------------------------
/tools/src/decode_output.hpp:
--------------------------------------------------------------------------------
1 | #ifndef DECODE_OUTPUT_HPP
2 | #define DECODE_OUTPUT_HPP
3 |
4 | #include "decode_common.hpp"
5 |
6 | /**
7 | * @file decode_output.hpp
8 | * Defines functions used for output to stdout.
9 | */
10 |
11 | /**
12 | * Outputs an operand to stdout
13 | * @param inst The operand's instruction
14 | * @param op The operand
15 | */
16 | void printfOperand(instruction * inst, operand * op);
17 |
18 | /**
19 | * Outputs an instruction.
20 | * @param out The output stream.
21 | * @param inst The instruction to output.
22 | */
23 | void fprintfInstruction(std::ostream & out, instruction * inst);
24 |
25 | /**
26 | * Prints the assembly's metadata to stdout, in appropriate format for parsing.
27 | */
28 | void printfMetadata();
29 |
30 | #endif
31 |
--------------------------------------------------------------------------------
/tools/src/doxygen.dox:
--------------------------------------------------------------------------------
1 | /*! \mainpage Decoding CUDA Binary
2 |
3 | \section intro Overview
4 |
5 | The code here is what we used in our Decoding CUDA Binary paper for CGO 2019.
6 |
7 | The decode program is the assembler generator tool.
8 | It can analyze binary encodings, and output bit-flipped code for more analysis, or output assembler code.
9 |
10 | The asm2bin tool contains our existing assemblers, and can be used to inject bit-flipped code into executables.
11 | It can also change the assembly code to a more human-readable format.
12 |
13 | The bin2asm tool can extract assembly code in a format our other tools understand.
14 | Use the -hex flag when extracting assembly for use with the decode tool, so binary is included.
15 |
16 | */
17 |
18 |
--------------------------------------------------------------------------------
/tools/src/output.hpp:
--------------------------------------------------------------------------------
1 | #ifndef OUTPUT_HPP
2 | #define OUTPUT_HPP
3 |
4 | #include "common.hpp"
5 |
6 | /**
7 | * @file output.hpp
8 | * Defines functions used for output to stdout.
9 | */
10 |
11 | /**
12 | * Outputs an instruction.
13 | * @param out The output stream.
14 | * @param inst The instruction to output.
15 | */
16 | void fprintfInstruction(std::ostream & out, instruction * inst);
17 |
18 | /**
19 | * Outputs the assembly code, split into basic blocks, to stdout
20 | * @param metadata If true, prints out the metadata.
21 | */
22 | void printfBlocks(bool metadata);
23 |
24 | /**
25 | * Outputs assembly code in a format optimized for ease of reading the dispatch values.
26 | */
27 | void printfSCHIVals();
28 |
29 | /**
30 | * Outputs the assembly code, together with hexadecimal equivalent, to stdout
31 | */
32 | void printfHex();
33 |
34 | /**
35 | * Prints the assembly to stdout, in appropriate format for parsing.
36 | * @param metadata True iff metadata for parsing should be included
37 | */
38 | void printfAssembly(bool metadata);
39 |
40 | #endif
41 |
--------------------------------------------------------------------------------
/tools/README_asm2bin.txt:
--------------------------------------------------------------------------------
1 | ========================
2 | ========ASM2BIN=========
3 | ========================
4 |
5 | Our asm2bin program can parse assembly code in the format generated by our bin2asm tool, and modify CUDA binaries.
6 | If you want to modify the GPU code programmatically, consider doing so from inside this tool; in asm2bin.ypp, you'll
7 | find a comment that says: //This is where we might call functions to modify or optimize the GPU code.
8 |
9 | Below are some examples of how to run the program, and brief descriptions of the results.
10 |
11 | ./asm2bin assembly.txt -printBlocks
12 | This parses the GPU code in assembly.txt, and outputs it to stdout in a more human-readable format.
13 | Note that asm2bin can also accept the human-readable format as input.
14 |
15 | ./asm2bin assembly.txt -write program.exe
16 | This parses the GPU code in assembly.txt, and overwrites the matching kernel function in program.exe.
17 | Warning: if new assembly is larger than the original, and you're on a non-Linux machine,
18 | then our tool will terminate without modifying the binary.
19 |
--------------------------------------------------------------------------------
/tools/src/asm2bin.hpp:
--------------------------------------------------------------------------------
1 | #ifndef ASM2BIN_PARSE_HPP
2 | #define ASM2BIN_PARSE_HPP
3 |
4 | #include "common.hpp"
5 |
6 | /**
7 | * @file asm2bin.hpp
8 | * Defines functions and constants used by the asm2bin parser.
9 | */
10 |
11 | /**
12 | * Deletes scheduling metadata instructions from all of the basic blocks.
13 | */
14 | void stripSCHI();
15 |
16 | /**
17 | * Inserts scheduling metadata instructions throughout code.
18 | * Note that this assumes there are no such instructions already present.
19 | */
20 | void insertSCHIs();
21 |
22 | /**
23 | * Creates an array of binary code equivalent to the assembly.
24 | * @return a structure containing the array of code
25 | */
26 | kernelData * generateByteCode();
27 |
28 | /**
29 | * Gets a list of functions within the code, indicated by CAL instructions.
30 | * Useful for fixing up the ELF after adding/removing code.
31 | * @return a list of ascending addresses corresponding to function starts
32 | */
33 | node * getCallTargets();
34 |
35 | /**
36 | * Called by parser after reaching the end of the assembly code.
37 | */
38 | void finishParsing();
39 |
40 | #endif
41 |
42 |
--------------------------------------------------------------------------------
/tools/src/binary20.hpp:
--------------------------------------------------------------------------------
1 | #ifndef BINARY20_HPP
2 | #define BINARY20_HPP
3 | #include "common.hpp"
4 |
5 | /**
6 | * @file binary20.hpp
7 | * Defines functions used to convert assembly-style instructions to hex/binary for sm_20, sm_21, sm_30.
8 | */
9 |
10 | /**
11 | * Get the binary value of an 'imme' operand.
12 | * An imme is usually an operand than could be either a hex value, constant memory, or a register.
13 | * @param op is the operand
14 | * @param sizeLimit is the number of bits the value can use (excluding the two bits that identify the type)
15 | * @param special indicates a unique value type: special & 1 means ignore negative; special & 2 means its the 4th operand and constant memory
16 | * @return a 64-bit integer whose lowest bits are the binary value
17 | */
18 | long long getImme20(operand * op, char sizeLimit, char special);
19 |
20 | /**
21 | * Converts an assembly instruction to a hex string, equivalent to binary code for sm_20, sm_21, sm_30.
22 | * @param inst The instruction being converted
23 | * @param arch The sm_xx architecture of the kernel code.
24 | * @param cuobjdump_version The cuobjdump version with which the assembly code was generated.
25 | * @return a char array containing 16 hexadecimal characters
26 | */
27 | char* instructionToHexString20(instruction * inst, int arch, int cuobjdump_version);
28 |
29 | #endif
30 |
31 |
--------------------------------------------------------------------------------
/tools/README_decode.txt:
--------------------------------------------------------------------------------
1 | ========================
2 | =========DECODE=========
3 | ========================
4 |
5 | This program is used to analyze NVIDIA ISA encodings, and generate assembler code.
6 |
7 | This tool has been tested on ISAs with versions between 3.2 and 6.2.
8 | It is not yet compatible with newer architectures, as we need to update our structures to support 16-byte instructions.
9 |
10 | This program has three running modes:
11 |
12 | ./decode assembly.txt < oldPersistent.dat > newPersistent.dat
13 | The above command will analyze the code in file assembly.txt (generated by our bin2asm tool with the -hex flag).
14 | It will combine the analysis with encodings in oldPersistent.dat, and output new encodings to newPersistent.dat.
15 |
16 | ./decode -probe assembly.txt < persistent.dat > bitflip.txt
17 | The above command will generate bit-flipped code based on analysis of assembly.txt plus encodings in persistent.dat.
18 | The bit-flipped code in bitflip.txt can be injected into an executable using our asm2bin tool.
19 | After injection, new assembly for analysis can be retrieved with bin2asm using the -hex flag.
20 |
21 | ./decode -final assembly.txt < persistent.dat > assembler.cpp
22 | The above command will generate an assembler based on analysis of assembly.txt plus encodings in persistent.dat.
23 | This assembler can be placed inside a C++ function; see our binary50.cpp and binary35.cpp files in src/ for examples.
24 |
25 | Note that during the first run for a given architecture, since no persistent data exists, you should just enter -1 into stdin.
--------------------------------------------------------------------------------
/tools/src/binary.hpp:
--------------------------------------------------------------------------------
1 | #ifndef BINARY_HPP
2 | #define BINARY_HPP
3 | #include "common.hpp"
4 |
5 | /**
6 | * @file binary.hpp
7 | * Defines functions used to convert assembly-style instructions to hex/binary.
8 | */
9 |
10 | /**
11 | * Gets the difference between two values.
12 | * Used to determine jump sizes when converting instructions to binary/hex.
13 | * @param val1 The first value
14 | * @param val2 The second value
15 | * @param sizeLimit The number of bits in the difference
16 | * @return a 64-bit integer whose lowest bits equate to (val1-val2)
17 | */
18 | long long getBinaryDifference(unsigned long long val1, int val2, char sizeLimit);
19 |
20 | /**
21 | * Sets bits inside of a 64-bit unsigned integer, which represents a binary instruction.
22 | * @param binary The address of the value being altered
23 | * @param location The index of the first bit being set
24 | * @param value The bits used to set binary
25 | * @param size The number of bits in value
26 | * @param overwrite Indicates whether to overwrite 1's with 0's
27 | */
28 | void setBinary(unsigned long long * binary, int location, long long value, int size, char overwrite);
29 |
30 | /**
31 | * Converts an assembly instruction to a hex string, equivalent to binary code.
32 | * @param inst The instruction being converted
33 | * @param arch The kernel function's architecture (the value of xx in sm_xx).
34 | * @param cuobjdump_version The cuobjdump version with which the assembly code was generated.
35 | * @return a char array containing 16 hexadecimal characters
36 | */
37 | char* instructionToHexString(instruction * inst, int arch, int cubojdump_version);
38 |
39 | #endif
40 |
41 |
--------------------------------------------------------------------------------
/tools/src/cudacommon.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CUDACOMMON_HPP
2 | #define CUDACOMMON_HPP
3 | #include "common.hpp"
4 |
5 | /**
6 | * @file cudacommon.hpp
7 | * Defines functions used to analyze CUDA code.
8 | */
9 |
10 | /**
11 | * Checks if instruction has given mod attached to opcode.
12 | * @param inst The instruction
13 | * @param mod The mod string
14 | * @return true iff mod is present, false iff not
15 | */
16 | bool hasMod(instruction* inst, const char* mod);
17 |
18 | /**
19 | * Checks if operand has given mod.
20 | * @param op The operand
21 | * @param mod The mod string
22 | * @return true iff mod is present, false iff not
23 | */
24 | bool hasMod(operand* op, const char* mod);
25 |
26 | /**
27 | * Checks if instruction has given mod in appropriate place, associated with the opcode.
28 | * @param inst The instruction
29 | * @param mod The mod string
30 | * @param later Should be 1 iff target mod is after a type mod; 0 if before
31 | * @return true iff mod is present, false iff not
32 | */
33 | bool hasTypeMod(instruction* inst, const char* mod, char later);
34 |
35 | /**
36 | * Gets the basic block which contains the given line.
37 | * @param line The line number for an instruction
38 | * @return the blockNode which contains the line, or 0 if no such line exists
39 | */
40 | blockNode * getBlock(int line);
41 |
42 | /**
43 | * Gets the basic block which contains the given instruction.
44 | * @param inst The instruction
45 | * @return the blockNode which contains the instruction, or 0 if no such line exists
46 | */
47 | blockNode * getBlock(instruction * inst);
48 |
49 | /**
50 | * Gets the instruction with the given line number.
51 | * @param line The line number for an instruction
52 | * @return the desired instruction
53 | */
54 | instruction * getLine(int line);
55 |
56 | #endif
57 |
--------------------------------------------------------------------------------
/tools/README.txt:
--------------------------------------------------------------------------------
1 | ========================
2 | =====THIS DIRECTORY=====
3 | ========================
4 | This directory contains source code for our assembler generator tool, plus related tools.
5 | We hope to continue updating this, such as to add support for the Volta generation of NVIDIA devices.
6 |
7 | ========================
8 | =======OUR TOOLS========
9 | ========================
10 | The bin2asm tool extract kernel functions' assembly code from nvcc-generated executables.
11 | The asm2bin tool can assemble assembly code for several architectures, overwriting binary in an executable.
12 | The decode tool can generate assembler code for use with the asm2bin tool.
13 |
14 | If writing functions to analyze/modify GPU code, we recommend calling them in asm2bin.
15 | There's a line in asm2bin.ypp that says "//This is where we might call functions to modify or optimize the GPU code."
16 |
17 | ========================
18 | ======DEPENDENCIES======
19 | ========================
20 | Our tools are all compiled with g++.
21 | The asm2bin and decode tools are dependent on flex and bison.
22 | The bin2asm tool expects that the CUDA Toolkit be installed and part of your PATH.
23 |
24 | ========================
25 | =====COMPATIBILITY======
26 | ========================
27 | The asm2bin and decode tools should be run on a Linux machine for full compatibility.
28 | The decode tool has been tested with compute capabilities between 3.2 and 6.2, inclusive.
29 |
30 | ========================
31 | ======COMPILATION=======
32 | ========================
33 | Our tools can be compiled by running make in this directory.
34 |
35 | ========================
36 | =COMMAND LINE ARGUMENTS=
37 | ========================
38 | Run any of the three tools with the -h flag for some details on runtime arguments.
39 | See separate readme files for more info about the distinct tools: bin2asm, asm2bin, and decode.
40 |
41 | ========================
42 | =====DOCUMENTATION======
43 | ========================
44 | Documentation can be generated with Doxygen.
45 |
--------------------------------------------------------------------------------
/tools/src/binary.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "binary.hpp"
6 | #include "binary20.hpp"
7 | #include "binary35.hpp"
8 | #include "binary50.hpp"
9 | #include "common.hpp"
10 | using namespace std;
11 |
12 | long long getBinaryDifference(unsigned long long val1, int val2, char sizeLimit) {
13 | long long answer = 0;
14 | if(val1 > (unsigned long long)val2) {
15 | answer = val1 - val2;
16 | }
17 | else {
18 | answer = val2 - val1;
19 | unsigned long long temp = 0xffffffffffffffffLL >> (64 - sizeLimit);
20 | temp ^= answer;
21 | temp++;
22 | temp &= 0xffffffffffffffffLL >> (64 - sizeLimit);
23 | answer = temp;
24 | }
25 | return answer;
26 | }
27 |
28 | void setBinary(unsigned long long * binary, int location, long long value, int size, char overwrite) {
29 | int x;
30 | if(overwrite) {
31 | for(x = 0; x < size; x++) {
32 | if(value & (0x1LL << x)) {
33 | *binary |= 0x1LL << location;
34 | }
35 | else if(*binary & (0x1LL << location)) {
36 | *binary ^= 0x1LL << location;
37 | }
38 | location--;
39 | }
40 | }
41 | else {
42 | for(x = 0; x < size; x++) {
43 | if(value & (0x1LL << x)) {
44 | *binary |= 0x1LL << location;
45 | }
46 | location--;
47 | }
48 | }
49 | }
50 |
51 | bool printedStatusMessage = false;
52 | char* instructionToHexString(instruction * inst, int arch, int cuobjdump_version) {
53 | if(arch < 20) {
54 | cerr << "FATAL ERROR: The sm_1x architecture is not supported.\n";
55 | exit(0);
56 | } else if(arch == 20 || arch == 21 || arch == 30) {
57 | return instructionToHexString20(inst, arch, cuobjdump_version);
58 | } else if(arch == 32 || arch == 35 || arch == 37) {
59 | return instructionToHexString35(inst, cuobjdump_version);
60 | } else if(arch == 50 || arch == 52 || arch == 53 || arch == 60 || arch == 61 || arch == 62) {
61 | return instructionToHexString50(inst, cuobjdump_version);
62 | } else {
63 | cerr << "FATAL ERROR: No assembler found for architecture " << arch << ".\n";
64 | exit(0);
65 | }
66 | }
67 |
68 |
--------------------------------------------------------------------------------
/tools/src/cudacommon.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "cfghelpers.hpp"
6 | #include "cudacommon.hpp"
7 | using namespace std;
8 |
9 | extern node* _blocks;
10 | extern node* _functions;
11 | extern int _tag;
12 | extern bool _verbose;
13 |
14 | bool hasMod(instruction* inst, const char* mod) {
15 | node * n = inst->mods;
16 | while(n) {
17 | if(!strcmp((char*)n->value, mod)) {
18 | return true;
19 | }
20 | n = n->next;
21 | }
22 |
23 | return false;
24 | }
25 |
26 | bool hasMod(operand* op, const char* mod) {
27 | node * n = op->mods;
28 | while(n) {
29 | if(!strcmp((char*)n->value, mod)) {
30 | return true;
31 | }
32 | n = n->next;
33 | }
34 |
35 | return false;
36 | }
37 |
38 | bool hasTypeMod(instruction* inst, const char* mod, char later) {
39 | node * n = inst->mods;
40 | bool seenType = 0;
41 | while(n) {
42 | if(!strcmp((char*)n->value, mod)) {
43 | if(seenType ^ !later) {
44 | return true;
45 | }
46 | }
47 |
48 | if(!strcmp((char*)n->value, "U16")) {
49 | seenType = true;
50 | } else if(!strcmp((char*)n->value, "U32")) {
51 | seenType = true;
52 | } else if(!strcmp((char*)n->value, "U64")) {
53 | seenType = true;
54 | } else if(!strcmp((char*)n->value, "S16")) {
55 | seenType = true;
56 | } else if(!strcmp((char*)n->value, "S32")) {
57 | seenType = true;
58 | } else if(!strcmp((char*)n->value, "S64")) {
59 | seenType = true;
60 | } else if(!strcmp((char*)n->value, "F16")) {
61 | seenType = true;
62 | } else if(!strcmp((char*)n->value, "F32")) {
63 | seenType = true;
64 | } else if(!strcmp((char*)n->value, "F64")) {
65 | seenType = true;
66 | } else if(!strcmp((char*)n->value, "32")) {
67 | seenType = true;
68 | } else if(!strcmp((char*)n->value, "64")) {
69 | seenType = true;
70 | } else if(!strcmp((char*)n->value, "128")) {
71 | seenType = true;
72 | }
73 |
74 | n = n->next;
75 | }
76 |
77 | return false;
78 | }
79 |
80 | blockNode * getBlock(int line) {
81 | blockNode * b;
82 | node * n = _blocks;
83 |
84 | while(n) {
85 | b = (blockNode*) n->value;
86 | if(b->first <= line && b->last >= line) {
87 | return b;
88 | }
89 | n = n->next;
90 | }
91 |
92 | return 0;
93 | }
94 |
95 | blockNode * getBlock(instruction * inst) {
96 | blockNode * b;
97 | node * n = _blocks;
98 |
99 | while(n) {
100 | b = (blockNode*) n->value;
101 | if(inst->blockID == b->id) {
102 | return b;
103 | }
104 | n = n->next;
105 | }
106 |
107 | return 0;
108 | }
109 |
110 | instruction * getLine(int line) {
111 | blockNode * b = getBlock(line);
112 | node * n = b->instructions;
113 | int counter = b->first;
114 | while(counter < line) {
115 | n = n->next;
116 | counter++;
117 | }
118 |
119 | return (instruction*) n->value;
120 | }
121 |
--------------------------------------------------------------------------------
/tools/Makefile:
--------------------------------------------------------------------------------
1 | CC = @g++
2 | LEXER = @flex
3 | PARSER = @bison
4 |
5 | CFLAGS = -Isrc -O3 -Wall
6 | rm = @rm
7 | OBJs = objs/asm2bin.tab.o \
8 | objs/asm2bin.yy.o \
9 | objs/binary.o \
10 | objs/binary20.o \
11 | objs/binary35.o \
12 | objs/binary50.o \
13 | objs/cfghelpers.o \
14 | objs/common.o \
15 | objs/cudacommon.o \
16 | objs/elf.o \
17 | objs/elfmanip.o \
18 | objs/output.o
19 | ELFOBJs = objs/elf.o \
20 | objs/elfmanip.o
21 | DECODEOBJS = objs/decode.tab.o \
22 | objs/decode.yy.o \
23 | objs/decode_common.o \
24 | objs/decode_output.o
25 |
26 | ifeq ($(OS),Windows_NT)
27 | rm = @del
28 | CFLAGS += -D WINDOWS
29 | MKDIR_FLAGS = ""
30 |
31 | cuobjdump_location := $(shell where cuobjdump)
32 | ifneq ($(cuobjdump_location),)
33 | CFLAGS += -D HAS_CUOBJDUMP
34 | endif
35 | else
36 | MKDIR_FLAGS = -p
37 | UNAME_S := $(shell uname -s)
38 | UNAME_P := $(shell uname -p)
39 |
40 | cuobjdump_location := $(shell which cuobjdump)
41 | ifneq ($(cuobjdump_location),)
42 | CFLAGS += -D HAS_CUOBJDUMP
43 | endif
44 |
45 | ifeq ($(UNAME_S),Linux)
46 | CFLAGS += -D LINUX
47 | ifeq ($(UNAME_P),x86_64)
48 | CFLAGS += -D AMD64
49 | CFLAGS += -D LINUX64
50 | endif
51 | else ifeq ($(UNAME_P),x86_64)
52 | CFLAGS += -D AMD64
53 | endif
54 | endif
55 |
56 | default: objs bin2asm asm2bin decode
57 |
58 | debug: CFLAGS += -g -O0
59 | debug: default
60 |
61 | objs:
62 | @mkdir ${MKDIR_FLAGS} "objs"
63 |
64 | objs/asm2bin.yy.o: src/asm2bin.l src/common.hpp
65 | ${LEXER} -oobjs/asm2bin.yy.c src/asm2bin.l
66 | ${CC} ${CFLAGS} -c objs/asm2bin.yy.c -o $@
67 |
68 | objs/asm2bin.tab.o: src/asm2bin.ypp src/asm2bin.hpp src/cfghelpers.hpp src/common.hpp
69 | ${PARSER} -oobjs/asm2bin.tab.cpp -dv src/asm2bin.ypp
70 | ${CC} ${CFLAGS} -c objs/asm2bin.tab.cpp -o $@
71 |
72 | objs/binary.o: src/binary*.cpp src/binary*.hpp src/common.hpp
73 | ${CC} ${CFLAGS} -c src/binary.cpp -o $@
74 |
75 | objs/binary20.o: src/binary20.cpp src/binary20.hpp src/binary.hpp src/common.hpp
76 | ${CC} ${CFLAGS} -c src/binary20.cpp -o $@
77 |
78 | objs/binary35.o: src/binary35.cpp src/binary35.hpp src/binary.hpp src/common.hpp
79 | ${CC} ${CFLAGS} -c src/binary35.cpp -o $@
80 |
81 | objs/binary50.o: src/binary50.cpp src/binary50.hpp src/binary.hpp src/common.hpp
82 | ${CC} ${CFLAGS} -c src/binary50.cpp -o $@
83 |
84 | objs/cfghelpers.o: src/cfghelpers.hpp src/cfghelpers.cpp src/common.hpp src/cudacommon.hpp src/output.hpp
85 | ${CC} ${CFLAGS} -c src/cfghelpers.cpp -o $@
86 |
87 | objs/common.o: src/common.cpp src/common.hpp
88 | ${CC} ${CFLAGS} -c src/common.cpp -o $@
89 |
90 | objs/cudacommon.o: src/cudacommon.cpp src/cfghelpers.hpp src/cudacommon.hpp src/common.hpp
91 | ${CC} ${CFLAGS} -c src/cudacommon.cpp -o $@
92 |
93 | objs/elf.o: src/elf.cpp src/elf.hpp
94 | ${CC} ${CFLAGS} -c src/elf.cpp -o $@
95 |
96 | objs/elfmanip.o: src/elfmanip.cpp src/elfmanip.hpp src/common.hpp src/elf.hpp
97 | ${CC} ${CFLAGS} -c src/elfmanip.cpp -o $@
98 |
99 | objs/output.o: src/output.cpp src/output.hpp src/common.hpp src/cfghelpers.hpp src/cudacommon.hpp
100 | ${CC} ${CFLAGS} -c src/output.cpp -o $@
101 |
102 | bin2asm: ${ELFOBJs} objs/common.o src/elf2asm.cpp
103 | ${CC} ${CFLAGS} ${ELFOBJs} objs/common.o src/elf2asm.cpp -o bin2asm ${ELFLinks}
104 |
105 | asm2bin: ${OBJs}
106 | ${CC} ${CFLAGS} ${OBJs} -o asm2bin ${ELFLinks}
107 |
108 |
109 | objs/decode.yy.o: src/decode.l src/decode_common.hpp
110 | ${LEXER} -oobjs/decode.yy.c src/decode.l
111 | ${CC} ${CFLAGS} -c objs/decode.yy.c -o $@
112 |
113 | objs/decode.tab.o: src/decode.ypp src/decode.hpp src/decode_common.hpp
114 | ${PARSER} -oobjs/decode.tab.cpp -dv src/decode.ypp
115 | ${CC} ${CFLAGS} -c objs/decode.tab.cpp -o $@
116 |
117 | objs/decode_common.o: src/decode_common.cpp src/decode_common.hpp
118 | ${CC} ${CFLAGS} -c src/decode_common.cpp -o $@
119 |
120 | objs/decode_output.o: src/decode_output.cpp src/decode_output.hpp src/decode_common.hpp
121 | ${CC} ${CFLAGS} -c src/decode_output.cpp -o $@
122 |
123 | decode: ${DECODEOBJS}
124 | ${CC} ${CFLAGS} ${DECODEOBJS} -o $@
125 |
126 | clean:
127 | ${rm} -f asm2bin asm2bin.exe bin2asm bin2asm.exe decode decode.exe *.o .temp*
128 | ${rm} -f objs/*
129 |
130 |
--------------------------------------------------------------------------------
/tools/src/cfghelpers.hpp:
--------------------------------------------------------------------------------
1 | #ifndef CFGHELPERS_HPP
2 | #define CFGHELPERS_HPP
3 | #include "common.hpp"
4 | #include
5 | #include
6 | #include
7 | using namespace std;
8 |
9 | /**
10 | * @file cfghelpers.hpp
11 | * Defines functions used to create and modify the control flow graph.
12 | */
13 |
14 | /**
15 | * Adds a branch from a given block to a given line.
16 | * If the line is in the middle of a basic block, it will be split into two blocks.
17 | * If block is NULL, this function simply splits the block which contains the target line.
18 | * @param a The block being branched from, or NULL.
19 | * @param line The target instruction.
20 | * @return true if the branch was added; false if it already existed
21 | */
22 | bool addBranchToLine(blockNode * a, instruction * line);
23 |
24 | /**
25 | * Adds a branch from a given block to a given line number.
26 | * If the line is in the middle of a basic block, it will be split into two blocks.
27 | * If block is NULL, this function simply splits the block which contains the target line.
28 | * @param a The block being branched from, or NULL.
29 | * @param line The target line number.
30 | * @return true if the branch was added; false if it already existed
31 | */
32 | bool addBranchToLine(blockNode * a, int line);
33 |
34 | /**
35 | * Adds a branch from a given block to a given address.
36 | * Finds line number for address, then calls #addBranchToLine
37 | * @param a The block being branched from, or NULL.
38 | * @param address The target address.
39 | * @return true if the branch was added; false if it already existed
40 | */
41 | bool addBranchToAddress(blockNode * a, int address);
42 |
43 | /**
44 | * Finds addresses for labels, and adds the addresses to metadata as appropriate.
45 | */
46 | void labelsToAddresses();
47 |
48 | /**
49 | * Sets up labels for blocks; replaces hex addresses with label operands.
50 | * Overwrites existing labels.
51 | */
52 | void addressesToLabels();
53 |
54 | /**
55 | * Combines adjacent blocks that should together form a basic block if predicate guards are ignored.
56 | */
57 | void combineBlocks();
58 |
59 | /**
60 | * Helper to fix ptrs (jump targets) after a block is deleted.
61 | * @param oldVal The ptr value to change
62 | * @param newVal The ptr value to replace oldVal with
63 | */
64 | void changePtrs(blockNode* oldVal, blockNode* newVal);
65 |
66 | /**
67 | * Deletes a basic block, adding its successors to its predecessors' successors.
68 | * @param deleted The block to delete
69 | */
70 | void deleteBlock(blockNode* deleted);
71 |
72 | /**
73 | * Fixes line numbers & addresses after addition/deletion of instructions.
74 | */
75 | void fixAssembly();
76 |
77 | /**
78 | * Adds successors to blocks with the specified end type.
79 | * @param The type of instruction the blocks end with.
80 | */
81 | void propogateBranches(blockEnd branch_type);
82 |
83 | /**
84 | * Helper for propogateCallReturns.
85 | * Recursively adds successors to blocks that end with a RET.
86 | * @param search The current block in which we're searching for a RET
87 | * @param target The address to jump to after a RET
88 | * @param tag A unique value to avoid infinite recursion
89 | * @return true iff changes were made
90 | */
91 | bool propogateReturns(blockNode * search, int target, int tag);
92 |
93 | /**
94 | * Add successors to blocks that end in a CAL or RET.
95 | * @return true iff changes were made, false otherwise
96 | */
97 | bool propogateCallReturns();
98 |
99 | /**
100 | * Adds pointers based on thread divergence, break, and continue instructions.
101 | * @param search The current block in which we're searching for SSY/.S|SYNC, PBK/BRK, and PCNT/CONT
102 | * @param ptrs A stack of pointers, as pairs
103 | * @param tag A unique value used to avoid double visiting any blocks
104 | * @param firstCall True iff at depth 0 of recursion
105 | * @return true iff changes were made, false otherwise
106 | */
107 | bool propogatePointersHelper(blockNode * search, stack > ptrs, int tag, bool firstCall);
108 |
109 | /**
110 | * Adds pointers based on thread divergence, break, and continue instructions.
111 | * @return true iff changes were made, false otherwise
112 | */
113 | bool propogatePointers();
114 |
115 | /**
116 | * Sets target blocks for relevant instructions.
117 | * Call this after first setting up all the basic blocks.
118 | */
119 | void setPtr();
120 |
121 | #endif
122 |
--------------------------------------------------------------------------------
/tools/src/decode.l:
--------------------------------------------------------------------------------
1 | %{
2 | #include
3 | #include "decode_common.hpp"
4 | #include "decode.tab.hpp"
5 | %}
6 | %option nounput
7 | %option noyywrap
8 | extern YYSTYPE yylval;
9 | extern int isatty();
10 |
11 | hex [-~]?(0x[0-9a-fA-F]+)|(\|0x[0-9a-fA-F]+\|)
12 | bitlist \{([0-9]*,)*[0-9]*\}
13 | float \-?[0-9]+(\.[0-9]+)?(e[\+\-][0-9]+)?
14 | pred !?(P[0-6])|!?[pP][tT]
15 | reg -?[-~]?\|?(RZ|R[0-9]+)\|?
16 | specialName SR_[\_\.A-Za-z0-9]+
17 | specialID SR[0-9]+
18 | const ([-~]\ *)?\|?c\|?\ *\[.*\]\ *\[.*\]\|?
19 | memory ([-~]\ *)?\|?\[.*\]\|?
20 | decimal_line_number [\_a-zA-Z0-9]+\ \([0-9]+\)
21 |
22 | %%
23 | "//Shared memory usage: "-?0[xX][a-fA-F0-9]+[^\n]* {
24 | yylval.token_.lexeme = yytext + 23;
25 | return(METADATA_SHAREDMEM);
26 | }
27 | "//Shared memory usage: "-?[0-9]+[^\n]* {
28 | yylval.token_.lexeme = yytext + 23;
29 | return(METADATA_SHAREDMEM);
30 | }
31 | "//Frame Size: "-?0[xX][a-fA-F0-9]+[^\n]* {
32 | yylval.token_.lexeme = yytext + 14;
33 | return(METADATA_FRAME_SIZE);
34 | }
35 | "//Frame Size: "-?[0-9]+[^\n]* {
36 | yylval.token_.lexeme = yytext + 14;
37 | return(METADATA_FRAME_SIZE);
38 | }
39 | "//Min Stack Size: "-?0[xX][a-fA-F0-9]+[^\n]* {
40 | yylval.token_.lexeme = yytext + 18;
41 | return(METADATA_MIN_STACK_SIZE);
42 | }
43 | "//Min Stack Size: "-?[0-9]+[^\n]* {
44 | yylval.token_.lexeme = yytext + 18;
45 | return(METADATA_MIN_STACK_SIZE);
46 | }
47 | "//Max Stack Size: "-?0[xX][a-fA-F0-9]+[^\n]* {
48 | yylval.token_.lexeme = yytext + 18;
49 | return(METADATA_MAX_STACK_SIZE);
50 | }
51 | "//Max Stack Size: "-?[0-9]+[^\n]* {
52 | yylval.token_.lexeme = yytext + 18;
53 | return(METADATA_MAX_STACK_SIZE);
54 | }
55 | "//Name: "[0-9a-zA-Z_]+[^\n]* {
56 | yylval.token_.lexeme = (char*) malloc(strlen(yytext) - 7);
57 | strcpy(yylval.token_.lexeme, yytext + 8);
58 | int x = 0;
59 | while(true) {
60 | if(yylval.token_.lexeme[x] == 0 || yylval.token_.lexeme[x] == '\n' || yylval.token_.lexeme[x] == '\r') {
61 | yylval.token_.lexeme[x] = 0;
62 | break;
63 | }
64 | x++;
65 | }
66 | return(METADATA_KERNELNAME);
67 | }
68 | "//Arch: sm_"[0-9]+[^\n]* {
69 | yylval.token_.lexeme = yytext + 11;
70 | return(METADATA_ARCH);
71 | }
72 | "//Function count: "[0-9]+[^\n]* {
73 | yylval.token_.lexeme = yytext + 18;
74 | return(METADATA_FUNCTIONCOUNT);
75 | }
76 | "//Function: "[0-9a-zA-Z\_\$]+[^\n]* {
77 | yylval.token_.lexeme = (char*) malloc(strlen(yytext) - 11);
78 | strcpy(yylval.token_.lexeme, yytext + 12);
79 | int x = 0;
80 | while(true) {
81 | if(yylval.token_.lexeme[x] == 0 || yylval.token_.lexeme[x] == '\n' || yylval.token_.lexeme[x] == '\r') {
82 | yylval.token_.lexeme[x] = 0;
83 | break;
84 | }
85 | x++;
86 | }
87 | return(METADATA_FUNCTIONNAME);
88 | }
89 | "//cuobjdump: "[0-9]+[^\n]* {
90 | yylval.token_.lexeme = yytext + 13;
91 | return(METADATA_CUOBJDUMP);
92 | }
93 | "//"[^\n]* {
94 | //return(COMMENT);
95 | }
96 | 1D|ARRAY_1D|RECT|2D|ARRAY_2D|3D|CUBE|ARRAY_CUBE {
97 | yylval.token_.type = type_texture_operand;
98 | yylval.token_.lexeme = (char *) malloc(strlen(yytext)+1);
99 | strcpy(yylval.token_.lexeme,yytext);
100 | return(TEXOP);
101 | }
102 | R|G|B|A|RG|RA|GA|BA|RGB|RGA|RBA|GBA|RGBA|INVALID5|INVALID6|INVALID7 {
103 | yylval.token_.type = type_channel;
104 | yylval.token_.lexeme = yytext;
105 | return(CHANNEL);
106 | }
107 | SB[0-9] {
108 | yylval.token_.type = type_sb;
109 | yylval.token_.lexeme = yytext + 2;
110 | return(SB_OPERAND);
111 | }
112 | {hex} {
113 | yylval.token_.type = type_hex;
114 | yylval.token_.lexeme = yytext;
115 | return(HEXVAL);
116 | }
117 | {bitlist} {
118 | yylval.token_.type = type_bit_list;
119 | yylval.token_.lexeme = yytext;
120 | return(BITLIST);
121 | }
122 | {float} {
123 | yylval.token_.type = type_hex;
124 | yylval.token_.lexeme = yytext;
125 | return(DECIMAL);
126 | }
127 | \+INF {
128 | return(PLUSINF);
129 | }
130 | \-INF {
131 | return(NEGINF);
132 | }
133 | \+QNAN {
134 | return(PLUSQNAN);
135 | }
136 | \+SNAN {
137 | return(PLUSSNAN);
138 | }
139 | @ {
140 | return(GUARD);
141 | }
142 | {pred} {
143 | yylval.token_.type = type_predicate;
144 | yylval.token_.lexeme = yytext;
145 | return(PREDICATE);
146 | }
147 | {reg} {
148 | yylval.token_.type = type_register;
149 | yylval.token_.lexeme = yytext;
150 | return(REG);
151 | }
152 | {const} {
153 | yylval.token_.type = type_const_mem;
154 | yylval.token_.lexeme = yytext;
155 | return(CONST);
156 | }
157 | {memory} {
158 | yylval.token_.type = type_mem;
159 | yylval.token_.lexeme = yytext;
160 | return(MEMORY);
161 | }
162 | {specialName} {
163 | yylval.token_.type = type_special_reg;
164 | yylval.token_.lexeme = yytext;
165 | return(SPECIALNAME);
166 | }
167 | {specialID} {
168 | yylval.token_.type = type_special_reg;
169 | yylval.token_.lexeme = yytext;
170 | return(SPECIALID);
171 | }
172 | \.[\?0-9A-Za-z\_]* {
173 | yylval.token_.type = type_mod;
174 | yylval.token_.lexeme = (char *) malloc(strlen(yytext));
175 | strcpy(yylval.token_.lexeme,yytext+1);
176 | return(MOD);
177 | }
178 | CC {
179 | yylval.token_.type = type_other_operand;
180 | return(CC);
181 | }
182 | PR {
183 | yylval.token_.type = type_other_operand;
184 | return(PR);
185 | }
186 | [0-9a-fA-F]{16}\: {
187 | yylval.token_.lexeme = (char *) malloc(strlen(yytext)+1);
188 | strcpy(yylval.token_.lexeme,yytext);
189 | return(HEXCODE);
190 | }
191 | [A-Z0-9_]* {
192 | yylval.token_.lexeme = (char *) malloc(strlen(yytext)+1);
193 | strcpy(yylval.token_.lexeme,yytext);
194 | return(OPCODE);
195 | }
196 | ; {
197 | return(SEMICOLON);
198 | }
199 | [ \t\n\r,] {
200 | }
201 | \| {
202 | return (STRAYPIPE);
203 | }
204 | . {
205 | printf("\n\nERROR: %c is an illegal character here\n\n",yytext[0]);
206 | yyterminate();
207 | }
208 | %%
209 | #ifdef WINDOWS
210 | int yylex_destroy() {
211 | yy_delete_buffer(YY_CURRENT_BUFFER);
212 | return 0;
213 | }
214 | #endif
215 |
--------------------------------------------------------------------------------
/tools/src/decode.hpp:
--------------------------------------------------------------------------------
1 | #ifndef DECODE_PARSE_HPP
2 | #define DECODE_PARSE_HPP
3 |
4 | #include "decode_common.hpp"
5 |
6 | /**
7 | * @file decode.hpp
8 | * Defines some functions, structs, and constants used to analyze encodings and generate assemblers.
9 | */
10 |
11 | //Various modifier types:
12 | #define NUM_MOD_TYPES 12
13 | #define MOD_FLAG 0
14 | #define MOD_TYPE 1
15 | #define MOD_PROPOSITION 2
16 | #define MOD_COMPARE 3
17 | #define MOD_ROUND 4
18 | #define MOD_SHIFTDIRECTION 5
19 | #define MOD_INVALIDCHKMODE 6
20 | #define MOD_IADD3 7
21 | #define MOD_MUFU 8
22 | #define MOD_XMAD 9
23 | #define MOD_IMNMX 10
24 | #define MOD_FTZ 11
25 |
26 | /**
27 | * Struct representing a modifier's encoding info.
28 | */
29 | typedef struct {
30 | /**
31 | * The modifier's ASCII name inside the assembly.
32 | */
33 | char * token;
34 |
35 | /**
36 | * The value of the binary instruction when this modifier was first observed.
37 | */
38 | bool vals[64];
39 |
40 | /**
41 | * Whether or not each bit is part of this modifier's encoding.
42 | * Initialized to all true, then set to false for bits confirmed not to matter.
43 | */
44 | bool matters[64];
45 |
46 | /**
47 | * Possible value of the binary instruction when this modifier was *not* present.
48 | */
49 | bool antivals[64];
50 |
51 | /**
52 | * True iff the antivals array has been initialized.
53 | */
54 | bool seenAntiVal;
55 |
56 | /**
57 | * The modifier's type, such as MOD_FLAG, MOD_TYPE, MOD_ROUND, etc.
58 | * MOD_FLAG is used for modifiers not known to be associated with a particular type.
59 | */
60 | int type;
61 |
62 | /**
63 | * The relative order of this modifier, with respect to its type.
64 | * For example, the second MOD_ROUND type modifier has count=1 (the default value is count=0).
65 | */
66 | int count;//0 for generic or 1st non-generic of its own type, 1 for second non-generic of its own type
67 |
68 | /**
69 | * Whether we want to optimize number of lines for this modifier in generated assembler.
70 | * If true, then this is combined with matching modifier from operations that have the same opcode.
71 | */
72 | bool combine;//false iff we should avoid combining this with same mod from other versions of same operation
73 | } operationMod;
74 |
75 | /**
76 | * Struct representing an operand's encoding info.
77 | */
78 | typedef struct {
79 | /**
80 | * The operand's type.
81 | */
82 | token_type type;
83 |
84 | /**
85 | * Possible starting locations for the encoding of the operand's first component.
86 | */
87 | bool possibleStart1[64];
88 |
89 | /**
90 | * At each location in possibleStart1, the max number of bits that seem to match.
91 | */
92 | int maxBits1[64];
93 |
94 | /**
95 | * Possible starting locations for the encoding of the operand's second component.
96 | * (I treat literal offset in memory operands as the second component.)
97 | */
98 | bool possibleStart2[64];
99 |
100 | /**
101 | * At each location in possibleStart2, the max number of bits that seem to match.
102 | */
103 | int maxBits2[64];
104 |
105 | /**
106 | * Possible starting locations for the encoding of the operand's third component.
107 | * (I treat constant memory bank in const memory operands as the third component.)
108 | */
109 | bool possibleStart3[64];
110 |
111 | /**
112 | * At each location in possibleStart3, the max number of bits that seem to match.
113 | */
114 | int maxBits3[64];
115 |
116 | /**
117 | * The number of components in the operand.
118 | * Set to 1 for most types, 2 for memory, 3 for constant memory.
119 | */
120 | int components;
121 |
122 | /**
123 | * A linked list of modifier encodings attached to this operand.
124 | */
125 | node * mods;
126 |
127 | /**
128 | * Indicates which unary functions we've seen attached to this operand.
129 | * Has 1 bit for each unary function.
130 | */
131 | operand_prop properties;
132 |
133 | /**
134 | * For the four known unary functions, which bits are used in its encoding.
135 | */
136 | bool propMatters[4][64];
137 |
138 | /**
139 | * For the four known unary functions, the instruction's binary when they were seen.
140 | */
141 | bool propVals[4][64];
142 |
143 | /**
144 | * True if this (hex literal) operand was actually written as a float/double literal in decimal.
145 | */
146 | bool decimal;
147 |
148 | /**
149 | * True if this (hex literal) operand is encoded as a relative address.
150 | */
151 | bool relative;
152 |
153 | /**
154 | * True if the arithmetic negation unary function uses twos complement.
155 | * False otherwise (e.g. if it's encoded as a single bit somewhere).
156 | */
157 | bool incNegative;//true iff negative prop is applied to hex operand instead of flipping a bit
158 |
159 | /**
160 | * True if this (hex literal) operand is encoded as its opposite.
161 | * We've seen this happen with the last operand in ISUB instructions, since ISUB is really a special case of IADD.
162 | */
163 | bool addlNegative;
164 |
165 | /**
166 | * True if the encoding for the third component is bit-shifted due to limited space in binary.
167 | */
168 | bool shiftComp3;
169 | } operationOperand;
170 |
171 | /**
172 | * Struct representing an operation's encoding.
173 | * If two instructions have the same opcode name AND the same operand types, they are considered the same operation.
174 | */
175 | typedef struct {
176 | /**
177 | * The opcode, using our own arbitrary identifiers.
178 | */
179 | int op;//uses enum id from common.hpp
180 |
181 | /**
182 | * True iff we've confirmed that this instruction can have predicate guards.
183 | * Some instructions, such as CAL, cannot.
184 | * Note that depending on the nvcc compiler version, NVIDIA's encoding will vary for cases where guards are disallowed.
185 | * (I.e. it's sometimes encoded the same as the null predicate PT, and sometimes the same as P0).
186 | */
187 | bool confirmedGuard;
188 |
189 | /**
190 | * The instruction's binary the first time that we saw this operation.
191 | */
192 | bool binid[64];
193 |
194 | /**
195 | * Which bits control the opcode for this operation.
196 | */
197 | bool binidmatters[64];
198 |
199 | /**
200 | * Array of operand encodings.
201 | */
202 | operationOperand * operands[8];
203 |
204 | /**
205 | * The number of operands for this operation.
206 | */
207 | int numOperands;
208 |
209 | /**
210 | * Linked list of modifier encodings that can be attached to the opcode.
211 | */
212 | node * mods;
213 |
214 | /**
215 | * Value of instruction's binary in cases where each particular modifier type was completely absent.
216 | */
217 | int noModBits[NUM_MOD_TYPES][64];
218 | } operation;
219 |
220 | /**
221 | * Returns the kind of mod a mod string corresponds to.
222 | * @param modstr The mod string
223 | * @return modifier's type if known, or MOD_FLAG if assumed to be generic
224 | */
225 | int getModType(const char * modstr);
226 |
227 | /**
228 | * Returns the unique integer ID we associate with the given opcode string.
229 | * @param opname The opcode's string
230 | * @return opcode's integer ID, or -1 on failure
231 | */
232 | int getOpcode(const char * opname);
233 |
234 | /**
235 | * Performs analysis of an instruction, updating the operation list.
236 | * @param inst The parsed assembly code
237 | * @param hexstring the binary code (in hexadecimal format)
238 | */
239 | void analyze(instruction * inst, char * hexstring);
240 |
241 | #endif
242 |
243 |
--------------------------------------------------------------------------------
/tools/src/elfmanip.hpp:
--------------------------------------------------------------------------------
1 | #ifndef ELFMANIP_HPP
2 | #define ELFMANIP_HPP
3 | #include "common.hpp"
4 |
5 | /**
6 | * @file elfmanip.hpp
7 | * Defines structures and functions used for reading/writing to ELF.
8 | */
9 |
10 | /**
11 | * This structure can hold the first 0x10 bytes of the .nv_fatbin section.
12 | * After bytes, either the section ends, or another fatHeader is found.
13 | */
14 | typedef struct {
15 | unsigned int magic;
16 | unsigned int unknown;
17 | unsigned long long size;
18 | } fatHeader;
19 |
20 | /**
21 | * This structure holds a CUDA_ELF object inside an .nv.info* section.
22 | */
23 | typedef struct {
24 | char format;
25 | char attribute;
26 | char data[];
27 | } CUDA_INFO;
28 |
29 | /**
30 | * Enum for attribute IDs in CUDA_INFO sections
31 | */
32 | typedef enum {
33 | EIATTR_ERROR = 0x00,
34 | EIATTR_PAD = 0x01,
35 | EIATTR_IMAGE_SLOT = 0x2,
36 | EIATTR_JUMPTABLE_RELOCS = 0x3,
37 | EIATTR_CTAIDZ_USED = 0x4,
38 | EIATTR_MAX_THREADS = 0x5,
39 | EIATTR_IMAGE_OFFSET = 0x6,
40 | EIATTR_IMAGE_SIZE = 0x07,
41 | EIATTR_TEXTURE_NORMALIZED = 0x08,
42 | EIATTR_SAMPLER_INIT = 0x09,
43 | EIATTR_PARAM_CBANK = 0x0a,
44 | EIATTR_SMEM_PARAM_OFFSETS = 0x0b,
45 | EIATTR_CBANK_PARAM_OFFSETS = 0x0c,
46 | EIATTR_SYNC_STACK = 0x0d,
47 | EIATTR_TEXID_SAMPID_MAP = 0x0e,
48 | EIATTR_EXTERNS = 0x0f,
49 | EIATTR_REQNTID = 0x10,
50 |
51 | /**
52 | * Seems to indicate local-memory stack size.
53 | * After two byte size (equal to 0x8), has 4byte function ID, then 4byte frame size.
54 | * Function ID is based on index in symbol table.
55 | */
56 | EIATTR_FRAME_SIZE = 0x11,
57 |
58 | /**
59 | * Seems to indicate local-memory stack size.
60 | * After two byte size (equal to 0x8), has 4byte function ID, then 4byte stack size.
61 | * Function ID is based on index in symbol table.
62 | */
63 | EIATTR_MIN_STACK_SIZE = 0x12,
64 |
65 | EIATTR_SAMPLER_FORCE_UNNORMALIZED = 0x13,
66 | EIATTR_BINDLESS_IMAGE_OFFSETS = 0x14,
67 | EIATTR_BINDLESS_TEXTURE_BANK = 0x15,
68 | EIATTR_BINDLESS_SURFACE_BANK = 0x16,
69 | EIATTR_KPARAM_INFO = 0x17, //I think this describes a parameter for a kernel function
70 | EIATTR_SMEM_PARAM_SIZE = 0x18,
71 | EIATTR_CBANK_PARAM_SIZE = 0x19,
72 | EIATTR_QUERY_NUMATTRIB = 0x1a,
73 | EIATTR_MAXREG_COUNT = 0x1b,
74 | EIATTR_EXIT_INSTR_OFFSETS = 0x1c,
75 | EIATTR_S2RCTAID_INSTR_OFFSETS = 0x1d,
76 |
77 | //error "unknown attribute" for 0x1e and above as of cuobjdump 5.0
78 |
79 | EIATTR_CRS_STACK_SIZE = 0x1e,
80 | EIATTR_NEED_CNP_WRAPPER = 0x1f,
81 | EIATTR_NEED_CNP_PATCH = 0x20,
82 | EIATTR_EXPLICIT_CACHING = 0x21,
83 |
84 | //error "unknown attribute" for 0x22 and above as of cuobjdump 5.5
85 |
86 | EIATTR_ISTYPEP_USED = 0x22,
87 |
88 | //error "unknown attribute" for 0x23 and above as of cuobjdump 6.0
89 |
90 | EIATTR_MAX_STACK_SIZE = 0x23, //Another attribute that indicates local-memory stack size.
91 | EIATTR_SUQ_USED = 0x24,
92 | EIATTR_LD_CACHEMOD_INSTR_OFFSETS = 0x25,
93 |
94 | //error "unknown attribute" for 0x26 and above as of cuobjdump 6.5
95 |
96 | EIATTR_LOAD_CACHE_REQUEST = 0x26,
97 | EIATTR_ATOM_SYS_INSTR_OFFSETS = 0x27,
98 |
99 | //error "unknown attribute" for 0x28 and above as of cuobjdump 8.0
100 |
101 | EIATTR_COOP_GROUP_INSTR_OFFSETS = 0x28,
102 | EIATTR_COOP_GROUP_MASK_REGIDS = 0x29,
103 | EIATTR_SW1850030_WAR = 0x2a,
104 | EIATTR_WMMA_USED = 0x2b,
105 |
106 | //error "unknown attribute" for 0x2c and above as of cuobjdump 9.2
107 |
108 | //Constants to help with error checking:
109 | minAttribute = 0x00,
110 | maxAttribute = 0x2b
111 | } Attribute;
112 |
113 | /**
114 | * Enum for 'Format' values in CUDA_INFO
115 | */
116 | typedef enum {
117 | //error "unknown Format" for 0x00
118 |
119 | /**
120 | * I haven't seen the following value occur naturally.
121 | * Object's data is of size 0.
122 | * Speculation: NVAL stands for "no value" or "null value" or similar.
123 | */
124 | EIFMT_NVAL = 0x01,
125 |
126 | /**
127 | * I haven't seen the following value occur naturally.
128 | * Object's data is of size 1.
129 | * Speculation: BVAL stands for "byte value".
130 | */
131 | EIFMT_BVAL = 0x02,
132 |
133 | /**
134 | * I haven't seen the following value occur naturally.
135 | * Object's data is of size 2.
136 | * Speculation: HVAL stands for "half-int value" or something similar.
137 | */
138 | EIFMT_HVAL = 0x03,
139 |
140 | /**
141 | * This is the value (usually?) used in naturally compiled CUDA binaries.
142 | * Object's data starts with a two-byte value indicating number of additional bytes.
143 | * (I suspect the size handling may vary depending on the attribute type.)
144 | * Speculation: SVAL stands for "(variable-)size value", or perhaps "special value".
145 | */
146 | EIFMT_SVAL = 0x04,
147 |
148 | //error "unknown Format" for 0x05 and above as of cuobjdump 5.0 through 8.0
149 |
150 | //Constants to help with error checking:
151 | minFormat = 0x01,
152 | maxFormat = 0x04
153 | } Format;
154 |
155 | /**
156 | * Overwrites a kernel function inside an ELF.
157 | * @param file The name of the executable ELF
158 | * @param kernel Contains the name of the function and the new bytecode
159 | * @param calls A list of function offsets
160 | * @param forceFallback Iff true, fallback function is used even on proper ELF files.
161 | * @return true iff the kernel is successfully overwritten
162 | */
163 | bool overwriteKernel(char* file, kernelData * kernel, node * calls, bool forceFallback);
164 |
165 | /**
166 | * Helper for overwriteKernel.
167 | * @param oldData The address of array containing nv_fatbin data
168 | * @oaram newData
169 | * @param original_size The number of bytes in oldData
170 | * @param kernel Contains the kernel function's name & the new bytecode
171 | * @param calls A list of function offsets
172 | * @param targetFatOffset Is set to the offset (relative to oldData) of header for fatbin containing target ELF.
173 | * @param oldEndELF Is set to the offset (relative to oldData) of the end of the target ELF prior to modification.
174 | * @return the size of the new nv_fatbin data, or -1 on failure
175 | */
176 | int overwriteKernelHelper(const char * oldData, char *& newData, unsigned int original_size, kernelData * kernel, node * calls, unsigned int & targetFatOffset, unsigned int & oldEndELF);
177 |
178 | /**
179 | * Returns contents of a specified kernel function.
180 | * @param file The name of the executable ELF
181 | * @param kernel The name of the kernel function
182 | * @param kernelNumber The index of the kernel function; only used if kernel is NULL
183 | * @param forceFallback Iff true, fallback function is used even on proper ELF files.
184 | * @return a kernelData pointer, or 0 on failure
185 | */
186 | kernelData* getKernelFunction(char * file, char * kernel, int kernelNumber, bool forceFallback);
187 |
188 | /**
189 | * Helper for getKernelFunction.
190 | * @param bytes The data in the nv_fatbin section
191 | * @param size The number of bytes in the nv_fatbin section
192 | * @param kernel The name of the kernel function
193 | * @param kernelNumber The index of the kernel function; only used if kernel is NULL
194 | * @return a kernelData pointer, or 0 on failure
195 | */
196 | kernelData* getKernelFunctionHelper(const char * bytes, unsigned int size, char * kernel, int kernelNumber);
197 |
198 | /**
199 | * Returns a list of kernel function names within the specified ELF.
200 | * @param file is the location of the executable ELF
201 | * @param forceFallback Iff true, fallback function is used even on proper ELF files.
202 | * @return a list of char*
203 | */
204 | node * getKernelNames(char * file, bool forceFallback);
205 |
206 | /**
207 | * Helper for getKernelNames.
208 | * Returns a list of kernel function names from within the fatbin section
209 | * @param bytes The data in the nv_fatbin section
210 | * @param size The number of bytes in nv_fatbin
211 | * @return a list of char*
212 | */
213 | node * getKernelNamesHelper(const char * bytes, int size);
214 |
215 | #endif
216 |
217 |
--------------------------------------------------------------------------------
/tools/src/asm2bin.l:
--------------------------------------------------------------------------------
1 | %{
2 | #include
3 | #include "common.hpp"
4 | #include "asm2bin.tab.hpp"
5 | %}
6 | %option nounput
7 | %option noyywrap
8 | extern YYSTYPE yylval;
9 | extern int isatty();
10 |
11 | hex [-~]?(0x[0-9a-fA-F]+)|(\|0x[0-9a-fA-F]+\|)
12 | bitlist \{([0-9]*,)*[0-9]*\}
13 | float \-?[0-9]+(\.[0-9]+)?(e[\+\-][0-9]+)?
14 | pred !?(P[0-6])|!?[pP][tT]
15 | reg -?[-~]?\|?(RZ|R[0-9]+)\|?
16 | specialName SR_[\_\.A-Za-z0-9]+
17 | specialID SR[0-9]+
18 | const ([-~]\ *)?\|?c\|?\ *\[.*\]\ *\[.*\]\|?
19 | memory ([-~]\ *)?\|?\[[^\[\]]*\]\|?
20 | hexBinaryInst [0-9a-f]{16}\ \/\/[\ ]
21 | decimal_line_number [\_a-zA-Z0-9]+\ \([0-9]+\)
22 |
23 | %%
24 | {hexBinaryInst} {}
25 | "//Shared memory usage: "-?0[xX][a-fA-F0-9]+[^\n]* {
26 | yylval.token_.lexeme = yytext + 23;
27 | return(METADATA_SHAREDMEM);
28 | }
29 | "//Shared memory usage: "-?[0-9]+[^\n]* {
30 | yylval.token_.lexeme = yytext + 23;
31 | return(METADATA_SHAREDMEM);
32 | }
33 | "//Frame Size: "-?0[xX][a-fA-F0-9]+[^\n]* {
34 | yylval.token_.lexeme = yytext + 14;
35 | return(METADATA_FRAME_SIZE);
36 | }
37 | "//Frame Size: "-?[0-9]+[^\n]* {
38 | yylval.token_.lexeme = yytext + 14;
39 | return(METADATA_FRAME_SIZE);
40 | }
41 | "//Min Stack Size: "-?0[xX][a-fA-F0-9]+[^\n]* {
42 | yylval.token_.lexeme = yytext + 18;
43 | return(METADATA_MIN_STACK_SIZE);
44 | }
45 | "//Min Stack Size: "-?[0-9]+[^\n]* {
46 | yylval.token_.lexeme = yytext + 18;
47 | return(METADATA_MIN_STACK_SIZE);
48 | }
49 | "//Max Stack Size: "-?0[xX][a-fA-F0-9]+[^\n]* {
50 | yylval.token_.lexeme = yytext + 18;
51 | return(METADATA_MAX_STACK_SIZE);
52 | }
53 | "//Max Stack Size: "-?[0-9]+[^\n]* {
54 | yylval.token_.lexeme = yytext + 18;
55 | return(METADATA_MAX_STACK_SIZE);
56 | }
57 | "//Name: "[0-9a-zA-Z_]+[^\n]* {
58 | yylval.token_.lexeme = (char*) malloc(strlen(yytext) - 7);
59 | strcpy(yylval.token_.lexeme, yytext + 8);
60 | int x = 0;
61 | while(true) {
62 | if(yylval.token_.lexeme[x] == 0 || yylval.token_.lexeme[x] == '\n' || yylval.token_.lexeme[x] == '\r') {
63 | yylval.token_.lexeme[x] = 0;
64 | break;
65 | }
66 | x++;
67 | }
68 | return(METADATA_KERNELNAME);
69 | }
70 | "//Arch: sm_"[0-9]+[^\n]* {
71 | yylval.token_.lexeme = yytext + 11;
72 | return(METADATA_ARCH);
73 | }
74 | "//Function count: "[0-9]+[^\n]* {
75 | yylval.token_.lexeme = yytext + 18;
76 | return(METADATA_FUNCTIONCOUNT);
77 | }
78 | "//Function: "[0-9a-zA-Z\_\$]+[^\n]* {
79 | yylval.token_.lexeme = (char*) malloc(strlen(yytext) - 11);
80 | strcpy(yylval.token_.lexeme, yytext + 12);
81 | int x = 0;
82 | while(true) {
83 | if(yylval.token_.lexeme[x] == 0 || yylval.token_.lexeme[x] == '\n' || yylval.token_.lexeme[x] == '\r') {
84 | yylval.token_.lexeme[x] = 0;
85 | break;
86 | }
87 | x++;
88 | }
89 | return(METADATA_FUNCTIONNAME);
90 | }
91 | "//cuobjdump: "[0-9]+[^\n]* {
92 | yylval.token_.lexeme = yytext + 13;
93 | return(METADATA_CUOBJDUMP);
94 | }
95 | "//SCHI: 0x"[0-9a-fA-F]+[^\n]*|"SCHI: 0x"[0-9a-fA-F]+[^\n]* {
96 | yylval.token_.lexeme = (char*) malloc(strlen(yytext) - 9);
97 | strcpy(yylval.token_.lexeme, yytext + 10);
98 | int x = 0;
99 | while(true) {
100 | if(yylval.token_.lexeme[x] == 0 || yylval.token_.lexeme[x] == '\n' || yylval.token_.lexeme[x] == '\r') {
101 | yylval.token_.lexeme[x] = 0;
102 | break;
103 | }
104 | x++;
105 | }
106 | return(INLINE_SCHI_VALUE);
107 | }
108 | "SCHI50:" {
109 | return(SCHI50);
110 | }
111 | "//"[^\n]* {
112 | //return(COMMENT);
113 | }
114 | label[a-zA-Z0-9\_]*\: {
115 | yylval.token_.lexeme = (char *) malloc(strlen(yytext)-5);
116 | yytext[strlen(yytext)-1] = 0;
117 | strcpy(yylval.token_.lexeme, yytext+5);
118 | return(LABEL);
119 | }
120 | label[a-zA-Z0-9\_]* {
121 | yylval.token_.type = type_texture_operand;
122 | yylval.token_.lexeme = (char *) malloc(strlen(yytext)-4);
123 | strcpy(yylval.token_.lexeme, yytext+5);
124 | return(LABEL_OP);
125 | }
126 | 1D|ARRAY_1D|RECT|2D|ARRAY_2D|3D|CUBE|ARRAY_CUBE {
127 | yylval.token_.type = type_texture_operand;
128 | yylval.token_.lexeme = yytext;
129 | return(TEXOP);
130 | }
131 | R|G|B|A|RG|RA|GA|BA|RGB|RGA|RBA|GBA|RGBA|INVALID5|INVALID6|INVALID7 {
132 | yylval.token_.type = type_channel;
133 | yylval.token_.lexeme = yytext;
134 | return(CHANNEL);
135 | }
136 | SB[0-9] {
137 | yylval.token_.type = type_sb;
138 | yylval.token_.lexeme = yytext + 2;
139 | return(SB_OPERAND);
140 | }
141 | {decimal_line_number} {
142 | yylval.token_.lexeme = strstr(yytext, "(") + 1;
143 | return(LINENUMBER);
144 | }
145 | {hex} {
146 | yylval.token_.type = type_hex;
147 | yylval.token_.lexeme = yytext;
148 | return(HEXVAL);
149 | }
150 | {bitlist} {
151 | yylval.token_.type = type_bit_list;
152 | yylval.token_.lexeme = yytext;
153 | return(BITLIST);
154 | }
155 | {float} {
156 | yylval.token_.type = type_hex;
157 | yylval.token_.lexeme = yytext;
158 | return(DECIMAL);
159 | }
160 | \+INF {
161 | return(PLUSINF);
162 | }
163 | \-INF {
164 | return(NEGINF);
165 | }
166 | \+SNAN {
167 | return(PLUSSNAN);
168 | }
169 | \+QNAN {
170 | return(PLUSQNAN);
171 | }
172 | @ {
173 | return(GUARD);
174 | }
175 | {pred} {
176 | yylval.token_.type = type_predicate;
177 | yylval.token_.lexeme = yytext;
178 | return(PREDICATE);
179 | }
180 | {reg} {
181 | yylval.token_.type = type_register;
182 | yylval.token_.lexeme = yytext;
183 | return(REG);
184 | }
185 | {const} {
186 | yylval.token_.type = type_const_mem;
187 | yylval.token_.lexeme = yytext;
188 | return(CONST);
189 | }
190 | {memory} {
191 | yylval.token_.type = type_mem;
192 | yylval.token_.lexeme = yytext;
193 | return(MEMORY);
194 | }
195 | {specialName} {
196 | yylval.token_.type = type_special_reg;
197 | yylval.token_.lexeme = yytext;
198 | return(SPECIALNAME);
199 | }
200 | {specialID} {
201 | yylval.token_.type = type_special_reg;
202 | yylval.token_.lexeme = yytext;
203 | return(SPECIALID);
204 | }
205 | \.[\?0-9A-Za-z\_]* {
206 | yylval.token_.type = type_mod;
207 | yylval.token_.lexeme = (char *) malloc(strlen(yytext));
208 | strcpy(yylval.token_.lexeme,yytext+1);
209 | return(MOD);
210 | }
211 | CC {
212 | yylval.token_.type = type_other_operand;
213 | return(CC);
214 | }
215 | PR {
216 | yylval.token_.type = type_other_operand;
217 | return(PR);
218 | }
219 | MOV {
220 | return(MOV);
221 | }
222 | MOV32I {
223 | return(MOV32I);
224 | }
225 | LD {
226 | return(LD);
227 | }
228 | LDU {
229 | return(LDU);
230 | }
231 | LDL {
232 | return(LDL);
233 | }
234 | LDS {
235 | return(LDS);
236 | }
237 | LDC {
238 | return(LDC);
239 | }
240 | ST {
241 | return(ST);
242 | }
243 | STL {
244 | return(STL);
245 | }
246 | STS {
247 | return(STS);
248 | }
249 | LDLK {
250 | return(LDLK);
251 | }
252 | LDSLK {
253 | return(LDSLK);
254 | }
255 | STUL {
256 | return(STUL);
257 | }
258 | STSUL {
259 | return(STSUL);
260 | }
261 | FADD {
262 | return(FADD);
263 | }
264 | FADD32I {
265 | return(FADD32I);
266 | }
267 | FMUL {
268 | return(FMUL);
269 | }
270 | FMUL32I {
271 | return(FMUL32I);
272 | }
273 | FFMA {
274 | return(FFMA);
275 | }
276 | FSET {
277 | return(FSET);
278 | }
279 | FSETP {
280 | return(FSETP);
281 | }
282 | DSETP {
283 | return(DSETP);
284 | }
285 | FCMP {
286 | return(FCMP);
287 | }
288 | MUFU {
289 | return(MUFU);
290 | }
291 | DADD {
292 | return(DADD);
293 | }
294 | DMUL {
295 | return(DMUL);
296 | }
297 | DFMA {
298 | return(DFMA);
299 | }
300 | IADD {
301 | return(IADD);
302 | }
303 | ISUB {
304 | return(ISUB);
305 | }
306 | IADD32I {
307 | return(IADD32I);
308 | }
309 | IMNMX {
310 | return(IMNMX);
311 | }
312 | IMUL {
313 | return(IMUL);
314 | }
315 | IMUL32I {
316 | return(IMUL32I);
317 | }
318 | IMAD {
319 | return(IMAD);
320 | }
321 | ISCADD {
322 | return(ISCADD);
323 | }
324 | ISET {
325 | return(ISET);
326 | }
327 | ISETP {
328 | return(ISETP);
329 | }
330 | ICMP {
331 | return(ICMP);
332 | }
333 | I2F {
334 | return(I2F);
335 | }
336 | I2I {
337 | return(I2I);
338 | }
339 | F2I {
340 | return(F2I);
341 | }
342 | F2F {
343 | return(F2F);
344 | }
345 | LOP {
346 | return(LOP);
347 | }
348 | LOP32I {
349 | return(LOP32I);
350 | }
351 | SHL {
352 | return(SHL);
353 | }
354 | SHR {
355 | return(SHR);
356 | }
357 | BFE {
358 | return(BFE);
359 | }
360 | BFI {
361 | return(BFI);
362 | }
363 | SEL {
364 | return(SEL);
365 | }
366 | SCHI {
367 | return(SCHI);
368 | }
369 | SSY {
370 | return(SSY);
371 | }
372 | BRA {
373 | return(BRA);
374 | }
375 | BRX {
376 | return(BRX);
377 | }
378 | PCNT {
379 | return(PCNT);
380 | }
381 | CONT {
382 | return(CONT);
383 | }
384 | PBK {
385 | return(PBK);
386 | }
387 | BRK {
388 | return(BRK);
389 | }
390 | CAL {
391 | return(CAL);
392 | }
393 | RET {
394 | return(RET);
395 | }
396 | EXIT {
397 | return(EXIT);
398 | }
399 | NOP {
400 | return(NOP);
401 | }
402 | BAR {
403 | return(BAR);
404 | }
405 | BPT {
406 | return(BPT);
407 | }
408 | B2R {
409 | return(B2R);
410 | }
411 | S2R {
412 | return(S2R);
413 | }
414 | PSETP {
415 | return(PSETP);
416 | }
417 | PSET {
418 | return(PSET);
419 | }
420 | FLO {
421 | return(FLO);
422 | }
423 | P2R {
424 | return(P2R);
425 | }
426 | R2P {
427 | return(R2P);
428 | }
429 | TEX {
430 | return(TEX);
431 | }
432 | TEXDEPBAR {
433 | return(TEXDEPBAR);
434 | }
435 | RRO {
436 | return(RRO);
437 | }
438 | PRMT {
439 | return(PRMT);
440 | }
441 | VADD {
442 | return(VADD);
443 | }
444 | DMNMX {
445 | return(DMNMX);
446 | }
447 | FMNMX {
448 | return(FMNMX);
449 | }
450 | RED {
451 | return(RED);
452 | }
453 | VOTE {
454 | return(VOTE);
455 | }
456 | POPC {
457 | return(POPC);
458 | }
459 | MEMBAR {
460 | return(MEMBAR);
461 | }
462 | STSCUL {
463 | return(STSCUL);
464 | }
465 | LEPC {
466 | return(LEPC);
467 | }
468 | CSETP {
469 | return(CSETP);
470 | }
471 | ISCADD32I {
472 | return(ISCADD32I);
473 | }
474 | VMNMX {
475 | return(VMNMX);
476 | }
477 | TLD {
478 | return(TLD);
479 | }
480 | SHF {
481 | return(SHF);
482 | }
483 | FCHK {
484 | return(FCHK);
485 | }
486 | JCAL {
487 | return(JCAL);
488 | }
489 | SHFL {
490 | return(SHFL);
491 | }
492 | LDG {
493 | return(LDG);
494 | }
495 | LD_LDU {
496 | return(LD_LDU);
497 | }
498 | ATOM {
499 | return(ATOM);
500 | }
501 | CCTL {
502 | return(CCTL);
503 | }
504 | XMAD {
505 | return(XMAD);
506 | }
507 | SYNC {
508 | return(SYNC);
509 | }
510 | STG {
511 | return(STG);
512 | }
513 | IADD3 {
514 | return(IADD3);
515 | }
516 | VABSDIFF {
517 | return(VABSDIFF);
518 | }
519 | DEPBAR {
520 | return(DEPBAR);
521 | }
522 | LOP3 {
523 | return(LOP3);
524 | }
525 | TLDS {
526 | return(TLDS);
527 | }
528 | TEXS {
529 | return(TEXS);
530 | }
531 | LEA {
532 | return(LEA);
533 | }
534 | DSET {
535 | return(DSET);
536 | }
537 | PHI {
538 | return(PHI);
539 | }
540 | BINCODE {
541 | return(BINCODE);
542 | }
543 |
544 | [a-fA-F0-9]{16}\: {
545 | //garbage
546 | }
547 |
548 | [ \t\n\r;,] {
549 | }
550 | [A-Z0-9]* {
551 | printf("\n\nERROR: %s is an unrecognized string here\n\n",yytext);
552 | yyterminate();
553 | }
554 | . {
555 | printf("\n\nERROR: %c is an illegal character here\n\n",yytext[0]);
556 | yyterminate();
557 | }
558 | %%
559 | #ifdef WINDOWS
560 | int yylex_destroy() {
561 | yy_delete_buffer(YY_CURRENT_BUFFER);
562 | return 0;
563 | }
564 | #endif
565 |
--------------------------------------------------------------------------------
/tools/src/elf2asm.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include "common.hpp"
8 | #include "elfmanip.hpp"
9 | using namespace std;
10 |
11 | int main(int argc, char ** argv) {
12 | int argElf = 0;
13 | int argKernel = 0;
14 | char * arch = 0;
15 | string sys;
16 | FILE* sass;
17 | char line[512+1];
18 |
19 | bool flag_includeHex = false;
20 | bool flag_fallbackelf = false;
21 |
22 | //Compatbility check:
23 | #ifndef HAS_CUOBJDUMP
24 | cerr << "FATAL ERROR e2a~24: elf2asm requires cuobjdump in PATH.\n";
25 | cerr << " Please install the CUDA SDK/Toolkit, fix your PATH variable, and then recompile.\n";
26 | exit(0);
27 | #endif
28 |
29 | //Read arguments:
30 | for(int x = 1; x < argc; x++) {
31 | if(!strcmp(argv[x],"-h")) {
32 | argElf = 0;
33 | argKernel = 0;
34 | break;
35 | } else if(!strcasecmp(argv[x], "-sm") && x + 1 != argc) {
36 | arch = argv[++x];
37 | } else if(!strcasecmp(argv[x], "-hex")) {
38 | flag_includeHex = true;
39 | } else if(!strcasecmp(argv[x], "-fallbackelf")) {
40 | flag_fallbackelf = true;
41 | } else if(!argElf) {
42 | argElf = x;
43 | } else if(!argKernel) {
44 | argKernel = x;
45 | } else {
46 | argElf = 0;
47 | argKernel = 0;
48 | break;
49 | }
50 | }
51 | bool flag_list = (argElf && !argKernel);
52 |
53 | if(!argElf || (!argKernel && !flag_list)) {
54 | cout << "Usage:\n";
55 | cout << argv[0] << " [-sm ] \n";
56 | cout << "\tLists the kernel functions in the specified file.\n";
57 | cout << argv[0] << " \n";
58 | cout << "\tTransform the specified kernel function to assembly.\n";
59 | cout << argv[0] << " -hex \n";
60 | cout << "\tTransform the specified kernel function to assembly & binary.\n";
61 | cout << argv[0] << " -h\n";
62 | cout << "\tDisplays this usage information.\n";
63 | return 0;
64 | }
65 |
66 | if(flag_list) {
67 | node * names = getKernelNames(argv[argElf], flag_fallbackelf);
68 | if(!names) {
69 | cerr << "FATAL ERROR e2a ~51; unable to find kernel functions in specified file.\n";
70 | }
71 | node * iter = names;
72 | int index = 0;
73 | while(iter) {
74 | if(!arch) {
75 | cout << (index+1) << ": " << (char*) iter->value << "\n";
76 | }
77 | else if(!strncmp(((char*)iter->value)+3, arch, strlen(arch))) {
78 | cout << (index+1) << ": " << (((char*)iter->value)+6) << "\n";
79 | }
80 | index++;
81 | iter = iter->next;
82 | }
83 | cleanNodesFully(names, &free);
84 | return 0;
85 | }
86 |
87 | //Get kernel data from ELF
88 | kernelData * kern = getKernelFunction(argv[argElf], 0, atoi(argv[argKernel]) - 1, flag_fallbackelf);
89 | if(!kern) {
90 | cerr << "Fatal error e2a~46; could not retrieve specified kernel function.\n";
91 | return 1;
92 | }
93 |
94 | //Prepare target strings used to find start of assembly code
95 | string strTarget = "Function : ";
96 | strTarget += kern->name;
97 | const char * target = strTarget.c_str();
98 |
99 | ostringstream oss;
100 | oss << "sm_" << kern->arch;
101 | string strTargetArch = oss.str();
102 | const char * targetArch = strTargetArch.c_str();
103 |
104 | //Get cuobjdump version:
105 | char * vers;
106 | #ifdef WINDOWS
107 | sys = "cuobjdump --version";
108 | sass = popen(sys.c_str(), "r");
109 | while(fgets(line, 512, sass)) {
110 | vers = strstr(line, "release ");
111 | if(vers) {
112 | vers = vers + strlen("release ");
113 | break;
114 | }
115 | }
116 | #else
117 | sys = "cuobjdump --version | grep -E \"release [0-9]+\\.[0-9]\" | sed -r \"s/.*release ([0-9]+)\\.([0-9]).*/\\1\\2/\"";
118 | sass = popen(sys.c_str(), "r");
119 | fgets(line, 512, sass);
120 | vers = line;
121 | #endif
122 | char *pos;
123 | if ((pos=strchr(vers, '\n')) != NULL)
124 | *pos = '\0';
125 | if(!vers || vers[0] < '0' || vers[0] > '9') {
126 | cerr << "FATAL ERROR e2a~151: Unable to get cuobjdump version.\n";
127 | exit(0);
128 | }
129 | pclose(sass);
130 |
131 | //Prepare cuobjdump output for reading:
132 | sys = "cuobjdump -sass -fun ";
133 | sys += kern->name;
134 | sys += " ";
135 | sys += argv[argElf];
136 | sass = popen(sys.c_str(), "r");
137 |
138 | cout << "//Name: " << (kern->name) << "\n";
139 | cout << "//Arch: sm_" << kern->arch << "\n";
140 | cout << "//cuobjdump: " << vers << "\n";
141 | cout << "//Shared memory usage: " << kern->sharedMemory << "\n";
142 | cout << "//Min Stack Size: " << kern->min_stack_size << "\n";
143 | cout << "//Max Stack Size: " << kern->max_stack_size << "\n";
144 | cout << "//Frame Size: " << kern->frame_size << "\n";
145 | cout << "//Function count: " << listSize(kern->functionNames) << "\n";
146 | node * iter = kern->functionNames;
147 | while(iter) {
148 | cout << "//Function: " << (char*) iter->value << "\n";
149 | iter = iter->next;
150 | }
151 |
152 | int seenArch = 9999999;
153 |
154 | //Find function code inside cuobjdump output:
155 | while(fgets(line, 512, sass)) {
156 | if(strstr(line, targetArch)) {
157 | seenArch = 0;
158 | } else {
159 | seenArch++;
160 | }
161 |
162 | if(strstr(line, target) && seenArch < 10) {
163 | int nextaddr = 0;//address of next instruction
164 |
165 | //Write function's assembly to stdout:
166 | while(fgets(line, 512, sass)) {
167 | if(!nextaddr) {
168 | char * address = strstr(line,"/*");
169 | if(!address) {
170 | continue;
171 | }
172 | }
173 | char* address = strstr(line,"/*");
174 | if(!address) {//done with function
175 | break;//note: skips any SCHIs at end, but there shouldn't ever be any
176 | } else if(!strstr(address+1,"/*")) {
177 | if(kern->arch >= 50 && nextaddr % (8 * 4)) {
178 | //we use 'continue' here to stop CUDA 7.x's reordering of SCHI from tricking us about addresses
179 | continue;
180 | }
181 |
182 | unsigned long long hexval;
183 | if(kern->bytes) {
184 | hexval = *((unsigned int*)&kern->bytes[nextaddr+4]);
185 | hexval = hexval << 32;
186 | hexval += *((unsigned int*)&kern->bytes[nextaddr]);
187 | if(flag_includeHex) {
188 | cout << std::hex << setfill('0') << setw(16) << hexval << std::dec << ": ";
189 | }
190 | } else {
191 | cerr << "SANITY CHECK ERROR e2a~187\n";
192 | cerr << "\tThere is no assembly associated with the kernel function.\n";
193 | exit(0);
194 | }
195 |
196 | if(kern->arch == 30) {
197 | hexval = hexval >> 4;
198 | } else if(kern->arch == 32 || kern->arch == 35 || kern->arch == 37) {
199 | hexval = hexval >> 2;
200 | } else if(kern->arch == 50 || kern->arch == 52 || kern->arch == 53) {
201 | //no opcode; SCHI identified by address, and (almost?) all bits are used
202 | } else if(kern->arch == 60 || kern->arch == 61 || kern->arch == 62) {
203 | //no opcode; SCHI identified by address, and (almost?) all bits are used
204 | } else {
205 | cerr << "ERROR: Don't know how to handle scheduling instructions in this architecture.\n";
206 | }
207 | cout << "SCHI";
208 | if(kern->arch < 50) {
209 | for(int x = 0; x < 7; x++) {
210 | if(x) cout << ",";
211 | cout << " 0x";
212 | cout << std::hex << (hexval & 0xff);
213 | hexval = hexval >> 8;
214 | }
215 | } else if(kern->arch == 50 || kern->arch == 52 || kern->arch == 53) {
216 | for(int x = 0; x < 3; x++) {
217 | if(x) cout << ",";
218 | cout << " 0x";
219 | cout << std::hex << (hexval & 0x1fffff);
220 | hexval = hexval >> 21;
221 | }
222 | } else if(kern->arch == 60 || kern->arch == 61 || kern->arch == 62) {
223 | for(int x = 0; x < 3; x++) {
224 | if(x) cout << ",";
225 | cout << " 0x";
226 | cout << std::hex << (hexval & 0x1fffff);
227 | hexval = hexval >> 21;
228 | }
229 | } else {
230 | //unrecognized arch, but already printed an error
231 | for(int x = 0; x < 3; x++) {
232 | if(x) cout << ",";
233 | cout << " 0x";
234 | cout << std::hex << (hexval & 0x1fffff);
235 | hexval = hexval >> 21;
236 | }
237 | }
238 | cout << std::dec << ";\n";
239 | nextaddr += 8;
240 | continue;
241 | } else if(kern->bytes && nextaddr >= kern->numBytes) {
242 | cerr << "FATAL ERROR e2a~77: out-of-bounds assembly code (larger than expected size)\n";
243 | return 1;
244 | }
245 |
246 | address += 2;
247 | int currentaddr = strtoul(address, 0, 16);
248 |
249 | //Deal with SCHI being skipped in cuobjdump (cuobjdump version 5.0) or reordered (cuobjdump version 7.x):
250 | while(currentaddr > nextaddr) {
251 | unsigned long long hexval;
252 | if(kern->bytes) {
253 | hexval = *((unsigned int*)&kern->bytes[nextaddr+4]);
254 | hexval = hexval << 32;
255 | hexval += *((unsigned int*)&kern->bytes[nextaddr]);
256 | if(flag_includeHex) {
257 | cout << std::hex << setfill('0') << setw(16) << hexval << std::dec << ": ";
258 | }
259 | } else {
260 | cerr << "SANITY CHECK ERROR e2a~248\n";
261 | cerr << "\tNo assembly code is associated with this kernel function.\n";
262 | exit(0);
263 | }
264 |
265 | if(kern->arch == 30) {
266 | hexval = hexval >> 4;
267 | } else if(kern->arch == 32 || kern->arch == 35 || kern->arch == 37) {
268 | hexval = hexval >> 2;
269 | } else if(kern->arch == 50 || kern->arch == 52 || kern->arch == 53) {
270 | //no opcode; SCHI identified by address, and (almost?) all bits are used
271 | } else if(kern->arch == 60 || kern->arch == 61 || kern->arch == 62) {
272 | //no opcode; SCHI identified by address, and (almost?) all bits are used
273 | } else {
274 | cerr << "ERROR: Don't know how to handle scheduling meta-instructions on this architecture.\n";
275 | }
276 | cout << "SCHI";
277 | if(kern->arch < 50) {
278 | for(int x = 0; x < 7; x++) {
279 | if(x) cout << ",";
280 | cout << " 0x";
281 | cout << std::hex << (hexval & 0xff);
282 | hexval = hexval >> 8;
283 | }
284 | } else if(kern->arch == 50 || kern->arch == 52 || kern->arch == 53) {
285 | for(int x = 0; x < 3; x++) {
286 | if(x) cout << ",";
287 | cout << " 0x";
288 | cout << std::hex << (hexval & 0x1fffff);
289 | hexval = hexval >> 21;
290 | }
291 | } else if(kern->arch == 60 || kern->arch == 61 || kern->arch == 62) {
292 | for(int x = 0; x < 3; x++) {
293 | if(x) cout << ",";
294 | cout << " 0x";
295 | cout << std::hex << (hexval & 0x1fffff);
296 | hexval = hexval >> 21;
297 | }
298 | } else {
299 | //unrecognized arch, but already printed an error
300 | for(int x = 0; x < 3; x++) {
301 | if(x) cout << ",";
302 | cout << " 0x";
303 | cout << std::hex << (hexval & 0x1fffff);
304 | hexval = hexval >> 21;
305 | }
306 | }
307 | cout << std::dec << ";\n";
308 |
309 | nextaddr += 8;
310 | }
311 | if(currentaddr < nextaddr) {
312 | continue;
313 | }
314 |
315 | if(flag_includeHex) {
316 | unsigned long long hexval;
317 | if(kern->bytes) {
318 | hexval = *((unsigned int*)&kern->bytes[nextaddr+4]);
319 | hexval = hexval << 32;
320 | hexval += *((unsigned int*)&kern->bytes[nextaddr]);
321 | if(flag_includeHex) {
322 | cout << std::hex << setfill('0') << setw(16) << hexval << std::dec << ": ";
323 | }
324 | } else {
325 | cerr << "SANITY CHECK ERROR e2a~295: no assembly code associated with kernel function\n";
326 | exit(0);
327 | }
328 | }
329 |
330 | //Write assembly code for instruction:
331 | int x = address - line;
332 | while(line[x] != '*' || line[x+1] != '/') {
333 | x++;
334 | }
335 | x += 2;
336 | while(line[x] == ' ' || line[x] == '\t' || line[x] == '{' || line[x] == '}') {
337 | x++;
338 | }
339 | if(line[x] == '/') {
340 | while(line[x] != '*' || line[x+1] != '/') {
341 | x++;
342 | }
343 | x += 2;
344 | while(line[x] == ' ' || line[x] == '\t' || line[x] == '{' || line[x] == '}') {
345 | x++;
346 | }
347 | }
348 | while(line[x] != ';') {
349 | cout << line[x++];
350 | }
351 | cout << ";\n";
352 |
353 | //Increment instruction address:
354 | int increment = 0;
355 | char * bytes = strstr(address, "/*");
356 | if(!bytes) {
357 | cerr << "FATAL ERROR e2a~206: invalid instruction address\n";
358 | return 1;
359 | }
360 | bytes += 2;//skip "/*"
361 | while((*bytes) != '*' && (*bytes) != '/') {
362 | if((*bytes) == ' ') {
363 | } else if((*bytes) == 'x') {
364 | increment--;
365 | } else {
366 | increment++;
367 | }
368 | bytes++;
369 | }
370 | if(increment % 2) {
371 | cerr << "FATAL ERROR e2a~215: fractional instruction size.\n";
372 | }
373 | increment = increment/2;
374 | nextaddr += increment;
375 | }
376 |
377 | //Get out of loop:
378 | break;
379 | }
380 | }
381 |
382 | //Cleanup & return:
383 | pclose(sass);
384 | free(kern->name);
385 | if(kern->bytes) {
386 | free(kern->bytes);
387 | }
388 | cleanNodesFully(kern->functionNames, &free);
389 | free(kern);
390 | return 0;
391 | }
392 |
--------------------------------------------------------------------------------
/tools/src/elf.hpp:
--------------------------------------------------------------------------------
1 | #ifndef ELF_HPP
2 | #define ELF_HPP
3 |
4 | #include
5 |
6 | /**
7 | * @file elf.hpp
8 | * Defines structures and functions used for analyzing/modifying ELFs.
9 | * This should (hopefully) replace the libelf library in cubin-analyze.
10 | */
11 |
12 | //Section types:
13 | #define SHT_NULL 0
14 | #define SHT_PROGBITS 1
15 | #define SHT_SYMTAB 2
16 | #define SHT_STRTAB 3
17 | #define SHT_RELA 4
18 | #define SHT_HASH 5
19 | #define SHT_DYNAMIC 6
20 | #define SHT_NOTE 7
21 | #define SHT_NOBITS 8
22 | #define SHT_REL 9
23 | #define SHT_SHLIB 10
24 | #define SHT_DYNSYM 11
25 | #define SHT_NUM 12
26 | #define SHT_LOPROC 0x70000000
27 | #define SHT_HIPROC 0x7fffffff
28 | #define SHT_LOUSER 0x80000000
29 | #define SHT_HIUSER 0xffffffff
30 |
31 | //Program Header types:
32 | #define PT_NULL 0
33 | #define PT_LOAD 1
34 | #define PT_DYNAMIC 2
35 | #define PT_INTERP 3
36 | #define PT_NOTE 4
37 | #define PT_SHLIB 5
38 | #define PT_PHDR 6
39 |
40 | /**
41 | * Struct containing an ELF's identifier in its header.
42 | */
43 | typedef struct ELF_Identifier {
44 | uint32_t ei_mag;
45 | uint8_t ei_class;
46 | uint8_t ei_data;
47 | uint8_t ei_version;
48 | uint8_t ei_osabi;
49 | uint8_t ei_abiversion;
50 | uint8_t ei_pad[7];
51 | } ELF_Identifier;
52 |
53 | /**
54 | * Struct containing a 64-bit ELF's header.
55 | */
56 | typedef struct ELF64_Header {
57 | ELF_Identifier e_ident;
58 | uint16_t e_type;
59 | uint16_t e_machine;
60 | uint32_t e_version;
61 | uint64_t e_entry;
62 | uint64_t e_phoff;
63 | uint64_t e_shoff;
64 | uint32_t e_flags;
65 | uint16_t e_ehsize;
66 | uint16_t e_phentsize;
67 | uint16_t e_phnum;
68 | uint16_t e_shentsize;
69 | uint16_t e_shnum;
70 | uint16_t e_shstrndx;
71 | } ELF64_Header;
72 |
73 | /**
74 | * Struct containing a 32-bit ELF's header.
75 | */
76 | typedef struct ELF32_Header {
77 | ELF_Identifier e_ident;
78 | uint16_t e_type;
79 | uint16_t e_machine;
80 | uint32_t e_version;
81 | uint32_t e_entry;
82 | uint32_t e_phoff;
83 | uint32_t e_shoff;
84 | uint32_t e_flags;
85 | uint16_t e_ehsize;
86 | uint16_t e_phentsize;
87 | uint16_t e_phnum;
88 | uint16_t e_shentsize;
89 | uint16_t e_shnum;
90 | uint16_t e_shstrndx;
91 | } ELF32_Header;
92 |
93 | /**
94 | * Struct containing one of a 64-bit ELF's section headers.
95 | */
96 | typedef struct ELF64_SHeader {
97 | uint32_t sh_name;
98 | uint32_t sh_type;
99 | uint64_t sh_flags;
100 | uint64_t sh_addr;
101 | uint64_t sh_offset;
102 | uint64_t sh_size;
103 | uint32_t sh_link;
104 | uint32_t sh_info;
105 | uint64_t sh_addralign;
106 | uint64_t sh_entsize;
107 | } ELF64_SHeader;
108 |
109 | /**
110 | * Struct containing one of a 32-bit ELF's section headers.
111 | */
112 | typedef struct ELF32_SHeader {
113 | uint32_t sh_name;
114 | uint32_t sh_type;
115 | uint32_t sh_flags;
116 | uint32_t sh_addr;
117 | uint32_t sh_offset;
118 | uint32_t sh_size;
119 | uint32_t sh_link;
120 | uint32_t sh_info;
121 | uint32_t sh_addralign;
122 | uint32_t sh_entsize;
123 | } ELF32_SHeader;
124 |
125 | /**
126 | * Struct containing one of an ELF's section headers.
127 | */
128 | typedef struct ELF_SHeader {
129 | uint32_t sh_name;
130 | uint32_t sh_type;
131 | uint64_t sh_flags;
132 | uint64_t sh_addr;
133 | uint64_t sh_offset;
134 | uint64_t sh_size;
135 | uint32_t sh_link;
136 | uint32_t sh_info;
137 | uint64_t sh_addralign;
138 | uint64_t sh_entsize;
139 | } ELF_SHeader;
140 |
141 | /**
142 | * Struct containing one of a 64-bit ELF's sections.
143 | */
144 | typedef struct ELF64_Section {
145 | ELF64_SHeader sheader;
146 | char * data;
147 | } ELF64_Section;
148 |
149 | /**
150 | * Struct containing one of a 32-bit ELF's sections.
151 | */
152 | typedef struct ELF32_Section {
153 | ELF32_SHeader sheader;
154 | char * data;
155 | } ELF32_Section;
156 |
157 | /**
158 | * Struct containing one of an ELF's sections.
159 | */
160 | typedef struct ELF_Section {
161 | union {
162 | ELF64_Section * sec64;
163 | ELF32_Section * sec32;
164 | };
165 | } ELF_Section;
166 |
167 | /**
168 | * Struct containing a program header for one of a 64-bit ELF's segments.
169 | */
170 | typedef struct ELF64_PHeader {
171 | uint32_t p_type;
172 | uint32_t p_flags;
173 | uint64_t p_offset;
174 | uint64_t p_vaddr;
175 | uint64_t p_paddr;
176 | uint64_t p_filesz; //Reminder: unlike p_memsz, does not include NOBITS sections
177 | uint64_t p_memsz;
178 | uint64_t p_align;
179 |
180 | /**
181 | * List of sections contained by this memory region.
182 | * Set to 0 for type PHDR, which only contains pheaders.
183 | */
184 | ELF64_Section ** sections;
185 |
186 | /**
187 | * Size of the sections array, or 0 for PHDR segment.
188 | */
189 | int numSections;
190 | } ELF64_PHeader;
191 |
192 | /**
193 | * Struct containing a program header for one of a 32-bit ELF's segments.
194 | */
195 | typedef struct ELF32_PHeader {
196 | uint32_t p_type;
197 | uint32_t p_flags;
198 | uint32_t p_offset;
199 | uint32_t p_vaddr;
200 | uint32_t p_paddr;
201 | uint32_t p_filesz; //Reminder: unlike p_memsz, does not include NOBITS sections
202 | uint32_t p_memsz;
203 | uint32_t p_align;
204 |
205 | /**
206 | * List of sections contained by this memory region.
207 | * Set to 0 for type PHDR, which only contains pheaders.
208 | */
209 | ELF32_Section ** sections;
210 |
211 | /**
212 | * Size of the sections array, or 0 for PHDR segment.
213 | */
214 | int numSections;
215 | } ELF32_PHeader;
216 |
217 | /**
218 | * Struct containing a 64-bit ELF.
219 | */
220 | typedef struct ELF64 {
221 | ELF64_Header header;
222 | ELF64_Section ** sections;
223 | ELF64_PHeader ** pheaders;
224 | } ELF64;
225 |
226 | /**
227 | * Struct containing a 32-bit ELF.
228 | */
229 | typedef struct ELF32 {
230 | ELF32_Header header;
231 | ELF32_Section ** sections;
232 | ELF32_PHeader ** pheaders;
233 | } ELF32;
234 |
235 | /**
236 | * Struct containing an ELF, either 32-bit or 64-bit.
237 | */
238 | typedef struct ELF {
239 | union {
240 | ELF64 elf64;
241 | ELF32 elf32;
242 | };
243 | bool x64;
244 | } ELF;
245 |
246 | /**
247 | * Struct containing a symbol from a 32-bit ELF's symbol table.
248 | */
249 | typedef struct {
250 | uint32_t st_name;
251 | uint32_t st_value;
252 | uint32_t st_size;
253 | unsigned char st_info;
254 | unsigned char st_other;
255 | uint16_t st_shndx;
256 | } ELF32_Sym;
257 |
258 | /**
259 | * Struct containing a symbol from a 64-bit ELF's symbol table.
260 | */
261 | typedef struct {
262 | uint32_t st_name;
263 | unsigned char st_info;
264 | unsigned char st_other;
265 | uint16_t st_shndx;
266 | uint64_t st_value;
267 | uint64_t st_size;
268 | } ELF64_Sym;
269 |
270 | /**
271 | * Struct containing a symbol from an ELF's symbol table.
272 | */
273 | typedef struct {
274 | uint32_t st_name;
275 | unsigned char st_info;
276 | unsigned char st_other;
277 | uint16_t st_shndx;
278 | uint64_t st_value;
279 | uint64_t st_size;
280 | } ELF_Sym;
281 |
282 | /**
283 | * Struct containing a line from a 32-bit ELF's dynamic section.
284 | */
285 | typedef struct ELF32_Dyn {
286 | int32_t d_tag;
287 | union {
288 | uint32_t d_val;
289 | uint32_t d_ptr;
290 | } d_un;
291 | } ELF32_Dyn;
292 |
293 | /**
294 | * Struct containing a line from a 64-bit ELF's dynamic section.
295 | */
296 | typedef struct ELF64_Dyn {
297 | int64_t d_tag;
298 | union {
299 | uint64_t d_val;
300 | uint64_t d_ptr;
301 | } d_un;
302 | } ELF64_Dyn;
303 |
304 | /**
305 | * Struct containing a line from a 32-bit ELF's relocation section.
306 | */
307 | typedef struct {
308 | uint32_t r_offset;
309 | uint32_t r_info;
310 | int32_t r_addend;
311 | } ELF32_Rela;
312 |
313 | /**
314 | * Struct containing a line from a 64-bit ELF's relocation section.
315 | */
316 | typedef struct
317 | {
318 | uint64_t r_offset;
319 | uint64_t r_info;
320 | int64_t r_addend;
321 | } ELF64_Rela1;
322 |
323 | /**
324 | * Struct containing a line from a 64-bit ELF's relocation section.
325 | * It doesn't seem like I used this struct anywhere?
326 | */
327 | typedef struct {
328 | uint64_t r_offset;
329 | uint32_t r_sym;
330 | uint8_t r_ssym;
331 | uint8_t r_type3;
332 | uint8_t r_type2;
333 | uint8_t r_type;
334 | int64_t r_addend;
335 | } ELF64_Rela2;
336 |
337 | /**
338 | * Struct containing a line from a 64-bit ELF's relocation section.
339 | * It doesn't seem like I used this struct anywhere?
340 | */
341 | typedef struct {
342 | uint64_t r_offset;
343 | uint8_t r_type;
344 | uint8_t r_type2;
345 | uint8_t r_type3;
346 | uint8_t r_ssym;
347 | uint32_t r_sym;
348 | int64_t r_addend;
349 | } ELF64_Rela3;
350 |
351 | /**
352 | * Encode an ELF into data structures.
353 | * @param elfbytes The entire ELF as a byte array.
354 | * @return a #ELF encoding the given ELF.
355 | */
356 | ELF * bytes2ELF(const char * elfbytes);
357 |
358 | /**
359 | * Convert an ELF to a byte array.
360 | * @param elf The #ELF64 struct which encodes the ELF.
361 | * @param size A by-reference integer to return the number of bytes.
362 | * @return a char array which, if written to file, results in the same ELF.
363 | */
364 | char * ELF2bytes(ELF * elf, uint64_t & size);
365 |
366 | /**
367 | * Returns the raw data for an ELF section.
368 | * @param elf
369 | * @param section
370 | * @return a const char array
371 | */
372 | const char * getSectionData(ELF * elf, const ELF_Section section);
373 |
374 | /**
375 | * Replace the data for an ELF section.
376 | * If section has NOBITS type, it is resized; data parameter is ignored.
377 | * @param elf The ELF.
378 | * @param sectionID The index of the section.
379 | * @param data The new data; a duplicate of this array is used.
380 | * @param dataSize The size of the new data.
381 | * @param oldElfEnd When modifying CUDA, the original address of the end of the modified ELF.
382 | */
383 | void replaceSectionData(ELF * elf, int sectionID, char * data, unsigned int dataSize, unsigned int oldElfEnd = 0);
384 |
385 | /**
386 | * Returns the number of sections in an #ELF.
387 | * @param elf The ELF.
388 | * @return an int
389 | */
390 | int getNumSections(ELF * elf);
391 |
392 | /**
393 | * Returns the number of segments (program headers) in an #ELF.
394 | * @param elf The ELF.
395 | * @return an int
396 | */
397 | int getNumSegments(ELF * elf);
398 |
399 | /**
400 | * Gets an ELF section by index.
401 | * @param elf The #ELF containing the section.
402 | * @param index The index of the section.
403 | * @pre There are more than index sections in the ELF.
404 | */
405 | ELF_Section getSection(ELF * elf, int index);
406 |
407 | /**
408 | * Gets a (read-only) symbol from a symbol table section in an elf.
409 | * @param elf
410 | * @param section
411 | * @param index
412 | * @return an #ELF_Sym
413 | */
414 | const ELF_Sym getSymbol(ELF * elf, const ELF_Section section, int index);
415 |
416 | /**
417 | * Gets the header for an ELF section.
418 | * @param elf
419 | * @param section The #ELF_Section
420 | * @return A read-only ELF_SHeader
421 | */
422 | const ELF_SHeader getHeader(ELF * elf, const ELF_Section section);
423 |
424 | /**
425 | * Returns the name of the given section.
426 | * @param elf The elf containing the section.
427 | * @param section The section for which the name is bring retrieved.
428 | * @return a char pointer to somehwere in the elf's string table.
429 | */
430 | const char * getName(ELF * elf, const ELF_Section section);
431 |
432 | /**
433 | * Returns the name of the given symbol.
434 | * @param elf The elf containing the symbol.
435 | * @param symtab The header for the symtab section containing the symbol.
436 | * @param sym The symbol for which the name is bring retrieved.
437 | * @return a char pointer to somehwere in the elf's string table.
438 | */
439 | const char * getName(ELF * elf, ELF_SHeader symtab, const ELF_Sym sym);
440 |
441 | /**
442 | * Adds a new symbol with specified name to a symbol table section in an ELF.
443 | * The st_name value is automatically set.
444 | * @param elf
445 | * @param sec
446 | * @param symbol
447 | * @param name
448 | */
449 | void addSymbol(ELF * elf, const ELF_Section sec, ELF_Sym symbol, char * name);
450 |
451 | /**
452 | * Adds a new section with specified name to an ELF.
453 | * The sh_addr, sh_offset, and sh_name values are automatically set.
454 | * @param elf
455 | * @param shdr
456 | * @param name
457 | * @param data
458 | */
459 | void addSection(ELF * elf, ELF_SHeader shdr, char * name, char * data);
460 |
461 | /**
462 | * Adds an existing section to an existing segment in an ELF.
463 | * @param elf
464 | * @param section
465 | * @param segment
466 | */
467 | void addSectionToSegment(ELF * elf, const ELF_Section section, int segment);
468 |
469 | /**
470 | * Sets the sh_info attribute for an ELF section.
471 | * @param elf
472 | * @param section
473 | * @param info
474 | */
475 | void setSH_info(ELF * elf, const ELF_Section section, uint32_t info);
476 |
477 | /**
478 | * Sets the st_size attribute for an ELF symbol.
479 | * @param elf
480 | * @param symtab
481 | * @param index The index of the symbol inside its section
482 | * @param size
483 | */
484 | void setST_size(ELF * elf, const ELF_Section symtab, int index, uint64_t size);
485 |
486 | /**
487 | * Sets the st_value attribute for an ELF symbol.
488 | * @param elf
489 | * @param symtab
490 | * @param index The index of the symbol inside its section
491 | * @param value
492 | */
493 | void setST_value(ELF * elf, const ELF_Section symtab, int index, uint64_t value);
494 |
495 | /**
496 | * Frees memory in use by an #ELF struct.
497 | * @param elf
498 | */
499 | void cleanELF(ELF * elf);
500 |
501 | #endif
502 |
503 |
--------------------------------------------------------------------------------
/tools/src/decode_output.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "decode_common.hpp"
6 | #include "decode_output.hpp"
7 | #include "decode.hpp"
8 | using namespace std;
9 |
10 | #ifdef WINDOWS
11 | #include
12 | #define LLx "I64x"
13 | #else
14 | #define LLx "llx"
15 | #endif
16 |
17 | extern node * _blocks;
18 | extern char* _kernel_name;
19 | extern int _arch;
20 | extern int _cuobjdump_version;
21 | extern int _shared_memory;
22 | extern int _min_stack_size;
23 | extern int _frame_size;
24 | extern char** _function_names;
25 | extern int _num_functions;
26 | extern node * _functions;
27 | extern std::string _specialNames35[];
28 |
29 | void printfMetadata() {
30 | if(_kernel_name) {
31 | cout << "//Name: " << _kernel_name << "\n";
32 | }
33 | cout << "//Arch: sm_" << _arch << "\n";
34 | cout << "//cuobjdump: " << _cuobjdump_version << "\n";
35 | cout << "//Shared memory usage: " << _shared_memory << "\n";
36 | cout << "//Min Stack Size: " << _min_stack_size << "\n";
37 | cout << "//Frame Size: " << _frame_size << "\n";
38 | if(_function_names) {
39 | cout << "//Function count: " << _num_functions << "\n";
40 | for(int x = 0; x < _num_functions; x++) {
41 | cout << "//Function: " << _function_names[x] << "\n";
42 | }
43 | }
44 | }
45 |
46 | char * operandToString(instruction * inst, operand * op) {
47 | char * answer = (char*) malloc(sizeof(char) * 256);
48 | char * pointer = answer;
49 | bool mustShowHex = false; //used for mem operand
50 |
51 | int counter;//because g++ gets mad if anything is defined inside a case
52 |
53 | switch (op->type) {
54 | case type_register:
55 | if(op->properties & prop_not) {
56 | sprintf(pointer, "!");
57 | pointer++;
58 | } else if(op->properties & prop_minus) {
59 | sprintf(pointer, "-");
60 | pointer++;
61 | } else if(op->properties & prop_bitwise_complement) {
62 | sprintf(pointer, "~");
63 | pointer++;
64 | }
65 | if(op->properties & prop_absolute_value) {
66 | sprintf(pointer, "|");
67 | pointer++;
68 | }
69 |
70 | if(op->reg < 0) {
71 | sprintf(pointer, "RZ");
72 | pointer+=2;
73 | } else {
74 | sprintf(pointer, "R%d",(int) op->val1);
75 | pointer += strlen(pointer);
76 | }
77 |
78 | if(op->properties & prop_absolute_value) {
79 | sprintf(pointer, "|");
80 | pointer++;
81 | }
82 |
83 | break;
84 |
85 | case type_special_reg:
86 | if(op->properties & prop_not) {
87 | sprintf(pointer, "!");
88 | pointer++;
89 | } else if(op->properties & prop_minus) {
90 | sprintf(pointer, "-");
91 | pointer++;
92 | } else if(op->properties & prop_bitwise_complement) {
93 | sprintf(pointer, "~");
94 | pointer++;
95 | }
96 | if(op->properties & prop_absolute_value) {
97 | sprintf(pointer, "|");
98 | pointer++;
99 | }
100 |
101 | sprintf(pointer, "SR");
102 | pointer += 2;
103 | if(_arch >= 35 && _cuobjdump_version < 55) {
104 | sprintf(pointer, "%d", (int) op->val1);
105 | pointer += strlen(pointer);
106 | } else if(_arch < 35) {
107 | sprintf(pointer, "_");
108 | pointer++;
109 | for(int x = 0; x < _numSpecials20; x++) {
110 | if(_specialIDs20[x] == (int) op->val1) {
111 | const char * name = _specialNames20[x].c_str();
112 | sprintf(pointer, "%s", name);
113 | pointer += strlen(pointer);
114 | break;
115 | }
116 | }
117 | } else if(_arch <= 50) {
118 | if(_specialNames35[op->val1].length() > 0) {
119 | sprintf(pointer, "_");
120 | pointer++;
121 | sprintf(pointer, "%s", _specialNames35[op->val1].c_str());
122 | pointer += strlen(pointer);
123 | } else {
124 | sprintf(pointer, "%d", (int) op->val1);
125 | pointer += strlen(pointer);
126 | }
127 | } else {
128 | cerr << "WARNING o~120: unimplemented arch for special operands.\n";
129 | sprintf(pointer, "%d", (int) op->val1);
130 | pointer += strlen(pointer);
131 | }
132 |
133 | if(op->properties & prop_absolute_value) {
134 | sprintf(pointer, "|");
135 | pointer++;
136 | }
137 |
138 | break;
139 |
140 | case type_predicate:
141 | if(op->properties & prop_not) {
142 | sprintf(pointer, "!");
143 | pointer++;
144 | }
145 |
146 | if(op->val1 == 7) {
147 | if(_cuobjdump_version <= 50 && _arch < 35) {
148 | sprintf(pointer, "pt");
149 | pointer += 2;
150 | } else {
151 | sprintf(pointer, "PT");
152 | pointer += 2;
153 | }
154 | } else {
155 | sprintf(pointer, "P%d", (int) op->val1);
156 | pointer += strlen(pointer);
157 | }
158 |
159 | break;
160 |
161 | case type_label:
162 | sprintf(pointer, "label");
163 | pointer += 5;
164 | sprintf(pointer, "%s", op->strval);
165 | pointer += strlen(pointer);
166 | break;
167 |
168 | case type_sb:
169 | sprintf(pointer, "SB");
170 | pointer += 2;
171 | sprintf(pointer, "%d", (int) op->val1);
172 | pointer += strlen(pointer);
173 | break;
174 |
175 | case type_hex:
176 | if(op->properties & prop_not) {
177 | sprintf(pointer, "!");
178 | pointer++;
179 | } else if(op->properties & prop_minus && !(op->properties & prop_float) && !(op->properties & prop_double)) {
180 | sprintf(pointer, "-");
181 | pointer++;
182 | } else if(op->properties & prop_bitwise_complement) {
183 | sprintf(pointer, "~");
184 | pointer++;
185 | }
186 | if(op->properties & prop_absolute_value) {
187 | sprintf(pointer, "|");
188 | pointer++;
189 | }
190 |
191 | if((op->properties & prop_float) || (op->properties & prop_float32i)) {
192 | if(op->val1 == 0x7f80000000000000LL >> 44) {
193 | sprintf(pointer, "+INF ");
194 | pointer += 5;
195 | } else if(op->val1 == 0xff80000000000000LL >> 44) {
196 | sprintf(pointer, "-INF ");
197 | pointer += 5;
198 | } else {
199 | union {unsigned int i; float f;} u;
200 | u.i = (int)(op->val1 & 0xffffffff);
201 | if(!(op->properties & prop_float32i)) {
202 | u.i = u.i << 12;
203 | }
204 | if(op->properties & prop_exp) {
205 | sprintf(pointer, "%.*e", op->precision, (double)u.f);
206 | pointer += strlen(pointer);
207 | } else {
208 | sprintf(pointer, "%.*f", op->precision, u.f);
209 | pointer += strlen(pointer);
210 | }
211 | }
212 | }
213 | else if(op->properties & prop_double) {
214 | if(op->val1 == 0x7ff0000000000000LL >> 44) {
215 | sprintf(pointer, "+INF ");
216 | pointer += 5;
217 | } else if(op->val1 == 0xfff0000000000000LL >> 44) {
218 | sprintf(pointer, "-INF ");
219 | pointer += 5;
220 | } else {
221 | union {unsigned long long int i; double d;} u;
222 | u.i = op->val1 << 32;
223 | if(!(op->properties & prop_float32i)) {
224 | u.i = u.i << 12;
225 | }
226 | if(op->properties & prop_exp) {
227 | sprintf(pointer, "%.*e", op->precision, u.d);
228 | pointer += strlen(pointer);
229 | } else {
230 | sprintf(pointer, "%.*f", op->precision, u.d);
231 | pointer += strlen(pointer);
232 | }
233 | }
234 | } else {
235 | sprintf(pointer, "0x%" LLx, op->val1);
236 | pointer += strlen(pointer);
237 | }
238 |
239 | if(op->properties & prop_absolute_value) {
240 | sprintf(pointer, "|");
241 | pointer++;
242 | }
243 |
244 | break;
245 |
246 | case type_bit_list:
247 | sprintf(pointer, "{");
248 | pointer += 1;
249 |
250 | counter = 0;
251 | for(unsigned int x = 0; x < 10; x++) {
252 | if(op->val1 & (0x1 << x)) {
253 | counter++;
254 | }
255 | }
256 |
257 | for(unsigned int x = 9; x >= 0; x--) {
258 | if(op->val1 & (0x1 << x)) {
259 | counter--;
260 | sprintf(pointer, "%d", x);
261 | pointer += strlen(pointer);
262 | if(counter) {
263 | sprintf(pointer, ",");
264 | pointer++;
265 | } else {
266 | break;
267 | }
268 | }
269 | }
270 |
271 | sprintf(pointer, "}");
272 | pointer += 1;
273 |
274 | break;
275 |
276 | case type_const_mem:
277 | if(op->properties & prop_not) {
278 | sprintf(pointer, "!");
279 | pointer++;
280 | } else if(op->properties & prop_minus) {
281 | sprintf(pointer, "-");
282 | pointer++;
283 | } else if(op->properties & prop_bitwise_complement) {
284 | sprintf(pointer, "~");
285 | pointer++;
286 | }
287 | if(op->properties & prop_absolute_value) {
288 | sprintf(pointer, "|");
289 | pointer++;
290 | }
291 |
292 | sprintf(pointer, "c");
293 | pointer++;
294 |
295 | if(_cuobjdump_version <= 50) {
296 | if(op->properties & prop_absolute_value) {
297 | sprintf(pointer, "|");
298 | pointer++;
299 | }
300 | }
301 |
302 | if(_cuobjdump_version <= 50) {
303 | sprintf(pointer, " [0x%x] [", op->val2);
304 | pointer += strlen(pointer);
305 | }
306 | else {
307 | if((op->converted && inst->op != opcode_F2F) || (inst->op == opcode_XMAD && op != inst->operands[inst->num_operands - 1])) {
308 | sprintf(pointer, "[0x%x] [", op->val2);
309 | pointer += strlen(pointer);
310 | } else {
311 | sprintf(pointer, "[0x%x][", op->val2);
312 | pointer += strlen(pointer);
313 | }
314 | }
315 |
316 | if(op->reg >= 0) {
317 | sprintf(pointer, "R%d", (int) op->val1);
318 | pointer += strlen(pointer);
319 | if(op->val3) {
320 | sprintf(pointer, "+");
321 | pointer++;
322 | }
323 | }
324 | if(op->val3 || op->reg < 0) {
325 | if(op->val3 < 0) {
326 | sprintf(pointer, "-0x%" LLx, (~((unsigned long long) op->val3)) + 1);
327 | pointer += strlen(pointer);
328 | } else {
329 | sprintf(pointer, "0x%" LLx, (unsigned long long) op->val3);
330 | pointer += strlen(pointer);
331 | }
332 | }
333 | sprintf(pointer, "]");
334 | pointer++;
335 |
336 | if(_cuobjdump_version >= 55) {
337 | if(op->properties & prop_absolute_value) {
338 | sprintf(pointer, "|");
339 | pointer++;
340 | }
341 | }
342 |
343 | break;
344 |
345 | case type_mem:
346 | if(inst->op == opcode_LD_LDU) {
347 | mustShowHex = true;
348 | }
349 |
350 | if(op->properties & prop_not) {
351 | sprintf(pointer, "!");
352 | pointer++;
353 | } else if(op->properties & prop_minus) {
354 | sprintf(pointer, "-");
355 | pointer++;
356 | } else if(op->properties & prop_bitwise_complement) {
357 | sprintf(pointer, "~");
358 | pointer++;
359 | } else if(op->properties & prop_absolute_value) {
360 | sprintf(pointer, "|");
361 | pointer++;
362 | }
363 |
364 | sprintf(pointer, "[");
365 | pointer++;
366 | if(op->reg >= 0) {
367 | sprintf(pointer, "R%d", (int) op->val1);
368 | pointer += strlen(pointer);
369 | if(op->val2 || mustShowHex) {
370 | sprintf(pointer, "+");
371 | pointer++;
372 | }
373 | }
374 | if(mustShowHex || op->val2 || op->reg < 0) {
375 | if(op->val2 == 0) {
376 | if(_cuobjdump_version <= 50) {
377 | sprintf(pointer, "0x0");
378 | pointer += 3;
379 | } else {
380 | sprintf(pointer, "RZ");
381 | pointer += 2;
382 | }
383 | } else if(op->val2 < 0) {
384 | sprintf(pointer, "-0x%" LLx, (~((unsigned long long) op->val2)) + 1);
385 | pointer += strlen(pointer);
386 | } else {
387 | sprintf(pointer, "0x%" LLx, (unsigned long long) op->val2);
388 | pointer += strlen(pointer);
389 | }
390 | }
391 | sprintf(pointer, "]");
392 | pointer++;
393 |
394 | if(op->properties & prop_absolute_value) {
395 | sprintf(pointer, "|");
396 | pointer++;
397 | }
398 |
399 | break;
400 |
401 | case type_texture_operand:
402 | for(int x = 0; x < _numTextures; x++) {
403 | if(_textureIDs[x] == (int) op->val1) {
404 | if(_cuobjdump_version >= 55 && _textureIDs[x+1] == _textureIDs[x]) {
405 | sprintf(pointer, "%s", _textureNames[x+1]);
406 | pointer += strlen(pointer);
407 | } else {
408 | sprintf(pointer, "%s", _textureNames[x]);
409 | pointer += strlen(pointer);
410 | }
411 | break;
412 | }
413 | }
414 |
415 | break;
416 |
417 | case type_channel:
418 | for(int x = 0; x < _numChannels; x++) {
419 | if(_channelIDs[x] == (int) op->val1) {
420 | sprintf(pointer, "%s", _channelNames[x]);
421 | pointer += strlen(pointer);
422 | break;
423 | }
424 | }
425 |
426 | break;
427 |
428 | case type_other_operand:
429 | sprintf(pointer, "%s", _otherNames[op->val1]);
430 | pointer += strlen(pointer);
431 |
432 | break;
433 |
434 | default:
435 | cerr << "SANITY CHECK ERROR o~1871: operand is of non-operand type.\n";
436 | }
437 | return answer;
438 | }
439 |
440 | void printfOperand(instruction * inst, operand * op) {
441 | char * operand = operandToString(inst, op);
442 | printf(operand);
443 | free(operand);
444 | }
445 |
446 | void fprintfInstruction(ostream & out, instruction * inst) {
447 | //printf predicate guard if it exists
448 | if(inst->guard) {
449 | out << "@";
450 | char * operand = operandToString(inst, inst->guard);
451 | out << operand << " ";
452 | if(!(inst->guard->properties & prop_not)) {
453 | out << " ";
454 | }
455 | free(operand);
456 | } else {
457 | out << " ";
458 | }
459 |
460 | //printf opcode + mods
461 | out << opcodes[inst->op];
462 | node * iter3 = inst->mods;
463 | while(iter3 != 0) {
464 | out << "." << (char*)iter3->value;
465 | iter3 = iter3->next;
466 | }
467 |
468 | //printf operands + mods
469 | int x;
470 | for(x = 0; x < inst->num_operands; x++) {
471 | if(x > 0) {
472 | out << ",";
473 | }
474 | out << " ";
475 | char * operand = operandToString(inst, inst->operands[x]);
476 | out << operand;
477 | free(operand);
478 |
479 | iter3 = inst->operands[x]->mods;
480 | while(iter3 != 0) {
481 | out << "." << (char*)iter3->value;
482 | iter3 = iter3->next;
483 | }
484 | }
485 |
486 | out << ";";
487 | }
488 |
--------------------------------------------------------------------------------
/tools/src/decode_common.hpp:
--------------------------------------------------------------------------------
1 | #ifndef DECODE_COMMON_HPP
2 | #define DECODE_COMMON_HPP
3 | #include
4 | #include
5 |
6 | /**
7 | * @file decode_common.hpp
8 | * Defines enums, structs, and functions that are used in multiple parts of the program.
9 | */
10 |
11 | #define MAXREGISTERS 255
12 |
13 | /**
14 | * Identifies a token's 'type' when parsing.
15 | * Also used to keep track of operand types.
16 | */
17 | typedef enum {
18 | type_opcode,
19 | type_register,
20 | type_special_reg,
21 | type_predicate,
22 | type_guard,
23 | type_sb,
24 | type_hex,
25 | type_bit_list,
26 | type_mod,
27 | type_const_mem,
28 | type_mem,
29 | type_texture_operand,
30 | type_channel,
31 | type_other_operand,
32 | type_label,
33 | } token_type;
34 |
35 | /**
36 | * Used to build linked lists.
37 | */
38 | typedef struct node {
39 | void * value;
40 | struct node * next;
41 | } node;
42 |
43 | /**
44 | * Mostly used to hold data during parsing.
45 | */
46 | typedef struct {
47 | token_type type;
48 | char* lexeme;
49 | } token;
50 |
51 | /**
52 | * Used to note attributes and unary operations of an operand.
53 | */
54 | typedef enum {
55 | prop_minus = 1,
56 | prop_bitwise_complement = 2,
57 | prop_absolute_value = 4,
58 | prop_not = 8,
59 |
60 | prop_float = 128,//not a real property; used here for accurate asm->asm
61 | prop_double = 256,//not a real property; used here for accurate asm->asm
62 | prop_exp = 512,//not a real property; used here for accurate asm->asm
63 | prop_float32i = 1024,//not a real property; used here for accurate asm->asm
64 | } operand_prop;
65 | inline operand_prop operator|(operand_prop a, operand_prop b) {
66 | return static_cast(static_cast(a) | static_cast(b));
67 | }
68 | inline operand_prop operator-(operand_prop a, operand_prop b) {
69 | return static_cast(static_cast(a) - static_cast(b));
70 | }
71 |
72 | /**
73 | * Represents a parsed operand.
74 | */
75 | typedef struct {
76 | /**
77 | * The list of mods applies to this operand, such as ".CC".
78 | */
79 | node * mods;
80 |
81 | /**
82 | * The operand's type.
83 | */
84 | token_type type;
85 |
86 | /**
87 | * Part of the operand's value.
88 | * Used for label name for labels.
89 | */
90 | char * strval;
91 |
92 | /**
93 | * Part of the operand's value.
94 | * This is the register ID in register, predicate, and memory operands.
95 | * This is an integer or bit-shifted float value for hex operands.
96 | */
97 | long long val1;
98 |
99 | /**
100 | * Part of the operand's value.
101 | * This is the offset for global/local/shared memory operands.
102 | * This is the memory bank value (the first hex value) for constant memory operands.
103 | */
104 | int val2;
105 |
106 | /**
107 | * Part of the operand's value.
108 | * This is the offset (the second hex value) for constant memory operands.
109 | */
110 | int val3;
111 |
112 | /**
113 | * The ID of the first register inside this operand, or -1.
114 | * The RZ register results in a value of -1 for this.
115 | */
116 | int reg;
117 |
118 | /**
119 | * The operand's properties, such as negative and/or absolute value.
120 | * There are some fake properties used for accurate output of assembly code.
121 | */
122 | operand_prop properties;
123 |
124 | /**
125 | * For base-10 operands (used in float/double instructions), this is the number of digits after the decimal point.
126 | * Used for accurate assembly output.
127 | */
128 | int precision;
129 |
130 | /**
131 | * Set to true for the last operand in I2F, F2I, I2I, or F2F.
132 | * Used for accurate assembly output.
133 | */
134 | bool converted;
135 |
136 | bool bad;
137 |
138 | bool decimal;
139 | } operand;
140 |
141 | /**
142 | * Unique ID for each opcode.
143 | */
144 | typedef enum {
145 | opcode_MOV = 0,
146 | opcode_MOV32I = 1,
147 | opcode_LD = 2,
148 | opcode_LDU = 3,
149 | opcode_LDL = 4,
150 | opcode_LDS = 5,
151 | opcode_LDC = 6,
152 | opcode_ST = 7,
153 | opcode_STL = 8,
154 | opcode_STS = 9,
155 | opcode_LDLK = 10,
156 | opcode_LDSLK = 11,
157 | opcode_STUL = 12,
158 | opcode_STSUL = 13,
159 | opcode_FADD = 14,
160 | opcode_FADD32I = 15,
161 | opcode_FMUL = 16,
162 | opcode_FMUL32I = 17,
163 | opcode_FFMA = 18,
164 | opcode_FSET = 19,
165 | opcode_FSETP = 20,
166 | opcode_DSETP = 21,
167 | opcode_FCMP = 22,
168 | opcode_MUFU = 23,
169 | opcode_DADD = 24,
170 | opcode_DMUL = 25,
171 | opcode_DFMA = 26,
172 | opcode_IADD = 27,
173 | opcode_IADD32I = 28,
174 | opcode_IMNMX = 29,
175 | opcode_IMUL = 30,
176 | opcode_IMUL32I = 31,
177 | opcode_IMAD = 32,
178 | opcode_ISCADD = 33,
179 | opcode_ISET = 34,
180 | opcode_ISETP = 35,
181 | opcode_ICMP = 36,
182 | opcode_I2F = 37,
183 | opcode_I2I = 38,
184 | opcode_F2I = 39,
185 | opcode_F2F = 40,
186 | opcode_LOP = 41,
187 | opcode_LOP32I = 42,
188 | opcode_SHL = 43,
189 | opcode_SHR = 44,
190 | opcode_BFE = 45,
191 | opcode_BFI = 46,
192 | opcode_SEL = 47,
193 | opcode_SCHI = 48,//this is asfermi's name for the fake instructions which contain scheduling information in sm_30 and up
194 | opcode_SSY = 49,
195 | opcode_BRA = 50,
196 | opcode_BRX = 51,
197 | opcode_PCNT = 52,
198 | opcode_CONT = 53,
199 | opcode_PBK = 54,
200 | opcode_BRK = 55,
201 | opcode_CAL = 56,
202 | opcode_RET = 57,
203 | opcode_EXIT = 58,
204 | opcode_NOP = 59,
205 | opcode_BAR = 60,
206 | opcode_BPT = 61,
207 | opcode_B2R = 62,
208 | opcode_S2R = 63,
209 | opcode_PSETP = 64,
210 | opcode_PSET = 65,
211 | opcode_FLO = 66,
212 | opcode_P2R = 67,
213 | opcode_R2P = 68,
214 | opcode_TEX = 69,
215 | opcode_TEXDEPBAR = 70,
216 | opcode_RRO = 71,
217 | opcode_PRMT = 72,
218 | opcode_VADD = 73,
219 | opcode_DMNMX = 74,
220 | opcode_FMNMX = 75,
221 | opcode_RED = 76,
222 | opcode_VOTE = 77,
223 | opcode_POPC = 78,
224 | opcode_MEMBAR = 79,
225 | opcode_STSCUL = 80,
226 | opcode_LEPC = 81,
227 | opcode_CSETP = 82,
228 | opcode_ISCADD32I = 83,
229 | opcode_VMNMX = 84,
230 | opcode_TLD = 85,
231 | opcode_SHF = 86,
232 | opcode_FCHK = 87,
233 | opcode_ISUB = 88,
234 | opcode_JCAL = 89, //calls extern functions like printf
235 | opcode_SHFL = 90,
236 | opcode_LDG = 91,
237 | opcode_LD_LDU = 92,
238 | opcode_ATOM = 93,
239 | opcode_CCTL = 94,
240 | opcode_XMAD = 95,
241 | opcode_SYNC = 96,
242 | opcode_STG = 97,
243 | opcode_IADD3 = 98,
244 | opcode_VABSDIFF = 99,
245 | opcode_DEPBAR = 100,
246 | opcode_LOP3 = 101,
247 | opcode_TLDS = 102,
248 | opcode_TEXS = 103,
249 | opcode_LEA = 104,
250 | opcode_DSET = 105,
251 | } opcode;
252 |
253 | /**
254 | * Represents an assembly instruction.
255 | */
256 | typedef struct {
257 | /**
258 | * A label representing this instruction's address, or 0.
259 | */
260 | char * label;
261 |
262 | /**
263 | * The predicate guard, or 0 of there isn't one.
264 | */
265 | operand * guard;
266 |
267 | /**
268 | * A unique ID representing the instruction's opcode.
269 | */
270 | opcode op;
271 |
272 | /**
273 | * A list of mods (such as ".LU" or ".128") attached to the opcode.
274 | */
275 | node * mods;
276 |
277 | /**
278 | * The operands for this instruction.
279 | */
280 | operand ** operands;
281 |
282 | /**
283 | * The number of operands for this instruction.
284 | */
285 | int num_operands;
286 |
287 | int address;
288 | } instruction;
289 |
290 | /**
291 | * Names of special registers.
292 | * Corresponds one-to-one with the specialIDs array.
293 | */
294 | extern std::string _specialNames20[];
295 |
296 | /**
297 | * Values of special registers.
298 | * Corresponds one-to-one with the specialNames array.
299 | */
300 | extern const int _specialIDs20[];
301 |
302 | /**
303 | * Number of recognized special registers.
304 | */
305 | extern const int _numSpecials20;
306 |
307 | /**
308 | * Names of texture operands.
309 | * Corresponds one-to-one with the textureIDs array.
310 | */
311 | extern const char * _textureNames[];
312 |
313 | /**
314 | * Values of texture operands.
315 | * Corresponds one-to-one with the textureNames array.
316 | */
317 | extern const int _textureIDs[];
318 |
319 | /**
320 | * Number of recognized texture operands.
321 | */
322 | extern const int _numTextures;
323 |
324 | /**
325 | * Names of channel operands.
326 | * Corresponds one-to-one with the _channelIDs array.
327 | */
328 | extern const char * _channelNames[];
329 |
330 | /**
331 | * Values of channel operands.
332 | * Corresponds one-to-one with the _channelNames array.
333 | */
334 | extern const int _channelIDs[];
335 |
336 | /**
337 | * Number of recognized channel operands.
338 | */
339 | extern const int _numChannels;
340 |
341 | /**
342 | * Names of miscellaneous operands.
343 | */
344 | extern const char * _otherNames[];
345 |
346 | /**
347 | * Number of items in otherNames array.
348 | */
349 | extern const int _numOthers;
350 |
351 | /**
352 | * List of opcode names.
353 | */
354 | extern const char * opcodes[];
355 |
356 | /**
357 | * Returns the value which represents a given special register.
358 | * @param lexeme The special register's name
359 | * @param format If true, change special register's name to match given lexeme
360 | * @param arch The architecture (e.g. 20 for sm_20)
361 | */
362 | int getSpecialID(const char * lexeme, bool format, int arch);
363 |
364 | /**
365 | * Returns a unique ID for certain miscellaneous operand types.
366 | * @param lexeme The operand's name
367 | */
368 | int getOtherID(const char * lexeme);
369 |
370 | /**
371 | * Returns the value which represents a given texture operand.
372 | * @param lexeme The texture operand's name
373 | */
374 | int getTextureID(const char * lexeme);
375 |
376 | /**
377 | * Returns the value which represents a given channel operand.
378 | * @param lexeme The channel operand's name
379 | */
380 | int getChannelID(const char * lexeme);
381 |
382 |
383 | /**
384 | * Create an instruction with given values.
385 | * Frees operand nodes after putting data into proper array.
386 | * @param op The opcode's ID
387 | * @param mods The list of mods for the opcode
388 | * @param operands The list of operands
389 | * @return an instruction
390 | */
391 | instruction* newInstruction(opcode op, node * mods, node * operands);
392 |
393 | /**
394 | * Create an instruction with no operands.
395 | * Frees operand nodes after putting data into proper array.
396 | * @param op The opcode's ID
397 | * @param mods The list of mods for the opcode
398 | * @return an instruction
399 | */
400 | instruction* newInstruction(opcode op, node * mods);
401 |
402 | /**
403 | * Create an instruction with one operand.
404 | * Frees operand nodes after putting data into proper array.
405 | * @param op The opcode's ID
406 | * @param mods The list of mods for the opcode
407 | * @param o1 The operand
408 | * @return an instruction
409 | */
410 | instruction* newInstruction(opcode op, node * mods, operand * o1);
411 |
412 | /**
413 | * Create an instruction with two operands.
414 | * Frees operand nodes after putting data into proper array.
415 | * @param op The opcode's ID
416 | * @param mods The list of mods for the opcode
417 | * @param o1 The first operand
418 | * @param o2 The second operand
419 | * @return an instruction
420 | */
421 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2);
422 |
423 | /**
424 | * Create an instruction with three operands.
425 | * Frees operand nodes after putting data into proper array.
426 | * @param op The opcode's ID
427 | * @param mods The list of mods for the opcode
428 | * @param o1 The first operand
429 | * @param o2 The second operand
430 | * @param o3 The third operand
431 | * @return an instruction
432 | */
433 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2, operand * o3);
434 |
435 | /**
436 | * Create an instruction with four operands.
437 | * Frees operand nodes after putting data into proper array.
438 | * @param op The opcode's ID
439 | * @param mods The list of mods for the opcode
440 | * @param o1 The first operand
441 | * @param o2 The second operand
442 | * @param o3 The third operand
443 | * @param o4 The fourth operand
444 | * @return an instruction
445 | */
446 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2, operand * o3, operand * o4);
447 |
448 | /**
449 | * Create an operand with given values.
450 | * Sets unspecified values to appropriate defaults.
451 | * @param type The operand type
452 | * @param val1 (defaults to 0)
453 | * @param val2 (defaults to 0)
454 | * @param val3 (defaults to 0)
455 | * @return the new operand
456 | */
457 | operand* newOperand(token_type type, long long val1 = 0, int val2 = 0, int val3 = 0);
458 |
459 | /**
460 | * Create an operand with given values.
461 | * Sets unspecified values to appropriate defaults.
462 | * @param type The operand type
463 | * @param props The operand's properties
464 | * @param val1
465 | * @param val2
466 | * @param val3
467 | * @return the new operand
468 | */
469 | operand* newOperand(token_type type, operand_prop props, long long val1, int val2, int val3);
470 |
471 | /**
472 | * Add a value to a sorted list of nodes, treated as a set.
473 | * Cannot add same value a second time (unless comparator erroneously varies).
474 | * @param first The address of the list
475 | * @param value The item to add to the list
476 | * @param comparator A comparison function, for sorting the list's items
477 | * @return the added node, or 0 if no change was made
478 | */
479 | node* addNode(node **first, void * value, long long (*comparator)(void*, void*));
480 |
481 | /**
482 | * Adds node to end of list, no questions asked.
483 | * @param first The address of the list
484 | * @param value The item to add to the list
485 | */
486 | void addLast(node ** first, void * value);
487 |
488 | /**
489 | * Adds node to start of list, no questions asked.
490 | * @param first The address of the list
491 | * @param value The item to add to the list
492 | */
493 | void addFirst(node ** first, void * value);
494 |
495 | /**
496 | * Adds value to list, immediately after specified predecessor.
497 | * If predecessor is 0, value is added to start of list instead.
498 | * @param first The address of the list
499 | * @param afterMe The node we're inserting things after
500 | * @param value The item to add to the list
501 | */
502 | void addAfter(node ** first, node * afterMe, void * value);
503 |
504 | /**
505 | * Adds value to list, immediately before specified node.
506 | * If node is 0, value is added to start of list.
507 | * @param first The address of the list
508 | * @param beforeMe The node we're inserting things before
509 | * @param value The item to add to the list
510 | */
511 | void addBefore(node **first, node * beforeMe, void * value);
512 |
513 | /**
514 | * Adds node to list, at specified zero-based index
515 | * @param first The address of the list
516 | * @param value The item to add to the list
517 | * @param index The location in the list to add the element to
518 | */
519 | void addAtIndex(node ** first, void * value, int index);
520 |
521 | /**
522 | * Removes a node from a list.
523 | * @param first The address of the list
524 | * @param value The item to remove from the list
525 | * @param comparator A comparison function, used to identify the correct item
526 | * @return the removed node on success, or 0 if the node does not exist
527 | */
528 | node * removeNode(node **first, void * value, long long (*comparator)(void*, void*));
529 |
530 | /**
531 | * Removes a node from a list and frees it.
532 | * @param first The address of the list
533 | * @param value The item to remove from the list
534 | * @param comparator A comparison function, to identify the corrent item
535 | * @return true on success, false if node does not exist
536 | */
537 | bool deleteNode(node **first, void * value, long long (*comparator)(void*, void*));
538 |
539 | /**
540 | * Removes a node from a list, and frees it and its contents.
541 | * @param first The address of the list
542 | * @param value The item to remove from the list
543 | * @param comparator A comparison function, to identify the corrent item
544 | * @param valueCleaner The free function to use on the node's contents
545 | * @return true on success, false if node does not exist
546 | */
547 | bool deleteNode(node **first, void * value, long long (*comparator)(void*, void*), void (*valueCleaner)(void *));
548 |
549 | /**
550 | * Removes a node from a list and frees it.
551 | * @param first The address of the list
552 | * @param del The node to delete
553 | * @return true on success, false if node is not in list
554 | */
555 | bool deleteNode(node **first, node * del);
556 |
557 | /**
558 | * Comparator for addNode; compares long integers by value.
559 | * @param a The first long being compared
560 | * @param b The second long being compared
561 | * @return negative number iff a
2 | #include
3 | #include
4 | #include "decode_common.hpp"
5 |
6 | /**
7 | * Names of special registers for sm_2x.
8 | * Corresponds one-to-one with the specialIDs array.
9 | */
10 | std::string _specialNames20[] = {"laneid", "virtcfg", "virtid", "pm0", "pm1", "pm2", "pm3", "pm4", "pm5", "pm6", "pm7", "prim_type", "invocation_id", "y_direction", "machine_id_0", "machine_id_1", "machine_id_2", "machine_id_3", "affinity", "tid", "tid.x", "tid.y", "tid.z", "ctaparam", "ctaid.x", "ctaid.y", "ctaid.z", "ntid", "ntid.x", "ntid.y", "ntid.z", "gridparam", "nctaid.x", "nctaid.y", "nctaid.z", "swinlo", "swinsz", "smemsz", "smembanks", "lwinlo", "lwinsz", "lmemlosz", "lmemhioff", "eqmask", "ltmask", "lemask", "gtmask", "gemask", "clocklo", "clockhi"};
11 |
12 | /**
13 | * Values of special registers for sm_2x.
14 | * Corresponds one-to-one with the specialNames array.
15 | */
16 | const int _specialIDs20[] = {0,2,3,4,5,6,7,8,9,10,11,16,17,18,24,25,26,27,28,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,80,81};
17 |
18 | /**
19 | * Number of recognized special registers for sm_2x.
20 | */
21 | const int _numSpecials20 = 62;
22 |
23 | /**
24 | * Names of special registers for sm_3x through sm_6x
25 | * //TODO deal with SM_SHADER_TYPE, which does not start with SR_
26 | */
27 | std::string _specialNames35[] = {"laneid", "clock", "virtcfg", "virtid", "pm0", "pm1", "pm2", "pm3", "pm4", "pm5", "pm6", "pm7", "", "", "", "", "prim_type", "invocation_id", "y_direction", "thread_kill", "shader_type", "directcbewriteaddresslow", "directcbewriteaddresshigh", "directcbewriteenabled", "machine_id_0", "machine_id_1", "machine_id_2", "machine_id_3", "affinity", "invocation_info", "wscalefactor_xy", "wscalefactor_z", "tid", "tid.x", "tid.y", "tid.z", "cta_param", "ctaid.x", "ctaid.y", "ctaid.z", "ntid", "cirqueueincrminusone", "nlatc", "", "", "", "", "", "swinlo", "swinsz", "smemsz", "smembanks", "lwinlo", "lwinsz", "lmemlosz", "lmemhioff", "eqmask", "ltmask", "lemask", "gtmask", "gemask", "regalloc", "ctxaddr", "", "globalerrorstatus", "", "warperrorstatus", "warperrorstatusclear", "", "", "", "", "pm_hi0", "pm_hi1", "pm_hi2", "pm_hi3", "pm_hi4", "pm_hi5", "pm_hi6", "pm_hi7", "clocklo", "clockhi", "globaltimerlo", "globaltimerhi", "", "", "", "", "", "", "", "", "", "", "", "", "hwtaskid", "circularqueueentryindex", "circularqueueentryaddresslow", "circularqueueentryaddresshigh"};
28 |
29 | /**
30 | * Names of texture operands.
31 | * Corresponds one-to-one with the textureIDs array.
32 | */
33 | const char * _textureNames[] = {"1D", "ARRAY_1D", "RECT", "2D", "ARRAY_2D", "3D", "CUBE", "ARRAY_CUBE"};
34 |
35 | /**
36 | * Values of texture operands.
37 | * Corresponds one-to-one with the textureNames array.
38 | */
39 | const int _textureIDs[] = {0,1,2,2,3,4,6,7};
40 |
41 | /**
42 | * Number of recognized texture operands.
43 | */
44 | const int _numTextures = 8;
45 |
46 | /**
47 | * Names of texture operands.
48 | * Corresponds one-to-one with the textureIDs array.
49 | */
50 | const char * _channelNames[] = {"R", "G", "B", "A", "RG", "RA", "GA", "BA", "RGB", "RGA", "RBA", "GBA", "RGBA", "INVALID5", "INVALID6", "INVALID7", };
51 |
52 | /**
53 | * Values of texture operands.
54 | * Corresponds one-to-one with the textureNames array.
55 | */
56 | const int _channelIDs[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
57 |
58 | /**
59 | * Number of recognized texture operands.
60 | */
61 | const int _numChannels = 16;
62 |
63 | /**
64 | * Names of miscellaneous operands.
65 | */
66 | const char * _otherNames[] = {"CC", "PR"};
67 |
68 | /**
69 | * Number of items in otherNames array.
70 | */
71 | const int _numOthers = 2;
72 |
73 | /**
74 | * List of opcode names.
75 | * An opcode's index in this array should be the same as the the corresponding 'opcode' enum value.
76 | */
77 | const char * opcodes[] = {"MOV", "MOV32I", "LD", "LDU", "LDL", "LDS", "LDC", "ST", "STL", "STS", "LDLK", "LDSLK", "STUL", "STSUL", "FADD", "FADD32I", "FMUL", "FMUL32I", "FFMA", "FSET", "FSETP", "DSETP", "FCMP", "MUFU", "DADD", "DMUL", "DFMA", "IADD", "IADD32I", "IMNMX", "IMUL", "IMUL32I", "IMAD", "ISCADD", "ISET", "ISETP", "ICMP", "I2F", "I2I", "F2I", "F2F", "LOP", "LOP32I", "SHL", "SHR", "BFE", "BFI", "SEL", "SCHI", "SSY", "BRA", "BRX", "PCNT", "CONT", "PBK", "BRK", "CAL", "RET", "EXIT", "NOP", "BAR", "BPT", "B2R", "S2R", "PSETP", "PSET", "FLO", "P2R", "R2P", "TEX", "TEXDEPBAR", "RRO", "PRMT", "VADD", "DMNMX", "FMNMX", "RED", "VOTE", "POPC", "MEMBAR", "STSCUL", "LEPC", "CSETP", "ISCADD32I", "VMNMX", "TLD", "SHF", "FCHK", "ISUB", "JCAL", "SHFL", "LDG", "LD_LDU", "ATOM", "CCTL", "XMAD", "SYNC", "STG", "IADD3", "VABSDIFF", "DEPBAR", "LOP3", "TLDS", "TEXS", "LEA", "DSET", 0};
78 |
79 | int getSpecialID(const char * lexeme, bool format, int arch) {
80 | if(arch <= 30) {
81 | for(int x = 0; x < _numSpecials20; x++) {
82 | for(int y = 0; y >= 0; y++) {
83 | if(lexeme[y] == _specialNames20[x][y]) {
84 | if(lexeme[y] == 0) {
85 | //Make them match perfectly for printing out later:
86 | if(format) {
87 | for(int z = 0; z < y; z++) {
88 | _specialNames20[x][z] = lexeme[z];
89 | }
90 | }
91 |
92 | //Return value:
93 | return _specialIDs20[x];
94 | }
95 | } else if(lexeme[y] == '_' && _specialNames20[x][y] == '.') {
96 | //interchangeable; format varies depending on CUDA version
97 | } else if(lexeme[y] == '.' && _specialNames20[x][y] == '_') {
98 | //interchangeable; format varies depending on CUDA version
99 | } else if(lexeme[y] >= 'A' && lexeme[y] <= 'Z' && lexeme[y] == _specialNames20[x][y] - ('a' - 'A')) {
100 | //interchangeable; format varies depending on CUDA version
101 | } else if(lexeme[y] >= 'a' && lexeme[y] <= 'z' && lexeme[y] == _specialNames20[x][y] - ('A' - 'a')) {
102 | //interchangeable; format varies depending on CUDA version
103 | } else {
104 | break;
105 | }
106 | }
107 | }
108 | } else if(arch < 70) {
109 | int numSpecials = sizeof(_specialNames35) / sizeof(std::string);
110 | for(int x = 0; x < numSpecials; x++) {
111 | for(int y = 0; y >= 0; y++) {
112 | if(lexeme[y] == _specialNames35[x][y]) {
113 | if(lexeme[y] == 0) {
114 | //Make them match perfectly for printing out later:
115 | if(format) {
116 | for(int z = 0; z < y; z++) {
117 | _specialNames35[x][z] = lexeme[z];
118 | }
119 | }
120 |
121 | //Return value:
122 | return x;
123 | }
124 | } else if(lexeme[y] == '_' && _specialNames35[x][y] == '.') {
125 | //interchangeable; format varies depending on CUDA version
126 | } else if(lexeme[y] == '.' && _specialNames35[x][y] == '_') {
127 | //interchangeable; format varies depending on CUDA version
128 | } else if(lexeme[y] >= 'A' && lexeme[y] <= 'Z' && lexeme[y] == _specialNames35[x][y] - ('a' - 'A')) {
129 | //interchangeable; format varies depending on CUDA version
130 | } else if(lexeme[y] >= 'a' && lexeme[y] <= 'z' && lexeme[y] == _specialNames35[x][y] - ('A' - 'a')) {
131 | //interchangeable; format varies depending on CUDA version
132 | } else {
133 | break;
134 | }
135 | }
136 | }
137 | } else {
138 | fprintf(stderr, "ERROR c~98: Unimplemented.\n");
139 | }
140 |
141 | fprintf(stderr,"WARNING: Found unrecognized special register \"%s\".\n",lexeme);
142 | return -1;
143 | }
144 |
145 | int getOtherID(const char * lexeme) {
146 | for(int x = 0; x < _numOthers; x++) {
147 | if(!strcmp(lexeme, _otherNames[x])) {
148 | return x;
149 | }
150 | }
151 |
152 | fprintf(stderr,"\n*SANITY CHECK ERROR ~22* This message is unreachable! \"%s\".\n\n",lexeme);
153 | return -1;
154 | }
155 |
156 | int getTextureID(const char * lexeme) {
157 | for(int x = 0; x < _numTextures; x++) {
158 | if(!strcmp(lexeme, _textureNames[x])) {
159 | return _textureIDs[x];
160 | }
161 | }
162 |
163 | fprintf(stderr,"\n*SANITY CHECK ERROR ~33* Found unrecognized texture operand \"%s\".\n\n",lexeme);
164 | return -1;
165 | }
166 |
167 | int getChannelID(const char * lexeme) {
168 | for(int x = 0; x < _numChannels; x++) {
169 | if(!strcmp(lexeme, _channelNames[x])) {
170 | return _channelIDs[x];
171 | }
172 | }
173 |
174 | fprintf(stderr,"\n*SANITY CHECK ERROR c~162: Found unrecognized channel operand \"%s\".\n\n",lexeme);
175 | return -1;
176 | }
177 |
178 | instruction* newInstruction(opcode op, node * mods, node * operands) {
179 | instruction * inst = (instruction*) malloc(sizeof(instruction));
180 | inst->label = 0;
181 | inst->op = op;
182 | inst->mods = mods;
183 | inst->guard = 0;
184 | inst->num_operands = 0;
185 | inst->operands = 0;
186 |
187 | if(operands) {
188 | inst->num_operands = listSize(operands);
189 | inst->operands = (operand**) malloc(inst->num_operands*sizeof(operand*));
190 | node *o = operands;
191 | int x = 0;
192 | while(o) {
193 | inst->operands[x] = (operand*) o->value;
194 | o = o->next;
195 | x++;
196 | }
197 | cleanNodes(operands);
198 | }
199 |
200 | return inst;
201 | }
202 |
203 | instruction* newInstruction(opcode op, node * mods) {
204 | return newInstruction(op, mods, (node*)0);
205 | }
206 |
207 | instruction* newInstruction(opcode op, node * mods, operand * o1) {
208 | instruction * inst = newInstruction(op, mods);
209 | inst->num_operands = 1;
210 | inst->operands = (operand**) malloc(1*sizeof(operand*));
211 | inst->operands[0] = o1;
212 |
213 | return inst;
214 | }
215 |
216 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2) {
217 | instruction * inst = newInstruction(op, mods);
218 | inst->num_operands = 2;
219 | inst->operands = (operand**) malloc(2*sizeof(operand*));
220 | inst->operands[0] = o1;
221 | inst->operands[1] = o2;
222 |
223 | return inst;
224 | }
225 |
226 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2, operand * o3) {
227 | instruction * inst = newInstruction(op, mods);
228 | inst->num_operands = 3;
229 | inst->operands = (operand**) malloc(3*sizeof(operand*));
230 | inst->operands[0] = o1;
231 | inst->operands[1] = o2;
232 | inst->operands[2] = o3;
233 |
234 | return inst;
235 | }
236 |
237 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2, operand * o3, operand * o4) {
238 | instruction * inst = newInstruction(op, mods);
239 | inst->num_operands = 4;
240 | inst->operands = (operand**) malloc(4*sizeof(operand*));
241 | inst->operands[0] = o1;
242 | inst->operands[1] = o2;
243 | inst->operands[2] = o3;
244 | inst->operands[3] = o4;
245 | return inst;
246 | }
247 |
248 | operand* newOperand(token_type type, long long val1, int val2, int val3) {
249 | operand * op = (operand*) malloc(sizeof(operand));
250 | op->type = type;
251 | op->val1 = val1;
252 | op->val2 = val2;
253 | op->val3 = val3;
254 | op->reg = -1;
255 | op->mods = 0;
256 | op->properties = (operand_prop) 0;
257 | op->converted = false;
258 | op->bad = false;
259 | op->decimal = false;
260 |
261 | if(type == type_register || type == type_const_mem || type == type_mem) {
262 | if(val1 >= 0) {
263 | op->reg = val1;
264 | }
265 | }
266 |
267 | return op;
268 | }
269 |
270 | operand* newOperand(token_type type, operand_prop props, long long val1, int val2, int val3) {
271 | operand * op = newOperand(type, val1, val2, val3);
272 | op->properties = props;
273 | return op;
274 | }
275 |
276 | node *addNode(node **first, void * value, long (*comparator)(void*, void*)) {
277 | //check if new node replaces first
278 | if(!*first || comparator((*first)->value,value) > 0) {
279 | node * newNode = (node*) malloc(sizeof(node));
280 | newNode->value = value;
281 | newNode->next = *first;
282 | *first = newNode;
283 | return newNode;
284 | }
285 |
286 | if(!comparator((*first)->value,value)) {
287 | return 0;
288 | }
289 |
290 | //find location to place new node
291 | node* iterator = *first;
292 | while(iterator->next && comparator(iterator->next->value,value) < 0) {
293 | iterator = iterator->next;
294 | }
295 |
296 | //if already in list, return
297 | if(iterator->next && !comparator(iterator->next->value,value)) {
298 | return 0;
299 | }
300 |
301 | //add new node to list
302 | node * newNode = (node*) malloc(sizeof(node));
303 | newNode->value = value;
304 | newNode->next = iterator->next;
305 | iterator->next = newNode;
306 | return newNode;
307 | }
308 |
309 | void addLast(node ** first, void * value) {
310 | //check if new node replaces first
311 | if(!*first) {
312 | node * newNode = (node*) malloc(sizeof(node));
313 | newNode->value = value;
314 | newNode->next = *first;
315 | *first = newNode;
316 | return;
317 | }
318 |
319 | //find last
320 | node* iterator = *first;
321 | while(iterator->next != 0) {
322 | iterator = iterator->next;
323 | }
324 |
325 | //add new node to list
326 | node * newNode = (node*) malloc(sizeof(node));
327 | newNode->value = value;
328 | newNode->next = iterator->next;
329 | iterator->next = newNode;
330 | return;
331 | }
332 |
333 | void addFirst(node ** first, void * value) {
334 | node * newNode = (node*) malloc(sizeof(node));
335 | newNode->value = value;
336 | newNode->next = *first;
337 | *first = newNode;
338 | }
339 |
340 | void addAfter(node **first, node * afterMe, void * value) {
341 | node * newNode = (node*) malloc(sizeof(node));
342 | newNode->value = value;
343 | if(afterMe != 0) {
344 | newNode->next = afterMe->next;
345 | afterMe->next = newNode;
346 | }
347 | else {
348 | newNode->next = *first;
349 | *first = newNode;
350 | }
351 | }
352 |
353 | void addBefore(node **first, node * beforeMe, void * value) {
354 | if(!*first || (*first) == beforeMe) {
355 | node * newNode = (node*) malloc(sizeof(node));
356 | newNode->value = value;
357 | newNode->next = *first;
358 | *first = newNode;
359 | return;
360 | }
361 |
362 | //find location to place new node
363 | node* iterator = *first;
364 | while(iterator->next != beforeMe) {
365 | iterator = iterator->next;
366 | }
367 |
368 | //add new node to list
369 | node * newNode = (node*) malloc(sizeof(node));
370 | newNode->value = value;
371 | newNode->next = iterator->next;
372 | iterator->next = newNode;
373 | }
374 |
375 | void addAtIndex(node ** first, void * value, int index) {
376 | if(index < 1) {
377 | addFirst(first, value);
378 | return;
379 | }
380 |
381 | //Find place to add to:
382 | node* iterator = *first;
383 | while(iterator->next != 0 && index > 1) {
384 | iterator = iterator->next;
385 | index--;
386 | }
387 |
388 | //add new node to list
389 | node * newNode = (node*) malloc(sizeof(node));
390 | newNode->value = value;
391 | newNode->next = iterator->next;
392 | iterator->next = newNode;
393 | return;
394 | }
395 |
396 | node * removeNode(node **first, void * value, long (*comparator)(void*, void*)) {
397 | //make sure list is nonempty
398 | if(!*first) {
399 | return 0;
400 | }
401 |
402 | //check if first node is deleted
403 | if(!comparator((*first)->value,value)) {
404 | node * n = *first;
405 | *first = (*first)->next;
406 | return n;
407 | }
408 |
409 | //find location of node to delete
410 | node* iterator = *first;
411 | while(iterator->next != 0 && comparator(iterator->next->value,value)) {
412 | iterator = iterator->next;
413 | }
414 |
415 | //if node not found, return 0
416 | if(iterator->next == 0) {
417 | return 0;
418 | }
419 |
420 | //delete node
421 | node * n = iterator->next;
422 | iterator->next = iterator->next->next;
423 | n->next = 0;
424 | return n;
425 | }
426 |
427 | bool deleteNode(node **first, void * value, long long (*comparator)(void*, void*)) {
428 | //make sure list is nonempty
429 | if(!*first) {
430 | return false;
431 | }
432 |
433 | //check if first node is deleted
434 | if(!comparator((*first)->value,value)) {
435 | node * n = *first;
436 | *first = (*first)->next;
437 | free(n);
438 | return true;
439 | }
440 |
441 | //find location of node to delete
442 | node* iterator = *first;
443 | while(iterator->next && comparator(iterator->next->value,value)) {
444 | iterator = iterator->next;
445 | }
446 |
447 | //if node not found, return 0
448 | if(!iterator->next) {
449 | return false;
450 | }
451 |
452 | //delete node
453 | node * n = iterator->next;
454 | iterator->next = iterator->next->next;
455 | free(n);
456 | return true;
457 | }
458 |
459 | bool deleteNode(node **first, void * value, long long (*comparator)(void*, void*), void (*valueCleaner)(void *)) {
460 | //make sure list is nonempty
461 | if(!*first) {
462 | return false;
463 | }
464 |
465 | //check if first node is deleted
466 | if(!comparator((*first)->value,value)) {
467 | node * n = *first;
468 | *first = (*first)->next;
469 | valueCleaner(n->value);
470 | free(n);
471 | return true;
472 | }
473 |
474 | //find location of node to delete
475 | node* iterator = *first;
476 | while(iterator->next && comparator(iterator->next->value,value)) {
477 | iterator = iterator->next;
478 | }
479 |
480 | //if node not found, return 0
481 | if(!iterator->next) {
482 | return false;
483 | }
484 |
485 | //delete node
486 | node * n = iterator->next;
487 | iterator->next = iterator->next->next;
488 | valueCleaner(n->value);
489 | free(n);
490 | return true;
491 | }
492 |
493 | bool deleteNode(node **first, node * del) {
494 | //make sure list is nonempty
495 | if(!*first) {
496 | return false;
497 | }
498 |
499 | //check if first node is deleted
500 | if(*first == del) {
501 | node * n = *first;
502 | *first = (*first)->next;
503 | free(n);
504 | return true;
505 | }
506 |
507 | //find location of node to delete
508 | node* iterator = *first;
509 | while(iterator->next && iterator->next != del) {
510 | iterator = iterator->next;
511 | }
512 |
513 | //if node not found, return 0
514 | if(!iterator->next) {
515 | return false;
516 | }
517 |
518 | //delete node
519 | node * n = iterator->next;
520 | iterator->next = iterator->next->next;
521 | free(n);
522 | return true;
523 | }
524 |
525 | long long intorder(void * a, void * b) {
526 | long long i = (long long) a;
527 | long long j = (long long) b;
528 | return i - j;
529 | }
530 |
531 | long long charstarorder(void * a, void * b) {
532 | char * i = (char*) a;
533 | char * j = (char*) b;
534 | return (long)strcmp(i, j);
535 | }
536 |
537 | long long lexicographicorder(void * a, void * b) {
538 | char * i = (char*) a;
539 | char * j = (char*) b;
540 |
541 | return strcmp(i,j);
542 | }
543 |
544 | int listSize(node *first) {
545 | int count = 0;
546 |
547 | while(first) {
548 | count++;
549 | first = first->next;
550 | }
551 |
552 | return count;
553 | }
554 |
555 | node * containsValue(node *first, void * val) {
556 | while(first && first->value != val) {
557 | first = first->next;
558 | }
559 |
560 | return first;
561 | }
562 |
563 | void cleanNodes(node *first) {
564 | node * next;
565 | while(first) {
566 | next = first->next;
567 | free(first);
568 | first = next;
569 | }
570 | }
571 |
572 | void cleanNodesFully(node *first, void (*valueCleaner)(void *)) {
573 | node * next;
574 | while(first) {
575 | next = first->next;
576 | valueCleaner(first->value);
577 | free(first);
578 | first = next;
579 | }
580 | }
581 |
582 | void cleanOperand(operand * o) {
583 | cleanNodesFully(o->mods,&free);
584 | free(o);
585 | }
586 |
587 | void cleanInstruction(instruction* i) {
588 | if(i->guard) {
589 | free(i->guard);
590 | }
591 | cleanNodesFully(i->mods, &free);
592 | if(i->num_operands > 0) {
593 | int x;
594 | for(x = 0; x < i->num_operands; x++) {
595 | cleanOperand(i->operands[x]);
596 | }
597 | free(i->operands);
598 | }
599 |
600 | free(i);
601 | }
602 |
603 |
--------------------------------------------------------------------------------
/tools/src/output.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "binary.hpp"
6 | #include "cfghelpers.hpp"
7 | #include "common.hpp"
8 | #include "cudacommon.hpp"
9 | #include "output.hpp"
10 | #include "asm2bin.hpp"
11 | using namespace std;
12 |
13 | #ifdef WINDOWS
14 | #include
15 | #define LLx "I64x"
16 | #else
17 | #define LLx "llx"
18 | #endif
19 |
20 | extern node * _blocks;
21 | extern char* _kernel_name;
22 | extern int _arch;
23 | extern int _cuobjdump_version;
24 | extern int _shared_memory;
25 | extern int _min_stack_size;
26 | extern int _max_stack_size;
27 | extern int _frame_size;
28 | extern char** _function_names;
29 | extern int _num_functions;
30 | extern node * _functions;
31 | extern int _tag;
32 | extern std::string _specialNames35[];
33 |
34 | void printfMetadata() {
35 | if(_kernel_name) {
36 | cout << "//Name: " << _kernel_name << "\n";
37 | }
38 | cout << "//Arch: sm_" << _arch << "\n";
39 | cout << "//cuobjdump: " << _cuobjdump_version << "\n";
40 | cout << "//Shared memory usage: " << _shared_memory << "\n";
41 | cout << "//Min Stack Size: " << _min_stack_size << "\n";
42 | cout << "//Max Stack Size: " << _max_stack_size << "\n";
43 | cout << "//Frame Size: " << _frame_size << "\n";
44 | if(_function_names) {
45 | cout << "//Function count: " << _num_functions << "\n";
46 | for(int x = 0; x < _num_functions; x++) {
47 | cout << "//Function: " << _function_names[x] << "\n";
48 | }
49 | }
50 | }
51 |
52 | char * operandToString(instruction * inst, operand * op) {
53 | char * answer = (char*) malloc(sizeof(char) * 256);
54 | char * pointer = answer;
55 | bool mustShowHex = false; //used for mem operand
56 |
57 | int counter;//because g++ gets mad if anything is defined inside a case
58 |
59 | switch (op->type) {
60 | case type_register:
61 | if(op->properties & prop_not) {
62 | sprintf(pointer, "!");
63 | pointer++;
64 | } else if(op->properties & prop_minus) {
65 | sprintf(pointer, "-");
66 | pointer++;
67 | } else if(op->properties & prop_bitwise_complement) {
68 | sprintf(pointer, "~");
69 | pointer++;
70 | }
71 | if(op->properties & prop_absolute_value) {
72 | sprintf(pointer, "|");
73 | pointer++;
74 | }
75 |
76 | if(op->reg < 0) {
77 | sprintf(pointer, "RZ");
78 | pointer+=2;
79 | } else {
80 | sprintf(pointer, "R%d",(int) op->val1);
81 | pointer += strlen(pointer);
82 | }
83 |
84 | if(op->properties & prop_absolute_value) {
85 | sprintf(pointer, "|");
86 | pointer++;
87 | }
88 |
89 | break;
90 |
91 | case type_special_reg:
92 | if(op->properties & prop_not) {
93 | sprintf(pointer, "!");
94 | pointer++;
95 | } else if(op->properties & prop_minus) {
96 | sprintf(pointer, "-");
97 | pointer++;
98 | } else if(op->properties & prop_bitwise_complement) {
99 | sprintf(pointer, "~");
100 | pointer++;
101 | }
102 | if(op->properties & prop_absolute_value) {
103 | sprintf(pointer, "|");
104 | pointer++;
105 | }
106 |
107 | sprintf(pointer, "SR");
108 | pointer += 2;
109 | if(_arch >= 35 && _cuobjdump_version < 55) {
110 | sprintf(pointer, "%d", (int) op->val1);
111 | pointer += strlen(pointer);
112 | } else if(_arch < 35) {
113 | sprintf(pointer, "_");
114 | pointer++;
115 | for(int x = 0; x < _numSpecials20; x++) {
116 | if(_specialIDs20[x] == (int) op->val1) {
117 | const char * name = _specialNames20[x].c_str();
118 | sprintf(pointer, "%s", name);
119 | pointer += strlen(pointer);
120 | break;
121 | }
122 | }
123 | } else if(_arch < 70) {
124 | if(_specialNames35[op->val1].length() > 0) {
125 | sprintf(pointer, "_");
126 | pointer++;
127 | sprintf(pointer, "%s", _specialNames35[op->val1].c_str());
128 | pointer += strlen(pointer);
129 | } else {
130 | sprintf(pointer, "%d", (int) op->val1);
131 | pointer += strlen(pointer);
132 | }
133 | } else {
134 | cerr << "WARNINg o~120: unimplemented arch for special operands.\n";
135 | sprintf(pointer, "%d", (int) op->val1);
136 | pointer += strlen(pointer);
137 | }
138 |
139 | if(op->properties & prop_absolute_value) {
140 | sprintf(pointer, "|");
141 | pointer++;
142 | }
143 |
144 | break;
145 |
146 | case type_predicate:
147 | if(op->properties & prop_not) {
148 | sprintf(pointer, "!");
149 | pointer++;
150 | }
151 |
152 | if(op->val1 == 7) {
153 | if(_cuobjdump_version <= 50 && _arch < 35) {
154 | sprintf(pointer, "pt");
155 | pointer += 2;
156 | } else {
157 | sprintf(pointer, "PT");
158 | pointer += 2;
159 | }
160 | } else {
161 | sprintf(pointer, "P%d", (int) op->val1);
162 | pointer += strlen(pointer);
163 | }
164 |
165 | break;
166 |
167 | case type_label:
168 | sprintf(pointer, "label");
169 | pointer += 5;
170 | sprintf(pointer, "%s", op->strval);
171 | pointer += strlen(pointer);
172 | break;
173 |
174 | case type_sb:
175 | sprintf(pointer, "SB");
176 | pointer += 2;
177 | sprintf(pointer, "%d", (int) op->val1);
178 | pointer += strlen(pointer);
179 | break;
180 |
181 | case type_hex:
182 | if(op->properties & prop_not) {
183 | sprintf(pointer, "!");
184 | pointer++;
185 | } else if(op->properties & prop_minus && !(op->properties & prop_float) && !(op->properties & prop_double)) {
186 | sprintf(pointer, "-");
187 | pointer++;
188 | } else if(op->properties & prop_bitwise_complement) {
189 | sprintf(pointer, "~");
190 | pointer++;
191 | }
192 | if(op->properties & prop_absolute_value) {
193 | sprintf(pointer, "|");
194 | pointer++;
195 | }
196 |
197 | if((op->properties & prop_float) || (op->properties & prop_float32i)) {
198 | if(op->val1 == 0x7f80000000000000LL >> 44) {
199 | sprintf(pointer, "+INF ");
200 | pointer += 5;
201 | } else if(op->val1 == 0xff80000000000000LL >> 44) {
202 | sprintf(pointer, "-INF ");
203 | pointer += 5;
204 | } else {
205 | union {unsigned int i; float f;} u;
206 | u.i = (int)(op->val1 & 0xffffffff);
207 | if(!(op->properties & prop_float32i)) {
208 | u.i = u.i << 12;
209 | }
210 | if(op->properties & prop_exp) {
211 | sprintf(pointer, "%.*e", op->precision, (double)u.f);
212 | pointer += strlen(pointer);
213 | } else {
214 | sprintf(pointer, "%.*f", op->precision, u.f);
215 | pointer += strlen(pointer);
216 | }
217 | }
218 | }
219 | else if(op->properties & prop_double) {
220 | if(op->val1 == 0x7ff0000000000000LL >> 44) {
221 | sprintf(pointer, "+INF ");
222 | pointer += 5;
223 | } else if(op->val1 == 0xfff0000000000000LL >> 44) {
224 | sprintf(pointer, "-INF ");
225 | pointer += 5;
226 | } else {
227 | union {unsigned long long int i; double d;} u;
228 | u.i = op->val1 << 32;
229 | if(!(op->properties & prop_float32i)) {
230 | u.i = u.i << 12;
231 | }
232 | if(op->properties & prop_exp) {
233 | sprintf(pointer, "%.*e", op->precision, u.d);
234 | pointer += strlen(pointer);
235 | } else {
236 | sprintf(pointer, "%.*f", op->precision, u.d);
237 | pointer += strlen(pointer);
238 | }
239 | }
240 | } else {
241 | sprintf(pointer, "0x%" LLx, op->val1);
242 | pointer += strlen(pointer);
243 | }
244 |
245 | if(op->properties & prop_absolute_value) {
246 | sprintf(pointer, "|");
247 | pointer++;
248 | }
249 |
250 | break;
251 |
252 | case type_bit_list:
253 | sprintf(pointer, "{");
254 | pointer += 1;
255 |
256 | counter = 0;
257 | for(unsigned int x = 0; x < 10; x++) {
258 | if(op->val1 & (0x1 << x)) {
259 | counter++;
260 | }
261 | }
262 |
263 | for(unsigned int x = 9; x >= 0; x--) {
264 | if(op->val1 & (0x1 << x)) {
265 | counter--;
266 | sprintf(pointer, "%d", x);
267 | pointer += strlen(pointer);
268 | if(counter) {
269 | sprintf(pointer, ",");
270 | pointer++;
271 | } else {
272 | break;
273 | }
274 | }
275 | }
276 |
277 | sprintf(pointer, "}");
278 | pointer += 1;
279 |
280 | break;
281 |
282 | case type_const_mem:
283 | if(op->properties & prop_not) {
284 | sprintf(pointer, "!");
285 | pointer++;
286 | } else if(op->properties & prop_minus) {
287 | sprintf(pointer, "-");
288 | pointer++;
289 | } else if(op->properties & prop_bitwise_complement) {
290 | sprintf(pointer, "~");
291 | pointer++;
292 | }
293 | if(op->properties & prop_absolute_value) {
294 | sprintf(pointer, "|");
295 | pointer++;
296 | }
297 |
298 | sprintf(pointer, "c");
299 | pointer++;
300 |
301 | if(_cuobjdump_version <= 50) {
302 | if(op->properties & prop_absolute_value) {
303 | sprintf(pointer, "|");
304 | pointer++;
305 | }
306 | }
307 |
308 | if(_cuobjdump_version <= 50) {
309 | sprintf(pointer, " [0x%x] [", op->val2);
310 | pointer += strlen(pointer);
311 | }
312 | else {
313 | if((op->converted && inst->op != opcode_F2F) || (inst->op == opcode_XMAD && op != inst->operands[inst->num_operands - 1])) {
314 | sprintf(pointer, "[0x%x] [", op->val2);
315 | pointer += strlen(pointer);
316 | } else {
317 | sprintf(pointer, "[0x%x][", op->val2);
318 | pointer += strlen(pointer);
319 | }
320 | }
321 |
322 | if(op->reg >= 0) {
323 | sprintf(pointer, "R%d", (int) op->val1);
324 | pointer += strlen(pointer);
325 | if(op->val3) {
326 | sprintf(pointer, "+");
327 | pointer++;
328 | }
329 | }
330 | if(op->val3 || op->reg < 0) {
331 | if(op->val3 < 0) {
332 | sprintf(pointer, "-0x%" LLx, (~((unsigned long long) op->val3)) + 1);
333 | pointer += strlen(pointer);
334 | } else {
335 | sprintf(pointer, "0x%" LLx, (unsigned long long) op->val3);
336 | pointer += strlen(pointer);
337 | }
338 | }
339 | sprintf(pointer, "]");
340 | pointer++;
341 |
342 | if(_cuobjdump_version >= 55) {
343 | if(op->properties & prop_absolute_value) {
344 | sprintf(pointer, "|");
345 | pointer++;
346 | }
347 | }
348 |
349 | break;
350 |
351 | case type_mem:
352 | if(inst->op == opcode_LD_LDU) {
353 | mustShowHex = true;
354 | }
355 |
356 | if(op->properties & prop_not) {
357 | sprintf(pointer, "!");
358 | pointer++;
359 | } else if(op->properties & prop_minus) {
360 | sprintf(pointer, "-");
361 | pointer++;
362 | } else if(op->properties & prop_bitwise_complement) {
363 | sprintf(pointer, "~");
364 | pointer++;
365 | } else if(op->properties & prop_absolute_value) {
366 | sprintf(pointer, "|");
367 | pointer++;
368 | }
369 |
370 | sprintf(pointer, "[");
371 | pointer++;
372 | if(op->reg >= 0) {
373 | sprintf(pointer, "R%d", (int) op->val1);
374 | pointer += strlen(pointer);
375 | if(op->val2 || mustShowHex) {
376 | sprintf(pointer, "+");
377 | pointer++;
378 | }
379 | }
380 | if(mustShowHex || op->val2 || op->reg < 0) {
381 | if(op->val2 == 0) {
382 | if(_cuobjdump_version <= 50) {
383 | sprintf(pointer, "0x0");
384 | pointer += 3;
385 | } else {
386 | sprintf(pointer, "RZ");
387 | pointer += 2;
388 | }
389 | } else if(op->val2 < 0) {
390 | sprintf(pointer, "-0x%" LLx, (~((unsigned long long) op->val2)) + 1);
391 | pointer += strlen(pointer);
392 | } else {
393 | sprintf(pointer, "0x%" LLx, (unsigned long long) op->val2);
394 | pointer += strlen(pointer);
395 | }
396 | }
397 | sprintf(pointer, "]");
398 | pointer++;
399 |
400 | if(op->properties & prop_absolute_value) {
401 | sprintf(pointer, "|");
402 | pointer++;
403 | }
404 |
405 | break;
406 |
407 | case type_texture_operand:
408 | for(int x = 0; x < _numTextures; x++) {
409 | if(_textureIDs[x] == (int) op->val1) {
410 | if(_cuobjdump_version >= 55 && _textureIDs[x+1] == _textureIDs[x]) {
411 | sprintf(pointer, "%s", _textureNames[x+1]);
412 | pointer += strlen(pointer);
413 | } else {
414 | sprintf(pointer, "%s", _textureNames[x]);
415 | pointer += strlen(pointer);
416 | }
417 | break;
418 | }
419 | }
420 |
421 | break;
422 |
423 | case type_channel:
424 | for(int x = 0; x < _numChannels; x++) {
425 | if(_channelIDs[x] == (int) op->val1) {
426 | sprintf(pointer, "%s", _channelNames[x]);
427 | pointer += strlen(pointer);
428 | break;
429 | }
430 | }
431 |
432 | break;
433 |
434 | case type_other_operand:
435 | sprintf(pointer, "%s", _otherNames[op->val1]);
436 | pointer += strlen(pointer);
437 |
438 | break;
439 |
440 | default:
441 | cerr << "SANITY CHECK ERROR ~1871: operand is of non-operand type.\n";
442 | }
443 | return answer;
444 | }
445 |
446 | void fprintfInstruction(ostream & out, instruction * inst) {
447 | //printf predicate guard if it exists
448 | if(inst->guard) {
449 | out << "@";
450 | char * operand = operandToString(inst, inst->guard);
451 | out << operand << " ";
452 | if(!(inst->guard->properties & prop_not)) {
453 | out << " ";
454 | }
455 | free(operand);
456 | }
457 | else {
458 | out << " ";
459 | }
460 |
461 | //printf opcode + mods
462 | out << opcodes[inst->op];
463 | node * iter3 = inst->mods;
464 | while(iter3 != 0) {
465 | out << "." << (char*)iter3->value;
466 | iter3 = iter3->next;
467 | }
468 |
469 | //printf operands + mods
470 | int x;
471 | for(x = 0; x < inst->num_operands; x++) {
472 | if(x > 0) {
473 | out << ",";
474 | }
475 | out << " ";
476 | char * operand = operandToString(inst, inst->operands[x]);
477 | out << operand;
478 | free(operand);
479 |
480 | iter3 = inst->operands[x]->mods;
481 | while(iter3 != 0) {
482 | out << "." << (char*)iter3->value;
483 | iter3 = iter3->next;
484 | }
485 | }
486 |
487 | out << ";";
488 |
489 | //print SCHI value:
490 | if(_arch < 30) {
491 | //do nothing
492 | } else if(_arch < 50) {
493 | out << " //SCHI: 0x" << std::hex << inst->SCHIVal << std::dec;
494 | } else if(_arch >= 50 && _arch < 70) {
495 | out << " SCHI50: 0x" << std::hex << inst->SCHIVal << ", " << std::dec << inst->depBarrierW << ", " << inst->depBarrierR << ", ";
496 | for(int x = 0; x < 6; x++) {
497 | if(inst->depBarrierMask & (1 << x)) {
498 | out << "1";
499 | } else {
500 | out << "0";
501 | }
502 | }
503 | } else {
504 | cerr << "ERROR UNIMPLEMENTED o~507: don't know how to output scheduling metadata\n";
505 | }
506 | }
507 |
508 | void printfBlocks(bool metadata) {
509 | int maxLength = 40;
510 |
511 | if(metadata) {
512 | printfMetadata();
513 | }
514 |
515 | node * iter = _blocks;
516 | node * iter2;
517 | while(iter) {
518 | blockNode * currentBlock = (blockNode*) iter->value;
519 |
520 | //Print label for block:
521 | printf("label");
522 | printf("%s", ((instruction*)currentBlock->instructions->value)->label);
523 | printf(":");
524 |
525 | //if(currentBlock->canLoop) {
526 | // printf("\tCAN LOOP");
527 | //}
528 |
529 | //printf instructions inside the basic block:
530 | iter2 = currentBlock->instructions;
531 | node * iter3;
532 | while(iter2) {
533 | printf("\n\t");
534 | instruction* inst = (instruction*) iter2->value;
535 |
536 | //printf predicate guard if it exists
537 | if(inst->guard) {
538 | printf("@");
539 | char * operand = operandToString(inst, inst->guard);
540 | printf("%s ", operand);
541 | if(!(inst->guard->properties & prop_not)) {
542 | printf(" ");
543 | }
544 | free(operand);
545 | }
546 | else {
547 | printf(" ");
548 | }
549 |
550 | int length = 0;
551 |
552 | //printf opcode + mods
553 | printf("%s", opcodes[inst->op]);
554 | length += strlen(opcodes[inst->op]);
555 | iter3 = inst->mods;
556 | while(iter3 != 0) {
557 | printf(".%s", (char*)iter3->value);
558 | length += 1 + strlen((char*)iter3->value);
559 | iter3 = iter3->next;
560 | }
561 |
562 | //printf operands + mods
563 | int x;
564 | for(x = 0; x < inst->num_operands; x++) {
565 | if(x > 0) {
566 | printf(",");
567 | length++;
568 | }
569 | printf(" ");
570 | length++;
571 | char * operand = operandToString(inst, inst->operands[x]);
572 | printf("%s", operand);
573 | length += strlen(operand);
574 | free(operand);
575 |
576 | iter3 = inst->operands[x]->mods;
577 | while(iter3 != 0) {
578 | //if(strcmp((char*)iter3->value, "reuse")) {
579 | printf(".%s", (char*)iter3->value);
580 | length += 1 + strlen((char*)iter3->value);
581 | //}
582 | iter3 = iter3->next;
583 | }
584 | }
585 |
586 |
587 | printf(";");
588 | length++;
589 |
590 | if(length > maxLength) {
591 | maxLength = length;
592 | }
593 |
594 | //print SCHI value:
595 | if(_arch < 30) {
596 | //do nothing
597 | } else if(_arch < 50) {
598 | for(int x = length; x < maxLength; x++) {
599 | printf(" ");
600 | }
601 | printf("//SCHI: 0x%" LLx, (unsigned long long) inst->SCHIVal);
602 | } else if(_arch < 70) {
603 | for(int x = length; x < maxLength; x++) {
604 | printf(" ");
605 | }
606 | printf(" SCHI50: 0x%x, %d, %d, " , inst->SCHIVal, inst->depBarrierW, inst->depBarrierR);
607 | for(int x = 0; x < 6; x++) {
608 | if(inst->depBarrierMask & (1 << x)) {
609 | printf("1");
610 | } else {
611 | printf("0");
612 | }
613 | }
614 | } else {
615 | cerr << "ERROR UNIMPLEMENTED o~798: don't know how to handle scheduling metadata on this architecture\n";
616 | }
617 |
618 | iter2 = iter2->next;
619 | }
620 | printf("\n\n");
621 |
622 | iter = iter->next;
623 | }
624 | }
625 |
626 | void printfHex() {
627 | node * iter = _blocks;
628 | node * iter2;
629 | while(iter) {
630 | blockNode* currentBlock = (blockNode*) iter->value;
631 | //printf instructions inside the basic block:
632 | iter2 = currentBlock->instructions;
633 | node * iter3;
634 | while(iter2) {
635 | instruction* inst = (instruction*) iter2->value;
636 |
637 | char * hex = instructionToHexString(inst, _arch, _cuobjdump_version);
638 | printf("%s", hex);
639 | free(hex);
640 |
641 | printf(" // ");
642 |
643 | //printf predicate guard if it exists
644 | if(inst->guard) {
645 | printf("@");
646 | printf("%s", operandToString(inst, inst->guard));
647 | printf(" ");
648 | }
649 |
650 | //printf opcode + mods
651 | printf("%s", opcodes[inst->op]);
652 | iter3 = inst->mods;
653 | while(iter3) {
654 | printf(".%s", (char*)iter3->value);
655 | iter3 = iter3->next;
656 | }
657 |
658 | //printf operands + mods
659 | int x;
660 | for(x = 0; x < inst->num_operands; x++) {
661 | if(x > 0 && inst->op != opcode_BRX) {
662 | printf(",");
663 | }
664 | printf(" ");
665 | printf("%s", operandToString(inst, inst->operands[x]));
666 |
667 | iter3 = inst->operands[x]->mods;
668 | while(iter3 != 0) {
669 | printf(".%s", (char*)iter3->value);
670 | iter3 = iter3->next;
671 | }
672 | }
673 |
674 | printf("\n");
675 | iter2 = iter2->next;
676 | }
677 |
678 | iter = iter->next;
679 | }
680 | }
681 |
682 | void printfAssembly(bool metadata) {
683 | if(metadata) {
684 | printfMetadata();
685 | }
686 |
687 | node * iter = _blocks;
688 | node * iter2;
689 | while(iter) {
690 | blockNode* currentBlock = (blockNode*) iter->value;
691 | int line = currentBlock->first;
692 |
693 | //printf instructions inside the basic block:
694 | iter2 = currentBlock->instructions;
695 | node * iter3;
696 | while(iter2) {
697 | instruction* inst = (instruction*) iter2->value;
698 |
699 | //printf predicate guard if it exists
700 | if(inst->guard) {
701 | printf("@");
702 | printf("%s", operandToString(inst, inst->guard));
703 | printf(" ");
704 | }
705 |
706 | //printf opcode + mods
707 | printf("%s", opcodes[inst->op]);
708 | iter3 = inst->mods;
709 | while(iter3) {
710 | printf(".%s", (char*)iter3->value);
711 | iter3 = iter3->next;
712 | }
713 |
714 | //printf operands + mods
715 | int x;
716 | for(x = 0; x < inst->num_operands; x++) {
717 | if(x > 0 && inst->op != opcode_BRX) {
718 | printf(",");
719 | }
720 | printf(" ");
721 |
722 | printf("%s", operandToString(inst, inst->operands[x]));
723 |
724 | iter3 = inst->operands[x]->mods;
725 | while(iter3 != 0) {
726 | printf(".%s", (char*)iter3->value);
727 | iter3 = iter3->next;
728 | }
729 | }
730 |
731 | if(inst->num_operands == 0) {
732 | if(_cuobjdump_version > 55 && (inst->op == opcode_EXIT || inst->op == opcode_RET || inst->op == opcode_BRK || inst->op == opcode_CONT || inst->op == opcode_SYNC)) {
733 | /* empty */
734 | } else if(_cuobjdump_version > 50 && !inst->mods && inst->op != opcode_NOP) {
735 | printf(" ");
736 | }
737 | }
738 |
739 | printf(";");
740 |
741 | cout << "\n";
742 | line++;
743 | iter2 = iter2->next;
744 | }
745 |
746 | iter = iter->next;
747 | }
748 | }
749 |
--------------------------------------------------------------------------------
/tools/src/common.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include "common.hpp"
5 |
6 | /**
7 | * Names of special registers for sm_2x.
8 | * Corresponds one-to-one with the specialIDs array.
9 | */
10 | std::string _specialNames20[] = {"laneid", "virtcfg", "virtid", "pm0", "pm1", "pm2", "pm3", "pm4", "pm5", "pm6", "pm7", "prim_type", "invocation_id", "y_direction", "machine_id_0", "machine_id_1", "machine_id_2", "machine_id_3", "affinity", "tid", "tid.x", "tid.y", "tid.z", "ctaparam", "ctaid.x", "ctaid.y", "ctaid.z", "ntid", "ntid.x", "ntid.y", "ntid.z", "gridparam", "nctaid.x", "nctaid.y", "nctaid.z", "swinlo", "swinsz", "smemsz", "smembanks", "lwinlo", "lwinsz", "lmemlosz", "lmemhioff", "eqmask", "ltmask", "lemask", "gtmask", "gemask", "clocklo", "clockhi"};
11 |
12 | /**
13 | * Values of special registers for sm_2x.
14 | * Corresponds one-to-one with the specialNames array.
15 | */
16 | const int _specialIDs20[] = {0,2,3,4,5,6,7,8,9,10,11,16,17,18,24,25,26,27,28,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,80,81};
17 |
18 | /**
19 | * Number of recognized special registers for sm_2x.
20 | */
21 | const int _numSpecials20 = 62;
22 |
23 | /**
24 | * Names of special registers for sm_3x through sm_6x
25 | * //TODO deal with SM_SHADER_TYPE, which does not start with SR_
26 | */
27 | std::string _specialNames35[] = {"laneid", "clock", "virtcfg", "virtid", "pm0", "pm1", "pm2", "pm3", "pm4", "pm5", "pm6", "pm7", "", "", "", "", "prim_type", "invocation_id", "y_direction", "thread_kill", "shader_type", "directcbewriteaddresslow", "directcbewriteaddresshigh", "directcbewriteenabled", "machine_id_0", "machine_id_1", "machine_id_2", "machine_id_3", "affinity", "invocation_info", "wscalefactor_xy", "wscalefactor_z", "tid", "tid.x", "tid.y", "tid.z", "cta_param", "ctaid.x", "ctaid.y", "ctaid.z", "ntid", "cirqueueincrminusone", "nlatc", "", "", "", "", "", "swinlo", "swinsz", "smemsz", "smembanks", "lwinlo", "lwinsz", "lmemlosz", "lmemhioff", "eqmask", "ltmask", "lemask", "gtmask", "gemask", "regalloc", "ctxaddr", "", "globalerrorstatus", "", "warperrorstatus", "warperrorstatusclear", "", "", "", "", "pm_hi0", "pm_hi1", "pm_hi2", "pm_hi3", "pm_hi4", "pm_hi5", "pm_hi6", "pm_hi7", "clocklo", "clockhi", "globaltimerlo", "globaltimerhi", "", "", "", "", "", "", "", "", "", "", "", "", "hwtaskid", "circularqueueentryindex", "circularqueueentryaddresslow", "circularqueueentryaddresshigh"};
28 |
29 | /**
30 | * Names of texture operands.
31 | * Corresponds one-to-one with the textureIDs array.
32 | */
33 | const char * _textureNames[] = {"1D", "ARRAY_1D", "RECT", "2D", "ARRAY_2D", "3D", "CUBE", "ARRAY_CUBE"};
34 |
35 | /**
36 | * Values of texture operands.
37 | * Corresponds one-to-one with the textureNames array.
38 | */
39 | const int _textureIDs[] = {0,1,2,2,3,4,6,7};
40 |
41 | /**
42 | * Number of recognized texture operands.
43 | */
44 | const int _numTextures = 8;
45 |
46 | /**
47 | * Names of texture operands.
48 | * Corresponds one-to-one with the textureIDs array.
49 | */
50 | const char * _channelNames[] = {"R", "G", "B", "A", "RG", "RA", "GA", "BA", "RGB", "RGA", "RBA", "GBA", "RGBA", "INVALID5", "INVALID6", "INVALID7", };
51 |
52 | /**
53 | * Values of texture operands.
54 | * Corresponds one-to-one with the textureNames array.
55 | */
56 | const int _channelIDs[] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
57 |
58 | /**
59 | * Number of recognized texture operands.
60 | */
61 | const int _numChannels = 16;
62 |
63 | /**
64 | * Names of miscellaneous operands.
65 | */
66 | const char * _otherNames[] = {"CC", "PR"};
67 |
68 | /**
69 | * Number of items in otherNames array.
70 | */
71 | const int _numOthers = 2;
72 |
73 | /**
74 | * List of opcode names.
75 | * An opcode's index in this array should be the same as the the corresponding 'opcode' enum value.
76 | */
77 | const char * opcodes[] = {"MOV", "MOV32I", "LD", "LDU", "LDL", "LDS", "LDC", "ST", "STL", "STS", "LDLK", "LDSLK", "STUL", "STSUL", "FADD", "FADD32I", "FMUL", "FMUL32I", "FFMA", "FSET", "FSETP", "DSETP", "FCMP", "MUFU", "DADD", "DMUL", "DFMA", "IADD", "IADD32I", "IMNMX", "IMUL", "IMUL32I", "IMAD", "ISCADD", "ISET", "ISETP", "ICMP", "I2F", "I2I", "F2I", "F2F", "LOP", "LOP32I", "SHL", "SHR", "BFE", "BFI", "SEL", "SCHI", "SSY", "BRA", "BRX", "PCNT", "CONT", "PBK", "BRK", "CAL", "RET", "EXIT", "NOP", "BAR", "BPT", "B2R", "S2R", "PSETP", "PSET", "FLO", "P2R", "R2P", "TEX", "TEXDEPBAR", "RRO", "PRMT", "VADD", "DMNMX", "FMNMX", "RED", "VOTE", "POPC", "MEMBAR", "STSCUL", "LEPC", "CSETP", "ISCADD32I", "VMNMX", "TLD", "SHF", "FCHK", "ISUB", "JCAL", "SHFL", "LDG", "LD_LDU", "ATOM", "CCTL", "XMAD", "SYNC", "STG", "IADD3", "VABSDIFF", "DEPBAR", "LOP3", "TLDS", "TEXS", "LEA", "DSET", "PHI", "BINCODE"};
78 |
79 | int getSpecialID(const char * lexeme, bool format, int arch) {
80 | if(arch <= 30) {
81 | for(int x = 0; x < _numSpecials20; x++) {
82 | for(int y = 0; y >= 0; y++) {
83 | if(lexeme[y] == _specialNames20[x][y]) {
84 | if(lexeme[y] == 0) {
85 | //Make them match perfectly for printing out later:
86 | if(format) {
87 | for(int z = 0; z < y; z++) {
88 | _specialNames20[x][z] = lexeme[z];
89 | }
90 | }
91 |
92 | //Return value:
93 | return _specialIDs20[x];
94 | }
95 | } else if(lexeme[y] == '_' && _specialNames20[x][y] == '.') {
96 | //interchangeable; format varies depending on CUDA version
97 | } else if(lexeme[y] == '.' && _specialNames20[x][y] == '_') {
98 | //interchangeable; format varies depending on CUDA version
99 | } else if(lexeme[y] >= 'A' && lexeme[y] <= 'Z' && lexeme[y] == _specialNames20[x][y] - ('a' - 'A')) {
100 | //interchangeable; format varies depending on CUDA version
101 | } else if(lexeme[y] >= 'a' && lexeme[y] <= 'z' && lexeme[y] == _specialNames20[x][y] - ('A' - 'a')) {
102 | //interchangeable; format varies depending on CUDA version
103 | } else {
104 | break;
105 | }
106 | }
107 | }
108 | } else if(arch < 70) {
109 | int numSpecials = sizeof(_specialNames35) / sizeof(std::string);
110 | for(int x = 0; x < numSpecials; x++) {
111 | for(int y = 0; y >= 0; y++) {
112 | if(lexeme[y] == _specialNames35[x][y]) {
113 | if(lexeme[y] == 0) {
114 | //Make them match perfectly for printing out later:
115 | if(format) {
116 | for(int z = 0; z < y; z++) {
117 | _specialNames35[x][z] = lexeme[z];
118 | }
119 | }
120 |
121 | //Return value:
122 | return x;
123 | }
124 | } else if(lexeme[y] == '_' && _specialNames35[x][y] == '.') {
125 | //interchangeable; format varies depending on CUDA version
126 | } else if(lexeme[y] == '.' && _specialNames35[x][y] == '_') {
127 | //interchangeable; format varies depending on CUDA version
128 | } else if(lexeme[y] >= 'A' && lexeme[y] <= 'Z' && lexeme[y] == _specialNames35[x][y] - ('a' - 'A')) {
129 | //interchangeable; format varies depending on CUDA version
130 | } else if(lexeme[y] >= 'a' && lexeme[y] <= 'z' && lexeme[y] == _specialNames35[x][y] - ('A' - 'a')) {
131 | //interchangeable; format varies depending on CUDA version
132 | } else {
133 | break;
134 | }
135 | }
136 | }
137 | } else {
138 | fprintf(stderr, "ERROR c~98: Unimplemented.\n");
139 | }
140 |
141 | fprintf(stderr,"WARNING: Found unrecognized special register \"%s\".\n",lexeme);
142 | return -1;
143 | }
144 |
145 | int getOtherID(const char * lexeme) {
146 | for(int x = 0; x < _numOthers; x++) {
147 | if(!strcmp(lexeme, _otherNames[x])) {
148 | return x;
149 | }
150 | }
151 |
152 | fprintf(stderr,"\n*SANITY CHECK ERROR ~22* This message is unreachable! \"%s\".\n\n",lexeme);
153 | return -1;
154 | }
155 |
156 | int getTextureID(const char * lexeme) {
157 | for(int x = 0; x < _numTextures; x++) {
158 | if(!strcmp(lexeme, _textureNames[x])) {
159 | return _textureIDs[x];
160 | }
161 | }
162 |
163 | fprintf(stderr,"\n*SANITY CHECK ERROR ~33* Found unrecognized texture operand \"%s\".\n\n",lexeme);
164 | return -1;
165 | }
166 |
167 | int getChannelID(const char * lexeme) {
168 | for(int x = 0; x < _numChannels; x++) {
169 | if(!strcmp(lexeme, _channelNames[x])) {
170 | return _channelIDs[x];
171 | }
172 | }
173 |
174 | fprintf(stderr,"\n*SANITY CHECK ERROR c~162: Found unrecognized channel operand \"%s\".\n\n",lexeme);
175 | return -1;
176 | }
177 |
178 | void initBlock(blockNode** block) {
179 | *block = (blockNode*) malloc(sizeof(blockNode));
180 | (*block)->forward = 0;
181 | (*block)->branches = 0;
182 | (*block)->predecessors = 0;
183 | (*block)->instructions = 0;
184 | (*block)->last = 65535;
185 | (*block)->size = 0;
186 | (*block)->end = end_generic;
187 | (*block)->guarded = false;
188 | (*block)->force_guard = false;
189 | (*block)->tag = 0;
190 | (*block)->target = 0;
191 | (*block)->isBranchTarget = false;
192 | }
193 |
194 | instruction* newInstruction(opcode op, node * mods, node * operands) {
195 | instruction * inst = (instruction*) malloc(sizeof(instruction));
196 | inst->label = 0;
197 | inst->op = op;
198 | inst->mods = mods;
199 | inst->guard = 0;
200 | inst->num_operands = 0;
201 | inst->operands = 0;
202 | inst->ptr = 0;
203 | //inst->bar = 0;
204 | inst->SCHIVal = 0;
205 | inst->depBarrierW = 7;
206 | inst->depBarrierR = 7;
207 | inst->depBarrierMask = 0;
208 | inst->size = 8;
209 | inst->marked = 0;
210 |
211 | if(operands) {
212 | inst->num_operands = listSize(operands);
213 | inst->operands = (operand**) malloc(inst->num_operands*sizeof(operand*));
214 | node *o = operands;
215 | int x = 0;
216 | while(o) {
217 | inst->operands[x] = (operand*) o->value;
218 | o = o->next;
219 | x++;
220 | }
221 | cleanNodes(operands);
222 | }
223 |
224 | return inst;
225 | }
226 |
227 | instruction* newInstruction(opcode op, node * mods) {
228 | return newInstruction(op, mods, (node*)0);
229 | }
230 |
231 | instruction* newInstruction(opcode op, node * mods, operand * o1) {
232 | instruction * inst = newInstruction(op, mods);
233 | inst->num_operands = 1;
234 | inst->operands = (operand**) malloc(1*sizeof(operand*));
235 | inst->operands[0] = o1;
236 |
237 | return inst;
238 | }
239 |
240 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2) {
241 | instruction * inst = newInstruction(op, mods);
242 | inst->num_operands = 2;
243 | inst->operands = (operand**) malloc(2*sizeof(operand*));
244 | inst->operands[0] = o1;
245 | inst->operands[1] = o2;
246 |
247 | return inst;
248 | }
249 |
250 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2, operand * o3) {
251 | instruction * inst = newInstruction(op, mods);
252 | inst->num_operands = 3;
253 | inst->operands = (operand**) malloc(3*sizeof(operand*));
254 | inst->operands[0] = o1;
255 | inst->operands[1] = o2;
256 | inst->operands[2] = o3;
257 |
258 | return inst;
259 | }
260 |
261 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2, operand * o3, operand * o4) {
262 | instruction * inst = newInstruction(op, mods);
263 | inst->num_operands = 4;
264 | inst->operands = (operand**) malloc(4*sizeof(operand*));
265 | inst->operands[0] = o1;
266 | inst->operands[1] = o2;
267 | inst->operands[2] = o3;
268 | inst->operands[3] = o4;
269 | return inst;
270 | }
271 |
272 | instruction* newInstruction(opcode op, node * mods, operand * o1, operand * o2, operand * o3, operand * o4, operand * o5) {
273 | instruction * inst = newInstruction(op, mods);
274 | inst->num_operands = 5;
275 | inst->operands = (operand**) malloc(5 * sizeof(operand*));
276 | inst->operands[0] = o1;
277 | inst->operands[1] = o2;
278 | inst->operands[2] = o3;
279 | inst->operands[3] = o4;
280 | inst->operands[4] = o5;
281 | return inst;
282 | }
283 |
284 | operand* newOperand(token_type type, long long val1, int val2, int val3) {
285 | operand * op = (operand*) malloc(sizeof(operand));
286 | op->type = type;
287 | op->strval = 0;
288 | op->val1 = val1;
289 | op->val2 = val2;
290 | op->val3 = val3;
291 | op->reg = -1;
292 | op->dest = 0;
293 | op->mods = 0;
294 | op->width = 1;
295 | op->properties = (operand_prop) 0;
296 | op->converted = false;
297 |
298 | if(type == type_register || type == type_const_mem || type == type_mem) {
299 | if(val1 >= 0) {
300 | op->reg = val1;
301 | }
302 | }
303 |
304 | return op;
305 | }
306 |
307 | operand * newOperand(token_type type, operand_prop props, long long val1, int val2, int val3) {
308 | operand * op = newOperand(type, val1, val2, val3);
309 | op->properties = props;
310 | return op;
311 | }
312 |
313 | node * stringsToList(const char * str1, const char * str2, const char * str3) {
314 | node * list = 0;
315 |
316 | if(!str1) {
317 | return list;
318 | }
319 | char * str = (char *) malloc(strlen(str1) + 1);
320 | memcpy(str, str1, strlen(str1) + 1);
321 | addLast(&list, str);
322 |
323 | if(!str2) {
324 | return list;
325 | }
326 | str = (char *) malloc(strlen(str2) + 1);
327 | memcpy(str, str2, strlen(str2) + 1);
328 | addLast(&list, str);
329 |
330 | if(!str3) {
331 | return list;
332 | }
333 | str = (char *) malloc(strlen(str3) + 1);
334 | memcpy(str, str3, strlen(str3) + 1);
335 | addLast(&list, str);
336 |
337 | return list;
338 | }
339 |
340 | node * addNode(node **first, void * value, long long (*comparator)(void*, void*)) {
341 | //check if new node replaces first
342 | if(!*first || comparator((*first)->value,value) > 0) {
343 | node * newNode = (node*) malloc(sizeof(node));
344 | newNode->value = value;
345 | newNode->next = *first;
346 | *first = newNode;
347 | return newNode;
348 | }
349 |
350 | if(!comparator((*first)->value,value)) {
351 | return 0;
352 | }
353 |
354 | //find location to place new node
355 | node* iterator = *first;
356 | while(iterator->next && comparator(iterator->next->value,value) < 0) {
357 | iterator = iterator->next;
358 | }
359 |
360 | //if already in list, return
361 | if(iterator->next && !comparator(iterator->next->value,value)) {
362 | return 0;
363 | }
364 |
365 | //add new node to list
366 | node * newNode = (node*) malloc(sizeof(node));
367 | newNode->value = value;
368 | newNode->next = iterator->next;
369 | iterator->next = newNode;
370 | return newNode;
371 | }
372 |
373 | void addLast(node ** first, void * value) {
374 | //check if new node replaces first
375 | if(!*first) {
376 | node * newNode = (node*) malloc(sizeof(node));
377 | newNode->value = value;
378 | newNode->next = *first;
379 | *first = newNode;
380 | return;
381 | }
382 |
383 | //find last
384 | node* iterator = *first;
385 | while(iterator->next != 0) {
386 | iterator = iterator->next;
387 | }
388 |
389 | //add new node to list
390 | node * newNode = (node*) malloc(sizeof(node));
391 | newNode->value = value;
392 | newNode->next = iterator->next;
393 | iterator->next = newNode;
394 | return;
395 | }
396 |
397 | void addFirst(node ** first, void * value) {
398 | node * newNode = (node*) malloc(sizeof(node));
399 | newNode->value = value;
400 | newNode->next = *first;
401 | *first = newNode;
402 | }
403 |
404 | void addAfter(node **first, node * afterMe, void * value) {
405 | node * newNode = (node*) malloc(sizeof(node));
406 | newNode->value = value;
407 | if(afterMe != 0) {
408 | newNode->next = afterMe->next;
409 | afterMe->next = newNode;
410 | }
411 | else {
412 | newNode->next = *first;
413 | *first = newNode;
414 | }
415 | }
416 |
417 | void addBefore(node **first, node * beforeMe, void * value) {
418 | if(!*first || (*first) == beforeMe) {
419 | node * newNode = (node*) malloc(sizeof(node));
420 | newNode->value = value;
421 | newNode->next = *first;
422 | *first = newNode;
423 | return;
424 | }
425 |
426 | //find location to place new node
427 | node* iterator = *first;
428 | while(iterator->next != beforeMe) {
429 | iterator = iterator->next;
430 | }
431 |
432 | //add new node to list
433 | node * newNode = (node*) malloc(sizeof(node));
434 | newNode->value = value;
435 | newNode->next = iterator->next;
436 | iterator->next = newNode;
437 | }
438 |
439 | void addAtIndex(node ** first, void * value, int index) {
440 | if(index < 1) {
441 | addFirst(first, value);
442 | return;
443 | }
444 |
445 | //Find place to add to:
446 | node* iterator = *first;
447 | while(iterator->next != 0 && index > 1) {
448 | iterator = iterator->next;
449 | index--;
450 | }
451 |
452 | //add new node to list
453 | node * newNode = (node*) malloc(sizeof(node));
454 | newNode->value = value;
455 | newNode->next = iterator->next;
456 | iterator->next = newNode;
457 | return;
458 | }
459 |
460 | node * removeNode(node **first, void * value, long long (*comparator)(void*, void*)) {
461 | //make sure list is nonempty
462 | if(!*first) {
463 | return 0;
464 | }
465 |
466 | //check if first node is deleted
467 | if(!comparator((*first)->value,value)) {
468 | node * n = *first;
469 | *first = (*first)->next;
470 | return n;
471 | }
472 |
473 | //find location of node to delete
474 | node* iterator = *first;
475 | while(iterator->next != 0 && comparator(iterator->next->value,value)) {
476 | iterator = iterator->next;
477 | }
478 |
479 | //if node not found, return 0
480 | if(iterator->next == 0) {
481 | return 0;
482 | }
483 |
484 | //delete node
485 | node * n = iterator->next;
486 | iterator->next = iterator->next->next;
487 | n->next = 0;
488 | return n;
489 | }
490 |
491 | bool deleteNode(node **first, void * value, long long (*comparator)(void*, void*)) {
492 | //make sure list is nonempty
493 | if(!*first) {
494 | return false;
495 | }
496 |
497 | //check if first node is deleted
498 | if(!comparator((*first)->value,value)) {
499 | node * n = *first;
500 | *first = (*first)->next;
501 | free(n);
502 | return true;
503 | }
504 |
505 | //find location of node to delete
506 | node* iterator = *first;
507 | while(iterator->next && comparator(iterator->next->value,value)) {
508 | iterator = iterator->next;
509 | }
510 |
511 | //if node not found, return 0
512 | if(!iterator->next) {
513 | return false;
514 | }
515 |
516 | //delete node
517 | node * n = iterator->next;
518 | iterator->next = iterator->next->next;
519 | free(n);
520 | return true;
521 | }
522 |
523 | bool deleteNode(node **first, void * value, long long (*comparator)(void*, void*), void (*valueCleaner)(void *)) {
524 | //make sure list is nonempty
525 | if(!*first) {
526 | return false;
527 | }
528 |
529 | //check if first node is deleted
530 | if(!comparator((*first)->value,value)) {
531 | node * n = *first;
532 | *first = (*first)->next;
533 | valueCleaner(n->value);
534 | free(n);
535 | return true;
536 | }
537 |
538 | //find location of node to delete
539 | node* iterator = *first;
540 | while(iterator->next && comparator(iterator->next->value,value)) {
541 | iterator = iterator->next;
542 | }
543 |
544 | //if node not found, return 0
545 | if(!iterator->next) {
546 | return false;
547 | }
548 |
549 | //delete node
550 | node * n = iterator->next;
551 | iterator->next = iterator->next->next;
552 | valueCleaner(n->value);
553 | free(n);
554 | return true;
555 | }
556 |
557 | bool deleteNode(node **first, node * del) {
558 | //make sure list is nonempty
559 | if(!*first) {
560 | return false;
561 | }
562 |
563 | //check if first node is deleted
564 | if(*first == del) {
565 | node * n = *first;
566 | *first = (*first)->next;
567 | free(n);
568 | return true;
569 | }
570 |
571 | //find location of node to delete
572 | node* iterator = *first;
573 | while(iterator->next && iterator->next != del) {
574 | iterator = iterator->next;
575 | }
576 |
577 | //if node not found, return 0
578 | if(!iterator->next) {
579 | return false;
580 | }
581 |
582 | //delete node
583 | node * n = iterator->next;
584 | iterator->next = iterator->next->next;
585 | free(n);
586 | return true;
587 | }
588 |
589 | long long instructionorder(void * a, void * b) {
590 | instruction * s = (instruction*) a;
591 | instruction * t = (instruction*) b;
592 | return s->line - t->line;
593 | }
594 |
595 | long long blockorder(void * a, void * b) {
596 | blockNode * s = (blockNode*) a;
597 | blockNode * t = (blockNode*) b;
598 | return s->first - t->first;
599 | }
600 |
601 | long long intorder(void * a, void * b) {
602 | long long i = (long long) a;
603 | long long j = (long long) b;
604 | return i - j;
605 | }
606 |
607 | long long charstarorder(void * a, void * b) {
608 | char * i = (char*) a;
609 | char * j = (char*) b;
610 | return (long long)strcmp(i, j);
611 | }
612 |
613 | long long lexicographicorder(void * a, void * b) {
614 | char * i = (char*) a;
615 | char * j = (char*) b;
616 |
617 | return strcmp(i,j);
618 | }
619 |
620 | int listSize(node *first) {
621 | int count = 0;
622 |
623 | while(first) {
624 | count++;
625 | first = first->next;
626 | }
627 |
628 | return count;
629 | }
630 |
631 | node * containsValue(node *first, void * val) {
632 | while(first && first->value != val) {
633 | first = first->next;
634 | }
635 |
636 | return first;
637 | }
638 |
639 | void cleanNodes(node *first) {
640 | node * next;
641 | while(first) {
642 | next = first->next;
643 | free(first);
644 | first = next;
645 | }
646 | }
647 |
648 | void cleanNodesFully(node *first, void (*valueCleaner)(void *)) {
649 | node * next;
650 | while(first) {
651 | next = first->next;
652 | valueCleaner(first->value);
653 | free(first);
654 | first = next;
655 | }
656 | }
657 |
658 | void cleanOperand(operand * o) {
659 | if(o->strval) {
660 | free(o->strval);
661 | }
662 | cleanNodesFully(o->mods,&free);
663 | free(o);
664 | }
665 |
666 | void cleanInstruction(instruction* i) {
667 | if(i->label) {
668 | free(i->label);
669 | }
670 | if(i->guard) {
671 | free(i->guard);
672 | }
673 | cleanNodesFully(i->mods, &free);
674 | if(i->num_operands > 0) {
675 | int x;
676 | for(x = 0; x < i->num_operands; x++) {
677 | cleanOperand(i->operands[x]);
678 | }
679 | }
680 | free(i->operands);
681 |
682 | free(i);
683 | }
684 |
685 | void cleanBlock(blockNode* block) {
686 | node * n = block->instructions;
687 | node * next;
688 | while(n != 0) {
689 | next = n->next;
690 | instruction * i = (instruction*) n->value;
691 | cleanInstruction(i);
692 | free(n);
693 | n = next;
694 | }
695 | cleanNodes(block->forward);
696 | cleanNodes(block->branches);
697 | cleanNodes(block->predecessors);
698 |
699 | free(block);
700 | }
701 |
--------------------------------------------------------------------------------
/tools/src/cfghelpers.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include "cfghelpers.hpp"
8 | #include "common.hpp"
9 | #include "cudacommon.hpp"
10 | #include "output.hpp"
11 | using namespace std;
12 |
13 | extern node * _blocks;
14 | extern char* _kernel_name;
15 | extern int _arch;
16 | extern int _shared_memory;
17 | extern char** _function_names;
18 | extern int _num_functions;
19 | extern node * _functions;
20 | extern int _tag;
21 | extern int _num_registers;
22 | extern int _counter;
23 | extern bool hasMod(instruction* inst, char* mod);
24 |
25 | bool addBranchToLine(blockNode * a, instruction * line) {
26 | blockNode * b;
27 | node * j = _blocks;
28 | bool added = false;
29 | while(j) {
30 | b = (blockNode*) j->value;
31 | if(line == b->instructions->value) {
32 | if(a) {
33 | added = added || addNode(&a->branches, (void*)b, &blockorder);
34 | addNode(&b->predecessors, (void*)a, &blockorder);
35 | } else {
36 | return false;
37 | }
38 | break;
39 | } else {
40 | node * insts = b->instructions;
41 | while(insts->next) {
42 | if(insts->next->value == line) {
43 | break;
44 | }
45 | insts = insts->next;
46 | }
47 |
48 | if(insts->next) {
49 | added = true;
50 |
51 | //split block
52 | blockNode * newBlock;
53 | initBlock(&newBlock);
54 | newBlock->first = line->line;
55 | newBlock->forward = b->forward;
56 | newBlock->branches = b->branches;
57 | newBlock->last = b->last;
58 | newBlock->guarded = b->guarded;
59 | newBlock->target = b->target;
60 | newBlock->end = b->end;
61 | b->end = end_generic;
62 | b->last = line->line - 1;
63 | b->branches = 0;
64 | b->forward = 0;
65 |
66 | //split instruction lists
67 | newBlock->instructions = insts->next;
68 | insts->next = 0;
69 |
70 | //Fix addresses:
71 | newBlock->address = ((instruction*)newBlock->instructions->value)->address;
72 | int orgSize = b->size;
73 | b->size = newBlock->address - b->address;
74 | newBlock->size = orgSize - b->size;
75 |
76 | //Add appropriate successors:
77 | addNode(&b->forward, (void*)newBlock, &blockorder);
78 | addNode(&newBlock->predecessors, (void*)b, &blockorder);
79 | if(a) {
80 | addNode(&a->branches, (void*)newBlock, &blockorder);
81 | addNode(&newBlock->predecessors, (void*)a, &blockorder);
82 | }
83 |
84 | //Change predecessors for other blocks appropriately
85 | node * blocks = _blocks;
86 | while(blocks) {
87 | blockNode * block = (blockNode*) blocks->value;
88 | if(containsValue(block->predecessors, (int*)b)) {
89 | if(!newBlock->guarded) {
90 | deleteNode(&block->predecessors, b, &blockorder);
91 | }
92 | addNode(&block->predecessors, newBlock, &blockorder);
93 | }
94 | blocks = blocks->next;
95 | }
96 |
97 | //Add newBlock to blocks list:
98 | addNode(&_blocks, (void*)newBlock, &blockorder);
99 | break;
100 | }
101 | }
102 | j = j->next;
103 | }
104 |
105 | return added;
106 | }
107 |
108 | bool addBranchToLine(blockNode * a, int line) {
109 | blockNode * b;
110 | node * k;
111 | node * j = _blocks;
112 | bool added = false;
113 | while(j) {
114 | b = (blockNode*) j->value;
115 | if(line == b->first && a) {
116 | added = added || addNode(&a->branches, (void*)b, &blockorder);
117 |
118 | addNode(&b->predecessors, (void*)a, &blockorder);
119 | break;
120 | } else if(line > b->first && line <= b->last) {
121 | added = true;
122 |
123 | //split block
124 | blockNode * newBlock;
125 | initBlock(&newBlock);
126 | newBlock->first = line;
127 | newBlock->forward = b->forward;
128 | newBlock->branches = b->branches;
129 | newBlock->last = b->last;
130 | newBlock->guarded = b->guarded;
131 | newBlock->target = b->target;
132 | newBlock->end = b->end;
133 | b->end = end_generic;
134 | b->last = line - 1;
135 | b->branches = 0;
136 | b->forward = 0;
137 |
138 | //split instruction lists
139 | k = b->instructions;
140 | int kline = b->first + 1;
141 | while(kline < line) {
142 | k = k->next;
143 | kline++;
144 | }
145 | newBlock->instructions = k->next;
146 | k->next = 0;
147 |
148 | //Fix addresses:
149 | newBlock->address = ((instruction*)newBlock->instructions->value)->address;
150 | int orgSize = b->size;
151 | b->size = newBlock->address - b->address;
152 | newBlock->size = orgSize - b->size;
153 |
154 | //Add appropriate successors:
155 | addNode(&b->forward, (void*)newBlock, &blockorder);
156 | addNode(&newBlock->predecessors, (void*)b, &blockorder);
157 | if(a) {
158 | addNode(&a->branches, (void*)newBlock, &blockorder);
159 | addNode(&newBlock->predecessors, (void*)a, &blockorder);
160 | }
161 |
162 | //Change predecessors for other blocks appropriately
163 | node * blocks = _blocks;
164 | while(blocks) {
165 | blockNode * block = (blockNode*) blocks->value;
166 | if(containsValue(block->predecessors, (int*)b)) {
167 | if(!newBlock->guarded) {
168 | deleteNode(&block->predecessors, b, &blockorder);
169 | }
170 | addNode(&block->predecessors, newBlock, &blockorder);
171 | }
172 | blocks = blocks->next;
173 | }
174 |
175 | //Add newBlock to blocks list:
176 | addNode(&_blocks, (void*)newBlock, &blockorder);
177 | break;
178 | }
179 | j = j->next;
180 | }
181 |
182 | return added;
183 | }
184 |
185 | bool addBranchToAddress(blockNode * a, int address) {
186 | blockNode * b;
187 | node * j = _blocks;
188 | while(j) {
189 | b = (blockNode*) j->value;
190 | if(address >= b->address && address < b->address + b->size) {
191 | node * insts = b->instructions;
192 | while(insts) {
193 | instruction * inst = (instruction*) insts->value;
194 | if(inst->address == address) {
195 | int lineNumber = inst->line;
196 | return addBranchToLine(a, lineNumber);
197 | }
198 |
199 | insts = insts->next;
200 | }
201 | }
202 |
203 | j = j->next;
204 | }
205 |
206 | cerr << "ERROR ch~132: can't find branch target at address 0x" << std::hex << address << ".\n";
207 | cerr << "\tThis error may mean that a branch jumps to a nonexistent instruction.\n";
208 | cerr << "\tSince we use pointers in our IR, this will cause problems.\n";
209 | return false;
210 | }
211 |
212 | /**
213 | * Gets the address corresponding to a given label.
214 | * //TODO: this would be better with some preprocessing
215 | * @param label The label to look for
216 | * @return The address of an instruction with matching label, or -1.
217 | */
218 | int labelToAddress(char * label) {
219 | node * blocks = _blocks;
220 | while(blocks) {
221 | blockNode * block = (blockNode*) blocks->value;
222 | node * instructions = block->instructions;
223 | while(instructions) {
224 | instruction * inst = (instruction*) instructions->value;
225 | if(inst->label) {
226 | if(!strcmp(inst->label, label)) {
227 | return inst->address;
228 | }
229 | }
230 | instructions = instructions->next;
231 | }
232 | blocks = blocks->next;
233 | }
234 | return -1;
235 | }
236 |
237 | void labelsToAddresses() {
238 | node * blocks = _blocks;
239 | while(blocks) {
240 | blockNode * block = (blockNode*) blocks->value;
241 | node * instructions = block->instructions;
242 | while(instructions) {
243 | instruction * inst = (instruction*) instructions->value;
244 | for(int x = 0; x < inst->num_operands; x++) {
245 | if(inst->operands[x]->type == type_label) {
246 | inst->operands[x]->val1 = labelToAddress(inst->operands[x]->strval);
247 | if(inst->operands[x]->val1 >= 0) {
248 | inst->operands[x]->type = type_hex;
249 | if(!instructions->next) {
250 | if(block->end == end_call || block->end == end_branch) {
251 | block->target = inst->operands[x]->val1;
252 | }
253 | }
254 | } else {
255 | cerr << "ERROR ch~261: can't find label " << inst->operands[x]->strval << ".\n";
256 | cerr << "\tThis error may mean that a branch jumps to a nonexistent instruction.\n";
257 | inst->operands[x]->val1 = 0x0;
258 | inst->operands[x]->type = type_hex;
259 | block->target = 0x0;
260 | }
261 | }
262 | }
263 | instructions = instructions->next;
264 | }
265 | blocks = blocks->next;
266 | }
267 | }
268 |
269 | void addressesToLabels() {
270 | //Apply labels to blocks, overwriting existing labels:
271 | int counter = 0;
272 | node * blocks = _blocks;
273 | while(blocks) {
274 | blockNode * block = (blockNode*) blocks->value;
275 | node * instructions = block->instructions;
276 | instruction * firstInst = (instruction*) instructions->value;
277 | if(firstInst->label) {
278 | free(firstInst->label);
279 | }
280 | firstInst->label = (char*) malloc(sizeof(char) * sizeof(int) * 4);
281 | sprintf(firstInst->label, "%d", counter);
282 | blocks = blocks->next;
283 | counter++;
284 | }
285 |
286 | //Convert addresses to labels for branches, synchronizations instructions, etcetera:
287 | blocks = _blocks;
288 | while(blocks) {
289 | blockNode * block = (blockNode*) blocks->value;
290 | node * instructions = block->instructions;
291 | while(instructions) {
292 | instruction * inst = (instruction*) instructions->value;
293 | if(inst->ptr) {
294 | for(int x = 0; x < inst->num_operands; x++) {
295 | if(inst->operands[x]->type == type_hex && ((blockNode*)inst->ptr)->address == (long long)inst->operands[x]->val1) {
296 | blockNode * ptr = ((blockNode*)inst->ptr);
297 | instruction * target = (instruction*) ptr->instructions->value;
298 | inst->operands[x]->type = type_label;
299 | if(inst->operands[x]->strval) {
300 | free(inst->operands[x]->strval);
301 | }
302 | inst->operands[x]->strval = (char*) malloc(sizeof(char) * (strlen(target->label) + 1));
303 | strcpy(inst->operands[x]->strval, target->label);
304 | }
305 | }
306 | }
307 | instructions = instructions->next;
308 | }
309 | blocks = blocks->next;
310 | }
311 | }
312 |
313 | //Helper for combine blocks
314 | void combineBlocksHelper(node * blocks, blockNode * deleted, blockNode * combined) {
315 | while(blocks) {
316 | blockNode * block = (blockNode *) blocks->value;
317 | deleteNode(&block->forward, deleted, &intorder);
318 | deleteNode(&block->branches, deleted, &intorder);
319 | deleteNode(&block->predecessors, deleted, &intorder);
320 | blocks = blocks->next;
321 | }
322 | }
323 |
324 | //This function combines adjacent blocks that have different predicate guards.
325 | //In other words, this redefines basic blocks in terms of control flow, instead of execution.
326 | void combineBlocks() {
327 | node * blocks = _blocks;
328 | while(blocks->next) {
329 | bool combined = false;
330 | blockNode * block1 = (blockNode*) blocks->value;
331 | blockNode * block2 = (blockNode*) blocks->next->value;
332 |
333 | if(block1->end == end_generic && !block2->isBranchTarget) {
334 | combined = true;
335 | while(block2->instructions) {
336 | addLast(&block1->instructions, block2->instructions->value);
337 | node * temp = block2->instructions;
338 | block2->instructions = block2->instructions->next;
339 | free(temp);
340 | }
341 |
342 | block1->end = block2->end;
343 | cleanNodes(block1->forward);
344 | block1->forward = block2->forward;
345 | block2->forward = 0;
346 | cleanNodes(block1->branches);
347 | block1->branches = block2->branches;
348 | block2->branches = 0;
349 |
350 | combineBlocksHelper(_blocks, block2, block1);
351 |
352 | node * sucs = block1->forward;
353 | while(sucs) {
354 | blockNode * suc = (blockNode *) sucs->value;
355 | addNode(&suc->predecessors, block1, &blockorder);
356 | sucs = sucs->next;
357 | }
358 | sucs = block1->branches;
359 | while(sucs) {
360 | blockNode * suc = (blockNode *) sucs->value;
361 | addNode(&suc->predecessors, block1, &blockorder);
362 | sucs = sucs->next;
363 | }
364 |
365 | cleanBlock(block2);
366 | node * temp = blocks->next;
367 | blocks->next = blocks->next->next;
368 | free(temp);
369 | }
370 |
371 | if(!combined) {
372 | blocks = blocks->next;
373 | }
374 | }
375 |
376 | blocks = _blocks;
377 | while(blocks->next) {
378 | blockNode * block = (blockNode *) blocks->value;
379 | if(!block->forward) {
380 | addFirst(&block->forward, blocks->next->value);
381 | }
382 | if(block->forward->value != blocks->next->value) {
383 | cerr << "SANITY CHECK ERROR ch~294.\n";
384 | cerr << "\tThis error absolutely should not ever happen.\n";
385 | cerr << "\tThis error means the assembly code is now non-linear, even without branches.\n";
386 | }
387 | blocks = blocks->next;
388 | }
389 | }
390 |
391 | void changePtrs(blockNode* oldVal, blockNode* newVal) {
392 | blockNode * a;
393 | instruction * inst;
394 | node * i = _blocks;
395 | while(i) {
396 | a = (blockNode*) i->value;
397 | node * j = a->instructions;
398 | while(j) {
399 | inst = (instruction*) j->value;
400 |
401 | if(inst->op == opcode_SSY ||
402 | inst->op == opcode_BRA ||
403 | inst->op == opcode_CAL ||
404 | inst->op == opcode_PBK ||
405 | inst->op == opcode_PCNT ||
406 | inst->op == opcode_BRX) {//found relevant instruction
407 | if(inst->ptr == oldVal) {
408 | inst->ptr = newVal;
409 | }
410 | }
411 |
412 | j = j->next;
413 | }
414 |
415 | i = i->next;
416 | }
417 | }
418 |
419 | void deleteBlock(blockNode* deleted) {
420 | //Fix up successors:
421 | node * iter = _blocks;
422 | while(iter) {
423 | blockNode* block = (blockNode*) iter->value;
424 |
425 | if(block == deleted) {
426 | if(iter->next) {
427 | //Change target block for appropriate SSYS, BRAs, CALs, etcetera
428 | changePtrs(deleted, (blockNode*) iter->next->value);
429 |
430 | //If deleted block is start of function, fix function list:
431 | node * iter2 = _functions;
432 | while(iter2) {
433 | blockNode* func = (blockNode*) iter2->value;
434 | if(func == deleted) {
435 | if(func->end == end_return) {
436 | cerr << "SANITY CHECK ERROR ~5432 deleted subroutine\n";
437 | cerr << "\tThis error suggests we (accidentally?) deleted a subroutine in the assembly.\n";
438 | }
439 | else if(func->end == end_generic || func->guarded) {
440 | iter2->value = iter->next->value;
441 | }
442 | }
443 |
444 | iter2 = iter2->next;
445 | }
446 | }
447 | }
448 | else {
449 | //If deleted block is a successor, adds its successor(s?) to this block's lists:
450 | if((block->end == end_call || block->end == end_branch)) {
451 | node * i = block->branches;
452 | while(i != 0) {
453 | if(i->value == deleted) {
454 | node * j = deleted->forward;
455 | if(listSize(deleted->forward)) {
456 | while(j) {
457 | addNode(&block->branches, j->value, &blockorder);
458 | addNode(&((blockNode*)j->value)->predecessors, block, &blockorder);
459 | j = j->next;
460 | }
461 | }
462 | else {
463 | node * temp = iter;
464 | blockNode * b;
465 | do {
466 | b = (blockNode*) temp->value;
467 | addNode(&block->branches, b, &blockorder);
468 | addNode(&b->predecessors, block, &blockorder);
469 | temp = temp->next;
470 | } while(b->guarded);
471 | }
472 | }
473 | i = i->next;
474 | }
475 | }
476 | if(block->end == end_generic || block->end == end_call) {
477 | node * i = block->forward;
478 | while(i != 0) {
479 | if(i->value == deleted) {
480 | node * j = deleted->forward;
481 | while(j) {
482 | addNode(&block->forward, j->value, &blockorder);
483 | addNode(&((blockNode*)j->value)->predecessors, block, &blockorder);
484 | j = j->next;
485 | }
486 | }
487 | i = i->next;
488 | }
489 | }
490 |
491 | //Remove deleted block from this block's successor & predecessor lists:
492 | deleteNode(&block->forward, deleted, &intorder);
493 | deleteNode(&block->branches, deleted, &intorder);
494 | deleteNode(&block->predecessors, deleted, &intorder);
495 | //deleteNode(&block->pseudoSuccessors, deleted, &intorder);
496 | }
497 |
498 | iter = iter->next;
499 | }
500 |
501 | //Remove deleted block from block list:
502 | deleteNode(&_blocks, deleted, &intorder);
503 |
504 | //Cleanup:
505 | cleanBlock(deleted);
506 | }
507 |
508 | void fixAssembly() {
509 | //Delete empty blocks:
510 | node * iter = _blocks;
511 | while(iter) {
512 | blockNode* block = (blockNode*) iter->value;
513 | if(block->instructions == 0) {
514 | deleteBlock(block);
515 | }
516 | else {
517 | break;
518 | }
519 | iter = _blocks;
520 | }
521 | iter = _blocks;
522 | while(iter && iter->next) {
523 | blockNode* block = (blockNode*) iter->next->value;
524 | if(block->instructions == 0) {
525 | deleteBlock(block);
526 | iter = _blocks;
527 | continue;
528 | }
529 | iter = iter->next;
530 | }
531 |
532 | //Fix line numbers and addresses:
533 | iter = _blocks;
534 | int trueLine = 0;
535 | int blockid = -1;
536 | int addr = 0;
537 | _num_registers = 0;
538 | while(iter) {
539 | blockNode* block = (blockNode*) iter->value;
540 | block->isBranchTarget = false;
541 | block->address = addr;
542 | block->first = trueLine;
543 | block->id = ++blockid;
544 | block->size = 0;
545 |
546 | if(!block->instructions) {
547 | cerr << "SANITY CHECK ERROR ~2951 empty block\n";
548 | cerr << "\tThis error means the assembly has a basic block with no instructions.\n";
549 | }
550 |
551 | node * iter2 = block->instructions;
552 | while(iter2) {
553 | instruction * inst = (instruction*) iter2->value;
554 |
555 | inst->address = addr;
556 | addr += inst->size;
557 | block->size += inst->size;
558 |
559 | inst->line = trueLine++;
560 | inst->blockID = blockid;
561 | iter2 = iter2->next;
562 |
563 | if(inst->size != 8) {
564 | cerr << "WARNING: non-8byte instruction code for: " << opcodes[inst->op] << "\n";
565 | cerr << "\t(Currently, this tool is not equipped to handle instructions of other sizes.)\n";
566 | }
567 |
568 | for(int x = 0; x < inst->num_operands; x++) {
569 | int reg = inst->operands[x]->reg;
570 | if(reg >= 0) {
571 | inst->operands[x]->reg = inst->operands[x]->val1;
572 | reg = inst->operands[x]->reg;
573 | int align = inst->operands[x]->width;
574 | if(align == 3) {
575 | align = 4;
576 | }
577 | int tempreg = reg - (reg % align);
578 | if(tempreg != reg) {
579 | cerr << "WARNING ch~481: a register has incorrect alignment: R" << reg << " in operand[" << x << "] of ";
580 | fprintfInstruction(cerr, inst);
581 | cerr << ".\n";
582 | static bool seenError = false;
583 | if(!seenError) {
584 | cerr << "\tNote that instructions with 64-bit or larger operands expect registers to be aligned to locations divisible by 2 or 4.\n";
585 | seenError = true;
586 | }
587 | }
588 | if(tempreg + inst->operands[x]->width > _num_registers) {
589 | _num_registers = tempreg + inst->operands[x]->width;
590 | }
591 | }
592 | }
593 | }
594 |
595 | block->last = trueLine - 1;
596 | iter = iter->next;
597 | }
598 |
599 | //Fix operands for BRA, CAL, SSY, etcetera
600 | iter = _blocks;
601 | while(iter) {
602 | blockNode* block = (blockNode*) iter->value;
603 |
604 | node * iter2 = block->instructions;
605 | while(iter2) {
606 | instruction* inst = (instruction*) iter2->value;
607 | if(inst->ptr) {
608 | if(inst->op != opcode_BRX) {
609 | blockNode* targetBlock = (blockNode*) inst->ptr;
610 | for(int x = 0; x < inst->num_operands; x++) {
611 | if(inst->operands[x]->type == type_hex) {
612 | inst->operands[x]->val1 = targetBlock->address;
613 | }
614 | }
615 | targetBlock->isBranchTarget = true;
616 | }
617 | else {
618 | static bool seenError = false;
619 | if(!seenError) {
620 | cerr << "ERROR ch~620: This tool is not able to handle addressing for the BRX instruction.\n";
621 | seenError = true;
622 | }
623 | }
624 | }
625 |
626 | iter2 = iter2->next;
627 | }
628 |
629 | _counter = block->last;
630 | iter = iter->next;
631 | }
632 |
633 | //Fix function IDs:
634 | int funcID = 0;
635 | blockNode * previousBlock = 0;
636 | iter = _blocks;
637 | while(iter) {
638 | blockNode* block = (blockNode*) iter->value;
639 | if(previousBlock) {
640 | if(previousBlock->end != end_generic) {
641 | node * preds = block->predecessors;
642 | while(preds) {
643 | blockNode * pred = (blockNode*) preds->value;
644 | if(pred->end == end_call) {
645 | if(!containsValue(pred->forward, block)) {
646 | funcID++;
647 | break;
648 | }
649 | }
650 | preds = preds->next;
651 | }
652 | }
653 | }
654 |
655 | block->funcID = funcID;
656 |
657 | node * iter2 = block->instructions;
658 | while(iter2) {
659 | instruction* inst = (instruction*) iter2->value;
660 | inst->funcID = funcID;
661 | iter2 = iter2->next;
662 | }
663 |
664 | previousBlock = block;
665 | iter = iter->next;
666 | }
667 | }
668 |
669 | void propogateBranches(blockEnd branch_type) {
670 | blockNode * a;
671 | node * i = _blocks;
672 | while(i) {
673 | a = (blockNode*) i->value;
674 | if(a->end == branch_type && a->target) {
675 | addBranchToAddress(a, a->target);
676 | }
677 | i = i->next;
678 | }
679 | }
680 |
681 | bool propogateReturns(blockNode * search, int target, int tag) {
682 | search->tag = tag;
683 | bool changed = false;
684 |
685 | if(search->guarded) {
686 | blockNode * next = getBlock(search->last + 1);
687 | if(next->tag != tag) {
688 | changed = changed || propogateReturns(next, target, tag);
689 | }
690 | }
691 |
692 | //Check if this block ends in a return:
693 | if(search->end == end_return) {
694 | changed = changed || addBranchToAddress(search, target);
695 | if(!search->guarded) {//no guard; end of path
696 | return changed;
697 | }
698 | }
699 |
700 | node * n;
701 |
702 | //If appropriate, search following blocks for returns:
703 | if(search->end == end_generic || search->end == end_call) {
704 | n = search->forward;
705 | while(n) {
706 | if(((blockNode*)n->value)->tag != tag) {
707 | changed = changed || propogateReturns((blockNode*)n->value, target, tag);
708 | }
709 | n = n->next;
710 | }
711 | }
712 |
713 | //Search branching successors for returns:
714 | if(search->end == end_branch) {
715 | n = search->branches;
716 | while(n) {
717 | if(((blockNode*)n->value)->tag != tag) {
718 | changed = changed || propogateReturns((blockNode*)n->value, target, tag);
719 | }
720 | n = n->next;
721 | }
722 | }
723 |
724 | return changed;
725 | }
726 |
727 | bool propogateCallReturns() {
728 | blockNode * a;
729 | blockNode * b;
730 | node * j;
731 | node * i = _blocks;
732 | bool changed = false;
733 | while(i) {
734 | a = (blockNode*) i->value;//current block being checked
735 |
736 | if(a->end == end_call) {//block ends with a call
737 | //Search successors for returns:
738 | j = a->branches;
739 | while(j) {
740 | b = (blockNode*) j->value;
741 | if(a->target == b->address) {//b is target of call
742 | node * insts = a->instructions;
743 | while(insts->next) insts = insts->next;
744 | instruction * inst = (instruction*) insts->value;
745 | changed = changed || propogateReturns(b, inst->address + inst->size, ++_tag);
746 | break;
747 | }
748 | j = j->next;
749 | }
750 |
751 | if(!j) {//call's target is not among the successors
752 | printf("\nSANITY-CHECK FAILED ch~493: cannot find called address\n");
753 | }
754 | }
755 | i = i->next;
756 | }
757 |
758 | return changed;
759 | }
760 |
761 | bool propogatePointersHelper(blockNode * search, stack > ptrs, int tag, bool firstCall) {
762 | if(search->tag == tag) {
763 | return false;
764 | }
765 | if(!firstCall) {
766 | search->tag = tag;
767 | }
768 |
769 | bool changed = false;
770 | if(search->guarded) {
771 | blockNode * next = getBlock(search->last + 1);
772 | changed = changed || propogatePointersHelper(next, ptrs, tag, false);
773 | }
774 |
775 | int addedPtrs = 0;
776 |
777 | node * iter = search->instructions;
778 | while(iter) {
779 | instruction * inst = (instruction*) iter->value;//current instruction being checked
780 | if(inst->op == opcode_SSY || inst->op == opcode_PBK || inst->op == opcode_PCNT) {
781 | long long target = inst->operands[0]->val1;
782 | ptrs.push(make_pair(inst->op, target));
783 | addedPtrs++;
784 | } else if(!ptrs.empty()) {
785 | opcode mustMatch = opcode_NOP;
786 | if(hasMod(inst, "S") || inst->op == opcode_SYNC) {//mod S is used prior to arch 50
787 | mustMatch = opcode_SSY;
788 | }
789 | else if(inst->op == opcode_CONT) {
790 | mustMatch = opcode_PCNT;
791 | }
792 | else if(inst->op == opcode_BRK) {
793 | mustMatch = opcode_PBK;
794 | }
795 | if(mustMatch != opcode_NOP) {
796 | //look for desired instruction in stack:
797 | pair targetPair = ptrs.top();
798 | ptrs.pop();
799 | while(targetPair.first != mustMatch && !ptrs.empty()) {
800 | targetPair = ptrs.top();
801 | ptrs.pop();
802 | }
803 | //if found desired instruction, match it to this jump
804 | if(targetPair.first == mustMatch) {
805 | long long target = targetPair.second;
806 | if(addedPtrs) {
807 | addedPtrs--;
808 | }
809 | changed = changed || addBranchToAddress(search, target);
810 | }
811 | }
812 | }
813 | iter = iter->next;
814 | }
815 |
816 | if(!ptrs.empty()) {
817 | node * children = 0;
818 | if(search->end == end_generic || search->end == end_call) {
819 | children = search->forward;
820 | } else if(search->end == end_branch) {
821 | children = search->branches;
822 | } else if(search->end == end_return) {
823 | static bool subroutineWarning = false;
824 | if(!subroutineWarning) {
825 | cerr << "Warning ch~976; subroutine may modify the pointer stack\n";
826 | subroutineWarning = true;
827 | }
828 | }
829 | while(children) {
830 | changed = changed || propogatePointersHelper((blockNode*)children->value, ptrs, tag, false);
831 | children = children->next;
832 | }
833 | }
834 |
835 | //Garbage removal:
836 | while(addedPtrs) {
837 | addedPtrs--;
838 | ptrs.pop();
839 | }
840 |
841 | return changed;
842 | }
843 |
844 | bool propogatePointers() {
845 | node * blocks = _blocks;
846 | bool changed = false;
847 | stack > ptrs;
848 | while(blocks) {
849 | blockNode * block = (blockNode*) blocks->value;//current block being checked
850 | changed = changed || propogatePointersHelper(block, ptrs, ++_tag, true);
851 | blocks = blocks->next;
852 | }
853 |
854 | return changed;
855 | }
856 |
857 | void setPtr() {
858 | _functions = 0;
859 | blockNode * a;
860 | instruction * inst;
861 | node * i = _blocks;
862 | while(i) {
863 | a = (blockNode*) i->value;
864 | node * j = a->instructions;
865 | while(j) {
866 | inst = (instruction*) j->value;
867 |
868 | if(inst->op == opcode_SSY ||
869 | inst->op == opcode_BRA ||
870 | inst->op == opcode_CAL ||
871 | inst->op == opcode_PBK ||
872 | inst->op == opcode_PCNT) {//found relevant instruction
873 | int target = -1;
874 | for(int x = 0; x < inst->num_operands; x++) {
875 | if(inst->operands[x]->type == type_hex) {
876 | target = inst->operands[x]->val1;
877 | break;
878 | }
879 | }
880 |
881 | //Search for target at start of each block
882 | node * k = _blocks;
883 | while(k) {
884 | a = (blockNode*) k->value;
885 | if(a->address == target) {
886 | inst->ptr = a;
887 | if(inst->op == opcode_CAL) {
888 | addNode(&_functions, a, &blockorder);
889 | }
890 | break;
891 | }
892 |
893 | k = k->next;
894 | }
895 |
896 | if(!k) {//target is not at the start of a block
897 | addBranchToAddress(NULL, target);
898 |
899 | //Search for target
900 | k = _blocks;
901 | while(k) {
902 | a = (blockNode*) k->value;
903 | if(a->address == target) {
904 | inst->ptr = a;
905 | if(inst->op == opcode_CAL) {
906 | addNode(&_functions, a, &blockorder);
907 | }
908 | break;
909 | }
910 |
911 | k = k->next;
912 | }
913 |
914 | //Sanity check:
915 | if(!k) {
916 | fprintf(stderr, "SANITY CHECK ERROR ch~1203; pointer to non-existant address 0x%x\n", target);
917 | }
918 | }
919 | }
920 | else if(inst->op == opcode_BRX) {
921 | //TODO handle BRX
922 | inst->ptr = 0;
923 | }
924 | else {
925 | inst->ptr = 0;
926 | }
927 |
928 | j = j->next;
929 | }
930 |
931 | i = i->next;
932 | }
933 |
934 | if(_num_functions && listSize(_functions) < _num_functions) {
935 | cerr << "SANITY CHECK FAILED ~1783; found " << listSize(_functions) << "/" << _num_functions << " functions.\n";
936 | cerr << "\tMismatch between actual and expected subroutine count.\n";
937 | }
938 | else if(_num_functions && listSize(_functions) > _num_functions) {
939 | cerr << "WARNING: found " << listSize(_functions) << "/" << _num_functions << " functions.\n";
940 | cerr << "\tMismatch between actual and expected subroutine count.\n";
941 | }
942 | }
943 |
--------------------------------------------------------------------------------