├── baker ├── README.md ├── src │ ├── utils │ │ ├── file.h │ │ └── file.cpp │ ├── symbols │ │ ├── import.h │ │ ├── import.cpp │ │ ├── symbols.h │ │ ├── block.h │ │ ├── block.cpp │ │ ├── symbols.cpp │ │ ├── instruction.h │ │ └── instruction.cpp │ ├── disassembler │ │ ├── analysis │ │ │ ├── code_tracer.h │ │ │ ├── code_tracer.cpp │ │ │ ├── data_analysis.cpp │ │ │ └── jump_table_analysis.cpp │ │ ├── verify.cpp │ │ ├── disassembler.h │ │ ├── parse_pe.cpp │ │ └── disassembler.cpp │ ├── assembler │ │ ├── assembler.h │ │ └── assembler.cpp │ ├── logging │ │ ├── logging.h │ │ └── logging.cpp │ ├── binary.h │ └── binary.cpp ├── baker.vcxproj.filters └── baker.vcxproj ├── .gitmodules ├── ExampleBakerUsage ├── main.cpp ├── examples │ ├── examples.h │ ├── rewrite_jumptable.cpp │ ├── create_beep_program.cpp │ └── rewrite_crackme.cpp ├── ExampleBakerUsage.vcxproj.filters └── ExampleBakerUsage.vcxproj ├── ExampleCrackMe ├── ExampleCrackMe.vcxproj.filters ├── example_crackme.c └── ExampleCrackMe.vcxproj ├── ExampleJumpTable ├── ExampleJumpTable.vcxproj.filters ├── example_jumptable.c └── ExampleJumpTable.vcxproj ├── README.md ├── baker.sln └── .gitignore /baker/README.md: -------------------------------------------------------------------------------- 1 | # baker 2 | 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "dependencies/zydis"] 2 | path = dependencies/zydis 3 | url = https://github.com/zyantific/zydis 4 | -------------------------------------------------------------------------------- /ExampleBakerUsage/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "examples/examples.h" 3 | 4 | int main() { 5 | rewrite_jumptable(); 6 | } -------------------------------------------------------------------------------- /baker/src/utils/file.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | std::vector read_file_to_buffer(std::string path); 9 | void output_file(uint8_t* buffer, size_t size, std::string path); -------------------------------------------------------------------------------- /baker/src/symbols/import.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | typedef uint32_t sym_id_t; 6 | 7 | struct IMPORT_ROUTINE { 8 | sym_id_t id; 9 | std::string name; 10 | }; 11 | 12 | struct IMPORT_MODULE { 13 | std::string name; 14 | std::vector routines; 15 | }; -------------------------------------------------------------------------------- /ExampleBakerUsage/examples/examples.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #pragma once 8 | 9 | void create_beep_program(); 10 | void rewrite_crackme(); 11 | void rewrite_jumptable(); -------------------------------------------------------------------------------- /ExampleBakerUsage/ExampleBakerUsage.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /baker/src/utils/file.cpp: -------------------------------------------------------------------------------- 1 | #include "file.h" 2 | 3 | std::vector read_file_to_buffer(std::string path) { 4 | std::ifstream file(path, std::ios::binary); 5 | if (!file) 6 | return {}; 7 | 8 | file.seekg(0, file.end); 9 | std::vector buf(file.tellg()); 10 | file.seekg(0, file.beg); 11 | 12 | file.read(reinterpret_cast(buf.data()), buf.size()); 13 | 14 | return buf; 15 | } 16 | 17 | void output_file(uint8_t* buffer, size_t size, std::string path) { 18 | FILE* file = NULL; 19 | fopen_s(&file, path.c_str(), "wb"); 20 | fwrite(buffer, size, 1, file); 21 | fclose(file); 22 | } -------------------------------------------------------------------------------- /ExampleBakerUsage/examples/rewrite_jumptable.cpp: -------------------------------------------------------------------------------- 1 | #include "examples.h" 2 | 3 | void rewrite_jumptable() { 4 | // disassemble .exe 5 | auto filebuf = read_file_to_buffer("C:\\Users\\li\\source\\repos\\baker\\x64\\Release\\ExampleJumpTable.exe"); 6 | const DISASSEMBLED_BINARY disasm_bin = disassemble_pe(filebuf); 7 | 8 | BINARY bin = {}; 9 | binary_duplicate(&disasm_bin.bin, &bin); 10 | binary_print(&bin); 11 | 12 | ASSEMBLED_BINARY asm_bin = build_pe(&bin); 13 | assembled_binary_print(&asm_bin); 14 | 15 | output_file(asm_bin.filebuf.data(), asm_bin.filebuf.size(), 16 | "C:\\Users\\li\\source\\repos\\baker\\x64\\Release\\reassembled_JumpTable.exe"); 17 | } -------------------------------------------------------------------------------- /baker/src/disassembler/analysis/code_tracer.h: -------------------------------------------------------------------------------- 1 | #include "../disassembler.h" 2 | #include 3 | #pragma once 4 | 5 | struct INSTR_TRACE_FRAME { 6 | int instr_idx; 7 | instr_t* instr; 8 | ZydisDecodedInstruction dec_instr; 9 | std::vector dec_ops; 10 | }; 11 | 12 | 13 | INSTR_TRACE_FRAME init_instr_trace_frame(BASIC_BLOCK* bb, int idx); 14 | 15 | using backtrace_func_t = std::function; 18 | 19 | /* 20 | trace_comp_func(target_reg, trace_frame): 21 | return false --> continue 22 | return true --> break 23 | */ 24 | void bb_backtrace( 25 | BASIC_BLOCK* bb, 26 | int begin_idx, 27 | ZydisDecodedOperand target_op, 28 | const backtrace_func_t comp); -------------------------------------------------------------------------------- /ExampleCrackMe/ExampleCrackMe.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | -------------------------------------------------------------------------------- /ExampleJumpTable/ExampleJumpTable.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | -------------------------------------------------------------------------------- /baker/src/symbols/import.cpp: -------------------------------------------------------------------------------- 1 | #include "../binary.h" 2 | 3 | 4 | IMPORT_MODULE* BINARY::import_module(std::string module_name) { 5 | IMPORT_MODULE* mod = NULL; 6 | for (int i = 0; i < import_modules.size(); i++) { 7 | mod = import_modules[i]; 8 | 9 | if (mod->name == module_name) { 10 | return mod; 11 | } 12 | } 13 | 14 | mod = new IMPORT_MODULE{}; 15 | mod->name = module_name; 16 | 17 | import_modules.push_back(mod); 18 | return mod; 19 | } 20 | 21 | 22 | IMPORT_ROUTINE* BINARY::import_routine(IMPORT_MODULE* mod, std::string routine_name) { 23 | for (IMPORT_ROUTINE* routine : mod->routines) { 24 | if (routine->name == routine_name) { 25 | return routine; 26 | } 27 | } 28 | 29 | SYMBOL* sym = new SYMBOL{}; 30 | sym->id = symbols.size(); 31 | symbols.push_back(sym); 32 | 33 | IMPORT_ROUTINE* routine = new IMPORT_ROUTINE{}; 34 | routine->id = sym->id; 35 | routine->name = routine_name; 36 | sym->type = SYMBOL_TYPE_IMPORT; 37 | sym->import_routine = routine; 38 | sym->name = "__imp_" + routine_name; 39 | mod->routines.push_back(routine); 40 | 41 | logger_log( 42 | BRIGHT_MAGENTA, "+import", 43 | WHITE, fmtf("{ id: %d, %s --> %s }\n", sym->id, mod->name.c_str(), routine_name.c_str())); 44 | 45 | return routine; 46 | } -------------------------------------------------------------------------------- /baker/src/symbols/symbols.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | #include "block.h" 7 | #include "import.h" 8 | 9 | typedef uint32_t sym_id_t; 10 | 11 | enum SYMBOL_TYPE { 12 | SYMBOL_TYPE_NONE, 13 | SYMBOL_TYPE_CODE, 14 | SYMBOL_TYPE_DATA, 15 | SYMBOL_TYPE_IMPORT, 16 | SYMBOL_TYPE_RELATIVE_INFO 17 | }; 18 | 19 | enum TARGET_TYPE { 20 | TARGET_TYPE_NONE, 21 | TARGET_TYPE_RVA, 22 | TARGET_TYPE_POINTER 23 | }; 24 | 25 | struct SYMBOL { 26 | sym_id_t id; 27 | SYMBOL_TYPE type; 28 | std::string name; 29 | 30 | union { 31 | // basic block 32 | BASIC_BLOCK* bb; 33 | 34 | // data symbol 35 | struct { 36 | DATA_BLOCK* db; 37 | uint32_t db_offset; 38 | 39 | // if this data symbol is a pointer that points to another symbol 40 | // which points to a absoulute address, base relocation will be needed 41 | TARGET_TYPE target_type; 42 | sym_id_t target_sym_id; 43 | }; 44 | 45 | // import 46 | IMPORT_ROUTINE* import_routine; 47 | }; 48 | 49 | // relocation info 50 | uint32_t rel_offset; 51 | }; 52 | 53 | struct XREF { 54 | SYMBOL* sym; 55 | int instr_idx; 56 | }; -------------------------------------------------------------------------------- /ExampleCrackMe/example_crackme.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // s3cr3t_k3y 5 | const char obfuscated_key[] = "\x26\x66\x36\x27\x66\x21\x0a\x3e\x66\x2c"; 6 | 7 | void xorstr(char* key, int key_len) { 8 | for (int i = 0; i < key_len; i++) { 9 | key[i] ^= 0x55; 10 | } 11 | } 12 | 13 | BOOL check_serial(const char* input) { 14 | int buflen = strlen(input) + 1; 15 | 16 | // could just do strdup but this is just a dummy program 17 | // to test a binary rewriter 18 | 19 | char* buf = malloc(buflen); 20 | if (buf == NULL) 21 | return FALSE; 22 | memset(buf, 0, buflen); 23 | memcpy(buf, input, buflen); 24 | 25 | xorstr(buf, strlen(buf)); 26 | 27 | if (buflen <= sizeof(obfuscated_key) 28 | && !memcmp(obfuscated_key, buf, buflen)) { 29 | free(buf); 30 | return TRUE; 31 | } else { 32 | free(buf); 33 | return FALSE; 34 | } 35 | } 36 | 37 | int main() { 38 | char serial[256] = { 0 }; 39 | 40 | printf("enter the key: "); 41 | fgets(serial, sizeof(serial), stdin); 42 | 43 | // remove the new line character 44 | size_t len = strlen(serial); 45 | if (serial[len - 1] == '\n') { 46 | serial[len - 1] = '\0'; 47 | } 48 | 49 | if (check_serial(serial)) { 50 | printf("access granted!\n"); 51 | } else { 52 | printf("access denied\n"); 53 | } 54 | 55 | system("pause"); 56 | } -------------------------------------------------------------------------------- /baker/src/assembler/assembler.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "../binary.h" 9 | 10 | struct SECTION { 11 | IMAGE_SECTION_HEADER hdr; 12 | std::vector bytes; 13 | int uninitialized_size; 14 | }; 15 | 16 | struct ASSEMBLED_BINARY { 17 | BINARY bin; 18 | std::vector filebuf; 19 | 20 | // .text 21 | // .rdata <-- .idata, .reloc 22 | // .data 23 | std::vector sections; 24 | SECTION* text_sect; 25 | SECTION* rdata_sect; 26 | SECTION* data_sect; 27 | 28 | std::map db_to_offset; 29 | std::map db_to_rva; 30 | std::map db_to_sect; 31 | 32 | std::map sym_to_offset; 33 | std::map sym_to_rva; 34 | 35 | uint32_t entry_point; 36 | uint64_t image_base; 37 | uint32_t section_alignment; 38 | uint32_t file_alignment; 39 | 40 | // get pointer to where instructin is mapped in the filebuf 41 | uint8_t* get_instr(BASIC_BLOCK* bb, instr_t* instr); 42 | 43 | // get pointer to where data is mapped in the filebuf 44 | uint8_t* get_data(SYMBOL* data_sym); 45 | }; 46 | 47 | uint32_t align_up(uint32_t val, uint32_t alignment); 48 | ASSEMBLED_BINARY* build_pe(const BINARY* bin_); 49 | 50 | void assembled_binary_print(ASSEMBLED_BINARY* asm_bin); -------------------------------------------------------------------------------- /baker/baker.vcxproj.filters: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /ExampleBakerUsage/examples/create_beep_program.cpp: -------------------------------------------------------------------------------- 1 | #include "examples.h" 2 | 3 | void create_beep_program() { 4 | BINARY bin = {}; 5 | 6 | IMPORT_MODULE* mod_k32 = bin.import_module("kernel32.dll"); 7 | sym_id_t imp_Beep = bin.import_routine(mod_k32, "Beep")->id; 8 | 9 | DATA_BLOCK* rdata = bin.data_block(0, true, "rdata"); 10 | sym_id_t str_hello_world = rdata->push_str("Hello, World!\n")->id; 11 | 12 | DATA_BLOCK* data = bin.data_block(1000, false, "data"); 13 | 14 | BASIC_BLOCK* bb_main = bin.set_entry(bin.basic_block()); 15 | BASIC_BLOCK* bb_call_beep = bin.basic_block(); 16 | BASIC_BLOCK* bb_junk = bin.basic_block(); 17 | BASIC_BLOCK* bb_end = bin.basic_block(); 18 | 19 | bb_main->push({ 20 | Lea(rax_, RipRel(str_hello_world)), 21 | })->fall(bb_call_beep->id); 22 | 23 | // Beep(1000, 1000); 24 | 25 | bb_call_beep->push({ 26 | Mov(rax_, RipRel(imp_Beep)), 27 | Mov(rcx_, Imm(1000)), 28 | Mov(rdx_, Imm(1000)), 29 | Sub(rsp_, Imm(0x28)), 30 | Call(rax_), 31 | Add(rsp_, Imm(0x28)), 32 | })->fall(bb_end->id); 33 | 34 | bb_junk->push({ 35 | Nop(), 36 | Nop(), 37 | Nop(), 38 | }); 39 | 40 | bb_end->push({ 41 | Mov(rax_, Imm(1234)), 42 | Ret() 43 | }); 44 | 45 | binary_print(&bin); 46 | 47 | ASSEMBLED_BINARY asm_bin = build_pe(&bin); 48 | 49 | output_file(asm_bin.filebuf.data(), asm_bin.filebuf.size(), 50 | "C:\\Users\\li\\source\\repos\\baker\\x64\\release\\assemble_beep_test.exe"); 51 | 52 | assembled_binary_print(&asm_bin); 53 | } -------------------------------------------------------------------------------- /baker/src/logging/logging.h: -------------------------------------------------------------------------------- 1 | #include "../symbols/symbols.h" 2 | #include 3 | #include 4 | #include 5 | #pragma once 6 | 7 | std::string serialize_sym(SYMBOL* sym); 8 | std::string serialize_instr_ex(ZydisDecoder* decoder, ZydisFormatter* formatter, const instr_t* instr, void* user_data = nullptr); 9 | std::string serialize_instr(struct BINARY* bin, const instr_t* instr); 10 | 11 | #define DEBUG_LOGGING 12 | 13 | std::string fmtf(_Printf_format_string_ const char* format, ...); 14 | void printf_ex(int color, _Printf_format_string_ const char* fmt, ...); 15 | 16 | void logger_indent(); 17 | void logger_unindent(); 18 | void logger_reset_indentation(); 19 | 20 | void logger_log__(int action_color, std::string action, int msg_color, std::string msg); 21 | void logger_warn__(std::string msg); 22 | 23 | #ifdef DEBUG_LOGGING 24 | #define logger_log(action_color, action, msg_color, msg) logger_log__(action_color, action, msg_color, msg) 25 | #else 26 | #define logger_log(action_color, action, msg_color, msg) 27 | #endif 28 | 29 | #define logger_warn(msg) logger_warn__(msg); 30 | 31 | 32 | void print_bb(BASIC_BLOCK* bb, uint32_t rva = 0); 33 | 34 | 35 | enum COLORS { 36 | BLACK = 30, 37 | RED = 31, 38 | GREEN = 32, 39 | YELLOW = 33, 40 | BLUE = 34, 41 | MAGENTA = 35, 42 | CYAN = 36, 43 | WHITE = 37, 44 | BRIGHT_BLACK = 90, 45 | BRIGHT_RED = 91, 46 | BRIGHT_GREEN = 92, 47 | BRIGHT_YELLOW = 93, 48 | BRIGHT_BLUE = 94, 49 | BRIGHT_MAGENTA = 95, 50 | BRIGHT_CYAN = 96, 51 | BRIGHT_WHITE = 97, 52 | }; -------------------------------------------------------------------------------- /baker/src/symbols/block.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "instruction.h" 8 | 9 | typedef uint32_t sym_id_t; 10 | const sym_id_t nullsid = 0; 11 | 12 | struct BASIC_BLOCK { 13 | sym_id_t id; 14 | std::vector instrs; 15 | sym_id_t fallthrough_sym_id; 16 | struct BINARY* bin_; 17 | 18 | struct BASIC_BLOCK* insert(int idx, instr_t instr); 19 | struct BASIC_BLOCK* insert(int idx, std::vector instructions_array); 20 | struct BASIC_BLOCK* push(instr_t instr); 21 | struct BASIC_BLOCK* push(std::vector instructions_array); 22 | struct BASIC_BLOCK* fall(sym_id_t id); 23 | struct BASIC_BLOCK* fall_to_next(); 24 | 25 | size_t size(); 26 | uint32_t get_instr_offset(int idx); 27 | }; 28 | 29 | struct DATA_BLOCK { 30 | std::string name; 31 | std::vector bytes; 32 | bool read_only; 33 | int alignment; 34 | bool is_jump_table; 35 | bool uninitialized; 36 | std::string parent_sect; 37 | std::map dboffset_to_sym; 38 | 39 | struct BINARY* bin_; 40 | 41 | struct SYMBOL* data_sym(int db_offset, enum TARGET_TYPE target_type = (enum TARGET_TYPE)0, sym_id_t target_id = nullsid); 42 | struct SYMBOL* push_val(uint64_t val, int len, enum TARGET_TYPE target_type = (enum TARGET_TYPE)0, sym_id_t target_id = nullsid); 43 | struct SYMBOL* push_buf(const void* buf, int len); 44 | struct SYMBOL* push_str(std::string str, bool nullterm = true); 45 | 46 | struct DATA_BLOCK* align(); 47 | struct DATA_BLOCK* map_to_sect(std::string sect_name); 48 | }; -------------------------------------------------------------------------------- /baker/src/disassembler/verify.cpp: -------------------------------------------------------------------------------- 1 | #include "disassembler.h" 2 | 3 | void DISASSEMBLER::verify() { 4 | assert(sym_rva_map.size() == bin.symbols.size()); 5 | 6 | for (BASIC_BLOCK* bb : bin.basic_blocks) { 7 | // we should not have empty basic blocks 8 | assert(!bb->instrs.empty()); 9 | } 10 | 11 | for (int i = 0; i < bin.symbols.size(); i++) { 12 | SYMBOL* sym = bin.symbols[i]; 13 | assert(sym->id == i); 14 | } 15 | 16 | int bb_count = 0; 17 | int sym_count = 0; 18 | 19 | for (uint32_t rva = 0; rva < rva_map.size(); rva++) { 20 | auto& entry = rva_map[rva]; 21 | 22 | // there is nothing at this rva 23 | if (!entry.blink && entry.id == nullsid) 24 | continue; 25 | 26 | // this holds a non-root instruction 27 | // non root as in not the first instruction of the basic block 28 | if (entry.blink) { 29 | assert(!entry.id); 30 | 31 | int instr_count = 0; 32 | for (uint32_t curr_rva = rva; true;) { 33 | instr_count++; 34 | auto& curr_entry = rva_map[curr_rva]; 35 | 36 | // we reached the root of the basic block 37 | if (!curr_entry.blink) { 38 | assert(curr_entry.id != nullsid); 39 | 40 | SYMBOL* root = bin.symbols[curr_entry.id]; 41 | assert(root); 42 | 43 | assert(root->type = SYMBOL_TYPE_CODE); 44 | 45 | // check instructions to be properly linked 46 | assert(instr_count <= root->bb->instrs.size()); 47 | 48 | break; 49 | } 50 | 51 | curr_rva -= curr_entry.blink; 52 | } 53 | 54 | continue; 55 | } 56 | 57 | SYMBOL* sym = bin.symbols[entry.id]; 58 | assert(sym); 59 | 60 | assert(sym_rva_map[sym->id] == rva); 61 | 62 | sym_count++; 63 | 64 | if (sym->type == SYMBOL_TYPE_CODE) 65 | bb_count++; 66 | } 67 | 68 | // the - 1 is to exclude the null symbol 69 | assert(sym_count == bin.symbols.size() - 1); 70 | assert(bb_count == bin.basic_blocks.size()); 71 | } -------------------------------------------------------------------------------- /baker/src/binary.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include "symbols/symbols.h" 6 | #include "logging/logging.h" 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | struct SECT_CONFIG { 18 | std::string name; 19 | uint32_t characteristics; 20 | }; 21 | 22 | struct BINARY { 23 | // binary 24 | std::vector basic_blocks; 25 | std::vector data_blocks; 26 | std::vector import_modules; 27 | 28 | // index of symbol = symbol id 29 | std::vector symbols; 30 | BASIC_BLOCK* entry_point; 31 | 32 | // section configs (optional) 33 | // std::vector extra_sects; 34 | 35 | ZydisFormatter formatter; 36 | ZydisDecoder decoder; 37 | BINARY(); 38 | 39 | // imports 40 | IMPORT_MODULE* import_module(std::string module_name); 41 | IMPORT_ROUTINE* import_routine(IMPORT_MODULE* mod, std::string routine_name); 42 | 43 | // forward declaration 44 | SYMBOL* label(); 45 | 46 | // code 47 | BASIC_BLOCK* basic_block(std::string name = ""); 48 | BASIC_BLOCK* basic_block(sym_id_t label_id, std::string name = ""); 49 | BASIC_BLOCK* set_entry(BASIC_BLOCK* bb); 50 | 51 | // data 52 | DATA_BLOCK* data_block(uint32_t size, BOOL read_only, std::string name = ""); 53 | 54 | // relative info 55 | SYMBOL* rel_info(uint32_t rel_offset, std::string name = ""); 56 | 57 | // getters 58 | SYMBOL* get_symbol(std::string name); 59 | DATA_BLOCK* get_data_block(std::string name); 60 | 61 | // xrefs 62 | std::vector get_xrefs(sym_id_t target_sym_id); 63 | std::vector get_xrefs(BASIC_BLOCK* bb, sym_id_t target_sym_id); 64 | }; 65 | 66 | void binary_duplicate(const BINARY* bin, BINARY* new_bin); 67 | void binary_free(BINARY* bin); 68 | void binary_print(BINARY* bin); 69 | 70 | #define STRUCT_SCHEMA(struct_type, start_db_offset) reinterpret_cast(start_db_offset) 71 | #define SS_MEMBER(schema, member) (uint32_t)(&schema->member) -------------------------------------------------------------------------------- /baker/src/disassembler/analysis/code_tracer.cpp: -------------------------------------------------------------------------------- 1 | #include "code_tracer.h" 2 | #include 3 | #include 4 | 5 | INSTR_TRACE_FRAME init_instr_trace_frame(BASIC_BLOCK* bb, int idx) { 6 | INSTR_TRACE_FRAME frame = {}; 7 | frame.instr_idx = idx; 8 | frame.instr = &bb->instrs[idx]; 9 | const auto [dec_instr, dec_ops] = 10 | decode_full(&bb->bin_->decoder, frame.instr->bytes, frame.instr->len); 11 | frame.dec_instr = dec_instr; 12 | frame.dec_ops = dec_ops; 13 | return frame; 14 | } 15 | 16 | bool compare_operands(const ZydisDecodedOperand& op_1, const ZydisDecodedOperand& op_2) { 17 | if (op_1.type == op_2.type) { 18 | if (op_1.type == ZYDIS_OPERAND_TYPE_REGISTER 19 | && op_1.reg.value != op_2.reg.value) 20 | return false; 21 | 22 | if (op_1.type == ZYDIS_OPERAND_TYPE_MEMORY 23 | && (op_1.mem.base != op_2.mem.base 24 | || op_1.mem.index != op_2.mem.index 25 | || op_1.mem.scale != op_2.mem.scale 26 | || op_1.mem.disp.value != op_2.mem.disp.value)) 27 | return false; 28 | } 29 | return true; 30 | } 31 | 32 | /* 33 | only_target_reg: only go through instructions where the left reg is the target reg 34 | trace_comp_func: 35 | return false --> continue 36 | return true --> break 37 | */ 38 | void bb_backtrace( 39 | BASIC_BLOCK* bb, 40 | int begin_idx, 41 | ZydisDecodedOperand target_op, 42 | const backtrace_func_t comp 43 | ) { 44 | for (int i = begin_idx; i >= 0; i--) { 45 | INSTR_TRACE_FRAME frame = init_instr_trace_frame(bb, i); 46 | auto& dec_instr = frame.dec_instr; 47 | auto& dec_ops = frame.dec_ops; 48 | 49 | if (comp(target_op, frame) == true) 50 | break; 51 | 52 | /* 53 | add resilience towards these situations: 54 | -- scanning for `lea {jmp_reg}, sym_123` 55 | lea rcx, sym_123 56 | mov rax, rcx 57 | jmp rax 58 | ------------------ 59 | -- scanning for `lea {jmp_reg}, sym_123` 60 | lea rcx, sym_123 61 | mov [rsp+0x18], rcx 62 | mov rax, [rsp+0x18] 63 | jmp rax 64 | */ 65 | if (dec_instr.mnemonic == ZYDIS_MNEMONIC_MOV && compare_operands(dec_ops[0], target_op)) { 66 | // we will switch the target operand, to the operand moved in 67 | target_op = dec_ops[1]; 68 | } 69 | } 70 | } -------------------------------------------------------------------------------- /baker/src/disassembler/analysis/data_analysis.cpp: -------------------------------------------------------------------------------- 1 | #include "../disassembler.h" 2 | 3 | uint32_t DISASSEMBLER::calculate_potential_ptr(SYMBOL* sym) { 4 | assert(sym->type == SYMBOL_TYPE_DATA); 5 | assert(sym->db); 6 | 7 | // make sure there is enough space for a pointer to fit 8 | if (sym->db_offset + 8 > sym->db->bytes.size()) { 9 | return 0; 10 | } 11 | 12 | // read data symbol points to 13 | const uint64_t val = *reinterpret_cast(&sym->db->bytes[sym->db_offset]); 14 | 15 | const uint64_t image_base = nthdrs->OptionalHeader.ImageBase; 16 | const uint32_t image_size = nthdrs->OptionalHeader.SizeOfImage; 17 | 18 | // make sure rva is valid 19 | if (val < image_base || val >= image_base + image_size) 20 | return 0; 21 | 22 | return val - image_base; 23 | } 24 | 25 | // check data symbol for further memory reference 26 | // symbol is updated to target the reference 27 | // reference rva is returned if found 28 | // 0 is returned if reference already found 29 | uint32_t DISASSEMBLER::analyze_data_symbol(SYMBOL* sym) { 30 | assert(sym->type == SYMBOL_TYPE_DATA); 31 | assert(sym->db); 32 | 33 | uint32_t ptr_rva = calculate_potential_ptr(sym); 34 | if (ptr_rva) { 35 | sym->target_type = TARGET_TYPE_POINTER; 36 | auto& rva_entry = rva_map[ptr_rva]; 37 | 38 | // target already discovered 39 | if (rva_entry.id) { 40 | sym->target_sym_id = rva_entry.id; 41 | return 0; 42 | } 43 | 44 | if (rva_in_exec_sect(ptr_rva)) { 45 | // middle of a basic block 46 | if (rva_entry.blink) { 47 | sym->target_sym_id = split_bb(ptr_rva).id; 48 | return ptr_rva; 49 | } 50 | 51 | // undiscovered code 52 | else { 53 | sym->target_sym_id = queue_rva(ptr_rva).id; 54 | return ptr_rva; 55 | } 56 | } else { 57 | // data pointer 58 | uint32_t db_offset = 0; 59 | DATA_BLOCK* db = rva_to_containing_db(ptr_rva, &db_offset); 60 | 61 | if (!db) 62 | return 0; 63 | 64 | SYMBOL* new_sym = db->data_sym(db_offset); 65 | rva_entry = { new_sym->id, 0 }; 66 | sym_rva_map_append(ptr_rva); 67 | 68 | sym->target_sym_id = new_sym->id; 69 | return ptr_rva; 70 | } 71 | } 72 | } 73 | 74 | void DISASSEMBLER::fully_analyze_data_symbol(SYMBOL* sym) { 75 | while (true) { 76 | uint32_t target_rva = analyze_data_symbol(sym); 77 | if (!target_rva) 78 | break; 79 | 80 | sym = bin.symbols[rva_map[target_rva].id]; 81 | if (sym->type != SYMBOL_TYPE_DATA) 82 | break; 83 | } 84 | } -------------------------------------------------------------------------------- /baker/src/disassembler/disassembler.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "../binary.h" 3 | #include 4 | #include 5 | 6 | struct RVA_DB_ENTRY { 7 | // rva to data block 8 | uint32_t rva; 9 | 10 | // the data block 11 | DATA_BLOCK* db; 12 | }; 13 | 14 | struct RVA_MAP_ENTRY { 15 | // if blink is 0, this is the symbol the rva lands in 16 | sym_id_t id; 17 | 18 | // if not 0, this is the number of bytes to previous rva entry 19 | // code --> goes to last line of assembly in the same basic block 20 | // jpt entry --> goes ot last jump table entry 21 | uint32_t blink; 22 | }; 23 | 24 | struct DISASSEMBLED_BINARY { 25 | BINARY bin; 26 | 27 | std::vector filebuf; 28 | IMAGE_DOS_HEADER* doshdr; 29 | IMAGE_NT_HEADERS* nthdrs; 30 | IMAGE_DATA_DIRECTORY* datadir; 31 | IMAGE_SECTION_HEADER* sects; 32 | uint64_t image_base; 33 | 34 | std::vector rva_map; 35 | std::vector rva_db_map; 36 | 37 | // [symbol id]: rva 38 | std::vector sym_rva_map; 39 | 40 | DATA_BLOCK* rva_to_db(uint32_t rva); 41 | DATA_BLOCK* rva_to_containing_db(uint32_t rva, uint32_t* db_offset = nullptr); 42 | 43 | BASIC_BLOCK* rva_to_bb(uint32_t rva); 44 | BASIC_BLOCK* rva_to_containing_bb(uint32_t rva, int* instr_idx = nullptr); 45 | 46 | IMAGE_SECTION_HEADER* rva_to_sect(uint32_t rva); 47 | bool rva_in_exec_sect(uint32_t rva); 48 | uint32_t rva_to_offset(uint32_t rva); 49 | template T rvacast(uint32_t rva) { return (T)&filebuf[rva_to_offset(rva)]; } 50 | }; 51 | 52 | struct DISASSEMBLER : DISASSEMBLED_BINARY { 53 | // pe parsing 54 | void create_section_dbs(); 55 | void parse_imports(); 56 | void parse_exceptions(); 57 | void parse_relocations(); 58 | 59 | // main functionalities 60 | std::queue disasm_queue; 61 | void sym_rva_map_append(uint32_t rva); 62 | RVA_MAP_ENTRY& queue_rva(uint32_t rva, sym_id_t label_id); 63 | RVA_MAP_ENTRY& queue_rva(uint32_t rva, std::string name = ""); 64 | RVA_MAP_ENTRY& split_bb(uint32_t rva, std::string name = ""); 65 | 66 | // data symbol analysis 67 | uint32_t calculate_potential_ptr(SYMBOL* sym); 68 | uint32_t analyze_data_symbol(SYMBOL* sym); 69 | void fully_analyze_data_symbol(SYMBOL* sym); 70 | 71 | // main recursive disassembler function 72 | void disassemble(); 73 | void sort_basic_blocks(); 74 | 75 | // jump table analysis 76 | std::map bb_explored_map; 77 | std::set jpt_rva_list; 78 | void collect_jump_tables(); 79 | void resolve_jpt_entries(); 80 | 81 | // verification 82 | void verify(); 83 | }; 84 | 85 | DISASSEMBLER* disassemble_pe(std::vector filebuf); -------------------------------------------------------------------------------- /baker/src/logging/logging.cpp: -------------------------------------------------------------------------------- 1 | #include "../binary.h" 2 | 3 | 4 | int indentation = 0; 5 | 6 | void logger_indent() { 7 | indentation++; 8 | } 9 | 10 | void logger_unindent() { 11 | indentation--; 12 | } 13 | 14 | void logger_reset_indentation() { 15 | indentation = 0; 16 | } 17 | 18 | std::string fmtf(_Printf_format_string_ const char* format, ...) { 19 | va_list args; 20 | va_start(args, format); 21 | int size = vsnprintf(nullptr, 0, format, args) + 1; 22 | 23 | std::vector buffer(size); 24 | vsnprintf( 25 | buffer.data(), 26 | size, 27 | format, 28 | args); 29 | 30 | va_end(args); 31 | 32 | return std::string(buffer.data()); 33 | } 34 | 35 | std::string serialize_sym(SYMBOL* sym) { 36 | std::string result = ""; 37 | result.append(fmtf("{ id: %d", sym->id)); 38 | 39 | if (!sym->name.empty()) 40 | result.append(fmtf(", %s", sym->name.c_str())); 41 | 42 | if (sym->type == SYMBOL_TYPE_CODE) 43 | result.append(fmtf(", bb: 0x%p", sym->bb)); 44 | 45 | if (sym->type == SYMBOL_TYPE_DATA) { 46 | if (!sym->db->name.empty()) { 47 | result.append(fmtf(", %s", sym->db->name.c_str())); 48 | } 49 | 50 | result.append(fmtf(", db: 0x%p", sym->db)); 51 | 52 | if (sym->db_offset) 53 | result.append(fmtf(" + %d", sym->db_offset)); 54 | 55 | if (sym->target_type == TARGET_TYPE_RVA) 56 | result.append(fmtf(", (rva target: %d)", sym->target_sym_id)); 57 | } 58 | 59 | result.append(" }"); 60 | return result; 61 | } 62 | 63 | std::string serialize_instr_ex(ZydisDecoder* decoder, ZydisFormatter* formatter, const instr_t* instr, void* user_data) { 64 | char buffer[256]; 65 | 66 | ZydisDecodedInstruction decoded_instr = {}; 67 | ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT] = {}; 68 | ZydisDecoderDecodeFull(decoder, instr->bytes, instr->len, &decoded_instr, operands); 69 | 70 | ZydisFormatterFormatInstruction(formatter, &decoded_instr, operands, 71 | decoded_instr.operand_count_visible, 72 | buffer, sizeof(buffer), 0, user_data); 73 | 74 | return buffer; 75 | } 76 | 77 | std::string serialize_instr(BINARY* bin, const instr_t* instr) { 78 | return serialize_instr_ex(&bin->decoder, &bin->formatter, instr, &bin->symbols); 79 | } 80 | 81 | void printf_ex(int color, _Printf_format_string_ const char* fmt, ...) { 82 | va_list args; 83 | va_start(args, fmt); 84 | 85 | printf("\033[%dm", color); 86 | vprintf(fmt, args); 87 | printf("\033[0m"); 88 | 89 | va_end(fmt); 90 | } 91 | 92 | void logger_log__(int action_color, std::string action, int msg_color, std::string msg) { 93 | for (int i = 0; i < indentation; i++) 94 | printf("\t"); 95 | 96 | if (!action.empty()) { 97 | printf_ex(action_color, "%-20s", ("[" + action + "]").c_str()); 98 | } 99 | 100 | if (!msg.empty()) { 101 | printf_ex(msg_color, msg.c_str()); 102 | } 103 | } 104 | 105 | void logger_warn__(std::string msg) { 106 | printf_ex(RED, "[!warning!]"); 107 | printf_ex(WHITE, msg.c_str()); 108 | } 109 | 110 | void print_bb(BASIC_BLOCK* bb, uint32_t rva) { 111 | BINARY* bin = bb->bin_; 112 | 113 | printf_ex(BRIGHT_BLUE, "[basic block]: %d, size: %d %s\n", bb->id, bb->size(), 114 | bin->symbols[bb->id]->name.c_str()); 115 | 116 | for (int bb_offset = 0; const instr_t& instr : bb->instrs) { 117 | if (rva) { 118 | printf("<+%0X>", rva + bb_offset); 119 | } 120 | 121 | printf("\t+%-4d: %s\n", bb_offset, serialize_instr(bin, &instr).c_str()); 122 | 123 | bb_offset += instr.len; 124 | } 125 | 126 | printf("\t--> %d\n", bb->fallthrough_sym_id); 127 | } -------------------------------------------------------------------------------- /ExampleJumpTable/example_jumptable.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // 5 | // this example is CRTless 6 | // 7 | 8 | typedef int(__CRTDECL *_vsnprintf_t)( 9 | _Out_writes_opt_(_BufferCount) _Post_maybez_ const char* buffer, 10 | _In_ const size_t bufsize, 11 | _In_z_ _Printf_format_string_ const char* format, 12 | va_list arglist 13 | ); 14 | 15 | _vsnprintf_t p_vsnprintf = NULL; 16 | 17 | void load_functions() { 18 | HMODULE ntdll = GetModuleHandleA("ntdll.dll"); 19 | p_vsnprintf = GetProcAddress(ntdll, "_vsnprintf"); 20 | } 21 | 22 | void crtless_printf(const char* format, ...) { 23 | char buffer[256]; 24 | DWORD bytes_written; 25 | HANDLE h_stdout = GetStdHandle(STD_OUTPUT_HANDLE); 26 | 27 | if (h_stdout == INVALID_HANDLE_VALUE) 28 | return; 29 | 30 | va_list args; 31 | va_start(args, format); 32 | int len = p_vsnprintf(buffer, sizeof(buffer), format, args); 33 | va_end(args); 34 | 35 | if (len < 0) 36 | return; 37 | 38 | WriteFile(h_stdout, buffer, len, &bytes_written, NULL); 39 | } 40 | 41 | /* 42 | switch case based jump tables 43 | */ 44 | 45 | char* serialize_0_9(int number) { 46 | switch (number) 47 | { 48 | case 0: return "zero"; 49 | case 1: return "one"; 50 | case 2: return "two"; 51 | case 3: return "three"; 52 | case 4: return "four"; 53 | case 5: return "five"; 54 | case 6: return "six"; 55 | case 7: return "seven"; 56 | case 8: return "eight"; 57 | case 9: return "nine"; 58 | default: return "invalid number (only numbers from 0-4 are allowed)"; 59 | } 60 | } 61 | 62 | char* serialize_10_19(int number) { 63 | switch (number) 64 | { 65 | case 10: return "ten"; 66 | case 11: return "eleven"; 67 | case 12: return "twelve"; 68 | case 13: return "thirteen"; 69 | case 14: return "fourteen"; 70 | case 15: return "fifthteen"; 71 | case 16: return "sixteen"; 72 | case 17: return "seventeen"; 73 | case 18: return "eighteen"; 74 | case 19: return "nineteen"; 75 | default: return "invalid number (only numbers from 10-19 are allowed)"; 76 | } 77 | } 78 | 79 | /* 80 | pointer based jump table 81 | */ 82 | 83 | typedef char* (*func_table_entry_t)(); 84 | 85 | char* func_20() { return "twenty"; } 86 | char* func_21() { return "twenty_one"; } 87 | char* func_22() { return "twenty_two"; } 88 | char* func_23() { return "twenty_three"; } 89 | char* func_24() { return "twenty_four"; } 90 | char* func_25() { return "twenty_five"; } 91 | char* func_26() { return "twenty_six"; } 92 | char* func_27() { return "twenty_seven"; } 93 | char* func_28() { return "twenty_eight"; } 94 | char* func_29() { return "twenty_nine"; } 95 | 96 | const func_table_entry_t func_table[] = { 97 | func_20, 98 | func_21, 99 | func_22, 100 | func_23, 101 | func_24, 102 | func_25, 103 | func_26, 104 | func_27, 105 | func_28, 106 | func_29 107 | }; 108 | 109 | char* serialize_20_29(int number) { 110 | if (number < 20 || number > 29) { 111 | return "invalid number (only numbers from 20-29 are allowed)"; 112 | } 113 | 114 | return func_table[number - 20](); 115 | } 116 | 117 | 118 | int jumptable_example_main() { 119 | load_functions(); 120 | 121 | crtless_printf("0 - 9\n"); 122 | for (int i = 0; i < 10; i++) { 123 | crtless_printf("%d --> %s\n", i, serialize_0_9(i)); 124 | } 125 | 126 | for (int i = 10; i < 20; i++) { 127 | crtless_printf("%d --> %s\n", i, serialize_10_19(i)); 128 | } 129 | 130 | for (int i = 20; i < 30; i++) { 131 | crtless_printf("%d --> %s\n", i, serialize_20_29(i)); 132 | } 133 | 134 | while (TRUE) { 135 | // so the console does not close 136 | Sleep(5000); 137 | } 138 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # binary rewriter/reassembler for x64 PE binaries [WIP] 2 | 3 | takes in a binary and lifts the assembly into a very basic intermediate representation. 4 | you can then modify this intermediate representation, and reassemble it into a new binary. 5 | 6 | ## currently works on: 7 | - C binary with no CRT 8 | - C binary with CRT 9 | 10 | ## currently supports: 11 | - jump tables 12 | 13 | ## work in progress: 14 | - .pdata exception directory 15 | - partial binary rewriting option to eliminate certain "unsolvable" issues 16 | 17 | ## example usage 18 | 19 | in this example, it will show rewriting the binary with basic CFF and nops 20 | ![image](https://github.com/user-attachments/assets/ddc639b3-8101-4531-a850-b4f74ff2611f) 21 | 22 | ```cpp 23 | 24 | // disassemble exe 25 | auto filebuf = read_file_to_buffer("path_to_exe"); 26 | const DISASSEMBLED_BINARY disasm_bin = disassemble_pe(filebuf); 27 | BINARY bin = {}; 28 | 29 | binary_duplicate(&disasm_bin.bin, &bin); 30 | binary_print(&bin); 31 | 32 | // spam nops 33 | spam_nops_1337(&bin); 34 | 35 | // flatten control flow with dispatch block 36 | control_flow_flattening(&bin); 37 | 38 | ASSEMBLED_BINARY asm_bin = build_pe(&bin); 39 | assembled_binary_print(&asm_bin); 40 | 41 | output_file(asm_bin.filebuf.data(), asm_bin.filebuf.size(), 42 | "path_to_reassembled_exe"); 43 | 44 | ``` 45 | 46 | flatten control flow 47 | ```cpp 48 | void control_flow_flattening(BINARY* bin) { 49 | SYMBOL* sym_image_base = bin->rel_info(0, "__ImageBase"); 50 | DATA_BLOCK* rva_table = bin->data_block(0, false, "cff_data"); 51 | SYMBOL* sym_rva_table = rva_table->data_sym(0); 52 | rva_table->alignment = 4; 53 | 54 | BASIC_BLOCK* dispatch_block = bin->basic_block("dispatch_block"); 55 | dispatch_block->push({ 56 | // rax -> index 57 | Lea(rcx_, RipRel(sym_image_base->id)), 58 | Mov(eax_, MemIdx(rcx_, rax_, 4, sym_rva_table->id)), 59 | 60 | Add(ecx_, eax_), 61 | Jmp(rcx_) 62 | }); 63 | 64 | for (int i = bin->basic_blocks.size() - 1; i >= 0; i--) { 65 | BASIC_BLOCK* bb = bin->basic_blocks[i]; 66 | 67 | instr_t* end_instr = &bb->instrs[bb->instrs.size() - 1]; 68 | auto [end_dec_ctx, end_dec_instr] = 69 | decode_instr(&bin->decoder, end_instr->bytes, end_instr->len); 70 | 71 | if (end_dec_instr.meta.category != ZYDIS_CATEGORY_COND_BR) { 72 | continue; 73 | } 74 | 75 | sym_id_t target_sym_id = get_sym_id(end_instr, &end_dec_instr); 76 | 77 | BASIC_BLOCK* in_proxy_block = bin->basic_block(); 78 | BASIC_BLOCK* out_proxy_block = bin->basic_block(); 79 | 80 | // push a entry, which turns into the target rva 81 | int entry_idx = rva_table->bytes.size() / 4; 82 | SYMBOL* entry = rva_table->push_val(0, sizeof(uint32_t)); 83 | entry->target_type = TARGET_TYPE_RVA; 84 | entry->target_sym_id = out_proxy_block->id; 85 | 86 | instr_store_val(end_instr, in_proxy_block->id, 87 | &bin->decoder, &end_dec_instr, &end_dec_ctx); 88 | 89 | in_proxy_block->push({ 90 | Push(rax_), 91 | Push(rcx_), 92 | Mov(rax_, Imm(entry_idx)), 93 | Jmp(ImmRel(dispatch_block->id)) 94 | }); 95 | 96 | out_proxy_block->push({ 97 | Pop(rcx_), 98 | Pop(rax_), 99 | Jmp(ImmRel(target_sym_id)) 100 | }); 101 | } 102 | } 103 | 104 | ``` 105 | 106 | spam nops 107 | ```cpp 108 | void spam_nops_1337(BINARY* bin) { 109 | for (BASIC_BLOCK* bb : bin->basic_blocks) { 110 | for (int i = bb->instrs.size(); i > 0; i--) { 111 | bb->instrs.insert(begin(bb->instrs) + i - 1, Nop()); 112 | } 113 | } 114 | } 115 | ``` 116 | 117 | ## current issues: 118 | - differentiating code (callback functions never called within binary itself) and data within executable sections, if there are also data in there 119 | - exceptions 120 | 121 | ## inspirations from: 122 | https://github.com/jonomango/chum 123 | -------------------------------------------------------------------------------- /baker.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.10.34928.147 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "baker", "baker\baker.vcxproj", "{B561D151-3BD9-4634-83F1-21954247FC45}" 7 | EndProject 8 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ExamplePrograms", "ExamplePrograms", "{EE2E34F2-3BCC-4D7B-9688-44C1880EF6AB}" 9 | EndProject 10 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ExampleCrackMe", "ExampleCrackMe\ExampleCrackMe.vcxproj", "{AEA8BE4F-D891-4559-B153-356596B50530}" 11 | EndProject 12 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ExampleJumpTable", "ExampleJumpTable\ExampleJumpTable.vcxproj", "{A7A527F2-31BC-4C13-BC32-FCC3AC1A6896}" 13 | EndProject 14 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ExampleBakerUsage", "ExampleBakerUsage\ExampleBakerUsage.vcxproj", "{5A1F1249-24FE-49A3-BD7A-69715A5A14F4}" 15 | EndProject 16 | Global 17 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 18 | Debug|x64 = Debug|x64 19 | Debug|x86 = Debug|x86 20 | Release|x64 = Release|x64 21 | Release|x86 = Release|x86 22 | EndGlobalSection 23 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 24 | {B561D151-3BD9-4634-83F1-21954247FC45}.Debug|x64.ActiveCfg = Debug|x64 25 | {B561D151-3BD9-4634-83F1-21954247FC45}.Debug|x64.Build.0 = Debug|x64 26 | {B561D151-3BD9-4634-83F1-21954247FC45}.Debug|x86.ActiveCfg = Debug|Win32 27 | {B561D151-3BD9-4634-83F1-21954247FC45}.Debug|x86.Build.0 = Debug|Win32 28 | {B561D151-3BD9-4634-83F1-21954247FC45}.Release|x64.ActiveCfg = Release|x64 29 | {B561D151-3BD9-4634-83F1-21954247FC45}.Release|x64.Build.0 = Release|x64 30 | {B561D151-3BD9-4634-83F1-21954247FC45}.Release|x86.ActiveCfg = Release|Win32 31 | {B561D151-3BD9-4634-83F1-21954247FC45}.Release|x86.Build.0 = Release|Win32 32 | {AEA8BE4F-D891-4559-B153-356596B50530}.Debug|x64.ActiveCfg = Release|x64 33 | {AEA8BE4F-D891-4559-B153-356596B50530}.Debug|x64.Build.0 = Release|x64 34 | {AEA8BE4F-D891-4559-B153-356596B50530}.Debug|x86.ActiveCfg = Debug|Win32 35 | {AEA8BE4F-D891-4559-B153-356596B50530}.Debug|x86.Build.0 = Debug|Win32 36 | {AEA8BE4F-D891-4559-B153-356596B50530}.Release|x64.ActiveCfg = Release|x64 37 | {AEA8BE4F-D891-4559-B153-356596B50530}.Release|x64.Build.0 = Release|x64 38 | {AEA8BE4F-D891-4559-B153-356596B50530}.Release|x86.ActiveCfg = Release|Win32 39 | {AEA8BE4F-D891-4559-B153-356596B50530}.Release|x86.Build.0 = Release|Win32 40 | {A7A527F2-31BC-4C13-BC32-FCC3AC1A6896}.Debug|x64.ActiveCfg = Release|x64 41 | {A7A527F2-31BC-4C13-BC32-FCC3AC1A6896}.Debug|x64.Build.0 = Release|x64 42 | {A7A527F2-31BC-4C13-BC32-FCC3AC1A6896}.Debug|x86.ActiveCfg = Debug|Win32 43 | {A7A527F2-31BC-4C13-BC32-FCC3AC1A6896}.Debug|x86.Build.0 = Debug|Win32 44 | {A7A527F2-31BC-4C13-BC32-FCC3AC1A6896}.Release|x64.ActiveCfg = Release|x64 45 | {A7A527F2-31BC-4C13-BC32-FCC3AC1A6896}.Release|x64.Build.0 = Release|x64 46 | {A7A527F2-31BC-4C13-BC32-FCC3AC1A6896}.Release|x86.ActiveCfg = Release|Win32 47 | {A7A527F2-31BC-4C13-BC32-FCC3AC1A6896}.Release|x86.Build.0 = Release|Win32 48 | {5A1F1249-24FE-49A3-BD7A-69715A5A14F4}.Debug|x64.ActiveCfg = Debug|x64 49 | {5A1F1249-24FE-49A3-BD7A-69715A5A14F4}.Debug|x64.Build.0 = Debug|x64 50 | {5A1F1249-24FE-49A3-BD7A-69715A5A14F4}.Debug|x86.ActiveCfg = Debug|Win32 51 | {5A1F1249-24FE-49A3-BD7A-69715A5A14F4}.Debug|x86.Build.0 = Debug|Win32 52 | {5A1F1249-24FE-49A3-BD7A-69715A5A14F4}.Release|x64.ActiveCfg = Release|x64 53 | {5A1F1249-24FE-49A3-BD7A-69715A5A14F4}.Release|x64.Build.0 = Release|x64 54 | {5A1F1249-24FE-49A3-BD7A-69715A5A14F4}.Release|x86.ActiveCfg = Release|Win32 55 | {5A1F1249-24FE-49A3-BD7A-69715A5A14F4}.Release|x86.Build.0 = Release|Win32 56 | EndGlobalSection 57 | GlobalSection(SolutionProperties) = preSolution 58 | HideSolutionNode = FALSE 59 | EndGlobalSection 60 | GlobalSection(NestedProjects) = preSolution 61 | {AEA8BE4F-D891-4559-B153-356596B50530} = {EE2E34F2-3BCC-4D7B-9688-44C1880EF6AB} 62 | {A7A527F2-31BC-4C13-BC32-FCC3AC1A6896} = {EE2E34F2-3BCC-4D7B-9688-44C1880EF6AB} 63 | {5A1F1249-24FE-49A3-BD7A-69715A5A14F4} = {EE2E34F2-3BCC-4D7B-9688-44C1880EF6AB} 64 | EndGlobalSection 65 | GlobalSection(ExtensibilityGlobals) = postSolution 66 | SolutionGuid = {502B6124-B30B-4415-A41D-43D78F1B37D8} 67 | EndGlobalSection 68 | EndGlobal 69 | -------------------------------------------------------------------------------- /baker/src/symbols/block.cpp: -------------------------------------------------------------------------------- 1 | #include "../binary.h" 2 | 3 | BASIC_BLOCK* BASIC_BLOCK::insert(int idx, instr_t instr) { 4 | SYMBOL* sym = bin_->symbols[this->id]; 5 | 6 | logger_log( 7 | WHITE, "", 8 | WHITE, sym->name.empty() 9 | ? fmtf("%-3d: +%-3d: %s\n", this->id, this->size(), serialize_instr(bin_, &instr).c_str()) 10 | : fmtf("%s, %-3d, +%-3d: %s\n", sym->name.c_str(), this->id, this->size(), serialize_instr(bin_, &instr).c_str()) 11 | ); 12 | 13 | instrs.insert(begin(instrs) + idx, instr); 14 | return this; 15 | } 16 | 17 | BASIC_BLOCK* BASIC_BLOCK::insert(int idx, std::vector instructions_array) { 18 | for (int i = idx; auto & instr : instructions_array) { 19 | this->insert(i++, instr); 20 | } 21 | return this; 22 | } 23 | 24 | 25 | BASIC_BLOCK* BASIC_BLOCK::push(instr_t instr) { 26 | this->insert(instrs.size(), instr); 27 | return this; 28 | } 29 | 30 | BASIC_BLOCK* BASIC_BLOCK::push(std::vector instructions_array) { 31 | this->insert(instrs.size(), instructions_array); 32 | return this; 33 | } 34 | 35 | BASIC_BLOCK* BASIC_BLOCK::fall(sym_id_t sym_id) { 36 | fallthrough_sym_id = sym_id; 37 | 38 | SYMBOL* sym = bin_->symbols[this->id]; 39 | logger_log( 40 | WHITE, "", 41 | WHITE, sym->name.empty() 42 | ? fmtf("%d --> %d\n", this->id, fallthrough_sym_id) 43 | : fmtf("%s, %d --> %d\n", sym->name.c_str(), this->id, fallthrough_sym_id)); 44 | return this; 45 | } 46 | 47 | BASIC_BLOCK* BASIC_BLOCK::fall_to_next() { 48 | fall(id + 1); 49 | return this; 50 | } 51 | 52 | size_t BASIC_BLOCK::size() { 53 | int size = 0; 54 | for (instr_t& ins : instrs) { 55 | size += ins.len; 56 | } 57 | return size; 58 | } 59 | 60 | uint32_t BASIC_BLOCK::get_instr_offset(int idx) { 61 | uint32_t offset = 0; 62 | for (int i = 0; i < idx; i++) { 63 | offset += instrs[i].len; 64 | } 65 | return offset; 66 | } 67 | 68 | /* 69 | BASIC_BLOCK* BASIC_BLOCK::map_to_sect(std::string sect_name) { 70 | this->parent_sect = sect_name; 71 | return this; 72 | } 73 | */ 74 | 75 | SYMBOL* DATA_BLOCK::data_sym(int db_offset, enum TARGET_TYPE target_type, sym_id_t target_id) { 76 | SYMBOL* sym = nullptr; 77 | bool reusing = false; 78 | 79 | if (dboffset_to_sym[db_offset]) { 80 | sym = dboffset_to_sym[db_offset]; 81 | reusing = true; 82 | } else { 83 | sym = new SYMBOL{}; 84 | sym->id = bin_->symbols.size(); 85 | bin_->symbols.push_back(sym); 86 | 87 | sym->type = SYMBOL_TYPE_DATA; 88 | sym->db = this; 89 | sym->db_offset = db_offset; 90 | dboffset_to_sym[db_offset] = sym; 91 | } 92 | 93 | if (target_type) { 94 | sym->target_type = target_type; 95 | sym->target_sym_id = target_id; 96 | } 97 | 98 | logger_log( 99 | WHITE, fmtf("%s %s", this->name.c_str(), reusing ? ">ds" : "+ds"), 100 | WHITE, fmtf("%d: +%d\n", sym->id, sym->db_offset)); 101 | 102 | return sym; 103 | } 104 | 105 | SYMBOL* DATA_BLOCK::push_val(uint64_t val, int len, TARGET_TYPE target_type, sym_id_t target_id) { 106 | SYMBOL* sym = data_sym(bytes.size()); 107 | 108 | bytes.insert(end(bytes), len, 0); 109 | memcpy(bytes.data(), &val, len); 110 | 111 | if (target_type) { 112 | sym->target_type = target_type; 113 | sym->target_sym_id = target_id; 114 | } 115 | 116 | logger_log( 117 | WHITE, fmtf("%s +val %d", this->name.c_str(), len), 118 | WHITE, fmtf("%d\n", val)); 119 | 120 | return sym; 121 | } 122 | 123 | SYMBOL* DATA_BLOCK::push_buf(const void* buf, int len) { 124 | SYMBOL* sym = data_sym(bytes.size()); 125 | 126 | bytes.insert(end(bytes), 127 | (uint8_t*)buf, 128 | (uint8_t*)buf + len); 129 | 130 | #ifdef DEBUG_LOGGING 131 | std::string fmt = ""; 132 | for (int i = 0; i < len; i++) { 133 | fmt.append(fmtf("%02X, ", ((uint8_t*)buf)[i])); 134 | 135 | if (i > 10) { 136 | fmt.append("..."); 137 | break; 138 | } 139 | } 140 | logger_log( 141 | WHITE, fmtf("%s +buf", this->name.c_str()), 142 | WHITE, fmtf("%s\n", fmt.c_str())); 143 | #endif 144 | 145 | return sym; 146 | } 147 | 148 | SYMBOL* DATA_BLOCK::push_str(std::string str, bool nullterm) { 149 | SYMBOL* sym = data_sym(bytes.size()); 150 | 151 | bytes.insert(end(bytes), 152 | str.c_str(), 153 | str.c_str() + str.size()); 154 | 155 | if (nullterm) { 156 | bytes.push_back('\0'); 157 | } 158 | 159 | logger_log( 160 | WHITE, fmtf("%s +buf", this->name.c_str()), 161 | WHITE, fmtf("%s\n", str.c_str())); 162 | 163 | return sym; 164 | } 165 | 166 | DATA_BLOCK* DATA_BLOCK::map_to_sect(std::string sect_name) { 167 | this->parent_sect = sect_name; 168 | return this; 169 | } -------------------------------------------------------------------------------- /baker/src/symbols/symbols.cpp: -------------------------------------------------------------------------------- 1 | #include "../binary.h" 2 | 3 | // create a forward declaration 4 | SYMBOL* BINARY::label() { 5 | SYMBOL* sym = new SYMBOL{}; 6 | sym->id = symbols.size(); 7 | symbols.push_back(sym); 8 | 9 | logger_log( 10 | YELLOW, "+label", 11 | WHITE, fmtf("id: %d\n", sym->id)); 12 | return sym; 13 | } 14 | 15 | // 16 | // CODE 17 | // 18 | 19 | BASIC_BLOCK* BINARY::basic_block(std::string name) { 20 | SYMBOL* sym = new SYMBOL{}; 21 | sym->id = symbols.size(); 22 | symbols.push_back(sym); 23 | 24 | sym->type = SYMBOL_TYPE_CODE; 25 | sym->bb = basic_blocks.emplace_back(new BASIC_BLOCK{}); 26 | sym->bb->id = sym->id; 27 | sym->bb->bin_ = this; 28 | sym->name = name; 29 | 30 | logger_log( 31 | BRIGHT_CYAN, "+basic_block", 32 | WHITE, fmtf("%s\n", serialize_sym(sym).c_str())); 33 | 34 | return sym->bb; 35 | } 36 | 37 | BASIC_BLOCK* BINARY::basic_block(sym_id_t label_id, std::string name) { 38 | SYMBOL* sym = symbols[label_id]; 39 | sym->type = SYMBOL_TYPE_CODE; 40 | sym->bb = basic_blocks.emplace_back(new BASIC_BLOCK{}); 41 | sym->bb->id = sym->id; 42 | sym->bb->bin_ = this; 43 | sym->name = name; 44 | 45 | logger_log( 46 | WHITE, ">basic_block", 47 | WHITE, fmtf("%s\n", serialize_sym(sym).c_str())); 48 | return sym->bb; 49 | } 50 | 51 | BASIC_BLOCK* BINARY::set_entry(BASIC_BLOCK* bb) { 52 | logger_log( 53 | WHITE, "entry_point", 54 | WHITE, fmtf("set to %d\n", bb->id)); 55 | return entry_point = bb; 56 | } 57 | 58 | // 59 | // DATA 60 | // 61 | 62 | DATA_BLOCK* BINARY::data_block(uint32_t size, BOOL read_only, std::string name) { 63 | DATA_BLOCK* db = data_blocks.emplace_back(new DATA_BLOCK{}); 64 | db->name = name; 65 | db->bytes = std::vector(size, 0); 66 | db->read_only = read_only; 67 | db->bin_ = this; 68 | 69 | logger_log( 70 | BRIGHT_RED, "+data_block", 71 | WHITE, fmtf("name: %s, size: %d, read_only: %d\n", 72 | name.c_str(), 73 | size, 74 | read_only)); 75 | return db; 76 | } 77 | 78 | // 79 | // RELATIVE INFO 80 | // 81 | 82 | // to handle data references out of data blocks 83 | // we create relative data symbols 84 | // this should only work for addresses within the pe header 85 | SYMBOL* BINARY::rel_info(uint32_t rel_offset, std::string name) { 86 | SYMBOL* sym = new SYMBOL{}; 87 | sym->type = SYMBOL_TYPE_RELATIVE_INFO; 88 | sym->id = symbols.size(); 89 | sym->name = name; 90 | sym->rel_offset = rel_offset; 91 | symbols.push_back(sym); 92 | 93 | logger_log( 94 | BRIGHT_GREEN, "+rel_info", 95 | WHITE, fmtf("sym_%d, name: %s, rel_offset: 0x%X\n", 96 | sym->id, 97 | name.c_str(), 98 | rel_offset)); 99 | 100 | return sym; 101 | } 102 | 103 | // 104 | // GETTERS 105 | // 106 | 107 | SYMBOL* BINARY::get_symbol(std::string name) { 108 | auto it = std::find_if(begin(symbols), end(symbols), 109 | [&](const SYMBOL* sym) { 110 | return sym->name == name; 111 | } 112 | ); 113 | 114 | if (it == end(symbols)) 115 | return nullptr; 116 | 117 | return *it; 118 | } 119 | 120 | DATA_BLOCK* BINARY::get_data_block(std::string name) { 121 | auto it = std::find_if(begin(data_blocks), end(data_blocks), 122 | [&](const DATA_BLOCK* db) { 123 | return db->name == name; 124 | } 125 | ); 126 | 127 | if (it == end(data_blocks)) 128 | return nullptr; 129 | 130 | return *it; 131 | } 132 | 133 | std::vector BINARY::get_xrefs(sym_id_t target_sym_id) { 134 | std::vector xrefs = {}; 135 | for (SYMBOL* sym : symbols) { 136 | if (sym->type == SYMBOL_TYPE_CODE) { 137 | BASIC_BLOCK* bb = sym->bb; 138 | 139 | for (int i = 0; i < bb->instrs.size(); i++) { 140 | instr_t* instr = &bb->instrs[i]; 141 | auto [dec_ctx, dec_instr] = decode_instr(&decoder, instr->bytes, instr->len); 142 | 143 | sym_id_t sym_id = get_sym_id(instr, &dec_instr); 144 | 145 | if (sym_id == target_sym_id) { 146 | xrefs.push_back({ sym, i }); 147 | } 148 | } 149 | 150 | if (bb->fallthrough_sym_id == target_sym_id) { 151 | xrefs.push_back({ sym, -1 }); 152 | } 153 | } 154 | 155 | else if (sym->type == SYMBOL_TYPE_DATA) { 156 | if (sym->target_sym_id == target_sym_id) 157 | xrefs.push_back({ sym }); 158 | } 159 | } 160 | return xrefs; 161 | } 162 | 163 | std::vector BINARY::get_xrefs(BASIC_BLOCK* bb, sym_id_t target_sym_id) { 164 | std::vector xrefs = {}; 165 | SYMBOL* sym = symbols[bb->id]; 166 | 167 | for (int i = 0; i < bb->instrs.size(); i++) { 168 | instr_t* instr = &bb->instrs[i]; 169 | auto [dec_ctx, dec_instr] = decode_instr(&decoder, instr->bytes, instr->len); 170 | 171 | sym_id_t sym_id = get_sym_id(instr, &dec_instr); 172 | 173 | if (sym_id == target_sym_id) { 174 | xrefs.push_back({ sym, i }); 175 | } 176 | } 177 | 178 | if (bb->fallthrough_sym_id == target_sym_id) { 179 | xrefs.push_back({ sym, -1 }); 180 | } 181 | 182 | return xrefs; 183 | } -------------------------------------------------------------------------------- /ExampleBakerUsage/examples/rewrite_crackme.cpp: -------------------------------------------------------------------------------- 1 | #include "examples.h" 2 | #include 3 | 4 | void opaque_predicates(BINARY* bin) { 5 | for (int bb_idx = bin->basic_blocks.size() - 1; bb_idx >= 0; bb_idx--) { 6 | BASIC_BLOCK* bb = bin->basic_blocks[bb_idx]; 7 | 8 | for (int instr_idx = 0; instr_idx < bb->instrs.size(); instr_idx++) { 9 | instr_t* instr = &bb->instrs[instr_idx]; 10 | auto [dec_instr, dec_ops] = 11 | decode_full(&bin->decoder, instr->bytes, instr->len); 12 | 13 | if (dec_instr.mnemonic != ZYDIS_MNEMONIC_MOV) 14 | continue; 15 | if (dec_ops[1].type != ZYDIS_OPERAND_TYPE_MEMORY) 16 | continue; 17 | 18 | // split basic block 19 | BASIC_BLOCK* new_bb = bin->basic_block(); 20 | new_bb->fall(bb->fallthrough_sym_id); 21 | 22 | new_bb->instrs.insert(begin(new_bb->instrs), 23 | begin(bb->instrs) + instr_idx + 1, 24 | end(bb->instrs)); 25 | 26 | bb->instrs.erase( 27 | begin(bb->instrs) + instr_idx + 1, 28 | end(bb->instrs)); 29 | 30 | // x = reg 31 | OPERAND reg = { 32 | .type = OP_REG, 33 | .reg = dec_ops[0].reg.value 34 | }; 35 | 36 | OPERAND root_reg = { 37 | .type = OP_REG, 38 | .reg = reg_root(dec_ops[0].reg.value) 39 | }; 40 | 41 | BASIC_BLOCK* even_branch = bin->basic_block(); 42 | BASIC_BLOCK* odd_branch = bin->basic_block(); 43 | 44 | bb->push({ 45 | Pushfq(), // save flags 46 | Push(root_reg), // save register 47 | Test(reg, Imm(1)), // check if even 48 | Jz(ImmRel(even_branch->id)) 49 | })->fall(odd_branch->id); 50 | 51 | // even branch 52 | // (x/2) * 2 == x 53 | even_branch->push({ 54 | Shr(reg, Imm(1)), // (x/2) 55 | Shl(reg, Imm(1)), // (x/2) * 2 56 | Cmp(reg, Mem(rsp_, 0)), // (x/2) * 2 == x 57 | Jz(ImmRel(new_bb->id)) 58 | }); 59 | 60 | // odd branch 61 | // (x*x) % 8 == 1 62 | odd_branch->push({ 63 | Imul(reg, reg), // x*x 64 | Test(reg, Imm(7)), // (x*x) % 8 65 | Jnz(ImmRel(new_bb->id)) // (x*x) % 8 == 1 66 | }); 67 | 68 | new_bb->insert(0, { Pop(root_reg), Popfq()}); 69 | } 70 | } 71 | } 72 | 73 | void control_flow_flattening(BINARY* bin) { 74 | SYMBOL* sym_image_base = bin->rel_info(0, "__ImageBase"); 75 | DATA_BLOCK* rva_table = bin->data_block(0, false, "cff_data"); 76 | SYMBOL* sym_rva_table = rva_table->data_sym(0); 77 | rva_table->alignment = 4; 78 | 79 | BASIC_BLOCK* dispatch_block = bin->basic_block("dispatch_block"); 80 | dispatch_block->push({ 81 | // rax -> index 82 | Lea(rcx_, RipRel(sym_image_base->id)), 83 | Mov(eax_, MemIdx(rcx_, rax_, 4, sym_rva_table->id)), 84 | 85 | Add(ecx_, eax_), 86 | Jmp(rcx_) 87 | }); 88 | 89 | for (int i = bin->basic_blocks.size() - 1; i >= 0; i--) { 90 | BASIC_BLOCK* bb = bin->basic_blocks[i]; 91 | 92 | instr_t* end_instr = &bb->instrs[bb->instrs.size() - 1]; 93 | auto [end_dec_ctx, end_dec_instr] = 94 | decode_instr(&bin->decoder, end_instr->bytes, end_instr->len); 95 | 96 | if (end_dec_instr.meta.category != ZYDIS_CATEGORY_COND_BR) { 97 | continue; 98 | } 99 | 100 | sym_id_t target_sym_id = get_sym_id(end_instr, &end_dec_instr); 101 | 102 | BASIC_BLOCK* in_proxy_block = bin->basic_block(); 103 | BASIC_BLOCK* out_proxy_block = bin->basic_block(); 104 | 105 | // push a entry, which turns into the target rva 106 | int entry_idx = rva_table->bytes.size() / 4; 107 | SYMBOL* entry = rva_table->push_val(0, sizeof(uint32_t)); 108 | entry->target_type = TARGET_TYPE_RVA; 109 | entry->target_sym_id = out_proxy_block->id; 110 | 111 | instr_store_val(end_instr, in_proxy_block->id, 112 | &bin->decoder, &end_dec_instr, &end_dec_ctx); 113 | 114 | in_proxy_block->push({ 115 | Push(rax_), 116 | Push(rcx_), 117 | Mov(rax_, Imm(entry_idx)), 118 | Jmp(ImmRel(dispatch_block->id)) 119 | }); 120 | 121 | out_proxy_block->push({ 122 | Pop(rcx_), 123 | Pop(rax_), 124 | Jmp(ImmRel(target_sym_id)) 125 | }); 126 | } 127 | } 128 | 129 | void spam_nops_1337(BINARY* bin) { 130 | for (BASIC_BLOCK* bb : bin->basic_blocks) { 131 | for (int i = bb->instrs.size(); i > 0; i--) { 132 | bb->instrs.insert(begin(bb->instrs) + i - 1, Nop()); 133 | } 134 | } 135 | } 136 | 137 | void rewrite_crackme() { 138 | // disassemble ExampleCrackMe.exe 139 | auto filebuf = read_file_to_buffer("C:\\Users\\li\\source\\repos\\baker\\x64\\Release\\ExampleCrackMe.exe"); 140 | const DISASSEMBLED_BINARY disasm_bin = disassemble_pe(filebuf); 141 | 142 | BINARY bin = {}; 143 | binary_duplicate(&disasm_bin.bin, &bin); 144 | 145 | spam_nops_1337(&bin); 146 | control_flow_flattening(&bin); 147 | 148 | ASSEMBLED_BINARY asm_bin = build_pe(&bin); 149 | assembled_binary_print(&asm_bin); 150 | 151 | output_file(asm_bin.filebuf.data(), asm_bin.filebuf.size(), 152 | "C:\\Users\\li\\source\\repos\\baker\\x64\\Release\\reassembled_crack_me.exe"); 153 | } 154 | -------------------------------------------------------------------------------- /ExampleCrackMe/ExampleCrackMe.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | 17.0 23 | Win32Proj 24 | {aea8be4f-d891-4559-b153-356596b50530} 25 | ExampleCrackMe 26 | 10.0 27 | 28 | 29 | 30 | Application 31 | true 32 | v143 33 | Unicode 34 | 35 | 36 | Application 37 | false 38 | v143 39 | true 40 | Unicode 41 | 42 | 43 | Application 44 | true 45 | v143 46 | Unicode 47 | 48 | 49 | Application 50 | false 51 | v143 52 | true 53 | Unicode 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | Level3 76 | true 77 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 78 | true 79 | 80 | 81 | Console 82 | true 83 | 84 | 85 | 86 | 87 | Level3 88 | true 89 | true 90 | true 91 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 92 | true 93 | 94 | 95 | Console 96 | true 97 | true 98 | true 99 | 100 | 101 | 102 | 103 | Level3 104 | true 105 | _DEBUG;_CONSOLE;%(PreprocessorDefinitions) 106 | true 107 | 108 | 109 | Console 110 | true 111 | 112 | 113 | 114 | 115 | Level3 116 | true 117 | true 118 | true 119 | NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 120 | true 121 | 122 | 123 | Console 124 | true 125 | true 126 | true 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /ExampleJumpTable/ExampleJumpTable.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | 17.0 23 | Win32Proj 24 | {a7a527f2-31bc-4c13-bc32-fcc3ac1a6896} 25 | ExampleJumpTable 26 | 10.0 27 | 28 | 29 | 30 | Application 31 | true 32 | v143 33 | Unicode 34 | 35 | 36 | Application 37 | false 38 | v143 39 | true 40 | Unicode 41 | 42 | 43 | Application 44 | true 45 | v143 46 | Unicode 47 | 48 | 49 | Application 50 | false 51 | v143 52 | true 53 | Unicode 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | Level3 76 | true 77 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 78 | true 79 | 80 | 81 | Console 82 | true 83 | 84 | 85 | 86 | 87 | Level3 88 | true 89 | true 90 | true 91 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 92 | true 93 | 94 | 95 | Console 96 | true 97 | true 98 | true 99 | 100 | 101 | 102 | 103 | Level3 104 | true 105 | _DEBUG;_CONSOLE;%(PreprocessorDefinitions) 106 | true 107 | 108 | 109 | Console 110 | true 111 | 112 | 113 | 114 | 115 | Level3 116 | true 117 | false 118 | true 119 | NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 120 | true 121 | false 122 | Disabled 123 | false 124 | %(AdditionalOptions) 125 | 126 | 127 | Console 128 | true 129 | true 130 | true 131 | jumptable_example_main 132 | true 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | -------------------------------------------------------------------------------- /baker/src/symbols/instruction.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | typedef uint32_t sym_id_t; 10 | 11 | #define INSTR_FLAG_MEM_IDX (1U << 0) 12 | 13 | struct instr_t { 14 | uint8_t bytes[15]; 15 | size_t len; 16 | uint32_t flags; 17 | }; 18 | 19 | enum INSTR_OP_TYPE { 20 | OP_NONE, 21 | OP_REG, 22 | 23 | OP_IMM, 24 | 25 | OP_IMMREL, 26 | OP_RIPREL, 27 | 28 | OP_MEM, 29 | OP_MEM_IDX 30 | }; 31 | 32 | struct OPERAND { 33 | INSTR_OP_TYPE type; 34 | 35 | union { 36 | int64_t imm; 37 | 38 | sym_id_t target_sym_id; 39 | 40 | ZydisRegister reg; 41 | 42 | struct { 43 | ZydisRegister base_reg; 44 | int disp; 45 | } mem; 46 | 47 | struct { 48 | ZydisRegister base_reg; 49 | ZydisRegister idx_reg; 50 | uint32_t scale; 51 | sym_id_t table_sym_id; 52 | } memidx; 53 | }; 54 | 55 | int len; 56 | }; 57 | 58 | 59 | OPERAND Reg(ZydisRegister reg); 60 | OPERAND Imm(int64_t immediate); 61 | 62 | OPERAND ImmRel(sym_id_t symbol_id); 63 | OPERAND RipRel(sym_id_t symbol_id, int len = 8); 64 | 65 | OPERAND Mem (OPERAND base_reg, int disp, int len = 8); 66 | OPERAND MemIdx(OPERAND base_reg, OPERAND idx_reg, int scale, sym_id_t table_sym_id = 0, int len = 8); 67 | 68 | instr_t Instr(ZydisMnemonic mnemonic, OPERAND op1, OPERAND op2); 69 | 70 | void instr_store_val(instr_t* instr, uint32_t val, ZydisDecoder* decoder, ZydisDecodedInstruction* dec_instr, ZydisDecoderContext* dec_ctx, uint64_t instr_va = NULL); 71 | sym_id_t get_sym_id(const instr_t* instr, const ZydisDecodedInstruction* dec_instr); 72 | 73 | // 74 | // zydis wrappers 75 | // 76 | 77 | void zy_expect_success(ZyanStatus zystatus); 78 | 79 | ZydisRegister reg_root(ZydisRegister reg); 80 | ZydisRegisterWidth reg_width(ZydisRegister reg); 81 | 82 | std::tuple< 83 | ZydisDecoderContext, 84 | ZydisDecodedInstruction> decode_instr( 85 | const ZydisDecoder* decoder, 86 | const uint8_t* raw_instr, 87 | const size_t len); 88 | 89 | std::vector decode_ops( 90 | const ZydisDecoder* decoder, 91 | const ZydisDecodedInstruction* dec_instr, 92 | const ZydisDecoderContext* dec_ctx); 93 | 94 | std::tuple< 95 | ZydisDecodedInstruction, 96 | std::vector> decode_full( 97 | const ZydisDecoder* decoder, 98 | const uint8_t* raw_instr, 99 | const size_t len); 100 | 101 | 102 | #define DECLARE_instr_t0(func_name, mnemonic) inline instr_t func_name() { return Instr(mnemonic, {}, {}); } 103 | #define DECLARE_instr_t1(func_name, mnemonic) inline instr_t func_name(OPERAND op1) { return Instr(mnemonic, op1, {}); } 104 | #define DECLARE_instr_t2(func_name, mnemonic) inline instr_t func_name(OPERAND op1, OPERAND op2) { return Instr(mnemonic, op1, op2); } 105 | 106 | // data movement 107 | DECLARE_instr_t2(Mov, ZYDIS_MNEMONIC_MOV); 108 | DECLARE_instr_t2(Lea, ZYDIS_MNEMONIC_LEA); 109 | DECLARE_instr_t1(Push, ZYDIS_MNEMONIC_PUSH); 110 | DECLARE_instr_t1(Pop, ZYDIS_MNEMONIC_POP); 111 | DECLARE_instr_t2(Xchg, ZYDIS_MNEMONIC_XCHG); 112 | DECLARE_instr_t2(Movsx, ZYDIS_MNEMONIC_MOVSX); 113 | DECLARE_instr_t2(Movzx, ZYDIS_MNEMONIC_MOVZX); 114 | DECLARE_instr_t2(Cmpxchg, ZYDIS_MNEMONIC_CMPXCHG); 115 | 116 | // control 117 | DECLARE_instr_t1(Call, ZYDIS_MNEMONIC_CALL); 118 | DECLARE_instr_t0(Ret, ZYDIS_MNEMONIC_RET); 119 | DECLARE_instr_t1(Jmp, ZYDIS_MNEMONIC_JMP); 120 | 121 | // conditional 122 | DECLARE_instr_t2(Cmp, ZYDIS_MNEMONIC_CMP); 123 | DECLARE_instr_t2(Test, ZYDIS_MNEMONIC_TEST); 124 | DECLARE_instr_t0(Pushfq, ZYDIS_MNEMONIC_PUSHFQ); 125 | DECLARE_instr_t0(Popfq, ZYDIS_MNEMONIC_POPFQ); 126 | 127 | // control-flow 128 | DECLARE_instr_t1(Jz, ZYDIS_MNEMONIC_JZ); 129 | DECLARE_instr_t1(Jnz, ZYDIS_MNEMONIC_JNZ); 130 | DECLARE_instr_t1(Jl, ZYDIS_MNEMONIC_JL); 131 | DECLARE_instr_t1(Jle, ZYDIS_MNEMONIC_JLE); 132 | DECLARE_instr_t1(Jnl, ZYDIS_MNEMONIC_JNL); 133 | DECLARE_instr_t1(Jnle, ZYDIS_MNEMONIC_JNLE); 134 | 135 | // logical operation 136 | DECLARE_instr_t2(And, ZYDIS_MNEMONIC_AND); 137 | DECLARE_instr_t2(Xor, ZYDIS_MNEMONIC_XOR); 138 | DECLARE_instr_t2(Or, ZYDIS_MNEMONIC_OR); 139 | 140 | // arithmatic operation 141 | DECLARE_instr_t1(Inc, ZYDIS_MNEMONIC_INC); 142 | DECLARE_instr_t1(Dec, ZYDIS_MNEMONIC_DEC); 143 | DECLARE_instr_t2(Add, ZYDIS_MNEMONIC_ADD); 144 | DECLARE_instr_t2(Sub, ZYDIS_MNEMONIC_SUB); 145 | DECLARE_instr_t2(Imul, ZYDIS_MNEMONIC_IMUL); 146 | DECLARE_instr_t2(Idiv, ZYDIS_MNEMONIC_IDIV); 147 | DECLARE_instr_t2(Mul, ZYDIS_MNEMONIC_MUL); 148 | DECLARE_instr_t2(Div, ZYDIS_MNEMONIC_DIV); 149 | DECLARE_instr_t2(Shl, ZYDIS_MNEMONIC_SHL); 150 | DECLARE_instr_t2(Shr, ZYDIS_MNEMONIC_SHR); 151 | 152 | 153 | // other 154 | DECLARE_instr_t0(Nop, ZYDIS_MNEMONIC_NOP); 155 | 156 | 157 | #define DECLARE_REG(reg_name, mnemonic) extern const OPERAND reg_name##_; 158 | 159 | DECLARE_REG(rax, ZYDIS_REGISTER_RAX); 160 | DECLARE_REG(rbx, ZYDIS_REGISTER_RBX); 161 | DECLARE_REG(rcx, ZYDIS_REGISTER_RCX); 162 | DECLARE_REG(rdx, ZYDIS_REGISTER_RDX); 163 | DECLARE_REG(rsi, ZYDIS_REGISTER_RSI); 164 | DECLARE_REG(rdi, ZYDIS_REGISTER_RDI); 165 | DECLARE_REG(rbp, ZYDIS_REGISTER_RBP); 166 | DECLARE_REG(rsp, ZYDIS_REGISTER_RSP); 167 | DECLARE_REG(r8, ZYDIS_REGISTER_R8); 168 | DECLARE_REG(r9, ZYDIS_REGISTER_R9); 169 | DECLARE_REG(r10, ZYDIS_REGISTER_R10); 170 | DECLARE_REG(r11, ZYDIS_REGISTER_R11); 171 | DECLARE_REG(r15, ZYDIS_REGISTER_R15); 172 | DECLARE_REG(r12, ZYDIS_REGISTER_R12); 173 | DECLARE_REG(r13, ZYDIS_REGISTER_R13); 174 | DECLARE_REG(r14, ZYDIS_REGISTER_R14); 175 | 176 | DECLARE_REG(eax, ZYDIS_REGISTER_EAX); 177 | DECLARE_REG(ebx, ZYDIS_REGISTER_EBX); 178 | DECLARE_REG(ecx, ZYDIS_REGISTER_ECX); 179 | DECLARE_REG(edx, ZYDIS_REGISTER_EDX); 180 | DECLARE_REG(esi, ZYDIS_REGISTER_ESI); 181 | DECLARE_REG(edi, ZYDIS_REGISTER_EDI); 182 | DECLARE_REG(ebp, ZYDIS_REGISTER_EBP); 183 | DECLARE_REG(esp, ZYDIS_REGISTER_ESP); 184 | DECLARE_REG(r8d, ZYDIS_REGISTER_R8D); 185 | DECLARE_REG(r9d, ZYDIS_REGISTER_R9D); 186 | DECLARE_REG(r10d, ZYDIS_REGISTER_R10D); 187 | DECLARE_REG(r11d, ZYDIS_REGISTER_R11D); 188 | DECLARE_REG(r15d, ZYDIS_REGISTER_R15D); 189 | DECLARE_REG(r12d, ZYDIS_REGISTER_R12D); 190 | DECLARE_REG(r13d, ZYDIS_REGISTER_R13D); 191 | DECLARE_REG(r14d, ZYDIS_REGISTER_R14D); 192 | 193 | DECLARE_REG(ax, ZYDIS_REGISTER_AX); 194 | DECLARE_REG(bx, ZYDIS_REGISTER_BX); 195 | DECLARE_REG(cx, ZYDIS_REGISTER_CX); 196 | DECLARE_REG(dx, ZYDIS_REGISTER_DX); 197 | DECLARE_REG(si, ZYDIS_REGISTER_SI); 198 | DECLARE_REG(di, ZYDIS_REGISTER_DI); 199 | DECLARE_REG(bp, ZYDIS_REGISTER_BP); 200 | DECLARE_REG(sp, ZYDIS_REGISTER_SP); 201 | DECLARE_REG(r8w, ZYDIS_REGISTER_R8W); 202 | DECLARE_REG(r9w, ZYDIS_REGISTER_R9W); 203 | DECLARE_REG(r10w, ZYDIS_REGISTER_R10W); 204 | DECLARE_REG(r11w, ZYDIS_REGISTER_R11W); 205 | 206 | 207 | DECLARE_REG(al, ZYDIS_REGISTER_AL); 208 | DECLARE_REG(bl, ZYDIS_REGISTER_BL); 209 | DECLARE_REG(cl, ZYDIS_REGISTER_CL); 210 | DECLARE_REG(dl, ZYDIS_REGISTER_DL); 211 | DECLARE_REG(sil, ZYDIS_REGISTER_SIL); 212 | DECLARE_REG(dil, ZYDIS_REGISTER_DIL); 213 | DECLARE_REG(bpl, ZYDIS_REGISTER_BPL); 214 | DECLARE_REG(spl, ZYDIS_REGISTER_SPL); 215 | DECLARE_REG(r8b, ZYDIS_REGISTER_R8B); 216 | DECLARE_REG(r9b, ZYDIS_REGISTER_R9B); 217 | DECLARE_REG(r10b, ZYDIS_REGISTER_R10B); 218 | DECLARE_REG(r11b, ZYDIS_REGISTER_R11B); -------------------------------------------------------------------------------- /baker/src/disassembler/parse_pe.cpp: -------------------------------------------------------------------------------- 1 | #include "disassembler.h" 2 | 3 | void DISASSEMBLER::create_section_dbs() { 4 | const auto insert_into_rva_db_map = [&](uint32_t rva, DATA_BLOCK* db) { 5 | // insert into rva datablock map (while keeping map sorted) 6 | RVA_DB_ENTRY rva_db_entry = { rva, db }; 7 | auto it = std::upper_bound(begin(rva_db_map), end(rva_db_map), 8 | rva_db_entry, 9 | [](const RVA_DB_ENTRY& left, const RVA_DB_ENTRY& right) { 10 | return left.rva < right.rva; 11 | }); 12 | rva_db_map.insert(it, rva_db_entry); 13 | }; 14 | 15 | for (int i = 0; i < nthdrs->FileHeader.NumberOfSections; i++) { 16 | IMAGE_SECTION_HEADER* secthdr = §s[i]; 17 | 18 | // ignore executable sections 19 | if (secthdr->Characteristics & IMAGE_SCN_MEM_EXECUTE) 20 | continue; 21 | 22 | assert(secthdr->Characteristics & IMAGE_SCN_MEM_READ); 23 | 24 | std::string sect_name = (char*)secthdr->Name; 25 | bool read_only = !(secthdr->Characteristics & IMAGE_SCN_MEM_WRITE); 26 | uint32_t file_size = secthdr->SizeOfRawData; 27 | uint32_t virt_size = secthdr->Misc.VirtualSize; 28 | 29 | DATA_BLOCK* db = nullptr; 30 | DATA_BLOCK* uninit_db = nullptr; 31 | 32 | if (virt_size > file_size) { 33 | uint32_t uninit_size = virt_size - file_size; 34 | db = bin.data_block(file_size, read_only, sect_name); 35 | uninit_db = bin.data_block(uninit_size, read_only, sect_name + "_uninit"); 36 | uninit_db->uninitialized = true; 37 | 38 | memcpy(db->bytes.data(), &filebuf[secthdr->PointerToRawData], file_size); 39 | memcpy(db->bytes.data(), &filebuf[secthdr->PointerToRawData + file_size], uninit_size); 40 | 41 | insert_into_rva_db_map(secthdr->VirtualAddress, db); 42 | insert_into_rva_db_map(secthdr->VirtualAddress + file_size, uninit_db); 43 | } else { 44 | db = bin.data_block(virt_size, read_only, sect_name); 45 | 46 | memcpy(db->bytes.data(), &filebuf[secthdr->PointerToRawData], secthdr->Misc.VirtualSize); 47 | insert_into_rva_db_map(secthdr->VirtualAddress, db); 48 | } 49 | } 50 | 51 | // print section mappings 52 | for (int i = 0; i < nthdrs->FileHeader.NumberOfSections; i++) { 53 | IMAGE_SECTION_HEADER* secthdr = §s[i]; 54 | 55 | logger_log( 56 | WHITE, (char*)secthdr->Name, 57 | WHITE, fmtf("%X - %X\n", 58 | secthdr->VirtualAddress, 59 | secthdr->VirtualAddress + secthdr->Misc.VirtualSize)); 60 | } 61 | } 62 | 63 | void DISASSEMBLER::parse_imports() { 64 | IMAGE_DATA_DIRECTORY impdir = datadir[IMAGE_DIRECTORY_ENTRY_IMPORT]; 65 | if (impdir.Size) { 66 | auto desc = rvacast(impdir.VirtualAddress); 67 | 68 | for (; desc->Characteristics; desc++) { 69 | IMPORT_MODULE* mod = bin.import_module(rvacast(desc->Name)); 70 | 71 | uint32_t first_thunk_rva = desc->FirstThunk; 72 | auto orig_first_thunk = rvacast(desc->OriginalFirstThunk); 73 | 74 | while (orig_first_thunk->u1.AddressOfData) { 75 | const auto imp_by_name = rvacast(orig_first_thunk->u1.AddressOfData); 76 | 77 | // create import symbol 78 | IMPORT_ROUTINE* routine = bin.import_routine(mod, imp_by_name->Name); 79 | 80 | // point this rva to the import symbol 81 | assert(rva_map[first_thunk_rva].id == nullsid); 82 | rva_map[first_thunk_rva].id = routine->id; 83 | sym_rva_map_append(first_thunk_rva); 84 | 85 | first_thunk_rva += sizeof(IMAGE_THUNK_DATA); 86 | orig_first_thunk++; 87 | } 88 | } 89 | } 90 | } 91 | 92 | void DISASSEMBLER::parse_exceptions() { 93 | IMAGE_DATA_DIRECTORY pdata = datadir[IMAGE_DIRECTORY_ENTRY_EXCEPTION]; 94 | if (pdata.Size) { 95 | logger_log( 96 | WHITE, "", 97 | WHITE, fmtf("parsing .pdata exception table\n")); 98 | 99 | const auto runtime_funcs = rvacast(pdata.VirtualAddress); 100 | uint32_t runtime_func_count = pdata.Size / sizeof(RUNTIME_FUNCTION); 101 | 102 | for (int i = 0; i < runtime_func_count; i++) { 103 | auto& rtfunc = runtime_funcs[i]; 104 | 105 | // ignore addresses that do not land in a executable section 106 | // an example of this occuring is in ntoskrnl.exe 107 | // at the start of the INITDATA section 108 | if (!rva_in_exec_sect(rtfunc.BeginAddress)) { 109 | continue; 110 | } 111 | 112 | // queue the runtime function 113 | if (rva_map[rtfunc.BeginAddress].id == nullsid) { 114 | queue_rva(rtfunc.BeginAddress); 115 | } 116 | } 117 | } 118 | } 119 | 120 | void DISASSEMBLER::parse_relocations() { 121 | IMAGE_DATA_DIRECTORY relocdir = datadir[IMAGE_DIRECTORY_ENTRY_BASERELOC]; 122 | if (relocdir.Size) { 123 | const auto starting_block_addr = rvacast(relocdir.VirtualAddress); 124 | 125 | for (uint8_t* block_addr = starting_block_addr; 126 | block_addr < starting_block_addr + relocdir.Size;) { 127 | 128 | const auto block = reinterpret_cast(block_addr); 129 | block_addr += block->SizeOfBlock; 130 | 131 | struct BASE_RELOC_ENTRY { 132 | uint16_t offset : 12; 133 | uint16_t type : 4; 134 | }; 135 | 136 | const int entry_count = (block->SizeOfBlock - sizeof(IMAGE_BASE_RELOCATION)) / 2; 137 | const auto entries = reinterpret_cast(block + 1); 138 | 139 | for (int i = 0; i < entry_count; i++) { 140 | BASE_RELOC_ENTRY& entry = entries[i]; 141 | 142 | // rva where relocation occurs 143 | uint32_t reloc_rva = block->VirtualAddress + entry.offset; 144 | 145 | // padding, can ignore 146 | if (entry.type == IMAGE_REL_BASED_ABSOLUTE) 147 | continue; 148 | 149 | assert(entry.type == IMAGE_REL_BASED_DIR64); 150 | 151 | auto& rva_entry = rva_map[reloc_rva]; 152 | 153 | // basic block creation should not have happened yet 154 | assert(rva_entry.blink == nullsid); 155 | 156 | // symbol has already been discovered 157 | if (rva_entry.id) 158 | continue; 159 | 160 | // todo: handle rvas within executable sections 161 | assert(!rva_in_exec_sect(reloc_rva)); 162 | 163 | uint32_t db_offset = 0; 164 | DATA_BLOCK* db = rva_to_containing_db(reloc_rva, &db_offset); 165 | 166 | SYMBOL* sym = nullptr; 167 | // this could happen if the data is within the pe header 168 | if (!db) 169 | sym = bin.rel_info(reloc_rva); 170 | else 171 | sym = db->data_sym(db_offset); 172 | 173 | rva_entry = { sym->id, 0 }; 174 | sym_rva_map_append(reloc_rva); 175 | 176 | fully_analyze_data_symbol(sym); 177 | } 178 | } 179 | } 180 | } -------------------------------------------------------------------------------- /ExampleBakerUsage/ExampleBakerUsage.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | 17.0 23 | Win32Proj 24 | {5a1f1249-24fe-49a3-bd7a-69715a5a14f4} 25 | ExampleBakerUsage 26 | 10.0 27 | 28 | 29 | 30 | Application 31 | true 32 | v143 33 | Unicode 34 | 35 | 36 | Application 37 | false 38 | v143 39 | true 40 | Unicode 41 | 42 | 43 | Application 44 | true 45 | v143 46 | Unicode 47 | 48 | 49 | Application 50 | false 51 | v143 52 | true 53 | Unicode 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | Level3 76 | true 77 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 78 | true 79 | 80 | 81 | Console 82 | true 83 | 84 | 85 | 86 | 87 | Level3 88 | true 89 | true 90 | true 91 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 92 | true 93 | 94 | 95 | Console 96 | true 97 | true 98 | true 99 | 100 | 101 | 102 | 103 | Level3 104 | true 105 | _DEBUG;_CONSOLE;%(PreprocessorDefinitions) 106 | true 107 | $(SolutionDir);$(SolutionDir)\dependencies\zydis\include;$(SolutionDir)\dependencies\zydis\dependencies\zycore\include%(AdditionalIncludeDirectories) 108 | stdcpp20 109 | 110 | 111 | Console 112 | true 113 | $(SolutionDir)\dependencies\zydis\msvc\bin\DebugX64 114 | Zycore.lib;Zydis.lib;$(CoreLibraryDependencies);%(AdditionalDependencies) 115 | 116 | 117 | 118 | 119 | Level3 120 | true 121 | true 122 | true 123 | NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 124 | true 125 | 126 | 127 | Console 128 | true 129 | true 130 | true 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | {b561d151-3bd9-4634-83f1-21954247fc45} 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | x64/ 2 | 3 | ## Ignore Visual Studio temporary files, build results, and 4 | ## files generated by popular Visual Studio add-ons. 5 | ## 6 | ## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore 7 | 8 | # User-specific files 9 | *.rsuser 10 | *.suo 11 | *.user 12 | *.userosscache 13 | *.sln.docstates 14 | 15 | # User-specific files (MonoDevelop/Xamarin Studio) 16 | *.userprefs 17 | 18 | # Mono auto generated files 19 | mono_crash.* 20 | 21 | # Build results 22 | [Dd]ebug/ 23 | [Dd]ebugPublic/ 24 | [Rr]elease/ 25 | [Rr]eleases/ 26 | x64/ 27 | x86/ 28 | [Ww][Ii][Nn]32/ 29 | [Aa][Rr][Mm]/ 30 | [Aa][Rr][Mm]64/ 31 | bld/ 32 | [Bb]in/ 33 | [Oo]bj/ 34 | [Ll]og/ 35 | [Ll]ogs/ 36 | 37 | # Visual Studio 2015/2017 cache/options directory 38 | .vs/ 39 | # Uncomment if you have tasks that create the project's static files in wwwroot 40 | #wwwroot/ 41 | 42 | # Visual Studio 2017 auto generated files 43 | Generated\ Files/ 44 | 45 | # MSTest test Results 46 | [Tt]est[Rr]esult*/ 47 | [Bb]uild[Ll]og.* 48 | 49 | # NUnit 50 | *.VisualState.xml 51 | TestResult.xml 52 | nunit-*.xml 53 | 54 | # Build Results of an ATL Project 55 | [Dd]ebugPS/ 56 | [Rr]eleasePS/ 57 | dlldata.c 58 | 59 | # Benchmark Results 60 | BenchmarkDotNet.Artifacts/ 61 | 62 | # .NET Core 63 | project.lock.json 64 | project.fragment.lock.json 65 | artifacts/ 66 | 67 | # ASP.NET Scaffolding 68 | ScaffoldingReadMe.txt 69 | 70 | # StyleCop 71 | StyleCopReport.xml 72 | 73 | # Files built by Visual Studio 74 | *_i.c 75 | *_p.c 76 | *_h.h 77 | *.ilk 78 | *.meta 79 | *.obj 80 | *.iobj 81 | *.pch 82 | *.pdb 83 | *.ipdb 84 | *.pgc 85 | *.pgd 86 | *.rsp 87 | # but not Directory.Build.rsp, as it configures directory-level build defaults 88 | !Directory.Build.rsp 89 | *.sbr 90 | *.tlb 91 | *.tli 92 | *.tlh 93 | *.tmp 94 | *.tmp_proj 95 | *_wpftmp.csproj 96 | *.log 97 | *.tlog 98 | *.vspscc 99 | *.vssscc 100 | .builds 101 | *.pidb 102 | *.svclog 103 | *.scc 104 | 105 | # Chutzpah Test files 106 | _Chutzpah* 107 | 108 | # Visual C++ cache files 109 | ipch/ 110 | *.aps 111 | *.ncb 112 | *.opendb 113 | *.opensdf 114 | *.sdf 115 | *.cachefile 116 | *.VC.db 117 | *.VC.VC.opendb 118 | 119 | # Visual Studio profiler 120 | *.psess 121 | *.vsp 122 | *.vspx 123 | *.sap 124 | 125 | # Visual Studio Trace Files 126 | *.e2e 127 | 128 | # TFS 2012 Local Workspace 129 | $tf/ 130 | 131 | # Guidance Automation Toolkit 132 | *.gpState 133 | 134 | # ReSharper is a .NET coding add-in 135 | _ReSharper*/ 136 | *.[Rr]e[Ss]harper 137 | *.DotSettings.user 138 | 139 | # TeamCity is a build add-in 140 | _TeamCity* 141 | 142 | # DotCover is a Code Coverage Tool 143 | *.dotCover 144 | 145 | # AxoCover is a Code Coverage Tool 146 | .axoCover/* 147 | !.axoCover/settings.json 148 | 149 | # Coverlet is a free, cross platform Code Coverage Tool 150 | coverage*.json 151 | coverage*.xml 152 | coverage*.info 153 | 154 | # Visual Studio code coverage results 155 | *.coverage 156 | *.coveragexml 157 | 158 | # NCrunch 159 | _NCrunch_* 160 | .*crunch*.local.xml 161 | nCrunchTemp_* 162 | 163 | # MightyMoose 164 | *.mm.* 165 | AutoTest.Net/ 166 | 167 | # Web workbench (sass) 168 | .sass-cache/ 169 | 170 | # Installshield output folder 171 | [Ee]xpress/ 172 | 173 | # DocProject is a documentation generator add-in 174 | DocProject/buildhelp/ 175 | DocProject/Help/*.HxT 176 | DocProject/Help/*.HxC 177 | DocProject/Help/*.hhc 178 | DocProject/Help/*.hhk 179 | DocProject/Help/*.hhp 180 | DocProject/Help/Html2 181 | DocProject/Help/html 182 | 183 | # Click-Once directory 184 | publish/ 185 | 186 | # Publish Web Output 187 | *.[Pp]ublish.xml 188 | *.azurePubxml 189 | # Note: Comment the next line if you want to checkin your web deploy settings, 190 | # but database connection strings (with potential passwords) will be unencrypted 191 | *.pubxml 192 | *.publishproj 193 | 194 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 195 | # checkin your Azure Web App publish settings, but sensitive information contained 196 | # in these scripts will be unencrypted 197 | PublishScripts/ 198 | 199 | # NuGet Packages 200 | *.nupkg 201 | # NuGet Symbol Packages 202 | *.snupkg 203 | # The packages folder can be ignored because of Package Restore 204 | **/[Pp]ackages/* 205 | # except build/, which is used as an MSBuild target. 206 | !**/[Pp]ackages/build/ 207 | # Uncomment if necessary however generally it will be regenerated when needed 208 | #!**/[Pp]ackages/repositories.config 209 | # NuGet v3's project.json files produces more ignorable files 210 | *.nuget.props 211 | *.nuget.targets 212 | 213 | # Microsoft Azure Build Output 214 | csx/ 215 | *.build.csdef 216 | 217 | # Microsoft Azure Emulator 218 | ecf/ 219 | rcf/ 220 | 221 | # Windows Store app package directories and files 222 | AppPackages/ 223 | BundleArtifacts/ 224 | Package.StoreAssociation.xml 225 | _pkginfo.txt 226 | *.appx 227 | *.appxbundle 228 | *.appxupload 229 | 230 | # Visual Studio cache files 231 | # files ending in .cache can be ignored 232 | *.[Cc]ache 233 | # but keep track of directories ending in .cache 234 | !?*.[Cc]ache/ 235 | 236 | # Others 237 | ClientBin/ 238 | ~$* 239 | *~ 240 | *.dbmdl 241 | *.dbproj.schemaview 242 | *.jfm 243 | *.pfx 244 | *.publishsettings 245 | orleans.codegen.cs 246 | 247 | # Including strong name files can present a security risk 248 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 249 | #*.snk 250 | 251 | # Since there are multiple workflows, uncomment next line to ignore bower_components 252 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 253 | #bower_components/ 254 | 255 | # RIA/Silverlight projects 256 | Generated_Code/ 257 | 258 | # Backup & report files from converting an old project file 259 | # to a newer Visual Studio version. Backup files are not needed, 260 | # because we have git ;-) 261 | _UpgradeReport_Files/ 262 | Backup*/ 263 | UpgradeLog*.XML 264 | UpgradeLog*.htm 265 | ServiceFabricBackup/ 266 | *.rptproj.bak 267 | 268 | # SQL Server files 269 | *.mdf 270 | *.ldf 271 | *.ndf 272 | 273 | # Business Intelligence projects 274 | *.rdl.data 275 | *.bim.layout 276 | *.bim_*.settings 277 | *.rptproj.rsuser 278 | *- [Bb]ackup.rdl 279 | *- [Bb]ackup ([0-9]).rdl 280 | *- [Bb]ackup ([0-9][0-9]).rdl 281 | 282 | # Microsoft Fakes 283 | FakesAssemblies/ 284 | 285 | # GhostDoc plugin setting file 286 | *.GhostDoc.xml 287 | 288 | # Node.js Tools for Visual Studio 289 | .ntvs_analysis.dat 290 | node_modules/ 291 | 292 | # Visual Studio 6 build log 293 | *.plg 294 | 295 | # Visual Studio 6 workspace options file 296 | *.opt 297 | 298 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 299 | *.vbw 300 | 301 | # Visual Studio 6 auto-generated project file (contains which files were open etc.) 302 | *.vbp 303 | 304 | # Visual Studio 6 workspace and project file (working project files containing files to include in project) 305 | *.dsw 306 | *.dsp 307 | 308 | # Visual Studio 6 technical files 309 | *.ncb 310 | *.aps 311 | 312 | # Visual Studio LightSwitch build output 313 | **/*.HTMLClient/GeneratedArtifacts 314 | **/*.DesktopClient/GeneratedArtifacts 315 | **/*.DesktopClient/ModelManifest.xml 316 | **/*.Server/GeneratedArtifacts 317 | **/*.Server/ModelManifest.xml 318 | _Pvt_Extensions 319 | 320 | # Paket dependency manager 321 | .paket/paket.exe 322 | paket-files/ 323 | 324 | # FAKE - F# Make 325 | .fake/ 326 | 327 | # CodeRush personal settings 328 | .cr/personal 329 | 330 | # Python Tools for Visual Studio (PTVS) 331 | __pycache__/ 332 | *.pyc 333 | 334 | # Cake - Uncomment if you are using it 335 | # tools/** 336 | # !tools/packages.config 337 | 338 | # Tabs Studio 339 | *.tss 340 | 341 | # Telerik's JustMock configuration file 342 | *.jmconfig 343 | 344 | # BizTalk build output 345 | *.btp.cs 346 | *.btm.cs 347 | *.odx.cs 348 | *.xsd.cs 349 | 350 | # OpenCover UI analysis results 351 | OpenCover/ 352 | 353 | # Azure Stream Analytics local run output 354 | ASALocalRun/ 355 | 356 | # MSBuild Binary and Structured Log 357 | *.binlog 358 | 359 | # NVidia Nsight GPU debugger configuration file 360 | *.nvuser 361 | 362 | # MFractors (Xamarin productivity tool) working folder 363 | .mfractor/ 364 | 365 | # Local History for Visual Studio 366 | .localhistory/ 367 | 368 | # Visual Studio History (VSHistory) files 369 | .vshistory/ 370 | 371 | # BeatPulse healthcheck temp database 372 | healthchecksdb 373 | 374 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 375 | MigrationBackup/ 376 | 377 | # Ionide (cross platform F# VS Code tools) working folder 378 | .ionide/ 379 | 380 | # Fody - auto-generated XML schema 381 | FodyWeavers.xsd 382 | 383 | # VS Code files for those working on multiple tools 384 | .vscode/* 385 | !.vscode/settings.json 386 | !.vscode/tasks.json 387 | !.vscode/launch.json 388 | !.vscode/extensions.json 389 | *.code-workspace 390 | 391 | # Local History for Visual Studio Code 392 | .history/ 393 | 394 | # Windows Installer files from build outputs 395 | *.cab 396 | *.msi 397 | *.msix 398 | *.msm 399 | *.msp 400 | 401 | # JetBrains Rider 402 | *.sln.iml -------------------------------------------------------------------------------- /baker/src/disassembler/analysis/jump_table_analysis.cpp: -------------------------------------------------------------------------------- 1 | #include "../disassembler.h" 2 | #include "code_tracer.h" 3 | #include 4 | 5 | void DISASSEMBLER::collect_jump_tables() { 6 | for (BASIC_BLOCK* bb : bin.basic_blocks) { 7 | if (bb_explored_map[bb->id]) 8 | continue; 9 | bb_explored_map[bb->id] = true; 10 | 11 | instr_t* end_instr = 12 | &bb->instrs[bb->instrs.size() - 1]; 13 | 14 | auto [end_dec_ctx, end_dec_instr] = 15 | decode_instr(&bin.decoder, end_instr->bytes, end_instr->len); 16 | 17 | if (end_dec_instr.meta.category != ZYDIS_CATEGORY_COND_BR 18 | && end_dec_instr.meta.category != ZYDIS_CATEGORY_UNCOND_BR 19 | && end_dec_instr.meta.category != ZYDIS_CATEGORY_CALL) 20 | continue; 21 | 22 | auto end_dec_ops = decode_ops(&bin.decoder, &end_dec_instr, &end_dec_ctx); 23 | 24 | if (end_dec_ops[0].type != ZYDIS_OPERAND_TYPE_REGISTER) 25 | continue; 26 | 27 | // lea base_reg, jpt 28 | // lea base_reg, __ImageBase 29 | ZydisRegister base_reg = {}; 30 | INSTR_TRACE_FRAME load_base = {}; 31 | 32 | // mov dst_reg, [base_reg + idx_reg * (4 or 8) + jump_table_rva] 33 | // mov dst_reg, [base_reg + idx_reg * (4 or 8)] 34 | INSTR_TRACE_FRAME load_idx = {}; 35 | ZydisDecodedOperand* load_idx_mem_op = nullptr; 36 | uint32_t jpt_rva = 0; 37 | int jpt_alignment = 0; 38 | 39 | // mov dst_reg, [base_reg:image_base + idx_reg * 4 + jump_table_rva] 40 | // mov dst_reg, [base_reg:jpt_rva + idx_reg * 4] 41 | bb_backtrace(bb, bb->instrs.size() - 1, end_dec_ops[0], 42 | [&](ZydisDecodedOperand& target_op, INSTR_TRACE_FRAME f) -> bool { 43 | if (f.dec_instr.mnemonic == ZYDIS_MNEMONIC_MOV 44 | && f.dec_ops[0].type == ZYDIS_OPERAND_TYPE_REGISTER 45 | && reg_root(f.dec_ops[0].reg.value) == reg_root(target_op.reg.value) 46 | && f.dec_ops[1].type == ZYDIS_OPERAND_TYPE_MEMORY 47 | && f.dec_ops[1].mem.base 48 | && f.dec_ops[1].mem.index 49 | && f.dec_ops[1].mem.scale) { 50 | load_idx = f; 51 | base_reg = load_idx.dec_ops[1].mem.base; 52 | jpt_alignment = load_idx.dec_ops[1].mem.scale; 53 | load_idx_mem_op = &load_idx.dec_ops[1]; 54 | return true; 55 | } 56 | }); 57 | 58 | if (!load_idx.instr) 59 | continue; 60 | 61 | // todo: handle other alignments later 62 | // but make sure they arent pointers, by comparing to the relocation table 63 | assert(load_idx.dec_ops[1].mem.scale == 4); 64 | 65 | // now we will trace for the instruction loading the base 66 | ZydisDecodedOperand base_reg_op = { 67 | .type = ZYDIS_OPERAND_TYPE_REGISTER, 68 | .reg = { .value = base_reg } 69 | }; 70 | 71 | bb_backtrace(bb, load_idx.instr_idx - 1, base_reg_op, 72 | [&](ZydisDecodedOperand& target_op, INSTR_TRACE_FRAME f) -> bool { 73 | if (f.dec_instr.mnemonic == ZYDIS_MNEMONIC_LEA 74 | && f.dec_ops[0].type == ZYDIS_OPERAND_TYPE_REGISTER 75 | && reg_root(f.dec_ops[0].reg.value) == reg_root(target_op.reg.value)) { 76 | load_base = f; 77 | return true; // break out of the trace 78 | } 79 | }); 80 | 81 | if (!load_base.instr) 82 | continue; 83 | 84 | sym_id_t base_sym_id = nullsid; 85 | assert(load_base.dec_instr.raw.disp.size == 32); 86 | memcpy(&base_sym_id, load_base.instr->bytes + load_base.dec_instr.raw.disp.offset, 4); 87 | 88 | SYMBOL* sym = bin.symbols[base_sym_id]; 89 | 90 | // if there was a displacement within the index load 91 | // then the base should be the image base 92 | if (load_idx_mem_op->mem.disp.value) { 93 | if (sym->type != SYMBOL_TYPE_RELATIVE_INFO && sym->rel_offset != 0) { 94 | logger_warn("load_idx had displacement, but base was not the image base\n"); 95 | continue; 96 | } 97 | jpt_rva = load_idx_mem_op->mem.disp.value; 98 | 99 | } else { 100 | if (sym->type == SYMBOL_TYPE_RELATIVE_INFO && sym->rel_offset == 0) { 101 | logger_warn("load_idx did not have displacement, but base was not the the jump table\n"); 102 | continue; 103 | } 104 | jpt_rva = sym_rva_map[sym->id]; 105 | } 106 | 107 | logger_log(WHITE, "", WHITE, 108 | fmtf(" found jump table at sym_%d -> 0x%X\n", bb->id, jpt_rva)); 109 | 110 | auto& rva_entry = rva_map[jpt_rva]; 111 | SYMBOL* jpt_sym = nullptr; 112 | 113 | if (!rva_entry.id) { 114 | if (rva_in_exec_sect(jpt_rva)) { 115 | jpt_sym = new SYMBOL{}; 116 | jpt_sym->type = SYMBOL_TYPE_RELATIVE_INFO; 117 | jpt_sym->rel_offset = jpt_rva; 118 | jpt_sym->id = rva_entry.id = bin.symbols.size(); 119 | bin.symbols.push_back(jpt_sym); 120 | sym_rva_map_append(jpt_rva); 121 | } else { 122 | uint32_t db_offset = 0; 123 | DATA_BLOCK* jpt_db = rva_to_containing_db(jpt_rva, &db_offset); 124 | jpt_sym = jpt_db->data_sym(db_offset); 125 | } 126 | } else { 127 | jpt_sym = bin.symbols[rva_entry.id]; 128 | } 129 | 130 | if (load_idx_mem_op->mem.disp.value) { 131 | // replace the displacement with the jump table symbol id 132 | assert(load_idx.dec_instr.raw.disp.size == 32); 133 | memcpy(load_idx.instr->bytes + load_idx.dec_instr.raw.disp.offset, 134 | &jpt_sym->id, 4); 135 | 136 | logger_log(WHITE, "", WHITE, 137 | fmtf("%s \n", serialize_instr(&bin, load_idx.instr).c_str())); 138 | 139 | load_idx.instr->flags |= INSTR_FLAG_MEM_IDX; 140 | } else { 141 | assert(sym->id == jpt_sym->id); 142 | 143 | logger_log(WHITE, "", WHITE, 144 | fmtf("sym_%d converted to first data symbol of the jump table\n", sym->id)); 145 | } 146 | 147 | jpt_rva_list.insert(jpt_rva); 148 | } 149 | } 150 | 151 | void DISASSEMBLER::resolve_jpt_entries() { 152 | for (uint32_t jpt_rva : jpt_rva_list) { 153 | auto& jpt_rva_entry = rva_map[jpt_rva]; 154 | assert(jpt_rva_entry.id); 155 | 156 | SYMBOL* jpt_sym = bin.symbols[jpt_rva_entry.id]; 157 | uint32_t db_offset = 0; 158 | DATA_BLOCK* db = nullptr; 159 | 160 | if (rva_in_exec_sect(jpt_rva)) { 161 | db = bin.data_block(0, true, fmtf("code_data_%x", jpt_rva)); 162 | } else { 163 | db = rva_to_containing_db(jpt_rva, &db_offset); 164 | } 165 | 166 | for (int i = 0; true; i++) { 167 | uint32_t offset = i * 4; 168 | uint32_t entry_rva = jpt_rva + offset; 169 | uint32_t target_rva = *rvacast(entry_rva); 170 | 171 | // if we ran into another jump table 172 | if (offset && jpt_rva_list.find(entry_rva) != jpt_rva_list.end()) { 173 | logger_log(WHITE, "", WHITE, 174 | fmtf("jump table runs into another at idx: %d\n", i)); 175 | break; 176 | } 177 | 178 | // if this is not a valid rva towards code 179 | if (!rva_in_exec_sect(target_rva)) { 180 | logger_log(WHITE, "", WHITE, 181 | fmtf("jump table invalid target rva at idx: %d\n", i)); 182 | break; 183 | } 184 | 185 | sym_id_t target_sym_id = queue_rva(target_rva).id; 186 | auto& rva_entry = rva_map[entry_rva]; 187 | SYMBOL* sym = nullptr; 188 | if (rva_entry.id) { 189 | sym = bin.symbols[rva_entry.id]; 190 | if (rva_in_exec_sect(entry_rva)) { 191 | assert(sym->type == SYMBOL_TYPE_RELATIVE_INFO && sym->rel_offset == entry_rva); 192 | sym->type = SYMBOL_TYPE_DATA; 193 | sym->db = db; 194 | sym->db_offset = offset; 195 | } 196 | } else { 197 | sym = db->data_sym(offset); 198 | rva_map[entry_rva].id = sym->id; 199 | sym_rva_map_append(entry_rva); 200 | } 201 | 202 | db->bytes.insert(end(db->bytes), 4, 0); 203 | 204 | sym->target_type = TARGET_TYPE_RVA; 205 | sym->target_sym_id = target_sym_id; 206 | } 207 | } 208 | } -------------------------------------------------------------------------------- /baker/baker.vcxproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Release 10 | Win32 11 | 12 | 13 | Debug 14 | x64 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | 17.0 23 | Win32Proj 24 | {b561d151-3bd9-4634-83f1-21954247fc45} 25 | baker 26 | 10.0 27 | 28 | 29 | 30 | Application 31 | true 32 | v143 33 | Unicode 34 | 35 | 36 | Application 37 | false 38 | v143 39 | true 40 | Unicode 41 | 42 | 43 | StaticLibrary 44 | true 45 | v143 46 | Unicode 47 | 48 | 49 | Application 50 | false 51 | v143 52 | true 53 | Unicode 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | Level3 76 | true 77 | WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) 78 | true 79 | 80 | 81 | Console 82 | true 83 | 84 | 85 | 86 | 87 | Level3 88 | true 89 | true 90 | true 91 | WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 92 | true 93 | 94 | 95 | Console 96 | true 97 | true 98 | true 99 | 100 | 101 | 102 | 103 | Level3 104 | true 105 | _DEBUG;_CONSOLE;%(PreprocessorDefinitions) 106 | true 107 | stdcpp20 108 | $(ProjectDir)..\dependencies\zydis\include;$(ProjectDir)..\dependencies\zydis\dependencies\zycore\include;%(AdditionalIncludeDirectories) 109 | 110 | 111 | Console 112 | true 113 | $(SolutionDir)\dependencies\zydis\msvc\bin\DebugX64;%(AdditionalLibraryDirectories) 114 | $(CoreLibraryDependencies);%(AdditionalDependencies) 115 | 116 | 117 | Zydis.lib;Zycore.lib 118 | 119 | 120 | $(ProjectDir)..\dependencies\zydis\msvc\bin\DebugX64 121 | 122 | 123 | 124 | 125 | Level3 126 | true 127 | true 128 | true 129 | NDEBUG;_CONSOLE;%(PreprocessorDefinitions) 130 | true 131 | stdc17 132 | true 133 | stdcpp20 134 | $(ProjectDir)ext\;%(AdditionalIncludeDirectories) 135 | 136 | 137 | Console 138 | true 139 | true 140 | true 141 | 142 | 143 | %(AdditionalDependencies) 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | -------------------------------------------------------------------------------- /baker/src/symbols/instruction.cpp: -------------------------------------------------------------------------------- 1 | #include "../binary.h" 2 | 3 | OPERAND Reg(ZydisRegister reg) { 4 | OPERAND op = {}; 5 | op.type = OP_REG; 6 | op.reg = reg; 7 | return op; 8 | } 9 | 10 | OPERAND Imm(int64_t immediate) { 11 | OPERAND op = {}; 12 | op.type = OP_IMM; 13 | op.imm = immediate; 14 | return op; 15 | } 16 | 17 | OPERAND ImmRel(sym_id_t symbol_id) { 18 | OPERAND op = {}; 19 | op.type = OP_IMMREL; 20 | op.target_sym_id = symbol_id; 21 | return op; 22 | } 23 | 24 | OPERAND RipRel(sym_id_t symbol_id, int len) { 25 | OPERAND op = {}; 26 | op.type = OP_RIPREL; 27 | op.target_sym_id = symbol_id; 28 | op.len = len; 29 | return op; 30 | } 31 | 32 | OPERAND Mem(OPERAND base_reg, int disp, int len) { 33 | OPERAND op = {}; 34 | op.type = OP_MEM; 35 | op.mem.base_reg = base_reg.reg; 36 | op.mem.disp = disp; 37 | op.len = len; 38 | return op; 39 | } 40 | 41 | OPERAND MemIdx(OPERAND base_reg, OPERAND idx_reg, int scale, sym_id_t table_sym_id, int len) { 42 | OPERAND op = {}; 43 | op.type = OP_MEM_IDX; 44 | op.memidx.base_reg = base_reg.reg; 45 | op.memidx.idx_reg = idx_reg.reg; 46 | op.memidx.scale = scale; 47 | op.memidx.table_sym_id = table_sym_id; 48 | op.len = len; 49 | return op; 50 | } 51 | 52 | instr_t Instr(ZydisMnemonic mnemonic, OPERAND op1, OPERAND op2) { 53 | instr_t instr = { 54 | .len = sizeof(instr.bytes) 55 | }; 56 | 57 | ZydisEncoderRequest request = { 58 | .machine_mode = ZYDIS_MACHINE_MODE_LONG_64, 59 | .mnemonic = mnemonic, 60 | }; 61 | 62 | OPERAND* ops[2] = { &op1, &op2 }; 63 | 64 | for (int i = 0; OPERAND* op : ops) { 65 | ZydisEncoderOperand* req_op = &request.operands[i]; 66 | OPERAND* other_op = i ? &op1 : &op2; 67 | 68 | if (op->type != OP_NONE) { 69 | if (op->type == OP_RIPREL || op->type == OP_MEM || op->type == OP_MEM_IDX) { 70 | if (other_op->type == OP_REG) 71 | op->len = reg_width(other_op->reg) / 8; 72 | if (other_op->type == OP_IMM) 73 | assert(op->len); 74 | } 75 | 76 | switch (op->type) { 77 | case OP_REG: 78 | req_op->type = ZYDIS_OPERAND_TYPE_REGISTER; 79 | req_op->reg.value = op->reg; 80 | break; 81 | 82 | case OP_IMM: 83 | req_op->type = ZYDIS_OPERAND_TYPE_IMMEDIATE; 84 | req_op->imm.s = op->imm; 85 | break; 86 | 87 | case OP_IMMREL: 88 | req_op->type = ZYDIS_OPERAND_TYPE_IMMEDIATE; 89 | req_op->imm.s = op->target_sym_id; 90 | break; 91 | 92 | case OP_RIPREL: 93 | req_op->type = ZYDIS_OPERAND_TYPE_MEMORY; 94 | req_op->mem.base = ZYDIS_REGISTER_RIP; 95 | req_op->mem.displacement = op->target_sym_id; 96 | req_op->mem.size = op->len; 97 | break; 98 | 99 | case OP_MEM: 100 | req_op->type = ZYDIS_OPERAND_TYPE_MEMORY; 101 | req_op->mem.base = op->mem.base_reg; 102 | req_op->mem.size = op->len; 103 | req_op->mem.displacement = op->mem.disp; 104 | break; 105 | 106 | case OP_MEM_IDX: 107 | req_op->type = ZYDIS_OPERAND_TYPE_MEMORY; 108 | req_op->mem.base = op->memidx.base_reg; 109 | req_op->mem.index = op->memidx.idx_reg; 110 | req_op->mem.scale = op->memidx.scale; 111 | req_op->mem.size = op->len; 112 | if (op->memidx.table_sym_id) { 113 | req_op->mem.displacement = op->memidx.table_sym_id; 114 | instr.flags |= INSTR_FLAG_MEM_IDX; 115 | } 116 | break; 117 | } 118 | 119 | request.operand_count++; 120 | } 121 | i++; 122 | } 123 | 124 | zy_expect_success(ZydisEncoderEncodeInstruction(&request, instr.bytes, &instr.len)); 125 | return instr; 126 | } 127 | 128 | 129 | void instr_store_val(instr_t* instr, uint32_t val, ZydisDecoder* decoder, ZydisDecodedInstruction* dec_instr, ZydisDecoderContext* dec_ctx, uint64_t instr_va) { 130 | assert(dec_instr->attributes & ZYDIS_ATTRIB_IS_RELATIVE); 131 | 132 | if (dec_instr->raw.imm[0].is_relative) { 133 | // store target symbol id 134 | if (val < (1ull << dec_instr->raw.imm[0].size)) { 135 | // can fit 136 | memcpy(instr->bytes + dec_instr->raw.imm[0].offset, &val, 137 | dec_instr->raw.imm[0].size / 8); 138 | } else { 139 | // cannot fit, re-encode with rel32 branch size 140 | // and then write value into instruction 141 | 142 | ZydisDecodedOperand dec_ops[10] = {}; 143 | zy_expect_success( 144 | ZydisDecoderDecodeOperands(decoder, 145 | dec_ctx, dec_instr, dec_ops, dec_instr->operand_count_visible) 146 | ); 147 | 148 | ZydisEncoderRequest req = {}; 149 | zy_expect_success( 150 | ZydisEncoderDecodedInstructionToEncoderRequest(dec_instr, 151 | dec_ops, dec_instr->operand_count_visible, &req) 152 | ); 153 | 154 | req.branch_type = ZYDIS_BRANCH_TYPE_NONE; 155 | req.branch_width = ZYDIS_BRANCH_WIDTH_NONE; 156 | req.operands[0].imm.u = instr_va + 0x12345678; 157 | 158 | instr->len = sizeof(instr->bytes); 159 | zy_expect_success( 160 | ZydisEncoderEncodeInstructionAbsolute(&req, instr->bytes, &instr->len, instr_va) 161 | ); 162 | 163 | assert(sizeof(val) == 4); 164 | memcpy(instr->bytes + instr->len - 4, &val, 4); 165 | } 166 | } else if (dec_instr->raw.disp.offset != 0 167 | && dec_instr->raw.modrm.mod == 0 168 | && dec_instr->raw.modrm.rm == 5) { 169 | 170 | // in x86-64, the offset in rip relative instructions should be 4 bytes (i think) 171 | assert(dec_instr->raw.disp.size == 32); 172 | 173 | memcpy(instr->bytes + dec_instr->raw.disp.offset, &val, 4); 174 | } 175 | } 176 | 177 | sym_id_t get_sym_id(const instr_t* instr, const ZydisDecodedInstruction* dec_instr) { 178 | sym_id_t sym_id = nullsid; 179 | if (dec_instr->raw.imm[0].is_relative) { 180 | memcpy(&sym_id, 181 | instr->bytes + dec_instr->raw.imm[0].offset, 182 | dec_instr->raw.imm[0].size / 8); 183 | } else if (dec_instr->raw.disp.size == 32) { 184 | memcpy(&sym_id, 185 | instr->bytes + dec_instr->raw.disp.offset, 186 | 4); 187 | } 188 | return sym_id; 189 | } 190 | 191 | 192 | void zy_expect_success(ZyanStatus zystatus) { 193 | assert(ZYAN_SUCCESS(zystatus)); 194 | } 195 | 196 | ZydisRegister reg_root(ZydisRegister reg) { 197 | return ZydisRegisterGetLargestEnclosing(ZYDIS_MACHINE_MODE_LONG_64, reg); 198 | } 199 | 200 | ZydisRegisterWidth reg_width(ZydisRegister reg) { 201 | return ZydisRegisterGetWidth(ZYDIS_MACHINE_MODE_LONG_64, reg); 202 | } 203 | 204 | std::tuple< 205 | ZydisDecoderContext, 206 | ZydisDecodedInstruction 207 | > decode_instr(const ZydisDecoder* decoder, const uint8_t* raw_instr, const size_t len) { 208 | ZydisDecoderContext dec_ctx = {}; 209 | ZydisDecodedInstruction dec_instr = {}; 210 | 211 | zy_expect_success( 212 | ZydisDecoderDecodeInstruction(decoder, 213 | &dec_ctx, raw_instr, len, &dec_instr) 214 | ); 215 | 216 | return { dec_ctx, dec_instr }; 217 | } 218 | 219 | std::vector decode_ops(const ZydisDecoder* decoder, 220 | const ZydisDecodedInstruction* dec_instr, const ZydisDecoderContext* dec_ctx) { 221 | ZydisDecodedOperand dec_ops[10] = {}; 222 | 223 | zy_expect_success( 224 | ZydisDecoderDecodeOperands(decoder, 225 | dec_ctx, dec_instr, dec_ops, dec_instr->operand_count_visible) 226 | ); 227 | 228 | return std::vector(dec_ops, dec_ops + dec_instr->operand_count_visible); 229 | } 230 | 231 | std::tuple< 232 | ZydisDecodedInstruction, 233 | std::vector 234 | > decode_full(const ZydisDecoder* decoder, const uint8_t* raw_instr, const size_t len) { 235 | ZyanStatus zystatus = 0; 236 | ZydisDecodedInstruction dec_instr = {}; 237 | ZydisDecodedOperand dec_ops[10] = {}; 238 | 239 | zy_expect_success( 240 | ZydisDecoderDecodeFull(decoder, raw_instr, len, &dec_instr, dec_ops) 241 | ); 242 | 243 | return { 244 | dec_instr, 245 | std::vector(dec_ops, dec_ops + dec_instr.operand_count_visible) 246 | }; 247 | } 248 | 249 | #define DEFINE_REG(reg_name, mnemonic) const OPERAND reg_name##_ = { OP_REG, mnemonic, }; 250 | 251 | DEFINE_REG(rax, ZYDIS_REGISTER_RAX); 252 | DEFINE_REG(rbx, ZYDIS_REGISTER_RBX); 253 | DEFINE_REG(rcx, ZYDIS_REGISTER_RCX); 254 | DEFINE_REG(rdx, ZYDIS_REGISTER_RDX); 255 | DEFINE_REG(rsi, ZYDIS_REGISTER_RSI); 256 | DEFINE_REG(rdi, ZYDIS_REGISTER_RDI); 257 | DEFINE_REG(rbp, ZYDIS_REGISTER_RBP); 258 | DEFINE_REG(rsp, ZYDIS_REGISTER_RSP); 259 | DEFINE_REG(r8, ZYDIS_REGISTER_R8); 260 | DEFINE_REG(r9, ZYDIS_REGISTER_R9); 261 | DEFINE_REG(r10, ZYDIS_REGISTER_R10); 262 | DEFINE_REG(r11, ZYDIS_REGISTER_R11); 263 | DEFINE_REG(r15, ZYDIS_REGISTER_R15); 264 | DEFINE_REG(r12, ZYDIS_REGISTER_R12); 265 | DEFINE_REG(r13, ZYDIS_REGISTER_R13); 266 | DEFINE_REG(r14, ZYDIS_REGISTER_R14); 267 | 268 | DEFINE_REG(eax, ZYDIS_REGISTER_EAX); 269 | DEFINE_REG(ebx, ZYDIS_REGISTER_EBX); 270 | DEFINE_REG(ecx, ZYDIS_REGISTER_ECX); 271 | DEFINE_REG(edx, ZYDIS_REGISTER_EDX); 272 | DEFINE_REG(esi, ZYDIS_REGISTER_ESI); 273 | DEFINE_REG(edi, ZYDIS_REGISTER_EDI); 274 | DEFINE_REG(ebp, ZYDIS_REGISTER_EBP); 275 | DEFINE_REG(esp, ZYDIS_REGISTER_ESP); 276 | DEFINE_REG(r8d, ZYDIS_REGISTER_R8D); 277 | DEFINE_REG(r9d, ZYDIS_REGISTER_R9D); 278 | DEFINE_REG(r10d, ZYDIS_REGISTER_R10D); 279 | DEFINE_REG(r11d, ZYDIS_REGISTER_R11D); 280 | DEFINE_REG(r15d, ZYDIS_REGISTER_R15D); 281 | DEFINE_REG(r12d, ZYDIS_REGISTER_R12D); 282 | DEFINE_REG(r13d, ZYDIS_REGISTER_R13D); 283 | DEFINE_REG(r14d, ZYDIS_REGISTER_R14D); 284 | 285 | DEFINE_REG(ax, ZYDIS_REGISTER_AX); 286 | DEFINE_REG(bx, ZYDIS_REGISTER_BX); 287 | DEFINE_REG(cx, ZYDIS_REGISTER_CX); 288 | DEFINE_REG(dx, ZYDIS_REGISTER_DX); 289 | DEFINE_REG(si, ZYDIS_REGISTER_SI); 290 | DEFINE_REG(di, ZYDIS_REGISTER_DI); 291 | DEFINE_REG(bp, ZYDIS_REGISTER_BP); 292 | DEFINE_REG(sp, ZYDIS_REGISTER_SP); 293 | DEFINE_REG(r8w, ZYDIS_REGISTER_R8W); 294 | DEFINE_REG(r9w, ZYDIS_REGISTER_R9W); 295 | DEFINE_REG(r10w, ZYDIS_REGISTER_R10W); 296 | DEFINE_REG(r11w, ZYDIS_REGISTER_R11W); 297 | 298 | DEFINE_REG(al, ZYDIS_REGISTER_AL); 299 | DEFINE_REG(bl, ZYDIS_REGISTER_BL); 300 | DEFINE_REG(cl, ZYDIS_REGISTER_CL); 301 | DEFINE_REG(dl, ZYDIS_REGISTER_DL); 302 | DEFINE_REG(sil, ZYDIS_REGISTER_SIL); 303 | DEFINE_REG(dil, ZYDIS_REGISTER_DIL); 304 | DEFINE_REG(bpl, ZYDIS_REGISTER_BPL); 305 | DEFINE_REG(spl, ZYDIS_REGISTER_SPL); 306 | DEFINE_REG(r8b, ZYDIS_REGISTER_R8B); 307 | DEFINE_REG(r9b, ZYDIS_REGISTER_R9B); 308 | DEFINE_REG(r10b, ZYDIS_REGISTER_R10B); 309 | DEFINE_REG(r11b, ZYDIS_REGISTER_R11B); -------------------------------------------------------------------------------- /baker/src/binary.cpp: -------------------------------------------------------------------------------- 1 | #include "binary.h" 2 | 3 | ZyanStatus hook_zydis_format_operand_mem(const ZydisFormatter* formatter, ZydisFormatterBuffer* buffer, ZydisFormatterContext* context); 4 | ZyanStatus hook_zydis_format_operand_imm(const ZydisFormatter* formatter, ZydisFormatterBuffer* buffer, ZydisFormatterContext* context); 5 | 6 | ZydisFormatterFunc orig_zydis_format_operand_mem = hook_zydis_format_operand_mem; // gets set to the original after the hook function 7 | ZydisFormatterFunc orig_zydis_format_operand_imm = hook_zydis_format_operand_imm; 8 | 9 | BINARY::BINARY() { 10 | zy_expect_success(ZydisDecoderInit(&decoder, 11 | ZYDIS_MACHINE_MODE_LONG_64, ZYDIS_STACK_WIDTH_64)); 12 | 13 | zy_expect_success(ZydisFormatterInit(&formatter, 14 | ZYDIS_FORMATTER_STYLE_INTEL)); 15 | 16 | /* 17 | makes relative instructions show as [rip+disp] 18 | instead of the absoulute address 19 | */ 20 | 21 | zy_expect_success(ZydisFormatterSetProperty(&formatter, 22 | ZYDIS_FORMATTER_PROP_FORCE_RELATIVE_BRANCHES, true)); 23 | 24 | zy_expect_success(ZydisFormatterSetProperty(&formatter, 25 | ZYDIS_FORMATTER_PROP_FORCE_RELATIVE_RIPREL, true)); 26 | 27 | /* 28 | sets the hooks to print symbols in the instructions 29 | */ 30 | 31 | orig_zydis_format_operand_mem = hook_zydis_format_operand_mem; 32 | orig_zydis_format_operand_imm = hook_zydis_format_operand_imm; 33 | 34 | zy_expect_success( 35 | ZydisFormatterSetHook(&formatter, ZYDIS_FORMATTER_FUNC_FORMAT_OPERAND_MEM, 36 | (const void**)(&orig_zydis_format_operand_mem)) 37 | ); 38 | 39 | zy_expect_success( 40 | ZydisFormatterSetHook(&formatter, ZYDIS_FORMATTER_FUNC_FORMAT_OPERAND_IMM, 41 | (const void**)(&orig_zydis_format_operand_imm)) 42 | ); 43 | 44 | /* 45 | create a null symbol, so no symbols will hold the id 0 46 | */ 47 | 48 | this->label(); 49 | } 50 | 51 | 52 | ZyanStatus hook_zydis_format_operand_mem( 53 | const ZydisFormatter* formatter, 54 | ZydisFormatterBuffer* buffer, 55 | ZydisFormatterContext* context) { 56 | 57 | if (context->operand->mem.base != ZYDIS_REGISTER_RIP) { 58 | return orig_zydis_format_operand_mem(formatter, buffer, context); 59 | } 60 | 61 | uint64_t mask = (1ull << context->instruction->raw.disp.size) - 1; 62 | auto& sym_table = *reinterpret_cast*>(context->user_data); 63 | sym_id_t sym_id = (context->operand->mem.disp.value) & mask; 64 | SYMBOL* sym = sym_table[sym_id]; 65 | 66 | ZyanString* str = nullptr; 67 | ZydisFormatterBufferAppend(buffer, ZYDIS_TOKEN_SYMBOL); 68 | ZydisFormatterBufferGetString(buffer, &str); 69 | 70 | COLORS color = WHITE; 71 | 72 | if (sym->type == SYMBOL_TYPE_CODE) 73 | color = BRIGHT_BLUE; 74 | 75 | else if (sym->type == SYMBOL_TYPE_DATA) 76 | color = BRIGHT_RED; 77 | 78 | else if (sym->type == SYMBOL_TYPE_IMPORT) 79 | color = BRIGHT_MAGENTA; 80 | 81 | else if (sym->type == SYMBOL_TYPE_RELATIVE_INFO) 82 | color = BRIGHT_GREEN; 83 | 84 | if (!sym->name.empty()) { 85 | ZyanStringAppendFormat(str, "[\033[%dmsym_%u/%s\033[0m]", color, sym->id, sym->name.c_str()); 86 | } else { 87 | ZyanStringAppendFormat(str, "[\033[%dmsym_%u\033[0m]", color, sym->id); 88 | } 89 | 90 | return ZYAN_STATUS_SUCCESS; 91 | } 92 | 93 | ZyanStatus hook_zydis_format_operand_imm( 94 | const ZydisFormatter* formatter, 95 | ZydisFormatterBuffer* buffer, 96 | ZydisFormatterContext* context) { 97 | 98 | if (!context->operand->imm.is_relative) { 99 | return orig_zydis_format_operand_imm(formatter, buffer, context); 100 | } 101 | 102 | uint64_t mask = (1ull << context->operand->size) - 1; 103 | auto& sym_table = *reinterpret_cast*>(context->user_data); 104 | sym_id_t sym_id = context->operand->imm.value.u & mask; 105 | SYMBOL* sym = sym_table[sym_id]; 106 | 107 | ZyanString* str = nullptr; 108 | ZydisFormatterBufferAppend(buffer, ZYDIS_TOKEN_SYMBOL); 109 | ZydisFormatterBufferGetString(buffer, &str); 110 | 111 | COLORS color = WHITE; 112 | 113 | if (sym->type == SYMBOL_TYPE_CODE) 114 | color = BRIGHT_BLUE; 115 | 116 | else if (sym->type == SYMBOL_TYPE_DATA) 117 | color = BRIGHT_RED; 118 | 119 | else if (sym->type == SYMBOL_TYPE_IMPORT) 120 | color = BRIGHT_MAGENTA; 121 | 122 | else if (sym->type == SYMBOL_TYPE_RELATIVE_INFO) 123 | color = BRIGHT_GREEN; 124 | 125 | if (!sym->name.empty()) { 126 | ZyanStringAppendFormat(str, "\033[%dmsym_%u/%s\033[0m", color, sym->id, sym->name.c_str()); 127 | } else { 128 | ZyanStringAppendFormat(str, "\033[%dmsym_%u\033[0m", color, sym->id); 129 | } 130 | 131 | return ZYAN_STATUS_SUCCESS; 132 | } 133 | 134 | 135 | void binary_duplicate(const BINARY* bin, BINARY* new_bin) { 136 | std::map old_to_new_routine; 137 | std::map old_to_new_db; 138 | std::map old_to_new_bb; 139 | 140 | for (IMPORT_MODULE* old_mod : bin->import_modules) { 141 | IMPORT_MODULE* mod = new IMPORT_MODULE{}; 142 | mod->name = old_mod->name; 143 | mod->routines = {}; 144 | 145 | for (IMPORT_ROUTINE* old_routine : old_mod->routines) { 146 | IMPORT_ROUTINE* routine = new IMPORT_ROUTINE{}; 147 | routine->id = old_routine->id; 148 | routine->name = old_routine->name; 149 | 150 | mod->routines.push_back(routine); 151 | old_to_new_routine[old_routine] = routine; 152 | } 153 | 154 | new_bin->import_modules.push_back(mod); 155 | } 156 | 157 | for (DATA_BLOCK* old_db : bin->data_blocks) { 158 | DATA_BLOCK* db = new_bin->data_blocks.emplace_back(new DATA_BLOCK{}); 159 | db->name = old_db->name; 160 | db->bytes = old_db->bytes; 161 | db->read_only = old_db->read_only; 162 | db->bin_ = new_bin; 163 | old_to_new_db[old_db] = db; 164 | } 165 | 166 | for (BASIC_BLOCK* old_bb : bin->basic_blocks) { 167 | BASIC_BLOCK* bb = new_bin->basic_blocks.emplace_back(new BASIC_BLOCK{}); 168 | bb->id = old_bb->id; 169 | bb->instrs = old_bb->instrs; 170 | bb->fallthrough_sym_id = old_bb->fallthrough_sym_id; 171 | bb->bin_ = new_bin; 172 | old_to_new_bb[old_bb] = bb; 173 | } 174 | 175 | new_bin->symbols = bin->symbols; 176 | 177 | for (int i = 0; i < bin->symbols.size(); i++) { 178 | SYMBOL* old_sym = bin->symbols[i]; 179 | SYMBOL* sym = new SYMBOL{}; 180 | new_bin->symbols[i] = sym; 181 | 182 | sym->id = i; 183 | sym->type = old_sym->type; 184 | sym->name = old_sym->name; 185 | 186 | if (sym->type == SYMBOL_TYPE_CODE) { 187 | BASIC_BLOCK* old_bb = old_sym->bb; 188 | BASIC_BLOCK* bb = old_to_new_bb[old_bb]; 189 | assert(bb->id == sym->id); 190 | sym->bb = bb; 191 | } 192 | 193 | else if (sym->type == SYMBOL_TYPE_DATA) { 194 | DATA_BLOCK* db = old_to_new_db[old_sym->db]; 195 | sym->db = db; 196 | sym->db_offset = old_sym->db_offset; 197 | db->dboffset_to_sym[sym->db_offset] = sym; 198 | 199 | sym->target_type = old_sym->target_type; 200 | sym->target_sym_id = old_sym->target_sym_id; 201 | } 202 | 203 | else if (sym->type == SYMBOL_TYPE_RELATIVE_INFO) { 204 | sym->rel_offset = old_sym->rel_offset; 205 | } 206 | 207 | else if (sym->type == SYMBOL_TYPE_IMPORT) { 208 | IMPORT_ROUTINE* old_routine = old_sym->import_routine; 209 | IMPORT_ROUTINE* routine = old_to_new_routine[old_routine]; 210 | sym->import_routine = routine; 211 | } 212 | 213 | else if (sym->type == SYMBOL_TYPE_NONE) { 214 | // null symbol 215 | assert(i == 0); 216 | } 217 | } 218 | 219 | new_bin->formatter = bin->formatter; 220 | new_bin->decoder = bin->decoder; 221 | new_bin->entry_point = bin->entry_point; 222 | } 223 | 224 | void binary_free(BINARY* bin) { 225 | for (IMPORT_MODULE* mod : bin->import_modules) { 226 | for (IMPORT_ROUTINE* routine : mod->routines) { 227 | free(routine); 228 | } 229 | free(mod); 230 | } 231 | 232 | for (DATA_BLOCK* db : bin->data_blocks) 233 | free(db); 234 | 235 | for (BASIC_BLOCK* bb : bin->basic_blocks) 236 | free(bb); 237 | 238 | for (SYMBOL* sym : bin->symbols) 239 | free(sym); 240 | } 241 | 242 | 243 | void binary_print(BINARY* bin) { 244 | printf("--------------------------------------------\n"); 245 | printf("----------------BINARY----------------------\n"); 246 | printf("--------------------------------------------\n"); 247 | 248 | printf_ex(BRIGHT_BLUE, "--------------CODE--------------------------\n"); 249 | 250 | for (BASIC_BLOCK* bb : bin->basic_blocks) { 251 | printf_ex(BRIGHT_BLUE, "[basic block]: %d, size: %d %s\n", bb->id, bb->size(), 252 | bin->symbols[bb->id]->name.c_str()); 253 | 254 | for (int bb_offset = 0; instr_t instr : bb->instrs) { 255 | printf("\t+%-4d: %s\n", bb_offset, serialize_instr(bin, &instr).c_str()); 256 | 257 | bb_offset += instr.len; 258 | } 259 | 260 | printf("\t--> %d\n", bb->fallthrough_sym_id); 261 | } 262 | 263 | for (int sect_idx = 0; sect_idx < 2; sect_idx++) { 264 | // 0: .rdata 265 | // 1: .data 266 | 267 | if (sect_idx == 0) 268 | printf_ex(BRIGHT_RED, "----READ-ONLY-DATA--------------------------\n"); 269 | 270 | if (sect_idx == 1) 271 | printf_ex(BRIGHT_MAGENTA, "--------------DATA--------------------------\n"); 272 | 273 | for (DATA_BLOCK* db : bin->data_blocks) { 274 | if (sect_idx == 0 && db->read_only == false) 275 | continue; 276 | 277 | if (sect_idx == 1 && db->read_only == true) 278 | continue; 279 | 280 | printf_ex(sect_idx ? BRIGHT_MAGENTA : BRIGHT_RED, 281 | "[data block]: %s, size: %d\n", db->name.c_str(), db->bytes.size()); 282 | 283 | struct LINE_INFO { 284 | SYMBOL* sym; 285 | std::vector bytes; 286 | }; 287 | 288 | std::vector lines = {}; 289 | 290 | LINE_INFO line = {}; 291 | for (int db_offset = 0; db_offset < db->bytes.size(); db_offset++) { 292 | uint8_t byte = db->bytes[db_offset]; 293 | 294 | if (db->dboffset_to_sym[db_offset]) { 295 | if (db_offset) { 296 | lines.push_back(line); 297 | line = {}; 298 | } 299 | line.sym = db->dboffset_to_sym[db_offset]; 300 | } 301 | 302 | int line_max_len = 16; 303 | if (line.sym) { 304 | if (line.sym->target_type == TARGET_TYPE_RVA) { 305 | line_max_len = 4; 306 | } else if (line.sym->target_type == TARGET_TYPE_POINTER) { 307 | line_max_len = 8; 308 | } 309 | } 310 | 311 | if (db_offset == db->bytes.size() - 1) { 312 | line.bytes.push_back(byte); 313 | lines.push_back(line); 314 | line = {}; 315 | } else { 316 | if (line.bytes.size() == line_max_len) { 317 | lines.push_back(line); 318 | line = {}; 319 | } 320 | 321 | line.bytes.push_back(byte); 322 | } 323 | } 324 | 325 | for (LINE_INFO& line : lines) { 326 | /* 327 | tabing it out 328 | */ 329 | if (line.sym) { 330 | SYMBOL* sym = line.sym; 331 | if (sym->target_type == TARGET_TYPE_RVA) 332 | printf("%-25s |", fmtf("[%-4d --rva-> %-4d]", sym->id, sym->target_sym_id).c_str()); 333 | if (sym->target_type == TARGET_TYPE_POINTER) 334 | printf("%-25s |", fmtf("[%-4d --ptr-> %-4d]", sym->id, sym->target_sym_id).c_str()); 335 | if (sym->target_type == TARGET_TYPE_NONE) 336 | printf("%-25s |", fmtf("[%d]", sym->id).c_str()); 337 | } else { 338 | printf("%-25s |", ""); 339 | } 340 | 341 | /* 342 | | 00 00 00 |text 343 | */ 344 | std::string fmt = ""; 345 | 346 | for (int j = 0; j < line.bytes.size(); j++) 347 | fmt += fmtf("%02X ", line.bytes[j]); 348 | printf("%-48s|", fmt.c_str()); 349 | 350 | for (int j = 0; j < line.bytes.size(); j++) 351 | printf("%c", line.bytes[j]); 352 | 353 | printf("\n"); 354 | } 355 | 356 | printf("\n"); 357 | } 358 | } 359 | 360 | printf("--------------------------------------------\n"); 361 | printf("--------------------------------------------\n"); 362 | } -------------------------------------------------------------------------------- /baker/src/disassembler/disassembler.cpp: -------------------------------------------------------------------------------- 1 | #include "disassembler.h" 2 | #include 3 | #include 4 | 5 | 6 | 7 | // spot bugs of rva_map not being synced with the symbols faster 8 | void DISASSEMBLER::sym_rva_map_append(uint32_t rva) { 9 | if (bin.symbols.size()) 10 | assert(sym_rva_map.size() == bin.symbols.size() - 1); 11 | 12 | sym_rva_map.push_back(rva); 13 | } 14 | 15 | 16 | RVA_MAP_ENTRY& DISASSEMBLER::queue_rva(uint32_t rva, sym_id_t label_id) { 17 | assert(rva_in_exec_sect(rva)); 18 | assert(bin.symbols[label_id]->type == SYMBOL_TYPE_CODE); 19 | 20 | disasm_queue.push(rva); 21 | bin.basic_block(label_id); 22 | sym_rva_map_append(rva); 23 | 24 | return rva_map[rva] = { label_id, 0 }; 25 | } 26 | 27 | RVA_MAP_ENTRY& DISASSEMBLER::queue_rva(uint32_t rva, std::string name) { 28 | assert(rva_in_exec_sect(rva)); 29 | 30 | if (rva_map[rva].id) { 31 | // check if a symbol already exists for this rva 32 | auto& entry = queue_rva(rva, rva_map[rva].id); 33 | 34 | SYMBOL* sym = bin.symbols[entry.id]; 35 | sym->name = name; 36 | 37 | return entry; 38 | } else { 39 | // create a new one 40 | disasm_queue.push(rva); 41 | BASIC_BLOCK* sym = bin.basic_block(name); 42 | sym_rva_map_append(rva); 43 | 44 | return rva_map[rva] = { sym->id, 0 }; 45 | } 46 | } 47 | 48 | RVA_MAP_ENTRY& DISASSEMBLER::split_bb(uint32_t rva, std::string name) { 49 | int instr_idx = 0; 50 | BASIC_BLOCK* origbb = nullptr; 51 | 52 | // find the index to split at, by following the list backward 53 | for (int curr_rva = rva; true; instr_idx++) { 54 | auto& entry = rva_map[curr_rva]; 55 | 56 | if (entry.blink) { 57 | curr_rva -= entry.blink; 58 | continue; 59 | } 60 | 61 | assert(entry.id); 62 | origbb = bin.symbols[entry.id]->bb; 63 | break; 64 | } 65 | 66 | logger_log( 67 | CYAN, "split_bb", 68 | WHITE, fmtf("<+%X> sym_%d[%d -- %d] --> sym_%d\n", 69 | rva, origbb->id, 70 | instr_idx, origbb->instrs.size(), bin.symbols.size())); 71 | 72 | BASIC_BLOCK* newbb = bin.basic_block(name); 73 | sym_rva_map_append(rva); 74 | 75 | // change fallthrough 76 | newbb ->fall(origbb->fallthrough_sym_id); 77 | origbb->fall(newbb->id); 78 | 79 | // cut block 80 | newbb->instrs.insert(begin(newbb->instrs), 81 | begin(origbb->instrs) + instr_idx, 82 | end(origbb->instrs)); 83 | 84 | origbb->instrs.erase( 85 | begin(origbb->instrs) + instr_idx, 86 | end(origbb->instrs)); 87 | 88 | return rva_map[rva] = { newbb->id, 0 }; 89 | } 90 | 91 | // main recursive disassembler function 92 | void DISASSEMBLER::disassemble() { 93 | while (!disasm_queue.empty()) { 94 | uint32_t rva_start = disasm_queue.front(); 95 | uint32_t file_start = rva_to_offset(rva_start); 96 | assert(file_start); 97 | disasm_queue.pop(); 98 | 99 | IMAGE_SECTION_HEADER* section = rva_to_sect(rva_start); 100 | uint32_t file_end = file_start + section->SizeOfRawData; 101 | assert(bin.symbols[rva_map[rva_start].id]->type == SYMBOL_TYPE_CODE); 102 | 103 | BASIC_BLOCK* currbb = bin.symbols[rva_map[rva_start].id]->bb; 104 | 105 | // instr_offset is offset of the instruction from file_start 106 | for (uint32_t instr_offset = 0; instr_offset < file_end;) { 107 | uint8_t* instr_ptr = &filebuf[file_start + instr_offset]; 108 | uint64_t curr_instr_va = image_base + rva_start + instr_offset; 109 | int remaining_sect_len = file_end - instr_offset; 110 | 111 | auto [dec_ctx, dec_instr] = 112 | decode_instr(&bin.decoder, instr_ptr, remaining_sect_len); 113 | 114 | instr_t instr = {}; 115 | instr.len = dec_instr.length; 116 | memcpy(instr.bytes, instr_ptr, instr.len); 117 | 118 | // if this is referencing something, we will store the target's symbol id into the instruction 119 | if (dec_instr.attributes & ZYDIS_ATTRIB_IS_RELATIVE) { 120 | // immediate access (etc: jmp XX; call XX; ...), (code access) 121 | if (dec_instr.raw.imm[0].is_relative) { 122 | assert(dec_instr.raw.imm[0].is_signed); 123 | 124 | uint32_t target_rva = 125 | rva_start + instr_offset 126 | + dec_instr.length + dec_instr.raw.imm[0].value.s; 127 | 128 | auto& target_rva_entry = rva_map[target_rva]; 129 | assert(rva_in_exec_sect(target_rva)); 130 | 131 | // if the target is already in a discovered basic block 132 | if (target_rva_entry.blink) { 133 | target_rva_entry = split_bb(target_rva); 134 | 135 | // if the target is in the same block, before this instruction 136 | // we will add the instruction to the new basic block split off instead 137 | if (rva_start <= target_rva && target_rva < rva_start + instr_offset) 138 | currbb = bin.symbols[target_rva_entry.id]->bb; 139 | } else if (!target_rva_entry.id) { 140 | target_rva_entry = queue_rva(target_rva); 141 | } 142 | 143 | assert(!target_rva_entry.blink); 144 | assert(target_rva_entry.id); 145 | 146 | instr_store_val(&instr, target_rva_entry.id, 147 | &bin.decoder, &dec_instr, &dec_ctx, curr_instr_va); 148 | } 149 | 150 | // rip relative reference (etc: mov rax, [rip+XX]; ....) (code/data access) 151 | if (dec_instr.raw.disp.offset != 0 152 | && dec_instr.raw.modrm.mod == 0 153 | && dec_instr.raw.modrm.rm == 5) { 154 | // in x86-64, the offset in rip relative instructions should be 4 bytes (i think) 155 | assert(dec_instr.raw.disp.size == 32); 156 | 157 | uint32_t target_rva = 158 | rva_start + instr_offset 159 | + dec_instr.length + dec_instr.raw.disp.value; 160 | 161 | auto& target_rva_entry = rva_map[target_rva]; 162 | 163 | if (rva_in_exec_sect(target_rva)) { 164 | if (target_rva_entry.blink) { 165 | target_rva_entry = split_bb(target_rva); 166 | 167 | // if we have splitted our own basic block 168 | // then we will update currbb 169 | if (rva_start <= target_rva && target_rva < rva_start + instr_offset) 170 | currbb = bin.symbols[target_rva_entry.id]->bb; 171 | } 172 | 173 | if (dec_instr.mnemonic == ZYDIS_MNEMONIC_LEA && target_rva_entry.id == nullsid) 174 | target_rva_entry = queue_rva(target_rva); 175 | } else if (target_rva_entry.id == nullsid) { 176 | assert(!target_rva_entry.blink); 177 | 178 | uint32_t db_offset = 0; 179 | DATA_BLOCK* db = rva_to_containing_db(target_rva, &db_offset); 180 | SYMBOL* sym = nullptr; 181 | 182 | if (db) { 183 | sym = db->data_sym(db_offset); 184 | sym_rva_map_append(target_rva); 185 | 186 | // todo: should we analyze this data symbol? 187 | } 188 | 189 | // addresses that does not land in a data block 190 | // are marked as relative data symbols 191 | // this should be a address within the PE header 192 | else { 193 | sym = bin.rel_info(target_rva); 194 | sym_rva_map_append(target_rva); 195 | } 196 | 197 | target_rva_entry = rva_map[target_rva] = { sym->id, 0 }; 198 | } 199 | 200 | assert(sizeof(target_rva_entry.id) == 4); 201 | assert(target_rva_entry.id); 202 | instr_store_val(&instr, target_rva_entry.id, 203 | &bin.decoder, &dec_instr, &dec_ctx, curr_instr_va); 204 | } 205 | } 206 | 207 | logger_log(WHITE, "", WHITE, fmtf("<+%04X>\t", rva_start + instr_offset)); 208 | currbb->push(instr); 209 | 210 | // if this is a terminating instruction, end this block 211 | if (dec_instr.meta.category == ZYDIS_CATEGORY_RET || 212 | dec_instr.meta.category == ZYDIS_CATEGORY_COND_BR || 213 | dec_instr.meta.category == ZYDIS_CATEGORY_UNCOND_BR || 214 | (dec_instr.meta.category == ZYDIS_CATEGORY_INTERRUPT && dec_instr.raw.imm[0].value.s == 0x29)) { 215 | 216 | // if this is a conditional branch 217 | // we will need to make this fall to the next block 218 | if (dec_instr.meta.category == ZYDIS_CATEGORY_COND_BR) { 219 | uint32_t fallthrough_rva = rva_start + instr_offset + dec_instr.length; 220 | 221 | auto& rva_entry = rva_map[fallthrough_rva]; 222 | if (rva_entry.id) { 223 | 224 | if (rva_entry.blink) 225 | rva_entry = split_bb(fallthrough_rva); 226 | 227 | currbb->fall(rva_entry.id); 228 | } else { 229 | currbb->fall(queue_rva(fallthrough_rva).id); 230 | } 231 | } 232 | 233 | break; 234 | } 235 | 236 | instr_offset += dec_instr.length; 237 | 238 | // if we have entered into another discovered basic block 239 | // end the current block 240 | auto& rva_entry = rva_map[rva_start + instr_offset]; 241 | if (rva_entry.id) { 242 | SYMBOL* sym = bin.symbols[rva_entry.id]; 243 | 244 | // TODO: it *might* be possible to fall into a jump table 245 | // (which would be marked as data, not code) 246 | assert(sym->type == SYMBOL_TYPE_CODE); 247 | currbb->fall(rva_entry.id); 248 | break; 249 | } 250 | 251 | // create rva entry for next instruction 252 | rva_entry = { 0, dec_instr.length }; 253 | } 254 | } 255 | } 256 | 257 | 258 | void DISASSEMBLER::sort_basic_blocks() { 259 | // sort basic blocks 260 | // to be in order of rva 261 | auto& blocks = bin.basic_blocks; 262 | std::sort(begin(blocks), end(blocks), 263 | [&](BASIC_BLOCK* left, BASIC_BLOCK* right) { 264 | return (sym_rva_map[left->id] < sym_rva_map[right->id]); 265 | }); 266 | } 267 | 268 | 269 | DISASSEMBLER* disassemble_pe(std::vector filebuf) { 270 | DISASSEMBLER* s = new DISASSEMBLER{}; 271 | s->filebuf = filebuf; 272 | s->doshdr = reinterpret_cast(&s->filebuf[0]); 273 | s->nthdrs = reinterpret_cast(&s->filebuf[s->doshdr->e_lfanew]); 274 | s->datadir = reinterpret_cast(s->nthdrs->OptionalHeader.DataDirectory); 275 | s->sects = IMAGE_FIRST_SECTION(s->nthdrs); 276 | s->image_base = s->nthdrs->OptionalHeader.ImageBase; 277 | 278 | // initialize rva map 279 | s->rva_map = std::vector( 280 | s->nthdrs->OptionalHeader.SizeOfImage, RVA_MAP_ENTRY{}); 281 | 282 | // sync sym_rva_map with the null symbol to placehold for symbol id 0 283 | s->sym_rva_map.push_back(0); 284 | 285 | s->create_section_dbs(); 286 | s->parse_imports(); 287 | s->parse_exceptions(); 288 | s->parse_relocations(); 289 | 290 | // disassemble starting from entry point 291 | uint32_t entry_rva = s->nthdrs->OptionalHeader.AddressOfEntryPoint; 292 | auto& entry_point = s->rva_map[entry_rva]; 293 | if (entry_point.id == nullsid) 294 | entry_point = s->queue_rva(entry_rva, "entrypoint"); 295 | 296 | s->bin.set_entry(s->bin.symbols[entry_point.id]->bb); 297 | s->disassemble(); 298 | s->sort_basic_blocks(); 299 | 300 | s->collect_jump_tables(); 301 | s->resolve_jpt_entries(); 302 | s->disassemble(); 303 | 304 | s->sort_basic_blocks(); 305 | s->verify(); 306 | return s; 307 | } 308 | 309 | 310 | DATA_BLOCK* DISASSEMBLED_BINARY::rva_to_db(uint32_t rva) { 311 | return rva_db_map[rva].db; 312 | } 313 | 314 | DATA_BLOCK* DISASSEMBLED_BINARY::rva_to_containing_db(uint32_t rva, uint32_t* db_offset) { 315 | auto it = std::lower_bound(begin(rva_db_map), end(rva_db_map), 316 | RVA_DB_ENTRY{ rva, nullptr }, 317 | [](const RVA_DB_ENTRY& left, const RVA_DB_ENTRY& right) { 318 | return (left.rva + left.db->bytes.size()) < right.rva; 319 | }); 320 | 321 | if (it == end(rva_db_map)) 322 | return nullptr; 323 | 324 | if (rva < it->rva || rva > it->rva + it->db->bytes.size()) 325 | return nullptr; 326 | 327 | // offset from start of data block 328 | if (db_offset) 329 | *db_offset = rva - it->rva; 330 | 331 | return it->db; 332 | } 333 | 334 | BASIC_BLOCK* DISASSEMBLED_BINARY::rva_to_bb(uint32_t rva) { 335 | return bin.symbols[rva_map[rva].id]->bb; 336 | } 337 | 338 | BASIC_BLOCK* DISASSEMBLED_BINARY::rva_to_containing_bb(uint32_t rva, int* instr_idx) { 339 | assert(rva <= rva_map.size()); 340 | auto& entry = rva_map[rva]; 341 | 342 | // this rva holds the root of a basic block 343 | if (!entry.blink) { 344 | assert(entry.id); 345 | if (instr_idx) 346 | *instr_idx = 0; 347 | 348 | return bin.symbols[entry.id]->bb; 349 | } 350 | 351 | int count = 0; 352 | for (int curr_rva = rva; true;) { 353 | auto& node = rva_map[curr_rva]; 354 | if (!node.blink) { 355 | assert(node.id); 356 | SYMBOL* sym = bin.symbols[node.id]; 357 | assert(sym->type == SYMBOL_TYPE_CODE); 358 | 359 | if (instr_idx) 360 | *instr_idx = count; 361 | 362 | return sym->bb; 363 | } 364 | 365 | curr_rva -= node.blink; 366 | count++; 367 | } 368 | 369 | return nullptr; 370 | } 371 | 372 | 373 | IMAGE_SECTION_HEADER* DISASSEMBLED_BINARY::rva_to_sect(uint32_t rva) { 374 | for (int i = 0; i < nthdrs->FileHeader.NumberOfSections; i++) { 375 | IMAGE_SECTION_HEADER* sect = §s[i]; 376 | 377 | if (sect->VirtualAddress <= rva && rva < sect->VirtualAddress + sect->Misc.VirtualSize) { 378 | return sect; 379 | } 380 | } 381 | return nullptr; 382 | } 383 | 384 | bool DISASSEMBLED_BINARY::rva_in_exec_sect(uint32_t rva) { 385 | IMAGE_SECTION_HEADER* sect = rva_to_sect(rva); 386 | if (!sect) 387 | return false; 388 | 389 | return sect->Characteristics & IMAGE_SCN_MEM_EXECUTE; 390 | } 391 | 392 | uint32_t DISASSEMBLED_BINARY::rva_to_offset(uint32_t rva) { 393 | IMAGE_SECTION_HEADER* sect = rva_to_sect(rva); 394 | if (!sect) 395 | return 0; 396 | 397 | return sect->PointerToRawData + (rva - sect->VirtualAddress); 398 | } 399 | -------------------------------------------------------------------------------- /baker/src/assembler/assembler.cpp: -------------------------------------------------------------------------------- 1 | #include "assembler.h" 2 | #include 3 | 4 | uint32_t align_up(uint32_t val, uint32_t alignment) { 5 | uint32_t r = val % alignment; 6 | if (!r) 7 | return val; 8 | 9 | return val + alignment - r; 10 | } 11 | 12 | SECTION* new_sect(ASSEMBLED_BINARY* s, std::string name, uint64_t characteristics) { 13 | SECTION* sect = new SECTION{}; 14 | assert(name.size() < IMAGE_SIZEOF_SHORT_NAME); 15 | memcpy(sect->hdr.Name, name.c_str(), name.size()); 16 | sect->hdr.Characteristics = characteristics; 17 | 18 | s->sections.push_back(sect); 19 | return sect; 20 | } 21 | 22 | void update_sections(ASSEMBLED_BINARY* s) { 23 | for (int i = 0; i < s->sections.size(); i++) { 24 | SECTION* sect = s->sections[i]; 25 | IMAGE_SECTION_HEADER* hdr = §->hdr; 26 | 27 | // size 28 | hdr->SizeOfRawData = align_up(sect->bytes.size(), s->file_alignment); 29 | hdr->Misc.VirtualSize = sect->bytes.size() + sect->uninitialized_size; 30 | 31 | // file offset & rva 32 | if (i == 0) { 33 | hdr->PointerToRawData = 0x400; // size of pe header 34 | hdr->VirtualAddress = 0x1000; 35 | } else { 36 | SECTION* prev_sect = s->sections[i - 1]; 37 | hdr->PointerToRawData = prev_sect->hdr.PointerToRawData + prev_sect->hdr.SizeOfRawData; 38 | hdr->VirtualAddress = align_up(prev_sect->hdr.VirtualAddress + prev_sect->hdr.SizeOfRawData, s->section_alignment); 39 | } 40 | } 41 | } 42 | 43 | uint32_t calculate_pehdr_size(ASSEMBLED_BINARY* s) { 44 | return sizeof(IMAGE_DOS_HEADER) + sizeof(IMAGE_NT_HEADERS) + (s->sections.size() * sizeof(IMAGE_SECTION_HEADER)); 45 | } 46 | 47 | uint32_t calculate_binary_file_size(ASSEMBLED_BINARY* s) { 48 | size_t filesize = 0x400; 49 | for (SECTION* sect : s->sections) { 50 | if (sect->bytes.empty()) 51 | continue; 52 | 53 | filesize += sect->hdr.SizeOfRawData; 54 | } 55 | return filesize; 56 | } 57 | 58 | uint32_t calculate_binary_virtual_size(ASSEMBLED_BINARY* s) { 59 | size_t filesize = 0x1000; 60 | for (SECTION* sect : s->sections) { 61 | if (empty(sect->bytes)) 62 | continue; 63 | 64 | filesize += align_up(sect->hdr.SizeOfRawData, s->section_alignment); 65 | } 66 | return filesize; 67 | } 68 | 69 | ASSEMBLED_BINARY* build_pe(const BINARY* bin_) { 70 | ASSEMBLED_BINARY* s = new ASSEMBLED_BINARY{}; 71 | binary_duplicate(bin_, &s->bin); 72 | s->entry_point = 0; 73 | s->image_base = 0x140000000; 74 | s->section_alignment = 0x1000; 75 | s->file_alignment = 0x200; 76 | 77 | auto& bin = s->bin; 78 | auto& import_modules = bin.import_modules; 79 | auto& basic_blocks = bin.basic_blocks; 80 | auto& data_blocks = bin.data_blocks; 81 | auto& symbols = bin.symbols; 82 | 83 | logger_reset_indentation(); 84 | logger_log(WHITE, "", WHITE, "--------------------------------------------\n"); 85 | logger_log(WHITE, "", WHITE, "-----------ASSEMBLING-PE--------------------\n"); 86 | logger_log(WHITE, "", WHITE, "--------------------------------------------\n"); 87 | 88 | 89 | logger_log(WHITE, "", WHITE, "applying fixes to moved away basic blocks..\n"); 90 | 91 | // apply fixes to basic blocks 92 | // with fallthrough blocks that is not right after it 93 | for (int i = 0; i < basic_blocks.size(); i++) { 94 | BASIC_BLOCK* bb = basic_blocks[i]; 95 | 96 | if (!bb->fallthrough_sym_id) 97 | continue; 98 | 99 | if (i < (basic_blocks.size() - 1) && basic_blocks[i + 1]->id == bb->fallthrough_sym_id) 100 | continue; 101 | 102 | bb->push(Jmp(ImmRel(bb->fallthrough_sym_id))); 103 | } 104 | 105 | // 106 | // generate IAT 107 | // 108 | 109 | sym_id_t iat_sym_id = nullsid; 110 | size_t iat_size = 0; 111 | 112 | if (!import_modules.empty()) { 113 | logger_reset_indentation(); 114 | logger_log( 115 | WHITE, "BUILDING", 116 | WHITE, "IMPORT TABLE\n"); 117 | 118 | // import descriptors block 119 | DATA_BLOCK* descs = bin.data_block( 120 | (import_modules.size() + 1) * sizeof(IMAGE_IMPORT_DESCRIPTOR), 121 | true, "mod_descriptors"); 122 | 123 | // module names block 124 | DATA_BLOCK* modnames = bin.data_block(0, true, "mod_names"); 125 | 126 | for (int i = 0; i < import_modules.size(); i++) { 127 | IMPORT_MODULE* mod = import_modules[i]; 128 | 129 | logger_reset_indentation(); 130 | logger_log( 131 | WHITE, "", 132 | WHITE, fmtf("import module: %s [\n", mod->name.c_str())); 133 | logger_indent(); 134 | 135 | // descriptor struct 136 | auto desc = STRUCT_SCHEMA(IMAGE_IMPORT_DESCRIPTOR, i * sizeof(IMAGE_IMPORT_DESCRIPTOR)); 137 | 138 | // module name 139 | SYMBOL* str_name = modnames->push_str(mod->name); 140 | descs->data_sym(SS_MEMBER(desc, Name), TARGET_TYPE_RVA, str_name->id); 141 | // thunk tables 142 | 143 | DATA_BLOCK* name_thunks = bin.data_block((mod->routines.size() + 1) * 8, true, "name_thunks"); 144 | DATA_BLOCK* routine_names = bin.data_block(0, true, "routine_names"); 145 | DATA_BLOCK* thunks = bin.data_block((mod->routines.size() + 1) * 8, true, "thunks"); 146 | 147 | // convert import symbols to data symbols 148 | for (int j = 0; j < mod->routines.size(); j++) { 149 | SYMBOL* sym = bin.symbols[mod->routines[j]->id]; 150 | assert(sym != nullptr && sym->type == SYMBOL_TYPE_IMPORT); 151 | sym->type = SYMBOL_TYPE_DATA; 152 | sym->db = thunks; 153 | sym->db_offset = j * 8; 154 | sym->db->dboffset_to_sym[sym->db_offset] = sym; 155 | 156 | logger_log( 157 | WHITE, "converted", 158 | WHITE, fmtf("import sym: { %d, %s }, to data symbol\n", sym->id, sym->name.c_str())); 159 | } 160 | 161 | // routine 162 | for (int j = 0; j < mod->routines.size(); j++) { 163 | const auto& name = mod->routines[j]->name; 164 | 165 | logger_log( 166 | WHITE, "", 167 | WHITE, fmtf("--> %s[\n", name.c_str())); 168 | logger_indent(); 169 | 170 | SYMBOL* imp_by_name = routine_names->push_val(0, 2); 171 | routine_names->push_str(name); 172 | 173 | name_thunks->data_sym(j * 8, TARGET_TYPE_RVA, imp_by_name->id); 174 | thunks ->data_sym(j * 8, TARGET_TYPE_RVA, imp_by_name->id); 175 | 176 | logger_unindent(); 177 | logger_log( 178 | WHITE, "", 179 | WHITE, "]\n"); 180 | } 181 | 182 | descs->data_sym(SS_MEMBER(desc, OriginalFirstThunk), TARGET_TYPE_RVA, name_thunks->data_sym(0)->id); 183 | descs->data_sym(SS_MEMBER(desc, FirstThunk), TARGET_TYPE_RVA, thunks->data_sym(0)->id); 184 | 185 | iat_size += name_thunks->bytes.size(); 186 | iat_size += routine_names->bytes.size(); 187 | iat_size += thunks->bytes.size(); 188 | 189 | logger_unindent(); 190 | logger_log( 191 | WHITE, "", 192 | WHITE, "]\n"); 193 | } 194 | 195 | iat_size += descs->bytes.size(); 196 | iat_size += modnames->bytes.size(); 197 | 198 | iat_sym_id = descs->data_sym(0)->id; 199 | } 200 | 201 | // 202 | // generate sections 203 | // 204 | 205 | s->text_sect = new_sect(s, ".text", IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_EXECUTE); 206 | s->rdata_sect = new_sect(s, ".rdata", IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ); 207 | s->data_sect = new_sect(s, ".data", IMAGE_SCN_CNT_INITIALIZED_DATA | IMAGE_SCN_MEM_READ | IMAGE_SCN_MEM_WRITE); 208 | update_sections(s); 209 | 210 | // insert and map code 211 | // .text section 212 | // re-encode all immediates to use largest branch size (rel32) 213 | logger_log(WHITE, "", WHITE, "mapping and writing code to .text\n"); 214 | for (BASIC_BLOCK* bb : basic_blocks) { 215 | SECTION* sect = s->text_sect; 216 | uint32_t file_start = sect->hdr.PointerToRawData + sect->bytes.size(); 217 | uint32_t rva_start = sect->hdr.VirtualAddress + sect->bytes.size(); 218 | 219 | s->sym_to_offset[bb->id] = file_start; 220 | s->sym_to_rva [bb->id] = rva_start; 221 | 222 | logger_log( 223 | WHITE, "", 224 | BRIGHT_BLUE, fmtf("basic block: %d %s\n", 225 | bb->id, bin.symbols[bb->id]->name.c_str())); 226 | 227 | for (uint32_t instr_offset = 0; instr_t& instr : bb->instrs) { 228 | uint32_t instr_rva = rva_start + instr_offset; 229 | uint64_t curr_instr_va = s->image_base + instr_rva; 230 | 231 | // decode 232 | auto [dec_ctx, dec_instr] = decode_instr(&s->bin.decoder, instr.bytes, instr.len); 233 | 234 | if (dec_instr.raw.imm[0].is_relative) { 235 | assert(dec_instr.operand_count_visible == 1); 236 | 237 | logger_log( 238 | WHITE, fmtf("<+%04X>", 239 | rva_start + instr_offset), 240 | WHITE, fmtf("bb + %-4d : %s\t \n", 241 | instr_offset, serialize_instr(&s->bin, &instr).c_str())); 242 | 243 | // have to retrieve it like this to avoid weird sign bugs with zydis 244 | sym_id_t target_sym_id = nullsid; 245 | memcpy(&target_sym_id, instr.bytes + dec_instr.raw.imm[0].offset, 246 | dec_instr.raw.imm[0].size / 8); 247 | 248 | assert(target_sym_id < bin.symbols.size()); 249 | 250 | // force extend to 32 bit branch width 251 | instr_store_val(&instr, 0x12345678, 252 | &s->bin.decoder, &dec_instr, &dec_ctx, curr_instr_va); 253 | 254 | // copy in symbol id 255 | memcpy(instr.bytes + instr.len - 4, &target_sym_id, 4); 256 | } else { 257 | logger_log( 258 | WHITE, fmtf("<+%04X>", 259 | rva_start + instr_offset), 260 | WHITE, fmtf("bb + %-4d : %s\n", 261 | instr_offset, serialize_instr(&s->bin, &instr).c_str())); 262 | } 263 | 264 | sect->bytes.insert(end(sect->bytes), 265 | instr.bytes, 266 | instr.bytes + instr.len); 267 | 268 | instr_offset += instr.len; 269 | } 270 | } 271 | update_sections(s); 272 | 273 | // insert and map data 274 | // .rdata & .data sections 275 | std::sort(begin(data_blocks), end(data_blocks), 276 | [&](const DATA_BLOCK* a, const DATA_BLOCK* b) { 277 | // read only before readwrite 278 | if (a->read_only != b->read_only) 279 | return a->read_only > b->read_only; // true > false 280 | 281 | // initialized before uninitialized 282 | if (a->uninitialized != b->uninitialized) 283 | return a->uninitialized < b->uninitialized; // false < true 284 | 285 | return false; 286 | }); 287 | 288 | for (DATA_BLOCK* db : bin.data_blocks) { 289 | SECTION* sect = db->read_only ? s->rdata_sect : s->data_sect; 290 | uint32_t file_start = sect->hdr.PointerToRawData + sect->bytes.size(); 291 | uint32_t rva_start = sect->hdr.VirtualAddress + sect->bytes.size(); 292 | 293 | s->db_to_sect[db] = sect; 294 | s->db_to_offset[db] = file_start; 295 | s->db_to_rva[db] = rva_start; 296 | 297 | logger_log( 298 | WHITE, "", 299 | BRIGHT_RED, fmtf("filebuf[0x%04X] <+0x%04X> %s, size: 0x%X\n", 300 | file_start, rva_start, db->name.c_str(), db->bytes.size())); 301 | 302 | if (db->uninitialized) { 303 | sect->uninitialized_size += db->bytes.size(); 304 | } else { 305 | sect->bytes.insert(end(sect->bytes), 306 | db->bytes.begin(), 307 | db->bytes.end()); 308 | } 309 | 310 | for (auto& [db_offset, sym] : db->dboffset_to_sym) { 311 | s->sym_to_offset[sym->id] = file_start + db_offset; 312 | s->sym_to_rva[sym->id] = rva_start + db_offset; 313 | 314 | logger_log( 315 | WHITE, fmtf("<+%04X>", rva_start + db_offset), 316 | WHITE, fmtf("%s + %04X, db+%04X, sym_%d\n", 317 | sect->hdr.Name, s->sym_to_rva[sym->id] - sect->hdr.VirtualAddress, 318 | db_offset, sym->id)); 319 | } 320 | update_sections(s); 321 | } 322 | 323 | // allocate file buffer 324 | s->filebuf.insert(end(s->filebuf), calculate_binary_file_size(s), 0); 325 | 326 | 327 | // create a unhooked formatter 328 | // since we are not printing instructions with symbol ids anymore 329 | ZydisFormatter unhooked_formatter = {}; 330 | zy_expect_success( 331 | ZydisFormatterInit(&unhooked_formatter, ZYDIS_FORMATTER_STYLE_INTEL)); 332 | 333 | zy_expect_success( 334 | ZydisFormatterSetProperty(&unhooked_formatter, 335 | ZYDIS_FORMATTER_PROP_FORCE_RELATIVE_BRANCHES, true) 336 | ); 337 | 338 | zy_expect_success( 339 | ZydisFormatterSetProperty(&unhooked_formatter, 340 | ZYDIS_FORMATTER_PROP_FORCE_RELATIVE_RIPREL, true) 341 | ); 342 | 343 | // resolve relative data symbols 344 | for (SYMBOL* sym : bin.symbols) { 345 | if (sym->type == SYMBOL_TYPE_RELATIVE_INFO) { 346 | s->sym_to_offset [sym->id] = sym->rel_offset; 347 | s->sym_to_rva [sym->id] = sym->rel_offset; 348 | } 349 | } 350 | 351 | // resolve & write assembly to file buffer 352 | // instruction holding: symbol id --> rva delta 353 | logger_log(WHITE, "", WHITE, "resolving assembly and writing to file buffer\n"); 354 | 355 | for (const BASIC_BLOCK* bb : basic_blocks) { 356 | uint32_t file_start = s->sym_to_offset[bb->id]; 357 | uint32_t rva_start = s->sym_to_rva[bb->id]; 358 | 359 | logger_log( 360 | WHITE, "", 361 | BRIGHT_BLUE, fmtf("basic block: %d %s\n", 362 | bb->id, bin.symbols[bb->id]->name.c_str())); 363 | 364 | for (uint32_t instr_offset = 0; const instr_t& instr : bb->instrs) { 365 | instr_t resolved_instr = instr; 366 | 367 | // decode 368 | auto [dec_ctx, dec_instr] = decode_instr(&s->bin.decoder, instr.bytes, instr.len); 369 | 370 | // resolve symbol id into rva delta 371 | sym_id_t target_sym_id = nullsid; 372 | 373 | if (dec_instr.attributes & ZYDIS_ATTRIB_IS_RELATIVE) { 374 | if (dec_instr.raw.imm[0].is_relative) { 375 | assert(dec_instr.raw.imm[0].size == 32); 376 | target_sym_id = get_sym_id(&instr, &dec_instr); 377 | 378 | uint32_t target_rva = s->sym_to_rva[target_sym_id]; 379 | uint32_t delta = target_rva - (rva_start + instr_offset + instr.len); 380 | 381 | memcpy(resolved_instr.bytes + instr.len - 4, &delta, 4); 382 | } 383 | 384 | else if (dec_instr.raw.disp.offset != 0 385 | && dec_instr.raw.modrm.mod == 0 386 | && dec_instr.raw.modrm.rm == 5) { 387 | target_sym_id = get_sym_id(&instr, &dec_instr); 388 | 389 | uint32_t target_rva = s->sym_to_rva[target_sym_id]; 390 | uint32_t delta = target_rva - (rva_start + instr_offset + instr.len); 391 | 392 | memcpy(resolved_instr.bytes + dec_instr.raw.disp.offset, &delta, 4); 393 | } 394 | } 395 | 396 | if (instr.flags & INSTR_FLAG_MEM_IDX) { 397 | assert(!(dec_instr.attributes & ZYDIS_ATTRIB_IS_RELATIVE)); 398 | 399 | target_sym_id = get_sym_id(&instr, &dec_instr); 400 | uint32_t target_rva = s->sym_to_rva[target_sym_id]; 401 | memcpy(resolved_instr.bytes + dec_instr.raw.disp.offset, &target_rva, 4); 402 | } 403 | 404 | memcpy(&s->filebuf[file_start + instr_offset], resolved_instr.bytes, resolved_instr.len); 405 | 406 | logger_log( 407 | WHITE, fmtf("<+%04X>", rva_start + instr_offset), 408 | WHITE, fmtf("bb + %-4d : %-30s %s\n", 409 | instr_offset, 410 | serialize_instr_ex( 411 | &s->bin.decoder, 412 | &unhooked_formatter, 413 | &resolved_instr).c_str(), 414 | target_sym_id ? std::to_string(target_sym_id).c_str() : "" 415 | ) 416 | ); 417 | 418 | instr_offset += instr.len; 419 | } 420 | 421 | } 422 | 423 | // write data blocks to file buffer 424 | uint32_t last_file_start = 0; 425 | uint32_t last_file_end = 0; 426 | 427 | for (DATA_BLOCK* db : data_blocks) { 428 | uint32_t file_offset = s->db_to_offset[db]; 429 | uint32_t rva = s->db_to_rva[db]; 430 | memcpy(&s->filebuf[file_offset], db->bytes.data(), db->bytes.size()); 431 | 432 | // spot overlapping errors 433 | assert(file_offset + db->bytes.size() <= last_file_start 434 | || file_offset >= last_file_end); 435 | 436 | logger_log( 437 | WHITE, fmtf("%s", db->name.c_str(), s->db_to_sect[db]->hdr.Name), 438 | WHITE, fmtf("inserted db: { %s, size: 0x%X, file offset: 0x%04X, rva: 0x%04X }\n", 439 | db->name.c_str(), db->bytes.size(), file_offset, rva)); 440 | 441 | last_file_start = file_offset; 442 | last_file_end = file_offset + db->bytes.size(); 443 | } 444 | 445 | 446 | // resolve data symbols 447 | // (resolving pointers or rvas to other symbols) 448 | 449 | // key: page frame number 450 | // val: block (set of information) 451 | std::map> reloc_blocks = {}; 452 | 453 | for (DATA_BLOCK* db : data_blocks) { 454 | for (auto& [db_offset, sym] : db->dboffset_to_sym) { 455 | if (!sym->target_sym_id) 456 | continue; 457 | 458 | uint32_t file_offset = s->sym_to_offset[sym->id]; 459 | uint32_t rva = s->sym_to_rva[sym->id]; 460 | 461 | if (sym->target_type == TARGET_TYPE_POINTER) { 462 | assert(!db->uninitialized); 463 | assert(db_offset <= sym->db->bytes.size() - 8); 464 | 465 | uint64_t target_va = s->image_base + s->sym_to_rva[sym->target_sym_id]; 466 | memcpy(&s->filebuf[file_offset], &target_va, 8); 467 | 468 | uint32_t pfn = rva >> 12; 469 | 470 | // get current block 471 | auto& block = reloc_blocks[pfn]; 472 | block.emplace(rva & 0xFFF); 473 | 474 | logger_log( 475 | WHITE, "pointer target", 476 | WHITE, fmtf("(<%s+%d> --> 0x%X) marked for relocation\n", 477 | db->name.c_str(), db_offset, sym->target_sym_id)); 478 | } 479 | 480 | if (sym->target_type == TARGET_TYPE_RVA) { 481 | assert(!db->uninitialized); 482 | assert(db_offset <= sym->db->bytes.size() - 4); 483 | uint32_t target_rva = s->sym_to_rva[sym->target_sym_id]; 484 | memcpy(&s->filebuf[file_offset], &target_rva, 4); 485 | 486 | logger_log( 487 | WHITE, "rva target", 488 | WHITE, fmtf("(%d --> %d) = 0x%X\n", 489 | sym->id, sym->target_sym_id, target_rva)); 490 | } 491 | } 492 | } 493 | 494 | // 495 | // todo: generate exception table 496 | // 497 | 498 | 499 | // 500 | // generate the base relocation table 501 | // 502 | 503 | struct BASERELOC_ENTRY { 504 | uint16_t offset : 12; 505 | uint16_t type : 4; 506 | }; 507 | 508 | SECTION* reloc_sect = nullptr; 509 | if (!reloc_blocks.empty()) { 510 | reloc_sect = new_sect(s, ".reloc", IMAGE_SCN_MEM_READ); 511 | auto& reloc_data = reloc_sect->bytes; 512 | 513 | for (const auto& [pfn, block] : reloc_blocks) { 514 | uint32_t block_offset = reloc_data.size(); 515 | uint32_t padding = (block.size() % 2) * sizeof(BASERELOC_ENTRY); 516 | uint32_t block_size = sizeof(IMAGE_BASE_RELOCATION) 517 | + (sizeof(BASERELOC_ENTRY) * block.size()) 518 | + padding; 519 | 520 | reloc_data.insert(end(reloc_data), block_size, 0); 521 | auto* hdr = reinterpret_cast (&reloc_data[block_offset]); 522 | auto* entries = reinterpret_cast (&reloc_data[block_offset + sizeof(IMAGE_BASE_RELOCATION)]); 523 | 524 | hdr->VirtualAddress = pfn << 12; 525 | hdr->SizeOfBlock = reloc_data.size() - block_offset; 526 | 527 | for (int i = 0; auto offset : block) { 528 | entries[i++] = { offset, IMAGE_REL_BASED_DIR64 }; 529 | } 530 | } 531 | 532 | update_sections(s); 533 | 534 | uint32_t file_offset = s->filebuf.size(); 535 | s->filebuf.insert(end(s->filebuf), reloc_sect->hdr.SizeOfRawData, 0); 536 | memcpy(&s->filebuf[file_offset], reloc_data.data(), reloc_data.size()); 537 | } 538 | 539 | // set entry point 540 | if (bin_->entry_point) { 541 | s->entry_point = s->sym_to_rva[bin_->entry_point->id]; 542 | } else { 543 | s->entry_point = s->text_sect->hdr.VirtualAddress; 544 | } 545 | 546 | // generate pe headers 547 | auto doshdr = reinterpret_cast(&s->filebuf[0]); 548 | *doshdr = { 549 | .e_magic = IMAGE_DOS_SIGNATURE, 550 | .e_lfanew = sizeof(IMAGE_DOS_HEADER), 551 | }; 552 | 553 | auto nthdrs = reinterpret_cast(&s->filebuf[doshdr->e_lfanew]); 554 | *nthdrs = { 555 | .Signature = IMAGE_NT_SIGNATURE, 556 | .FileHeader = { 557 | .Machine = IMAGE_FILE_MACHINE_AMD64, 558 | .NumberOfSections = (WORD)s->sections.size(), 559 | .SizeOfOptionalHeader = sizeof(IMAGE_OPTIONAL_HEADER), 560 | .Characteristics = IMAGE_FILE_EXECUTABLE_IMAGE, 561 | }, 562 | 563 | .OptionalHeader = { 564 | .Magic = IMAGE_NT_OPTIONAL_HDR64_MAGIC, 565 | .AddressOfEntryPoint = s->entry_point, 566 | .BaseOfCode = s->text_sect->hdr.VirtualAddress, 567 | .ImageBase = s->image_base, 568 | .SectionAlignment = s->section_alignment, 569 | .FileAlignment = s->file_alignment, 570 | 571 | .MajorOperatingSystemVersion = 6, 572 | .MinorOperatingSystemVersion = 0, 573 | .MajorSubsystemVersion = 6, 574 | .MinorSubsystemVersion = 0, 575 | 576 | .SizeOfImage = calculate_binary_virtual_size(s), 577 | .SizeOfHeaders = calculate_pehdr_size(s), 578 | 579 | .Subsystem = IMAGE_SUBSYSTEM_WINDOWS_CUI, 580 | .DllCharacteristics = IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE | 581 | IMAGE_DLLCHARACTERISTICS_NX_COMPAT | IMAGE_DLLCHARACTERISTICS_NO_SEH, 582 | 583 | .SizeOfStackReserve = 0x10000l, 584 | .SizeOfStackCommit = 0x1000, 585 | .SizeOfHeapReserve = 0x10000, 586 | .SizeOfHeapCommit = 0x1000, 587 | 588 | .NumberOfRvaAndSizes = 16 589 | }, 590 | }; 591 | 592 | 593 | // set data directories 594 | if (!import_modules.empty()) { 595 | auto& dir = nthdrs->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT]; 596 | dir.VirtualAddress = s->sym_to_rva[iat_sym_id]; 597 | dir.Size = iat_size; 598 | } 599 | 600 | if (!reloc_blocks.empty() && reloc_sect != NULL) { 601 | auto& dir = nthdrs->OptionalHeader.DataDirectory[IMAGE_DIRECTORY_ENTRY_BASERELOC]; 602 | dir.VirtualAddress = reloc_sect->hdr.VirtualAddress; 603 | dir.Size = reloc_sect->bytes.size(); 604 | } 605 | 606 | // set section headers 607 | for (int i = 0; i < s->sections.size(); i++) { 608 | SECTION* sect = s->sections[i]; 609 | 610 | if (sect->bytes.empty()) 611 | continue; 612 | 613 | uint32_t hdr_offset = sizeof(IMAGE_DOS_HEADER) + sizeof(IMAGE_NT_HEADERS) + (i * sizeof(IMAGE_SECTION_HEADER)); 614 | memcpy(&s->filebuf[hdr_offset], §->hdr, sizeof(IMAGE_SECTION_HEADER)); 615 | } 616 | 617 | return s; 618 | } 619 | 620 | 621 | 622 | uint8_t* ASSEMBLED_BINARY::get_instr(BASIC_BLOCK* bb, instr_t* instr) { 623 | for (int bb_offset = 0; instr_t& curr_instr : bb->instrs) { 624 | if (&curr_instr == instr) { 625 | return &filebuf[sym_to_offset[bb->id] + bb_offset]; 626 | } 627 | 628 | bb_offset += curr_instr.len; 629 | } 630 | return nullptr; 631 | } 632 | 633 | uint8_t* ASSEMBLED_BINARY::get_data(SYMBOL* data_sym) { 634 | return &filebuf[sym_to_offset[data_sym->id]]; 635 | } 636 | 637 | 638 | 639 | void assembled_binary_print(ASSEMBLED_BINARY* asm_bin) { 640 | BINARY* bin = &asm_bin->bin; 641 | 642 | printf("--------------------------------------------\n"); 643 | printf("--------ASSEMBLED-BINARY--------------------\n"); 644 | printf("--------------------------------------------\n"); 645 | 646 | printf_ex(BRIGHT_BLUE, "--------------CODE--------------------------\n"); 647 | 648 | for (BASIC_BLOCK* bb : bin->basic_blocks) { 649 | print_bb(bb, asm_bin->sym_to_rva[bb->id]); 650 | } 651 | 652 | for (int sect_idx = 0; sect_idx < 2; sect_idx++) { 653 | // 0: .rdata 654 | // 1: .data 655 | 656 | if (sect_idx == 0) 657 | printf_ex(BRIGHT_RED, "----READ-ONLY-DATA--------------------------\n"); 658 | 659 | if (sect_idx == 1) 660 | printf_ex(BRIGHT_MAGENTA, "--------------DATA--------------------------\n"); 661 | 662 | for (DATA_BLOCK* db : bin->data_blocks) { 663 | if (sect_idx == 0 && db->read_only == false) 664 | continue; 665 | 666 | if (sect_idx == 1 && db->read_only == true) 667 | continue; 668 | 669 | printf_ex(sect_idx ? BRIGHT_MAGENTA : BRIGHT_RED, 670 | "[data block]: %s, size: %d\n", db->name.c_str(), db->bytes.size()); 671 | 672 | struct LINE_INFO { 673 | SYMBOL* sym; 674 | int rva; 675 | std::vector bytes; 676 | }; 677 | 678 | std::vector lines = {}; 679 | 680 | LINE_INFO line = {}; 681 | for (int db_offset = 0; db_offset < db->bytes.size(); db_offset++) { 682 | uint8_t byte = asm_bin->filebuf[asm_bin->db_to_offset[db] + db_offset]; 683 | uint64_t rva = asm_bin->db_to_rva[db] + db_offset; 684 | 685 | if (db_offset == 0) { 686 | line.rva = rva; 687 | } 688 | 689 | if (db->dboffset_to_sym[db_offset]) { 690 | if (db_offset) { 691 | lines.push_back(line); 692 | line = {}; 693 | line.rva = rva; 694 | } 695 | line.sym = db->dboffset_to_sym[db_offset]; 696 | } 697 | 698 | int line_max_len = 16; 699 | if (line.sym) { 700 | if (line.sym->target_type == TARGET_TYPE_RVA) { 701 | line_max_len = 4; 702 | } else if (line.sym->target_type == TARGET_TYPE_POINTER) { 703 | line_max_len = 8; 704 | } 705 | } 706 | 707 | if (db_offset == db->bytes.size() - 1) { 708 | line.bytes.push_back(byte); 709 | lines.push_back(line); 710 | line = {}; 711 | line.rva = rva; 712 | } else { 713 | if (line.bytes.size() == line_max_len) { 714 | lines.push_back(line); 715 | line = {}; 716 | line.rva = rva; 717 | } 718 | 719 | line.bytes.push_back(byte); 720 | } 721 | } 722 | 723 | for (LINE_INFO& line : lines) { 724 | /* 725 | tabing it out 726 | */ 727 | printf("<+%0X>", line.rva); 728 | 729 | if (line.sym) { 730 | SYMBOL* sym = line.sym; 731 | if (sym->target_type == TARGET_TYPE_RVA) 732 | printf("%-25s |", fmtf("[%-4d --rva-> %-4d]", sym->id, sym->target_sym_id).c_str()); 733 | 734 | if (sym->target_type == TARGET_TYPE_POINTER) 735 | printf("%-25s |", fmtf("[%-4d --ptr-> %-4d]", sym->id, sym->target_sym_id).c_str()); 736 | 737 | if (sym->target_type == TARGET_TYPE_NONE) 738 | printf("%-25s |", fmtf("[%d]", sym->id).c_str()); 739 | } else { 740 | printf("%-25s |", ""); 741 | } 742 | 743 | /* 744 | | 00 00 00 00 |text 745 | */ 746 | std::string fmt = ""; 747 | 748 | for (int j = 0; j < line.bytes.size(); j++) 749 | fmt += fmtf("%02X ", line.bytes[j]); 750 | printf("%-48s|", fmt.c_str()); 751 | 752 | for (int j = 0; j < line.bytes.size(); j++) 753 | printf("%c", line.bytes[j]); 754 | 755 | printf("\n"); 756 | } 757 | 758 | printf("\n"); 759 | } 760 | } 761 | 762 | printf("--------------------------------------------\n"); 763 | printf("--------------------------------------------\n"); 764 | } --------------------------------------------------------------------------------