├── .gitignore ├── vmRef.yara ├── Makefile ├── vmStub.yara ├── VmReg.h ├── VmJmp.h ├── VmJmp.cpp ├── VmReg.cpp ├── x86devirt-LICENSE.txt ├── VmInfo.h ├── instructions.yara ├── README.md ├── VmInfo.cpp ├── notes.txt ├── x86devirt_jmp.py ├── x86devirt.py └── main.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | *.exe 3 | *.o 4 | *.bin -------------------------------------------------------------------------------- /vmRef.yara: -------------------------------------------------------------------------------- 1 | rule JumpToVmStub 2 | { 3 | strings: 4 | $hex_string = { E8 00 00 00 00 9C 81 6C ?? ?? ?? ?? ?? ?? 9D E8 00 00 00 00 9C 81 6C ?? ?? ?? ?? ?? ?? 9D } 5 | condition: 6 | $hex_string 7 | } -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | CFLAGS=-m32 3 | 4 | all: x86virt-disasm 5 | 6 | %.o: %.cpp %.h 7 | $(CC) $(CFLAGS) -ggdb -c $< -o $@ 8 | 9 | x86virt-disasm: main.o VmInfo.o VmReg.o VmJmp.o 10 | $(CC) -ggdb -m32 -o x86virt-disasm main.o VmInfo.o VmReg.o VmJmp.o -ludis86 -------------------------------------------------------------------------------- /vmStub.yara: -------------------------------------------------------------------------------- 1 | rule VmStub 2 | { 3 | strings: 4 | $hex_string = { 60 9C 9C 59 ?? ?? 8B 5C 24 24 8B 54 24 28 E8 00 00 00 00 5C 8B A4 24 E1 FE FF FF 55 ?? ?? 83 EC 2C 89 44 24 28 89 0C 24 ?? ?? 8D 7C 24 04 ?? ?? 46 8A 02 32 42 01 ?? ?? ?? 50 56 57 E8 D8 05 00 00 } 5 | condition: 6 | $hex_string 7 | } -------------------------------------------------------------------------------- /VmReg.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | enum DecodedRegister_t { 6 | EFLAGS = 0, 7 | EDI, 8 | ESI, 9 | EBP, 10 | ESP, 11 | EBX, 12 | EDX, 13 | ECX, 14 | EAX, 15 | REG_UNKNOWN, 16 | }; 17 | 18 | const char* getRegisterName(DecodedRegister_t reg); 19 | 20 | DecodedRegister_t decodeVmRegisterReference(const uint8_t registerEncoded); -------------------------------------------------------------------------------- /VmJmp.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | enum DecodedJump_t { 5 | JGE = 0, 6 | JL, 7 | JLE, 8 | JZ, 9 | JO, 10 | JBE, 11 | JNZ, 12 | JNO, 13 | JS, 14 | JP, 15 | JB, 16 | JG, 17 | JA, 18 | JNP, 19 | JNS, 20 | JNB, 21 | JMP_UNKNOWN, 22 | }; 23 | 24 | const char* getJumpName(DecodedJump_t jmp); 25 | DecodedJump_t decodeVmJump(const uint8_t jumpEncoded); -------------------------------------------------------------------------------- /VmJmp.cpp: -------------------------------------------------------------------------------- 1 | #include "VmJmp.h" 2 | 3 | const char* getJumpName(DecodedJump_t jmp) { 4 | static const char* jmpIndex[] = { 5 | "jge", 6 | "jl", 7 | "jle", 8 | "jz", 9 | "jo", 10 | "jbe", 11 | "jnz", 12 | "jno", 13 | "js", 14 | "jp", 15 | "jb", 16 | "jg", 17 | "ja", 18 | "jnp", 19 | "jns", 20 | "jnb", 21 | }; 22 | 23 | if(jmp < 0 || jmp > DecodedJump_t::JNB) 24 | return "???"; 25 | else 26 | return jmpIndex[jmp]; 27 | } 28 | 29 | DecodedJump_t decodeVmJump(const uint8_t jumpEncoded) { 30 | if(jumpEncoded < 0 || jumpEncoded > DecodedJump_t::JNB) 31 | return DecodedJump_t::JMP_UNKNOWN; 32 | 33 | return (DecodedJump_t)jumpEncoded; 34 | } -------------------------------------------------------------------------------- /VmReg.cpp: -------------------------------------------------------------------------------- 1 | #include "VmReg.h" 2 | 3 | using namespace std; 4 | 5 | const char* getRegisterName(DecodedRegister_t reg) { 6 | 7 | static const char* registerIndex[] = { 8 | "eflags", 9 | "edi", 10 | "esi", 11 | "ebp", 12 | "esp", 13 | "ebx", 14 | "edx", 15 | "ecx", 16 | "eax" 17 | }; 18 | 19 | if(reg < 0 || reg > DecodedRegister_t::EAX) 20 | return "???"; 21 | else 22 | return registerIndex[reg]; 23 | } 24 | 25 | DecodedRegister_t decodeVmRegisterReference(const uint8_t registerEncoded) { 26 | unsigned long reference = registerEncoded; 27 | reference = reference << 2; 28 | reference -= 0x20; 29 | reference ^= 0xFFFFFFFF; 30 | reference += 1; 31 | reference /= 4; 32 | 33 | if(reference < 0 || reference > DecodedRegister_t::EAX) 34 | return DecodedRegister_t::REG_UNKNOWN; 35 | 36 | return (DecodedRegister_t)reference; 37 | } -------------------------------------------------------------------------------- /x86devirt-LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2018 Jeremy Wildsmith 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /VmInfo.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | 5 | typedef __attribute__((stdcall)) void (*decryptInstruction_t)(void* pInstrBufferOffset1, uint32_t instrLengthMinusOne, uint32_t relativeOffset); 6 | 7 | class VmInfo { 8 | 9 | std::unique_ptr jmpMap; 10 | std::unique_ptr opcodeMap; 11 | decryptInstruction_t func_decryptInstruction; 12 | uint32_t dumpBase; 13 | uint32_t imageBase; 14 | 15 | std::unique_ptr dump; 16 | size_t dumpSize; 17 | private: 18 | std::unique_ptr func_decryptInstructionBuffer; 19 | std::unique_ptr readFile(const std::string& fileName, size_t* pSize = 0); 20 | 21 | public: 22 | VmInfo(std::string& dumpSource, uint32_t dumpBase, std::string& jmpMapSource, std::string& opcodeMapSource, std::string& decryptInstructionSource, uint32_t imageBase); 23 | 24 | std::unique_ptr readMemory(uint32_t address, uint32_t size); 25 | void decryptMemory(void* pInstrBufferOffset1, uint32_t instrLengthMinusOne, uint32_t relativeOffset); 26 | 27 | uint8_t getOpcodeMapping(uint8_t opcode); 28 | uint8_t getJmpMapping(uint8_t jmp); 29 | 30 | uint32_t getBaseAddress(); 31 | uint32_t getImageBase(); 32 | 33 | uint32_t getDumpSize(); 34 | }; -------------------------------------------------------------------------------- /instructions.yara: -------------------------------------------------------------------------------- 1 | rule VmInstructions 2 | { 3 | strings: 4 | $i0 = {8A 47 03 8A 67 04 C0 E4 03 80 CC 05 66 89 07 FF 74 24 2C 8F 47 02 B8 06 00 00 00 51 C6 04 38 68 ?? ?? ?? ?? ?? 59 81 C1 B0 FA FF FF 89 4C ?? 01 C6 44 38 05 C3 59 5A 58} 5 | $i1 = {8B 57 03 52 ?? ?? ?? ?? ?? 5A 8B 92 12 FD FF FF 01 14 24 ?? ?? ?? ?? ?? 6A 00 6A 00 60 9C 9C 58 50 ?? ?? ?? ?? ?? 58 8B 80 ED FC FF FF 8D 40 50 89 20 83 00 04 58 ?? ?? ?? ?? ?? 5C 8B A4 24 D8 FC FF FF 89 44 24 28 61} 6 | $i2 = {0F B6 47 04 50 8D 57 05 52 0F B6 57 03 57 E8 52 00 00 00 50 ?? ?? ?? ?? ?? ?? ?? 5A 8B 92 2D F9 FF FF ?? ?? ?? 58 EB} 7 | $i3 = {0F B6 47 03 8B 54 24 30 C1 E0 02 83 E8 20 F7 D8 8B 14 10 83 F8 10 75 03 83 C2 08 01 54 24 2C E9} 8 | $i4 = {5A 58 8B 44 24 28 0F B7 57 03 50 ?? ?? ?? ?? ?? 58 89 ?? 31} 9 | $i5 = {8B 47 03 01 44 24 2C E9} 10 | $i6 = {8B C1 0F B6 4F 03 D3 64 24 2C ?? ?? E9} 11 | $i7 = {0F B6 57 03 FF 74 24 08 9D E8 ?? ?? ?? ?? ?? ?? 74 ?? 5A} 12 | $i8 = {8B 47 03 8B 54 24 2C 9C FF 77 FC 9D 39 02 9C 8F 47 FC 9D E9} 13 | $i9 = {5A 03 57 03 03 4F 03 58 E9} 14 | $i10 = {8B 54 24 2C C6 07 68 8B 12 89 57 01 B8 05 00 00 00 EB} 15 | $i11 = {8B 47 03 89 44 24 2C E9} 16 | $i12 = {8B 47 03 8B 54 24 2C 9C FF 77 FC 9D 89 02 9C 8F 47 FC 9D E9} 17 | $i13 = {8B 57 03 C6 07 68 ?? ?? ?? ?? ?? 58 8B 80 6E F9 FF FF ?? ?? 89 57 01 B8 05 00 00 00 EB} 18 | $i14 = {8B 54 24 2C 66 C7 07 8F 05 89 57 02 B8 06 00 00 00 EB} 19 | $i15 = {8B 47 03 8B 54 24 2C 9C FF 77 FC 9D 29 02 9C 8F 47 FC 9D E9} 20 | $i16 = {5A 58 8B 44 24 28 C9 50 ?? ?? ?? ?? ?? 58 89 A0 4D FC FF FF 83 80 4D FC FF FF 04 58 ?? ?? 89 5C 24 24 9D 61 C2 04 00} 21 | $i17 = {0F B6 47 03 8B 54 24 30 C1 E0 02 83 E8 20 F7 D8 ?? ?? ?? 83 F8 10 75 03 83 C2 08 89 54 24 2C E9} 22 | 23 | condition: 24 | $i0 at 0 or 25 | $i1 at 0 or 26 | $i2 at 0 or 27 | $i3 at 0 or 28 | $i4 at 0 or 29 | $i5 at 0 or 30 | $i6 at 0 or 31 | $i7 at 0 or 32 | $i8 at 0 or 33 | $i9 at 0 or 34 | $i10 at 0 or 35 | $i11 at 0 or 36 | $i12 at 0 or 37 | $i13 at 0 or 38 | $i14 at 0 or 39 | $i15 at 0 or 40 | $i16 at 0 or 41 | $i17 at 0 42 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # x86devirt 2 | This is a project that aims to devirtualize & disassemble applications that 3 | have been virtualized using x86virt. 4 | 5 | It is a reverse engineering project and all implementation seen here was 6 | reverse engineered out of a protected binary file / crackme hosted on 7 | tuts4you.com at the following address: https://tuts4you.com/download/1850/ 8 | 9 | x86 Virtualizer is an open source project located at the following URL: 10 | https://github.com/rwfpl/rewolf-x86-virtualizer 11 | 12 | Since this project aims to build practical experience reverse engineering 13 | applications protected using a VM, the open source project was never 14 | referenced during the development of this project and all implementation is 15 | based entirely on what has been reverse engineered out of protected executables. 16 | 17 | # Credits / Dependencies 18 | The devirtualizer on this repository was developed independently by myself (Jeremy Wildsmith) with the help of the following tools / libraries: 19 | 20 | - The virtualizer / packer that this tool aims to counter was written by ReWolf. (https://github.com/rwfpl/rewolf-x86-virtualizer) 21 | - Angr is used to symbolically execute the jump decoder and extract the jump mappings. (http://angr.io/) 22 | - Distorm3 is used throughout (https://github.com/gdabah/distorm) 23 | - udis86 is used in the disassembler engine to disassemble instructions that decode into an x86 form (https://github.com/vmt/udis86) 24 | - x64dbg Debugger is used as the debugging engine to analyze the target. (https://github.com/x64dbg/x64dbg) 25 | - VirusTotal/yara-python is used to match signatures (for instruction handlers, the VM stub etc...) (https://github.com/gdabah/distorm) 26 | - nasm assembler is used to assemble the x86 output from the x86virt-disassembler utility 27 | 28 | # Documentation / Feedback 29 | 30 | # License 31 | This project and all of its' source files are licensed under the MIT license. NASM is licensed under a seperate license, mentioned under NASM-LICENSE in the distribution. 32 | 33 | # How to Use 34 | You must have the following installed to use the x86virt unpacker/devirtualizer: 35 | 1. Python 2.7 36 | 2. x64dbg 37 | 3. x64dbgpy 38 | 4. Python dependency angr (pip install angr) 39 | 5. Python dependency distorm3 (Download the installer from Distorm 3 releases page: https://github.com/gdabah/distorm/releases ) 40 | 41 | Once the above items have been installed and correctly configured: 42 | 1. Download x86devirt release from the releases page ( https://github.com/JeremyWildsmith/x86devirt/releases ) 43 | 2. Open the packed / virtualized target in x64dbg 44 | 3. Select the Plugins -> x64dbgpy -> Open Script menu item, and browse to the downloaded x86devirt.py script 45 | 46 | # How to Build 47 | The only portion that you are required to build is the disassembler engine, which is written in C++. On Windows you can build this using cygwin and the make command. 48 | -------------------------------------------------------------------------------- /VmInfo.cpp: -------------------------------------------------------------------------------- 1 | #include "VmInfo.h" 2 | #include "VmJmp.h" 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | 13 | VmInfo::VmInfo(std::string& dumpSource, uint32_t dumpBase, std::string& jmpMapSource, std::string& opcodeMapSource, std::string& decryptInstructionSource, uint32_t imageBase) { 14 | dump = readFile(dumpSource, &this->dumpSize); 15 | 16 | size_t readSize = 0; 17 | jmpMap = readFile(jmpMapSource, &readSize); 18 | 19 | if(readSize < JMP_UNKNOWN) { 20 | stringstream err("Jump map is too small. Must be at least "); 21 | err << static_cast(JMP_UNKNOWN) << " bytes in size."; 22 | throw runtime_error(err.str()); 23 | } 24 | 25 | opcodeMap = readFile(opcodeMapSource, &readSize); 26 | 27 | if(readSize < 0x100) 28 | throw runtime_error("Opcode map is too small. Must be at least 0x100 bytes in size"); 29 | 30 | func_decryptInstructionBuffer = readFile(decryptInstructionSource); 31 | 32 | this->func_decryptInstruction = (decryptInstruction_t)(func_decryptInstructionBuffer.get()); 33 | 34 | this->dumpBase = dumpBase; 35 | this->imageBase = imageBase; 36 | } 37 | 38 | 39 | std::unique_ptr VmInfo::readFile(const string& fileName, size_t* pSize) { 40 | ifstream fin(fileName, ios::in | ios::binary ); 41 | 42 | if(!fin) 43 | throw runtime_error(string("Error opening file for reading: ") + fileName); 44 | 45 | auto startPos = fin.tellg(); 46 | fin.seekg( 0, ios::end ); 47 | size_t fsize = fin.tellg() - startPos; 48 | fin.seekg(0, std::ios::beg); 49 | 50 | unique_ptr vmMemory(new uint8_t[fsize]); 51 | if(!fin.read((char*)vmMemory.get(), fsize)) 52 | throw runtime_error("Error reading file..."); 53 | 54 | fin.close(); 55 | 56 | if(pSize) 57 | *pSize = fsize; 58 | 59 | return vmMemory; 60 | } 61 | 62 | std::unique_ptr VmInfo::readMemory(uint32_t address, uint32_t size) { 63 | if(address < this->dumpBase || address + size > this->dumpBase + this->dumpSize) 64 | throw runtime_error("Reading out of memory dump"); 65 | 66 | uint32_t relativeAddress = address - this->dumpBase; 67 | 68 | unique_ptr readBuffer(new uint8_t[size]); 69 | memcpy(readBuffer.get(), dump.get() + relativeAddress, size); 70 | 71 | return readBuffer; 72 | } 73 | 74 | void VmInfo::decryptMemory(void* pInstrBufferOffset1, uint32_t instrLengthMinusOne, uint32_t relativeOffset) { 75 | this->func_decryptInstruction(pInstrBufferOffset1, instrLengthMinusOne, relativeOffset); 76 | } 77 | 78 | uint8_t VmInfo::getOpcodeMapping(uint8_t opcode) { 79 | return this->opcodeMap[opcode]; 80 | } 81 | 82 | uint8_t VmInfo::getJmpMapping(uint8_t jmp) { 83 | if(jmp >= JMP_UNKNOWN) 84 | throw runtime_error("Jump index outside of allowed range."); 85 | 86 | return this->jmpMap[jmp]; 87 | } 88 | 89 | uint32_t VmInfo::getBaseAddress() { 90 | return this->dumpBase; 91 | } 92 | 93 | uint32_t VmInfo::getImageBase() { 94 | return this->imageBase; 95 | } 96 | 97 | uint32_t VmInfo::getDumpSize() { 98 | return this->dumpSize; 99 | } -------------------------------------------------------------------------------- /notes.txt: -------------------------------------------------------------------------------- 1 | 004131AD - Enters Virtual Machine at this address 2 | 3 | 0012FBC8 004016F5 vm_test_vmed_01.004016F5 4 | 0012FBCC 00412D5C vm_test_vmed_01.00412D5C 5 | 6 | 7 | 8 | VM Stub starts at 004112FC 9 | - First argument is where routine used to be in the user .text code section 10 | - Second argument is where the encrypted bytecode to replace the functionality is 11 | On the stack 12 | First Arg 13 | Second Arg 14 | Return Address 15 | Parameters to the function call 16 | EDX Appears to be setup as the current instruction pointer of the VM 17 | 18 | 19 | ECX Is relative Instruction pointer 20 | 21 | Appears to make a call, pushes VM IP, Instr Lenght - 1, Buffer 22 | 00411338 | E8 D8050000 | call vm_test_vmed_01.411915 | 23 | 24 | Instruction Structure 25 | [Instruction Length (xored with next byte)][Remaining of instruciton] 26 | 27 | Inside an instruction handler: 28 | [ESP+30] resolves to pointer of registers in the following order 29 | EFLAGS 30 | EDI 31 | ESI 32 | EBP 33 | ESP 34 | EBX 35 | EDX 36 | ECX 37 | EAX 38 | 39 | EAX, ECX, EDX, EBX, original ESP, EBP, ESI, and EDI 40 | ========================== 41 | OPCODE 86, Handler at 0041160E 42 | OC OPERAND1REG[BYTE] 43 | 44 | if(OPERAND1REG == ESP) 45 | VMR = OPERAND1REG + 8; 46 | else 47 | VMR = OPERAND1REG 48 | 49 | ========================== 50 | OPCODE B0, Handler at 00411632 51 | OC OPERAND1_CONSTANT_DWORD 52 | 53 | VMR += OPERAND1_CONSTANT[BYTE] 54 | 55 | 56 | ========================== 57 | OPCODE 4, Handler at 004118D8 58 | X86 instruction with VMR 59 | 60 | ========================== 61 | OPCODE 0x73 Handler at 004118B0 62 | 63 | OC OP1[byte] OP2[byte] OP3[Data...] 64 | OP3 is OP2 bytes in Length 65 | 66 | 1. Copy OP2 Bytes from OP3 over start of instruction 67 | 2. Add module base address (0x400000) to instructionat OP1 bytes in 68 | 3. copy 0x68 into instruction at offset OP2 69 | 70 | 71 | ========================== 72 | OPCODE 0xE3 Handler at 00411803 73 | OPC DWORD PTR 74 | CMP [VMR], OPC 75 | 76 | 77 | ========================== 78 | OPCODE 0xD Handler at 004114B1 79 | OPC OP_JMP_TYPE[byte], RelativeOffset [dword] 80 | 81 | X = 82 | 83 | OP1 | Value of X 84 | 0 | JGE, JNL 85 | 1 | JL, JNGE 86 | 2 | JLE, JNG 87 | 3 | JZ, JE 88 | 4 | JO 89 | 5 | JBE, JNA 90 | 6 | JNZ, JNG 91 | 7 | JNO 92 | 8 | JS 93 | 9 | JP, JPE 94 | 10 | JB, JNAE, JC 95 | 11 | JG, JNLE 96 | 12 | JA, JNBE 97 | 13 | JNP, JPO 98 | 14 | JNS 99 | 15 | JNB, JAE, JNC 100 | 101 | 102 | 103 | For 1 104 | R | SF | OF | 105 | 0 | 0 | 0 | 106 | 1 | 0 | 1 | 107 | 1 | 1 | 0 | 108 | 0 | 1 | 1 | 109 | 110 | ========================== 111 | OPCODE 0x91 Handler at 004115BD 112 | ret [word] 113 | 114 | ========================== 115 | OPCODE 0x19 Handler at 004114DD 116 | OPC OP1[DWORD] 117 | 118 | 119 | ========================== 120 | OPCODE 0x64 Handler at 004114B1 121 | OPC OP1[DWORD] 122 | 123 | ========================== 124 | OPCODE 0x9B Handler at 004114D0 125 | OPC OP1[DWORD] 126 | jmp 127 | 128 | 129 | ========================== 130 | OPCODE 0xC6 Handler at 0041186C 131 | OPC OP1[DWORD] 132 | push 0x400000 133 | 134 | ========================== 135 | Opcode 4D 136 | OPC OP1[BYTE} 137 | SHL VMR, OP1 138 | 139 | ========================== 140 | Opcode 93 handler 00411602 141 | OPC OP1[dword} 142 | mov VMR, OP1 143 | 144 | ========================== 145 | Opcode C0 handler 0041181B 146 | mov [VMR], OP1[DWORD] 147 | 148 | Opcode D6 0041163E 149 | OPC OP1[BYTE] 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /x86devirt_jmp.py: -------------------------------------------------------------------------------- 1 | import angr 2 | 3 | possibleJmps = [ 4 | { 5 | "name": "jz", 6 | "must": [0x40], 7 | "not": [0x1, 0], 8 | "priority": 1 9 | }, 10 | { 11 | "name": "jo", 12 | "must": [0x800], 13 | "not": [0], 14 | "priority": 1 15 | }, 16 | { 17 | "name": "jno", 18 | "must": [0x40], 19 | "not": [0x800], 20 | "priority": 1 21 | }, 22 | { 23 | "name": "jp", 24 | "must": [0x4], 25 | "not": [0], 26 | "priority": 1 27 | }, 28 | { 29 | "name": "jnp", 30 | "must": [0], 31 | "not": [0x4], 32 | "priority": 1 33 | }, 34 | { 35 | "name": "jb", 36 | "must": [0x1], 37 | "not": [0], 38 | "priority": 1 39 | }, 40 | { 41 | "name": "jnb", 42 | "must": [0], 43 | "not": [0x1], 44 | "priority": 1 45 | }, 46 | { 47 | "name": "ja", 48 | "must": [0], 49 | "not": [0x40, 0x1, 0x41], 50 | "priority": 2 51 | }, 52 | { 53 | "name": "jl", 54 | "must": [0x800, 0x80], 55 | "not": [0x880, 0], 56 | "priority": 2 57 | }, 58 | { 59 | "name": "jge", 60 | "must": [0x880, 0], 61 | "not": [0x800, 0x80], 62 | "priority": 2 63 | }, 64 | { 65 | "name": "jg", 66 | "must": [0x880, 0], 67 | "not": [0x8C0, 0x800, 0x80], 68 | "priority": 3 69 | }, 70 | { 71 | "name": "jnz", 72 | "must": [0x1, 0], 73 | "not": [0x40], 74 | "priority": 1 75 | }, 76 | { 77 | "name": "jbe", 78 | "must": [0x41, 0x40, 0x1], 79 | "not": [0], 80 | "priority": 2 81 | }, 82 | { 83 | "name": "jle", 84 | "must": [0x40, 0xC0, 0x840, 0x80, 0x800], 85 | "not": [0x880, 0], 86 | "priority": 3 87 | }, 88 | { 89 | "name": "js", 90 | "must": [0x80], 91 | "not": [0], 92 | "priority": 1 93 | }, 94 | { 95 | "name": "jns", 96 | "must": [0], 97 | "not": [0x80], 98 | "priority": 1 99 | }, 100 | ] 101 | 102 | controlFlowBits = 0x8C5 103 | 104 | 105 | def getJmpStatesMap(proj): 106 | statesMap = {} 107 | 108 | state = proj.factory.blank_state(addr=0x0) 109 | state.add_constraints(state.regs.edx >= 0) 110 | state.add_constraints(state.regs.edx <= 15) 111 | simgr = proj.factory.simulation_manager(state) 112 | r = simgr.explore(find=0xDA, avoid=0xDE, num_find=100) 113 | 114 | for state in r.found: 115 | val = state.solver.eval(state.regs.edx) 116 | val = val - 0xD 117 | val = val / 2 118 | 119 | if(not statesMap.has_key(val)): 120 | statesMap[val] = {"must": [], "not": []} 121 | 122 | statesMap[val]["must"].append(state) 123 | 124 | state = proj.factory.blank_state(addr=0x0) 125 | state.add_constraints(state.regs.edx >= 0) 126 | state.add_constraints(state.regs.edx <= 15) 127 | simgr = proj.factory.simulation_manager(state) 128 | r = simgr.explore(find=0xDE, avoid=0xDA, num_find=100) 129 | 130 | for state in r.found: 131 | val = state.solver.eval(state.regs.edx) 132 | val = val - 0xD 133 | val = val / 2 134 | 135 | statesMap[val]["not"].append(state) 136 | 137 | return statesMap 138 | 139 | 140 | def decodeJumps(inputFile): 141 | proj = angr.Project(inputFile, main_opts={'backend': 'blob', 'custom_arch': 'i386'}, auto_load_libs=False) 142 | 143 | stateMap = getJmpStatesMap(proj) 144 | jumpMappings = {} 145 | for key, val in stateMap.iteritems(): 146 | 147 | for jmp in possibleJmps: 148 | satisfiedMustsRemaining = len(jmp["must"]) 149 | satisfiedNotsRemaining = len(jmp["not"]) 150 | 151 | for state in val["must"]: 152 | for con in jmp["must"]: 153 | if (state.solver.satisfiable( 154 | extra_constraints=[state.regs.eax & controlFlowBits == con & controlFlowBits])): 155 | satisfiedMustsRemaining -= 1; 156 | 157 | for state in val["not"]: 158 | for con in jmp["not"]: 159 | if (state.solver.satisfiable( 160 | extra_constraints=[state.regs.eax & controlFlowBits == con & controlFlowBits])): 161 | satisfiedNotsRemaining -= 1; 162 | 163 | if(satisfiedMustsRemaining <= 0 and satisfiedNotsRemaining <= 0): 164 | if(not jumpMappings.has_key(key)): 165 | jumpMappings[key] = [] 166 | 167 | jumpMappings[key].append(jmp) 168 | 169 | finalMap = {} 170 | for key, val in jumpMappings.iteritems(): 171 | maxPriority = 0; 172 | jmpName = "NOE FOUND" 173 | for j in val: 174 | if(j["priority"] > maxPriority): 175 | maxPriority = j["priority"] 176 | jmpName = j["name"] 177 | finalMap[jmpName] = key 178 | print("Mapped " + str(key) + " to " + jmpName) 179 | 180 | proj.terminate_execution() 181 | return finalMap 182 | 183 | 184 | -------------------------------------------------------------------------------- /x86devirt.py: -------------------------------------------------------------------------------- 1 | from x64dbgpy.pluginsdk import * 2 | import x64dbgpy 3 | import os 4 | import subprocess 5 | import yara 6 | import distorm3 7 | from time import sleep 8 | import struct 9 | import sys 10 | 11 | sys.path.insert(0, os.path.dirname(os.path.realpath(__file__))) 12 | 13 | from x86devirt_jmp import decodeJumps 14 | 15 | 16 | def findLabelLocation(labels, searchLabel): 17 | for l in labels: 18 | if(l["name"] == searchLabel): 19 | return l["address"] 20 | 21 | return None 22 | 23 | def devirt(source, destination, size, maxDestSize, mappingsLocation, decryptSubroutineDumpLocation, jmpMappings): 24 | global maxInstructions 25 | 26 | outAsmName = "out_" + hex(destination) + ".asm"; 27 | 28 | x64dbg._plugin_logputs("Dumping bytecode... ") 29 | sourceBuffer = Read(source, size) 30 | 31 | file = open("buffer.bin", "wb") 32 | file.write(sourceBuffer) 33 | file.close() 34 | 35 | CREATE_NO_WINDOW = 0x08000000 36 | x64dbg._plugin_logputs("Invoking disassembler: x86virt-disasm.exe buffer.bin " + hex(destination) + " " + hex(destination) + " " + mappingsLocation + " " + decryptSubroutineDumpLocation + " " + jmpMappings) 37 | disassembledOutput = subprocess.check_output(["x86virt-disasm.exe", "buffer.bin", hex(destination), hex(destination), mappingsLocation, decryptSubroutineDumpLocation, jmpMappings], creationflags=CREATE_NO_WINDOW) 38 | 39 | #Write disassembly to file 40 | file = open(outAsmName, "wb") 41 | file.write(disassembledOutput) 42 | file.close() 43 | 44 | x64dbg._plugin_logputs("Invoking nasm: nasm.exe -f bin " + outAsmName) 45 | disassembledOutput = subprocess.check_output(["nasm.exe", "-f", "bin", outAsmName], creationflags=CREATE_NO_WINDOW) 46 | 47 | #Reading assembled bytes into buffer... 48 | file = open(os.path.splitext(outAsmName)[0], "rb") 49 | assembledCode = file.read() 50 | file.close() 51 | 52 | if(len(assembledCode) > maxDestSize): 53 | x64dbg._plugin_logputs("Error, destination of " + str(maxDestSize) + " is too small for " + str(len(assembledCode))) 54 | return 0 55 | 56 | x64dbg.Memory_Write(destination, assembledCode, len(assembledCode)) 57 | x64dbg._plugin_logputs("It fits! Decrypted into " + hex(destination)) 58 | #Get nasm to assemble it... 59 | return len(assembledCode) 60 | 61 | def findVmStubs(rule): 62 | buffer = GetMainModuleSectionList() 63 | 64 | stubs = [] 65 | for val in buffer: 66 | x64dbg._plugin_logputs("Scanning section: " + val.name) 67 | scanBuffer = Read(val.addr, val.size) 68 | matches = rule.match(data=scanBuffer) 69 | 70 | if(len(matches) <= 0): 71 | continue 72 | 73 | for m in matches: 74 | for vmStubMatch in m.strings: 75 | stubs.append(vmStubMatch[0] + val.addr) 76 | 77 | return stubs 78 | 79 | def findVmStubCrossReferences(vmStub, rule): 80 | 81 | #x64dbg has not provided an interface to their cross-reference functionality yet... 82 | #So... We're going to have to do this with signatures 83 | references = [] 84 | signatureSize = 30 85 | buffer = GetMainModuleSectionList() 86 | for val in buffer: 87 | x64dbg._plugin_logputs("Scanning section: " + val.name) 88 | scanBuffer = Read(val.addr, val.size) 89 | matches = rule.match(data=scanBuffer) 90 | 91 | for m in matches: 92 | matchedStrings = m.strings 93 | for referenceMatch in matchedStrings: 94 | instructionLocation = referenceMatch[0] + signatureSize; 95 | 96 | lastInstructionBuffer = scanBuffer[instructionLocation : instructionLocation + 10] 97 | 98 | decomposedInstructions = distorm3.Decompose(instructionLocation + val.addr, lastInstructionBuffer) 99 | vmReferenceInstruction = decomposedInstructions[0] 100 | 101 | if (vmReferenceInstruction.flowControl == "FC_UNC_BRANCH" and vmReferenceInstruction.operands[0].value == vmStub): 102 | references.append({"start": referenceMatch[0] + val.addr, "jump": instructionLocation + val.addr, "section": val}) 103 | 104 | return references 105 | 106 | 107 | def emulateAndFind(startStub, jumpAddress): 108 | oldEip = Register.EIP 109 | SetEIP(startStub) 110 | SetBreakpoint(jumpAddress) 111 | debug.Run() 112 | DeleteBreakpoint(jumpAddress) 113 | original = struct.unpack(" 0): 336 | x64dbg._plugin_logputs("Devirtualized VM, now checking for additional layers.") 337 | return tryDevirtAll(yaraRules, ignore) 338 | else: 339 | x64dbg._plugin_logputs("No more VM layers to devirtualize. Completed") 340 | 341 | 342 | return True; 343 | 344 | def dumpJumpMap(jumpDecoder): 345 | outDecoderFile = "jmpDecoder_" + hex(jumpDecoder) + ".bin" 346 | outMapFile = "jmpMap.bin" 347 | 348 | jumpDecCalc = jumpDecoder + 0x4 349 | instructions = Read(jumpDecCalc, 0x100) 350 | x64dbg._plugin_logputs("Dump jump decoder for angr simulation...") 351 | 352 | file = open(outDecoderFile, "wb") 353 | file.write(instructions); 354 | file.close() 355 | 356 | jmpMap = decodeJumps(outDecoderFile) 357 | 358 | #the order expected by x86devirt-disassembler 359 | mappings = bytearray(16) 360 | jmpOrder = ["jge", "jl", "jle", "jz", "jo", "jbe", "jnz", "jno", "js", "jp", "jb", "jg", "ja", "jnp", "jns", "jnb"] 361 | 362 | for key, val in jmpMap.iteritems(): 363 | mappings[val] = jmpOrder.index(key); 364 | 365 | file = open(outMapFile, "wb") 366 | file.write(mappings) 367 | file.close() 368 | 369 | return outMapFile 370 | 371 | def main(): 372 | Message("This python script is an x86virt devirtualizer written by Jeremy Wildsmith. It has been published on the github page https://github.com/JeremyWildsmith/x86devirt") 373 | result = MessageYesNo("This script should be run when EIP Matches the entrypoint (not OEP, just the correct entrypoint). Is EIP at OP? Press No to cancel.") 374 | 375 | if(result == False): 376 | return False 377 | 378 | Message("Now attempting to locate all present VM stubs and decrypt / devirtualize respective functions.") 379 | 380 | os.chdir(os.path.dirname(os.path.realpath(__file__))) 381 | yaraRules = { 382 | "instructions": yara.compile(filepath='instructions.yara'), 383 | "vmStub": yara.compile(filepath='vmStub.yara'), 384 | "vmRef": yara.compile(filepath='vmRef.yara') 385 | } 386 | 387 | tryDevirtAll(yaraRules, []) 388 | 389 | Message("Application has been devirtualized, refer to log for more details...") 390 | 391 | main() 392 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include "VmInfo.h" 15 | #include "VmJmp.h" 16 | #include "VmReg.h" 17 | 18 | using namespace std; 19 | 20 | #define MAX_INSTRUCTION_LENGTH 100 21 | #define MAX_DISASSEMBLED_SIZE 100 22 | 23 | const char* vmrSub = "VMR"; 24 | 25 | enum DecodedInstructionType_t { 26 | INSTR_UNKNOWN, 27 | INSTR_RETN, 28 | INSTR_JUMP, 29 | INSTR_CONDITIONAL_JUMP, 30 | INSTR_STOP, 31 | INSTR_COMMENT, 32 | INSTR_MISC, 33 | }; 34 | 35 | struct DecodedVmInstruction { 36 | DecodedInstructionType_t type; 37 | bool isDecoded; 38 | char disassembled[MAX_DISASSEMBLED_SIZE]; 39 | uint32_t address; 40 | uint8_t bytes[MAX_INSTRUCTION_LENGTH]; 41 | uint8_t size; 42 | uint32_t controlDestination; 43 | 44 | DecodedVmInstruction() { 45 | isDecoded = false; 46 | strcpy(disassembled, ";Empty Instruction..."); 47 | address = 0; 48 | size = 0; 49 | controlDestination = 0; 50 | type = INSTR_COMMENT; 51 | } 52 | }; 53 | 54 | struct DisassembledRegion { 55 | uint32_t min; 56 | uint32_t max; 57 | }; 58 | 59 | ud_t ud_obj; 60 | 61 | 62 | vector disassemble86Instruction(const uint8_t* instrBuffer, uint32_t instrLength, const uint32_t instrAddress) { 63 | DecodedVmInstruction result; 64 | result.isDecoded = false; 65 | result.address = instrAddress; 66 | result.controlDestination = 0; 67 | result.size = instrLength; 68 | 69 | memcpy(result.bytes, instrBuffer, instrLength); 70 | 71 | ud_set_input_buffer(&ud_obj, instrBuffer, instrLength); 72 | ud_set_pc(&ud_obj, instrAddress); 73 | unsigned int ret = ud_disassemble(&ud_obj); 74 | strcpy(result.disassembled, ud_insn_asm(&ud_obj)); 75 | 76 | if(ret == 0) 77 | result.type = DecodedInstructionType_t::INSTR_UNKNOWN; 78 | else 79 | result.type = (!strncmp(result.disassembled, "ret", 3) ? DecodedInstructionType_t::INSTR_RETN : DecodedInstructionType_t::INSTR_MISC); 80 | 81 | vector resultSet; 82 | resultSet.push_back(result); 83 | 84 | return resultSet; 85 | } 86 | 87 | vector disassembleVmInstruction(const uint8_t* instrBuffer, uint32_t instrLength, uint32_t vmRelativeIp, VmInfo& vmInfo) { 88 | vector resultSet; 89 | 90 | DecodedVmInstruction baseInstr; 91 | baseInstr.isDecoded = true; 92 | baseInstr.address = vmRelativeIp + vmInfo.getBaseAddress(); 93 | baseInstr.controlDestination = 0; 94 | baseInstr.size = instrLength; 95 | baseInstr.type = DecodedInstructionType_t::INSTR_MISC; 96 | 97 | memcpy(baseInstr.bytes, instrBuffer, instrLength); 98 | 99 | switch(instrBuffer[0]) { 100 | case 0: 101 | { 102 | /* 103 | * Here we need to decode the VM instruction into a valid x86 instruction 104 | */ 105 | uint32_t x86InstructionLength = instrLength + 3; 106 | 107 | uint8_t x86Buffer[x86InstructionLength]; 108 | memcpy(x86Buffer, instrBuffer, instrLength); 109 | 110 | //89 05 04 89 111 | uint16_t operandsBuffer = *((uint16_t*)(&x86Buffer[1])); 112 | uint8_t* low = (uint8_t*)&operandsBuffer; 113 | uint8_t* high = ((uint8_t*)&operandsBuffer) + 1; 114 | 115 | *high = *high << 3; 116 | *high |= 5; 117 | 118 | //Here we copy the decoded instruction into the x86 buffer 119 | memcpy(x86Buffer, &operandsBuffer, 2); 120 | 121 | //We use a dummy pointer here (0xFFFFFFFF.) x86virt uses a pointer containing the value of VMR 122 | //So we will feed 0xFFFFFFFF into the disassembler and then replace it with VMR afterwards, since 123 | //the disassembler has no concept of the VMR 124 | uint32_t* ptr = (uint32_t*)(&x86Buffer[2]); 125 | *ptr = 0xFFFFFFFF; 126 | 127 | //here we disassemble the decided x86 instruction into the buffer 128 | char disassembledBuffer[100]; 129 | ud_set_input_buffer(&ud_obj, x86Buffer, x86InstructionLength); 130 | ud_disassemble(&ud_obj); 131 | sprintf(disassembledBuffer, "%s", ud_insn_asm(&ud_obj)); 132 | 133 | //And we replace the fake pointer 134 | const char* fakePointer = "0xffffffff"; 135 | char* ptrLocation = strstr(disassembledBuffer, fakePointer); 136 | 137 | char replacedPointerBuffer[100]; 138 | unsigned long firstLength = (ptrLocation - disassembledBuffer); 139 | 140 | memcpy(replacedPointerBuffer, disassembledBuffer, firstLength); 141 | replacedPointerBuffer[firstLength] = 0; 142 | strcat(replacedPointerBuffer, vmrSub); 143 | strcat(replacedPointerBuffer, ptrLocation + strlen(fakePointer)); 144 | strcpy(baseInstr.disassembled, replacedPointerBuffer); 145 | resultSet.push_back(baseInstr); 146 | 147 | break; 148 | } 149 | case 1: 150 | { 151 | uint32_t operand1 = *((uint32_t*)(&instrBuffer[1])); 152 | sprintf(baseInstr.disassembled, "call 0x%08X", operand1 + vmInfo.getImageBase()); 153 | resultSet.push_back(baseInstr); 154 | break; 155 | } 156 | case 2: 157 | {/* 158 | OPCODE 0x73 Handler at 004118B0 159 | 160 | OC OP1[byte] OP2[byte] OP3[Data...] 161 | OP3 is OP2 bytes in Length 162 | 163 | 1. Copy OP2 Bytes from OP3 over start of instruction 164 | 2. Add module base address (0x400000) to instructionat OP1 bytes in 165 | 3. copy 0x68 into instruction at offset OP2 166 | 167 | */ 168 | /* 169 | * Here we need to decode the VM instruction into a valid x86 instruction 170 | */ 171 | uint32_t x86InstructionLength = instrLength + 2; 172 | 173 | uint8_t x86Buffer[x86InstructionLength]; 174 | memcpy(x86Buffer, instrBuffer, instrLength); 175 | 176 | //Get operands needed for calculation 177 | const uint8_t op1 = x86Buffer[1]; 178 | const uint8_t op2 = x86Buffer[2]; 179 | 180 | uint8_t upperBuffer[op2]; 181 | memcpy(upperBuffer, &x86Buffer[3], op2); 182 | memcpy(x86Buffer, upperBuffer, op2); 183 | *((unsigned long*)(&x86Buffer[op1])) += 0x400000; 184 | 185 | ud_set_input_buffer(&ud_obj, x86Buffer, x86InstructionLength); 186 | ud_disassemble(&ud_obj); 187 | strcpy(baseInstr.disassembled, ud_insn_asm(&ud_obj)); 188 | resultSet.push_back(baseInstr); 189 | break; 190 | } 191 | case 3: 192 | { 193 | DecodedRegister_t operandA = decodeVmRegisterReference(instrBuffer[1]); 194 | sprintf(baseInstr.disassembled, "add %s, %s", vmrSub, getRegisterName(operandA)); 195 | resultSet.push_back(baseInstr); 196 | 197 | break; 198 | } 199 | case 4: 200 | { 201 | uint16_t operand1 = *((uint16_t*)(&instrBuffer[1])); 202 | sprintf(baseInstr.disassembled, "ret 0x%X", (uint32_t)operand1); 203 | baseInstr.type = DecodedInstructionType_t::INSTR_RETN; 204 | resultSet.push_back(baseInstr); 205 | break; 206 | } 207 | case 5: 208 | { 209 | uint32_t operand1 = *((uint32_t*)(&instrBuffer[1])); 210 | sprintf(baseInstr.disassembled, "add %s, 0x%X", vmrSub, operand1); 211 | resultSet.push_back(baseInstr); 212 | break; 213 | } 214 | case 6: 215 | { 216 | uint8_t operand1 = *((uint8_t*)(&instrBuffer[1])); 217 | sprintf(baseInstr.disassembled, "shl %s, 0x%X", vmrSub, operand1); 218 | resultSet.push_back(baseInstr); 219 | break; 220 | } 221 | case 7: 222 | { 223 | uint8_t operand1 = vmInfo.getJmpMapping(instrBuffer[1]); 224 | uint32_t operand2 = *((uint32_t*)(&instrBuffer[2])); 225 | 226 | sprintf(baseInstr.disassembled, "%s lbl_0x%08X", getJumpName(decodeVmJump(operand1)), vmRelativeIp + operand2 + vmInfo.getBaseAddress()); 227 | baseInstr.type = DecodedInstructionType_t::INSTR_CONDITIONAL_JUMP; 228 | baseInstr.controlDestination = vmRelativeIp + operand2 + vmInfo.getBaseAddress(); 229 | resultSet.push_back(baseInstr); 230 | break; 231 | } 232 | case 8: 233 | { 234 | const uint32_t operand = *((uint32_t*)(&instrBuffer[1])); 235 | sprintf(baseInstr.disassembled, "cmp dword [%s], 0x%X", vmrSub, operand); 236 | resultSet.push_back(baseInstr); 237 | break; 238 | } 239 | case 9: 240 | { 241 | uint32_t operand1 = *((uint32_t*)(&instrBuffer[1])); 242 | 243 | sprintf(baseInstr.disassembled, "jmp lbl_0x%08X", vmRelativeIp + operand1 + vmInfo.getBaseAddress()); 244 | baseInstr.controlDestination = vmRelativeIp + operand1 + vmInfo.getBaseAddress(); 245 | baseInstr.type = DecodedInstructionType_t::INSTR_JUMP; 246 | resultSet.push_back(baseInstr); 247 | break; 248 | } 249 | case 10: 250 | { 251 | sprintf(baseInstr.disassembled, "push dword [%s]", vmrSub); 252 | resultSet.push_back(baseInstr); 253 | break; 254 | } 255 | case 11: 256 | { 257 | uint32_t operand1 = *((uint32_t*)(&instrBuffer[1])); 258 | sprintf(baseInstr.disassembled, "mov %s, 0x%X", vmrSub, operand1); 259 | resultSet.push_back(baseInstr); 260 | break; 261 | } 262 | case 12: 263 | { 264 | uint32_t operand1 = *((uint32_t*)(&instrBuffer[1])); 265 | sprintf(baseInstr.disassembled, "mov dword [%s], 0x%X", vmrSub, operand1); 266 | resultSet.push_back(baseInstr); 267 | break; 268 | } 269 | case 13: 270 | { 271 | uint32_t operand1 = *((uint32_t*)(&instrBuffer[1])); 272 | sprintf(baseInstr.disassembled, "push 0x%08X", operand1 + vmInfo.getImageBase()); 273 | resultSet.push_back(baseInstr); 274 | break; 275 | } 276 | case 14: 277 | { 278 | sprintf(baseInstr.disassembled, "pop dword [VMR]"); 279 | resultSet.push_back(baseInstr); 280 | break; 281 | } 282 | case 15: 283 | { 284 | const uint32_t operand = *((uint32_t*)(&instrBuffer[1])); 285 | sprintf(baseInstr.disassembled, "sub dword [%s], 0x%X", vmrSub, operand); 286 | resultSet.push_back(baseInstr); 287 | break; 288 | } 289 | case 16: 290 | { 291 | sprintf(baseInstr.disassembled, "STOP"); 292 | baseInstr.type = DecodedInstructionType_t::INSTR_STOP; 293 | resultSet.push_back(baseInstr); 294 | break; 295 | } 296 | case 17: 297 | { 298 | DecodedRegister_t operandA = decodeVmRegisterReference(instrBuffer[1]); 299 | sprintf(baseInstr.disassembled, "mov %s, %s", vmrSub, getRegisterName(operandA)); 300 | resultSet.push_back(baseInstr); 301 | 302 | break; 303 | } 304 | default: 305 | { 306 | baseInstr.type = DecodedInstructionType_t::INSTR_UNKNOWN; 307 | resultSet.push_back(baseInstr); 308 | } 309 | } 310 | 311 | return resultSet; 312 | } 313 | 314 | uint32_t getInstructionLength(uint32_t address, VmInfo& vmInfo) { 315 | auto readBuffer = vmInfo.readMemory(address, 2); 316 | return readBuffer[0] ^ readBuffer[1]; 317 | } 318 | 319 | unsigned int decodeVmInstruction(vector& decodedBuffer, uint32_t vmRelativeIp, VmInfo& vmInfo) { 320 | 321 | uint32_t instrLength = getInstructionLength(vmInfo.getBaseAddress() + vmRelativeIp, vmInfo); 322 | 323 | //Read with offset 1, to trim off instr length byte 324 | unique_ptr instrBuffer = vmInfo.readMemory(vmInfo.getBaseAddress() + vmRelativeIp + 1, instrLength); 325 | vmInfo.decryptMemory(instrBuffer.get(), instrLength, vmRelativeIp); 326 | 327 | DecodedInstructionType_t instrType = DecodedInstructionType_t::INSTR_UNKNOWN; 328 | 329 | if(*reinterpret_cast(instrBuffer.get()) == 0xFFFF) { 330 | //Offset by 2 which removes the 0xFFFF part of the instruction. 331 | 332 | //Map instructions correctly 333 | instrBuffer[2] = vmInfo.getOpcodeMapping(instrBuffer[2]); 334 | decodedBuffer = disassembleVmInstruction(instrBuffer.get() + 2, instrLength - 2, vmRelativeIp, vmInfo); 335 | } else { 336 | decodedBuffer = disassemble86Instruction(instrBuffer.get(), instrLength, vmInfo.getBaseAddress() + vmRelativeIp); 337 | } 338 | 339 | return instrLength + 1; 340 | } 341 | 342 | void formatInstructionInfo(const DecodedVmInstruction& decodedInstruction) { 343 | bool isComment = decodedInstruction.type == DecodedInstructionType_t::INSTR_COMMENT; 344 | 345 | printf("%s_0x%08X: ", isComment ? ";cm" : "lbl", decodedInstruction.address); 346 | 347 | if(decodedInstruction.type == DecodedInstructionType_t::INSTR_UNKNOWN) 348 | printf("%-30s ;", "Failed to disassemble"); 349 | else 350 | printf("%-30s ;", decodedInstruction.disassembled); 351 | 352 | for(unsigned int i = 0; i < decodedInstruction.size; i++) { 353 | printf("%02X ", decodedInstruction.bytes[i] & 0xFF); 354 | } 355 | 356 | printf("\n"); 357 | } 358 | 359 | bool isInRegions(const std::vector& regions, uint32_t ip, uint32_t max = 0xFFFFFFFF) { 360 | for(auto& region : regions) { 361 | if(ip >= region.min && ip < region.max && !(region.max == max && region.min == ip)) 362 | return true; 363 | } 364 | 365 | return false; 366 | } 367 | 368 | vector getDisassembleRegions(const uint32_t initialIp, VmInfo& vmInfo) { 369 | vector disassembledStubs; 370 | queue stubsToDisassemble; 371 | stubsToDisassemble.push(initialIp); 372 | 373 | while(!stubsToDisassemble.empty()) { 374 | uint32_t vmRelativeIp = stubsToDisassemble.front() - vmInfo.getBaseAddress(); 375 | stubsToDisassemble.pop(); 376 | 377 | if(isInRegions(disassembledStubs, vmRelativeIp)) 378 | continue; 379 | 380 | DisassembledRegion current; 381 | current.min = vmRelativeIp; 382 | 383 | bool continueDisassembling = true; 384 | while(vmRelativeIp <= vmInfo.getDumpSize() && continueDisassembling) { 385 | 386 | vector instrSet; 387 | 388 | vmRelativeIp += decodeVmInstruction(instrSet, vmRelativeIp, vmInfo); 389 | 390 | for(auto& instr : instrSet) { 391 | if(instr.type == DecodedInstructionType_t::INSTR_UNKNOWN) { 392 | stringstream msg; 393 | msg << "Unknown instruction encountered: 0x" << hex << ((unsigned long)instr.bytes[0]); 394 | throw runtime_error(msg.str()); 395 | } 396 | 397 | if(instr.type == DecodedInstructionType_t::INSTR_JUMP || instr.type == DecodedInstructionType_t::INSTR_CONDITIONAL_JUMP) 398 | stubsToDisassemble.push(instr.controlDestination); 399 | 400 | if(instr.type == DecodedInstructionType_t::INSTR_STOP || instr.type == DecodedInstructionType_t::INSTR_RETN || instr.type == DecodedInstructionType_t::INSTR_JUMP) 401 | continueDisassembling = false; 402 | } 403 | } 404 | 405 | current.max = vmRelativeIp; 406 | disassembledStubs.push_back(current); 407 | } 408 | 409 | //Now we must resolve all overlapping stubs 410 | for(auto it = disassembledStubs.begin(); it != disassembledStubs.end();) { 411 | if(isInRegions(disassembledStubs, it->min, it->max)) 412 | disassembledStubs.erase(it++); 413 | else 414 | it++; 415 | } 416 | 417 | return disassembledStubs; 418 | } 419 | 420 | DecodedVmInstruction eliminateVmrFromSubset(vector::iterator start, vector::iterator end) { 421 | bool baseReg2Used = false; 422 | bool baseReg1Used = false; 423 | char baseReg1Buffer[10]; 424 | char baseReg2Buffer[10]; 425 | uint32_t multiplierReg1 = 1; 426 | uint32_t multiplierReg2 = 1; 427 | 428 | uint32_t offset = 0; 429 | 430 | for(auto it = start; it != end; it++) { 431 | char* dereferencePointer = 0; 432 | 433 | if(!strncmp(it->disassembled, "mov VMR, 0x", 11)) { 434 | offset = strtoul(&it->disassembled[11], NULL, 16); 435 | baseReg1Used = false; 436 | baseReg2Used = false; 437 | multiplierReg1 = multiplierReg2 = 1; 438 | } else if(!strncmp(it->disassembled, "mov VMR, ", 9)) { 439 | baseReg1Used = true; 440 | baseReg2Used = false; 441 | multiplierReg1 = multiplierReg2 = 1; 442 | offset = 0; 443 | strcpy(baseReg1Buffer, &it->disassembled[9]); 444 | } else if(!strncmp(it->disassembled, "add VMR, 0x", 11)) { 445 | offset += strtoul(&it->disassembled[11], NULL, 16); 446 | } else if(!strncmp(it->disassembled, "add VMR, ", 9)) { 447 | if(baseReg1Used) { 448 | baseReg2Used = true; 449 | strcpy(baseReg2Buffer, &it->disassembled[9]); 450 | } else { 451 | baseReg1Used = true; 452 | strcpy(baseReg1Buffer, &it->disassembled[9]); 453 | } 454 | } else if(!strncmp(it->disassembled, "shl VMR, 0x", 11)) { 455 | uint32_t shift = strtoul(&it->disassembled[11], NULL, 16); 456 | offset = offset << shift; 457 | if(baseReg1Used) { 458 | multiplierReg1 = multiplierReg1 << shift; 459 | } 460 | if(baseReg2Used) { 461 | multiplierReg2 = multiplierReg2 << shift; 462 | } 463 | } 464 | } 465 | 466 | auto lastInstruction = end - 1; 467 | string reconstructInstr(lastInstruction->disassembled); 468 | stringstream reconstructed; 469 | 470 | reconstructed << "["; 471 | 472 | if(baseReg1Used) { 473 | if(multiplierReg1 != 1) 474 | reconstructed << "0x" << hex << multiplierReg1 << " * "; 475 | 476 | reconstructed << baseReg1Buffer; 477 | } 478 | 479 | if(baseReg2Used) { 480 | reconstructed << " + "; 481 | if(multiplierReg2 != 1) 482 | reconstructed << "0x" << hex << multiplierReg2 << " * "; 483 | 484 | reconstructed << baseReg2Buffer; 485 | } 486 | 487 | if(offset != 0 || !(baseReg1Used)) 488 | reconstructed << " + 0x" << hex << offset; 489 | 490 | reconstructed << "]"; 491 | 492 | reconstructInstr.replace(reconstructInstr.find("[VMR]"), 5, reconstructed.str()); 493 | 494 | DecodedVmInstruction result; 495 | 496 | result.isDecoded = true; 497 | result.address = start->address; 498 | result.size = 0; 499 | result.type = lastInstruction->type; 500 | strcpy(result.disassembled, reconstructInstr.c_str()); 501 | 502 | return result; 503 | } 504 | 505 | vector eliminateVmr(vector& instructions) { 506 | auto itVmrStart = instructions.end(); 507 | vector compactInstructionlist; 508 | 509 | for(auto it = instructions.begin(); it != instructions.end(); it++) { 510 | if(!strncmp("mov VMR,", it->disassembled, 8) && itVmrStart == instructions.end()) { 511 | itVmrStart = it; 512 | }else if(itVmrStart != instructions.end() && strstr(it->disassembled, "[VMR]") != 0) 513 | { 514 | for(auto listing = itVmrStart; listing != it+1; listing++) { 515 | DecodedVmInstruction comment = *listing; 516 | comment.type = INSTR_COMMENT; 517 | compactInstructionlist.push_back(comment); 518 | } 519 | compactInstructionlist.push_back(eliminateVmrFromSubset(itVmrStart, it + 1)); 520 | itVmrStart = instructions.end(); 521 | } else if (itVmrStart == instructions.end()) { 522 | compactInstructionlist.push_back(*it); 523 | } 524 | } 525 | 526 | return compactInstructionlist; 527 | } 528 | 529 | bool sortRegionsAscending (DisassembledRegion& a, DisassembledRegion& b) { 530 | return a.min < b.min; 531 | } 532 | 533 | bool disassembleStub(const uint32_t initialIp, VmInfo& vmInfo) { 534 | 535 | vector stubs = getDisassembleRegions(initialIp, vmInfo); 536 | 537 | //Needs to be sorted, otherwise (due to jump sizes) may not fit into original location 538 | //Sorting should match it with the way it was implemented. 539 | sort(stubs.begin(), stubs.end(), sortRegionsAscending); 540 | 541 | if(stubs.empty()) { 542 | printf(";No stubs detected to disassemble.. %d", stubs.size()); 543 | return true; 544 | } 545 | 546 | vector instructions; 547 | for(auto& stub : stubs) { 548 | 549 | bool continueDisassembling = true; 550 | DecodedVmInstruction blockMarker; 551 | blockMarker.type = DecodedInstructionType_t::INSTR_COMMENT; 552 | strcpy(blockMarker.disassembled, "BLOCK"); 553 | instructions.push_back(blockMarker); 554 | for(uint32_t vmRelativeIp = stub.min; continueDisassembling && vmRelativeIp < stub.max;) { 555 | 556 | vector instrSet; 557 | 558 | vmRelativeIp += decodeVmInstruction(instrSet, vmRelativeIp, vmInfo); 559 | 560 | for(auto& instr : instrSet) { 561 | if(instr.type == DecodedInstructionType_t::INSTR_UNKNOWN) 562 | throw runtime_error("Unknown instruction encountered"); 563 | 564 | if(instr.type == DecodedInstructionType_t::INSTR_STOP) { 565 | continueDisassembling = false; 566 | break; 567 | } 568 | 569 | instructions.push_back(instr); 570 | } 571 | 572 | } 573 | 574 | instructions.push_back(blockMarker); 575 | } 576 | 577 | for(auto& i : eliminateVmr(instructions)) { 578 | formatInstructionInfo(i); 579 | } 580 | 581 | return true; 582 | } 583 | 584 | int main(int argc, char** args) { 585 | const uint32_t baseAddress = 0x400000; 586 | 587 | if(argc < 7) { 588 | printf("Arguments: \n"); 589 | printf("Incorrect number of arguments...\n"); 590 | return -1; 591 | } 592 | 593 | //Initialize udis devirtualizer 594 | ud_init(&ud_obj); 595 | ud_set_mode(&ud_obj, 32); 596 | ud_set_syntax(&ud_obj, UD_SYN_INTEL); 597 | 598 | uint32_t vmInitialIp = strtoul(args[3], NULL, 16); 599 | 600 | string dumpSource = args[1]; 601 | string jmpMapSource = args[6]; 602 | string opcodeMapSource = args[4]; 603 | string decryptSource = args[5]; 604 | 605 | VmInfo vmInfo(dumpSource, strtoul(args[2], NULL, 16), jmpMapSource, opcodeMapSource, decryptSource, 0x400000); 606 | 607 | printf(";X86devirt Disassembler, by Jeremy Wildsmith\n"); 608 | printf(";Assumes image base is at 0x%08X\n\n", baseAddress); 609 | printf(";Attempting to decode instructions starting at 0x%08X\n\n", vmInitialIp); 610 | printf("ORG 0x%08X\n", vmInfo.getBaseAddress()); 611 | printf("[BITS 32]\n"); 612 | 613 | try { 614 | if(!disassembleStub(vmInitialIp, vmInfo)) 615 | return -1; 616 | } catch (runtime_error& e) { 617 | printf("Error occured: %s", e.what()); 618 | } 619 | 620 | return 0; 621 | } --------------------------------------------------------------------------------