├── .gitignore ├── README.md ├── file_analysis ├── README.md ├── byte_frequency.py └── multi_diff.py ├── file_format_hacks ├── README.md ├── zeroSection.py └── zeroSection2.py ├── filesystem_analysis └── README.md ├── idapython ├── ARMdetect.py ├── CCCheck.py ├── Deobfuscate.py ├── FindMain.py ├── JccFlip.py ├── LocFuncAnalyzer.py ├── MalCheck.py ├── NopSled.py ├── README.md ├── RdtscCheck.py └── intCheck.py └── shellcode_analysis ├── README.md ├── sample_shc_32 ├── sample_shc_64 └── shc2exe.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #

Reverse Engineering Playground

2 | 3 | Scripts I made to aid me in everyday reversing or just for fun. Actually...mostly just for fun :) 4 | 5 | * [IDAPython](https://github.com/yellowbyte/idapython-scripts/tree/master/idapython/README.md) 6 | * [File Analysis](https://github.com/yellowbyte/idapython-scripts/tree/master/file_analysis/README.md) 7 | * [Shellcode Analysis](https://github.com/yellowbyte/reverse-engineering-playground/blob/master/shellcode_analysis/README.md) 8 | * [File Format Hacks](https://github.com/yellowbyte/idapython-scripts/tree/master/file_format_hacks/README.md) 9 | -------------------------------------------------------------------------------- /file_analysis/README.md: -------------------------------------------------------------------------------- 1 | ### [FrequencyAnalysis](https://github.com/yellowbyte/reverse-engineering-playground/blob/master/file_analysis/byte_frequency.py) 2 | Displays a file's byte distribution in a histogram plot. It will also display the top (num=?) occurring bytes. 3 | 4 | ### [MultiDiff](https://github.com/yellowbyte/reverse-engineering-playground/blob/master/file_analysis/multi_diff.py) 5 | Binary diff together as many files as you want to identify bytes that are the same across all the files. 6 | -------------------------------------------------------------------------------- /file_analysis/byte_frequency.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | 3 | from __future__ import division 4 | 5 | import matplotlib.pyplot as plt 6 | 7 | class FrequencyAnalysis: 8 | """ 9 | Frequency analysis of bytes in a file 10 | """ 11 | 12 | def __init__(self, _bytes): 13 | self.bytes = bytearray(_bytes) 14 | 15 | @property 16 | def distribution(self): 17 | analysis = [0] * 256 18 | for b in self.bytes: 19 | analysis[b] += 1 20 | return analysis 21 | 22 | def display_graph(self, exclude=list()): 23 | desired_distribution = self.distribution 24 | # Normalize distribution 25 | desired_distribution = map(lambda x:(x/max(desired_distribution))*100, 26 | desired_distribution) 27 | for b in exclude: 28 | desired_distribution[b] = 0 29 | plt.hist(range(256), 30 | bins=range(256), 31 | weights=desired_distribution, 32 | edgecolor='black') 33 | plt.show() 34 | 35 | def display_text(self, num=3): 36 | print('Top '+str(num)+' Highest Frequency Bytes Are:') 37 | 38 | distribution = self.distribution 39 | tops = sorted(list(enumerate(distribution)), 40 | key=lambda x:x[1], 41 | reverse=True)[:num] 42 | for b in tops: 43 | print('byte 0x{:x}: {} occurrences'.format(b[0],b[1])) 44 | print('') 45 | print('Total file size: 0x{0:x} ({0}) bytes'.format(len(self.bytes))) 46 | 47 | 48 | if __name__ == '__main__': 49 | filepath = raw_input('Path To File: ') 50 | with open(filepath, 'rb') as f: 51 | _bin = f.read() 52 | file_analysis = FrequencyAnalysis(_bin) 53 | file_analysis.display_text(num=5) 54 | file_analysis.display_graph() 55 | -------------------------------------------------------------------------------- /file_analysis/multi_diff.py: -------------------------------------------------------------------------------- 1 | """ 2 | Use it as commandline tool or import it as library 3 | """ 4 | import sys 5 | 6 | 7 | class MultiDiff(object): 8 | """ 9 | Class to do multiple binary files diff-ing 10 | """ 11 | 12 | def __init__(self, binaries_data, desired_amount_same=1): 13 | self.desired_amount_same = desired_amount_same 14 | self.binaries_data = binaries_data 15 | self.__mutual_bytes = None 16 | self.__current_size = None 17 | self.__data = None 18 | 19 | @property 20 | def mutual_bytes(self): 21 | """ 22 | Get mutual bytes across all the files 23 | """ 24 | if (self.__mutual_bytes is not None) and \ 25 | (self.__data == self.binaries_data): 26 | # Previously have already calculated 27 | return self.__mutual_bytes 28 | self.__mutual_bytes = list() 29 | self.__current_size = 0 30 | self.__data = self.binaries_data 31 | 32 | read_from = self.__data[0] 33 | for i, current_index_bytes in enumerate(zip(*self.__data)): 34 | one_sample = current_index_bytes[0] 35 | # XOR-ing 2 same bytes will equal 0 36 | not_all_same = any(map(lambda x: x^one_sample, 37 | current_index_bytes[1:])) 38 | if not not_all_same: # mutual byte detected 39 | self.__current_size += 1 40 | else: 41 | if self.__current_size >= self.desired_amount_same: 42 | # desired amount of mutual bytes detected. Take note 43 | start, end = (i-self.__current_size, i) 44 | self.__mutual_bytes.append((start, read_from[start:end])) 45 | self.__current_size = 0 46 | return self.__mutual_bytes 47 | 48 | def add_binary(self, binary_data): 49 | """ 50 | Add another binary to diff against 51 | """ 52 | self.binaries_data += binary_data 53 | 54 | def pretty_print(self): 55 | """ 56 | Output to stdout the mutual bytes in a human-friendly format 57 | """ 58 | for offset, matched_bytes in self.mutual_bytes: 59 | _bytes = ''.join(format(x, '02x') for x in matched_bytes) 60 | ascii_representation = ''.join(map( 61 | lambda x: chr(x) if 0x27 <= x <= 0x7e else '.', matched_bytes 62 | )) 63 | print('0x{:x}:\t{}\t{}'.format(offset, _bytes, ascii_representation)) 64 | 65 | 66 | if __name__ == '__main__': 67 | if len(sys.argv) <= 2: 68 | print 'Need to specify at least 2 files' 69 | files = sys.argv[1:] 70 | files_contents = list() 71 | for f in files: 72 | with open(f, 'rb') as current_file: 73 | files_contents.append(bytearray(current_file.read())) 74 | bins_analysis = MultiDiff(files_contents) 75 | bins_analysis.pretty_print() 76 | -------------------------------------------------------------------------------- /file_format_hacks/README.md: -------------------------------------------------------------------------------- 1 | ### [zeroSection](https://github.com/yellowbyte/reverse-engineering-playground/blob/master/file_format_hacks/zeroSection.py) 2 | Zeros out information about section headers from the ELF Header. Simply zero-ing out that information from the ELF Header renders many reversing tools (readelf, radare, objdump) unable to display information regarding a binary's sections, even if the section headers still exist. Experience reverser can still manually identify section header table without relying on the ELF Header, so the best way to hid information on sections is to zero out the section headers instead. This will not affect program execution since only program headers are required for execution. 3 | 4 | ### [zeroSection2](https://github.com/yellowbyte/reverse-engineering-playground/blob/master/file_format_hacks/zeroSection2.py) 5 | Not only zeros out section headers information from the ELF Header, it also zero out the whole section headers table. Even though simply zero-ing out section headers information from ELF Header will already renders many reversing tools unable to identify sections, experienced reverser will still be able to manually identify where the section headers table is located. This script will make it impossible to identify sections at all. 6 | -------------------------------------------------------------------------------- /file_format_hacks/zeroSection.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | 5 | def main(): 6 | print "---------- zeroSection ----------" 7 | 8 | #open the file as stream of binary in read and write mode 9 | filePath = sys.argv[1] 10 | theFile = open(filePath, "r+b") 11 | 12 | #zero out e_shoff, pointer to start of section header table 13 | theFile.seek(0x20) 14 | theFile.write(b'\x00\x00\x00\x00') 15 | 16 | #zero out e_shentsize, e_shnum, e_shstrndx 17 | rest = [0x2e, 0x30, 0x32] #e_shentsize (size of a section header table entry), e_shnum (number of entries in the section header table), e_shstrndx (index of section header table entry) 18 | for offset in rest: 19 | theFile.seek(offset) 20 | theFile.write(b'\x00\x00') 21 | 22 | theFile.close() 23 | 24 | main() 25 | -------------------------------------------------------------------------------- /file_format_hacks/zeroSection2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import struct 5 | 6 | def main(): 7 | print "---------- zeroSection2 ----------" 8 | 9 | #open the file as stream of binary in read and write 10 | filePath = sys.argv[1] 11 | theFile = open(filePath, "r+b") 12 | contents = theFile.read() 13 | 14 | #figure out size of section headers table 15 | start = struct.unpack("i", contents[32:36])[0] 16 | entrySize = struct.unpack("i", contents[46:48]+b'\x00\x00')[0] 17 | entryNum = struct.unpack("i", contents[48:50]+b'\x00\x00')[0] 18 | totalSize = entrySize * entryNum 19 | 20 | #zero out the whole section headers table 21 | theFile.seek(start) 22 | theFile.write(b'\x00' * totalSize) 23 | 24 | #zero out section headers info. from ELF Header 25 | offsets = [0x20, 0x2e, 0x30, 0x32] #e_shoff, e_shentsize, e_shnum, e_shstrndx 26 | bytes = 4 27 | for i in range(len(offsets)): 28 | if i == 1: #from index 1 and on, btyes to update is 2 29 | bytes = 2 30 | theFile.seek(offsets[i]) 31 | theFile.write(b'\x00' * bytes) 32 | 33 | 34 | 35 | main() 36 | -------------------------------------------------------------------------------- /filesystem_analysis/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yellowbyte/reverse-engineering-playground/805225b178c0243902ac4f3e2ce9e87fc7993cd6/filesystem_analysis/README.md -------------------------------------------------------------------------------- /idapython/ARMdetect.py: -------------------------------------------------------------------------------- 1 | from idc import * 2 | 3 | 4 | def initialPattern(instr_addr, structures): 5 | ''' 6 | find the initial pattern that signals a possible input pin being used, output pin being used, or I2C communication 7 | ''' 8 | instruction = GetDisasm(instr_addr) 9 | for struct in structures.keys(): 10 | if(structures[struct] in instruction and ("LDR" in instruction or "MOV" in instruction)): 11 | return instruction.split()[1][:-1]+" "+hex(instr_addr)+" "+struct #1: reg, 2: addr, 3: struct 12 | return "" 13 | 14 | def PatternEnd(instruction): 15 | ''' 16 | initial pattern already found. Figure out if current instruction is where the pattern ends. And if it is, print the type of pattern 17 | ''' 18 | global identify 19 | global reg 20 | global address 21 | global struct_type 22 | 23 | if(reg in instruction and ("STR" not in instruction and "LDR" not in instruction)): #Not the immediate value for the struct (pattern matching fails) 24 | identify = False 25 | result = initialPattern(addr, structures) 26 | if(len(result) != 0): 27 | reg, address, struct_type = result.split() 28 | elif(reg in instruction and "STR" in instruction): #pattern confirms (output) 29 | identify = False 30 | print(" struct:"+struct_type+" address: "+address+" (writing to register)") 31 | elif (reg in instruction and "LDR" in instruction): #pattern confirms (input) 32 | identify = False 33 | print(" struct:"+struct_type+" address: "+address+" (reading from register)") 34 | 35 | def Analyze(): 36 | ''' 37 | main routine. NOTE: makes sure to update the structures dictionary on line 39 to the chip that the binary is compiled for 38 | ''' 39 | print("------------------------------BEGIN--------------------------------") 40 | 41 | #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!IMPORTANT!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 42 | # The structures dictionary needs to be updated to the peripheral boundary addresses for the chip that the binary is compiled for 43 | # On the STM32F0 Discovery Board the chip is a STM32F0. The peripheral boundary addresses can be find on page 45 of the 44 | # Reference Manual(http://datasheet.octopart.com/STM32F072CBT6-STMicroelectronics-datasheet-21772404.pdf). 45 | # Also note that on the structures below, I didn't include all the structure addresses, I only include the one that I am interested in. 46 | structures = {"RCC":"0x40021000", "GPIOC":"0x48000800", "GPIOA":"0x48000000"} 47 | 48 | functions = {} 49 | begin = ScreenEA() 50 | identify = False #begin identifying form (pattern matching) 51 | valid = {} 52 | 53 | global identify 54 | global reg 55 | global address 56 | global struct_type 57 | 58 | for funcAddr in Functions(SegStart(begin), SegEnd(begin)): 59 | functions[GetFunctionName(funcAddr)] = funcAddr #a function dictionary is created to support pattern matching in another function 60 | 61 | for funcAddr in Functions(SegStart(begin), SegEnd(begin)): 62 | print("Inside Function: "+GetFunctionName(funcAddr)+" ("+hex(funcAddr)+")") 63 | addresses = list(FuncItems(funcAddr)) #return list of instructions in the function 64 | for addr in addresses: 65 | instruction = GetDisasm(addr) 66 | if("STR" in instruction and "#0x28" in instruction): 67 | print("~~~~~~~~~~ Possible I2C communication ~~~~~~~~~~") 68 | if(identify): #already matched the initial line for the pattern. Now checks if the pattern exists 69 | if("sub" in instruction): #pattern matching continues onto another function 70 | func = instruction.split()[1] #function name 71 | func_addresses = list(FuncItems(functions[func])) 72 | for fe in func_addresses: 73 | PatternEnd(GetDisasm(fe)) 74 | PatternEnd(instruction) 75 | else: #start of pattern matching hasn't been identified yet 76 | result = initialPattern(addr, structures) 77 | if(len(result) != 0): 78 | reg, address, struct_type = result.split() 79 | identify = True 80 | 81 | 82 | Analyze() 83 | -------------------------------------------------------------------------------- /idapython/CCCheck.py: -------------------------------------------------------------------------------- 1 | import idautils 2 | import idaapi 3 | 4 | 5 | def main(): 6 | print '--------------- CCCheck ---------------' 7 | 8 | current = MinEA() 9 | end = MaxEA() 10 | found = False 11 | 12 | while current < end: 13 | current = FindBinary(current, SEARCH_DOWN|SEARCH_NEXT, 'CC') 14 | if current != BADADDR and SegName(current) == '.text': 15 | print hex(current), GetDisasm(current) 16 | found = True 17 | 18 | if found: 19 | print '*** No 0xCC byte found' 20 | 21 | 22 | main() 23 | -------------------------------------------------------------------------------- /idapython/Deobfuscate.py: -------------------------------------------------------------------------------- 1 | import idautils 2 | import idaapi 3 | 4 | 5 | def main(): 6 | print '--------------- Deobfuscate ---------------' 7 | 8 | mask = [0xc1, 0x8f, 0x04, 0x08] 9 | ea = ScreenEA() 10 | maxAddr = MaxEA() 11 | i = 0 12 | 13 | while ea < maxAddr: 14 | PatchByte(ea, Byte(ea) ^ mask[i]) 15 | i = (i+1) % 4 16 | ea = NextAddr(ea) 17 | 18 | 19 | main() 20 | -------------------------------------------------------------------------------- /idapython/FindMain.py: -------------------------------------------------------------------------------- 1 | import idautils 2 | import idaapi 3 | 4 | 5 | def main(): 6 | print '--------------- FindMain ---------------' 7 | 8 | for func in idautils.Functions(): 9 | if GetFunctionName(func).lstrip('_') != 'start': 10 | continue 11 | for addr in idautils.FuncItems(func): 12 | if GetMnem(addr) == 'call': 13 | MakeName(LocByName(GetOpnd(PrevHead(addr, func), 0).split()[1]), 'main') 14 | Jump(LocByName('main')) 15 | break 16 | 17 | 18 | main() 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /idapython/JccFlip.py: -------------------------------------------------------------------------------- 1 | def setup(): 2 | # There are many equivalences in the jcc instructions. Because of that my list is a lot shorter than the actual list 3 | # showing all available jcc instructions. For example, JLE and JNG are equivalent, thus their representation in hex, 4 | # 7e, are equal. Also, my list doesn't include the more obscure jcc instructions such as those that jump depending 5 | # on the parity, sign, and overflow flags. 6 | jumpMirror = { 7 | 0x77:0x76, # JA:JBE 8 | 0x75:0x74, # JNZ:JZ 9 | 0x73:0x72, # JAE:JB 10 | 0x71:0x70, # JNO:JO 11 | 0x7f:0x7e, # JG:JLE 12 | 0x7d:0x7c, # JGE:JL 13 | 0x7b:0x7a, # JNP:JP 14 | 0x79:0x78, # JNS:JS 15 | } 16 | 17 | # Add in the pairs in reverse order. For example, the jumpMirror will match JA to JB but not JB to JA. This fixes that 18 | for key in jumpMirror.keys(): 19 | jumpMirror[jumpMirror[key]] = key 20 | 21 | 22 | def main(): 23 | print '---------- ConditionalFlip ----------' 24 | 25 | setup() 26 | ea = ScreenEA() 27 | PatchByte(ea, jumpMirror[Byte(ea)]) 28 | 29 | 30 | main() 31 | -------------------------------------------------------------------------------- /idapython/LocFuncAnalyzer.py: -------------------------------------------------------------------------------- 1 | import idautils 2 | import idaapi 3 | 4 | 5 | def findArguments(funcAddr, parentAddr): 6 | ''' 7 | find the number of arguments for a function 8 | ''' 9 | funcAddr = PrevHead(funcAddr, parentAddr) 10 | argNum = 0 11 | 12 | # While the instruction is not a call or jcc or pass the start of a function 13 | while GetMnem(funcAddr) != "call" and GetMnem(funcAddr)[0] != "j" and funcAddr != parentAddr: 14 | # A mov instruction that updates the stack. In GCC compiled 32-bits binary, mov instruction is used to put function arguments on stack instead of push instruction 15 | if GetMnem(funcAddr) == "mov" and "esp" in GetOpnd(funcAddr, 0): 16 | argNum += 1 17 | funcAddr = PrevHead(funcAddr, parentAddr) 18 | 19 | return argNum 20 | 21 | 22 | def main(): 23 | print '--------------- LocFuncAnalyzer ---------------' 24 | 25 | data = [0, 1, 2, 3, 4, 5] 26 | 27 | for func in idautils.Functions(): 28 | flags = GetFunctionFlags(func) 29 | codeRef = 0 30 | dataRef = 0 31 | NotLocalFunc = False 32 | 33 | if SegName(func) != ".text" or GetFunctionName(func).lstrip("_") == "start": # Ignore library functions and start 34 | continue 35 | for addr in idautils.XrefsTo(func, 1): 36 | #if it's not called from .text section then it's not a local function created by the programmer 37 | if SegName(addr.frm) != ".text" or GetFunctionName(addr.frm).lstrip("_") == "start": 38 | NotLocalFunc = True 39 | break 40 | if addr.type in data: 41 | dataRef += 1 42 | else: 43 | codeRef += 1 44 | refAddr = addr.frm 45 | refParentAddr = GetFunctionAttr(addr.frm, FUNCATTR_START) 46 | if NotLocalFunc: 47 | continue 48 | 49 | #If execution gets here, then it is a function that we are interested in 50 | print '*** ', GetFunctionName(func) 51 | print 'Number of Arguments: ', findArguments(refAddr, refParentAddr) 52 | print 'Code References: ', codeRef 53 | print 'Data References: ', dataRef 54 | 55 | 56 | main() 57 | -------------------------------------------------------------------------------- /idapython/MalCheck.py: -------------------------------------------------------------------------------- 1 | import idautils 2 | import idaapi 3 | 4 | 5 | MalFunc = { 6 | "CheckRemoteDebuggerPresent" : "Checks to see if a specific process (including your own) is being debugged", 7 | "IsDebuggerPresent" : "Checks to see if the current process is being debugged", 8 | "MapVirtualKey" : "Translates a virtual-key code into a character value. Used by keylogging malware", 9 | "GetTickCount" : "Retrieves the number of milliseconds since bootup", 10 | "EnableExecuteProtectionSupport" : "Used to modify the Data Execution Protection (DEP) settings of the host, making it more susceptible to attack", 11 | "GetTickCount" : "Retrieves the number of milliseconds since bootup. Used to gather timing information", 12 | "IsDebuggerPresent" : "Checks to see if the current process is being debugged", 13 | } 14 | 15 | 16 | def main(): 17 | print "--------------- MalCheck ---------------" 18 | 19 | functions = [GetFunctionName(func) for func in idautils.Functions()] 20 | badFunc = [func for func in functions if func in MalFunc.keys()] 21 | 22 | if(len(badFunc) == 0): 23 | print "no low-hanging fruits detected" 24 | else: 25 | for func in badFunc: 26 | print func, ":", MalFunc[func] 27 | 28 | if __name__ == '__main__': 29 | main() 30 | -------------------------------------------------------------------------------- /idapython/NopSled.py: -------------------------------------------------------------------------------- 1 | def main(): 2 | print '---------- NopSled ----------' 3 | 4 | start = SelStart() 5 | end = SelEnd() 6 | if start == end: #user did not select multiple lines. User only wants to nop current instruction 7 | start = ScreenEA() 8 | end = NextHead(start) 9 | while start < end: 10 | PatchByte(start, 0x90) 11 | start += 1 12 | 13 | 14 | main() 15 | -------------------------------------------------------------------------------- /idapython/README.md: -------------------------------------------------------------------------------- 1 | ### [ARMdetect](https://github.com/yellowbyte/reverse-engineering-playground/blob/master/idapython/ARMdetect.py) 2 | Identifies all sections in a ARM binary that is setting up (writing to) a pin, reading a pin (using the pin as input pin), or interfacing with other devices on the board using I2C. For full documentation, check out this repo: https://github.com/yellowbyte/Reverse_Engineering_Embedded_Devices. 3 | 4 | ### [CCCheck](https://github.com/yellowbyte/reverse-engineering-playground/blob/master/idapython/CCCheck.py) 5 | The 0xCC byte is the byte representing int 3, or software breakpoint. When you make a software breakpoint on an instruction, the debugger replaces the first byte of the instruction to 0xCC. When the CPU hits the int 3 instruction, the OS will signal SIGTRAP to the debugged program. But since the program is being debugged, the debugger will catch it instead, effectively halting the execution temporatory. The 0xCC byte can also be added to the program itself by the original software developers to thwart off people trying to reverse engineer their program since running the program under a debugger will stop it at random 0xCC instructions. This script checks the .text section for the 0xCC bytes and prints the addresses of where the 0xCC bytes are located if they exist. Being able to quickly identify where all the manually added 0xCC bytes are makes the initial dynamic analysis process smoother. 6 | 7 | ### [Deobfuscate](https://github.com/yellowbyte/reverse-engineering-playground/blob/master/idapython/Deobfuscate.py) 8 | Deobfuscates a portion of the code and data for a crackme by Tosh. This script will directly patch the bytes in IDA so IDA will show the correct deobfuscated listing rather than writing the deobfuscated listing to a separate file. This enhances static analysis and makes solving this crackme challenge a lot faster. Full write-up of this particular crackme can be viewed on my blog (http://yellowbyte.blogspot.com/2017/01/elf-anti-debug-root-me-cracking.html). 9 | 10 | ### [FindMain](https://github.com/yellowbyte/reverse-engineering-playground/blob/master/idapython/FindMain.py) 11 | In a stripped ELF executable, IDA will not be able to identify main. The name of the main function will be indistinguishable from the other local functions, in the form sub_"address of where it is located." This script will automatically find and rename main as "main" and then move cursor position in IDA's disassembly listing to beginning of main. This script currently only works for GNU Compiler Collection (GCC) compiled ELF executables. 12 | 13 | ### [intCheck](https://github.com/yellowbyte/reverse-engineering-playground/blob/master/idapython/intCheck.py) 14 | Interrupts are either generated by external sources, such as I/O devices, or by processor-detected exceptions in the running code. Although interrupts can be represented in assembly in the form: int 'interrupt number', they should not appear in the executable section of a binary. Detection of it in the executable section could mean that it is being used as a debugging deterrent, with the exception of int 0x80, which is used to make syscall. For example, int 0x2c raises a debug assertion exception but if the binary is running under WinDbg, WinDbg will consume this exception. Check can then be made to see if the corresponding exception handler is called. If it's not called, then the binary must be under the influence of a debugger. One thing to note is that this script will not catch single byte Int 0x3 (representing software breakpoint) since its representation in hex is just the 0xCC byte. To detect the single byte Interrupt 0x3, use my CCCheck script. 15 | 16 | ### [JccFlip](https://github.com/yellowbyte/reverse-engineering-playground/blob/master/idapython/JccFlip.py) 17 | Changes a jcc instruction to its opposite representation. For example, JA to JB. This is helpful when one is patching a binary to bypass the binary's authorization routine. 18 | 19 | ### [LocFuncAnalyzer](https://github.com/yellowbyte/reverse-engineering-playground/blob/master/idapython/LocFuncAnalyzer.py) 20 | In a stripped ELF binary, local functions are deprived of its original name. This is why local functions are not usually the starting point when doing analysis since without its original name, all local functions look exactly the same as one another. This script aims to change that. For each local function, it prints out the numbers of code references, data references, and arguments. You might be wondering how those information may be helpful. Well, a large number of code references is suspicious. For example, in a binary where readable texts are obfuscated, the text de-obfuscator function will have a large number of code references. The numbers of data references is also helpful to know since having any number of data references can mean that the function is being called indirectly, which is suspicious. Knowing the numbers of arguments for a local function can also be helpful. For example, if you are solving a crackme and there is a local function that takes two arguments, that function could be the authentication function that checks your input against the correct input. Being able to identify the authentication function quickly makes it easier to solve the crackme. 21 | 22 | ### [MalCheck](https://github.com/yellowbyte/reverse-engineering-playground/blob/master/idapython/MalCheck.py) 23 | Checks an executable for usage of API that has a high chance of being used maliciously or for anti-reversing purposes such as IsDebuggerPresent. It's always a good idea to check for low-hanging fruits before doing any deeper analysis. The "potentially malicious" functions that I came up with are from the book "Practical Malware Analysis." 24 | 25 | ### [NopSled](https://github.com/yellowbyte/reverse-engineering-playground/blob/master/idapython/NopSled.py) 26 | Either convert the instructions that user select/highlight or the instruction that the mouse cursor is on to NOPs. This can be useful to clean up Anti-Reversing techniques that add in dead/useless code to make it harder to analyze the disassembly. 27 | 28 | ### [RdtscCheck](https://github.com/yellowbyte/reverse-engineering-playground/blob/master/idapython/RdtscCheck.py) 29 | rdtsc instruction puts the number of ticks since the last system reboot in EDX:EAX. There is really no point for a binary to contain this instruction other than for anti-debugging purpose. To use as a debugging deterrent, program will have this instruction at at least two different places in the .text section and then have a compare instruction that compares the time eclipsed between two rdtsc instructions. If a breakpoint is placed anywhere between two rdtsc instructions, then the time eclipsed between the two instructions will be significantly higher, signaling that it is running under a debugger. This script checks the .text section for the rdtsc instructions and print out their addresses if they exist. 30 | -------------------------------------------------------------------------------- /idapython/RdtscCheck.py: -------------------------------------------------------------------------------- 1 | import idautils 2 | import idaapi 3 | 4 | 5 | def main(): 6 | print '--------------- RdtscCheck ---------------' 7 | 8 | current = MinEA() 9 | end = MaxEA() 10 | found = False 11 | 12 | while current < end: 13 | current = FindBinary(current, SEARCH_DOWN|SEARCH_NEXT, '0f 31') 14 | if current != BADADDR and SegName(current) == ".text": 15 | print hex(current), GetDisasm(current) 16 | found = True 17 | if found: 18 | print "*** No rdtsc instruction found" 19 | 20 | 21 | main() 22 | -------------------------------------------------------------------------------- /idapython/intCheck.py: -------------------------------------------------------------------------------- 1 | def main(): 2 | print '---------- intCheck ----------' 3 | 4 | current = MinEA() 5 | end = MaxEA() 6 | found = False 7 | 8 | while current < end: 9 | current = FindBinary(current, SEARCH_DOWN|SEARCH_NEXT, 'cd') 10 | if current != BADADDR and SegName(current) == '.text' and Byte(current+1) != 0x80: 11 | print hex(current), GetDisasm(current) 12 | found = True 13 | if found: 14 | print '*** No manually added interrupt found' 15 | 16 | 17 | main() 18 | -------------------------------------------------------------------------------- /shellcode_analysis/README.md: -------------------------------------------------------------------------------- 1 | ### [shc2exe](https://github.com/yellowbyte/reverse-engineering-playground/blob/master/shellcode_analysis/shc2exe.py) 2 | Given a file containing shellcode, it will wrap it into an executable binary so an analyst can perform live debugging on the shellcode. 3 | ```bash 4 | python shc2exe.py -a x86 sample_shc_32 5 | ``` 6 | The sample provided shellcodes, sample_shc_32 and sample_shc_64, are taken from [shell-storm](http://shell-storm.org/shellcode/) and note that it currently only support shellcode for x86 and x86-64 ISA. 7 | -------------------------------------------------------------------------------- /shellcode_analysis/sample_shc_32: -------------------------------------------------------------------------------- 1 | \x31\xc0\x50\x68\x2f\x2f\x73\x68\x68\x2f\x62\x69\x6e\x89\xe3\x89\xc1\x89\xc2\xb0\x0b\xcd\x80\x31\xc0\x40\xcd\x80 2 | -------------------------------------------------------------------------------- /shellcode_analysis/sample_shc_64: -------------------------------------------------------------------------------- 1 | \x6a\x42\x58\xfe\xc4\x48\x99\x52\x48\xbf\x2f\x62\x69\x6e\x2f\x2f\x73\x68\x57\x54\x5e\x49\x89\xd0\x49\x89\xd2\x0f\x05 2 | -------------------------------------------------------------------------------- /shellcode_analysis/shc2exe.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | 3 | import os 4 | import glob 5 | import lief 6 | import click 7 | import struct 8 | 9 | from capstone import * 10 | 11 | CURRENT_DIR = os.getcwd() 12 | 13 | MD_32 = Cs(CS_ARCH_X86, CS_MODE_32) 14 | MD_64 = Cs(CS_ARCH_X86, CS_MODE_64) 15 | 16 | TEMPLATE_32 = "section .text\n" \ 17 | "global _start\n" \ 18 | "\n" \ 19 | "_start:\n{}" \ 20 | "\n" \ 21 | "xor ebx, ebx\n" \ 22 | "mov eax, 0x1\n" \ 23 | "int 0x80\n" \ 24 | 25 | TEMPLATE_64 = ".intel_syntax noprefix\n" \ 26 | ".global _start\n" \ 27 | ".text\n" \ 28 | "\n" \ 29 | "_start:\n{}" \ 30 | "\n" \ 31 | "xor rdi, rdi\n" \ 32 | "mov rax, 0x3c\n" \ 33 | "syscall\n" \ 34 | 35 | 36 | class BinaryBuilder(object): 37 | """ 38 | This class builds an executable binary from shellcode 39 | """ 40 | 41 | def __init__(self, shellcode, executable_name, arch): 42 | self.shc = shellcode 43 | self.name = executable_name 44 | self.arch = { 45 | "MD": MD_32 if arch == "x86" else MD_64, 46 | "template": TEMPLATE_32 if arch == "x86" else TEMPLATE_64, 47 | "extension": ".asm" if arch == "x86" else ".s", 48 | "assemble": "nasm -f elf32 -o {0}.o {0}.asm" if arch == "x86" else 49 | "gcc -c {0}.s", # Why GCC instead of NASM for x86-64: 50 | # Capstone incorrectly disassembled some x86-64 51 | # mov instruction as `movabs` instead of just 52 | # `mov`. `movabs` is valid AT&T syntax but not 53 | # valid Intel syntax. Capstone is disassembling 54 | # all other instruction as Intel syntax but with 55 | # just the caveat of that mov instruction in AT&T. 56 | # NASM can only assemble Intel syntax assembly, thus 57 | # failed to assemble when it encountered `movabs` 58 | "link": "ld -m elf_i386 -o {0} {0}.o" if arch == "x86" else 59 | "ld -o {0} {0}.o" 60 | } 61 | self.filename = os.path.join( 62 | CURRENT_DIR, 63 | self.name+self.arch["extension"]) 64 | self._disasm = None 65 | self._assembly = None 66 | self._binary = None 67 | self._shc_disasm() 68 | self._inject() 69 | self._compile() 70 | self._parse_binary() 71 | 72 | @property 73 | def shellcode(self): 74 | """ 75 | Disassembled shellcode 76 | """ 77 | return self._disasm 78 | 79 | @property 80 | def assembly(self): 81 | """ 82 | Content of a compilable assembly file with the shellcode injected 83 | """ 84 | return self._assembly 85 | 86 | @property 87 | def va(self): 88 | """ 89 | Virtual Address pointing to the beginning of the injected shellcode 90 | """ 91 | return hex(self._binary.header.entrypoint) 92 | 93 | @staticmethod 94 | def to_disk(content, filename): 95 | """ 96 | Save data to file on disk 97 | """ 98 | with open(filename, "w") as _file: 99 | _file.write(content) 100 | 101 | def _shc_disasm(self): 102 | """ 103 | Disassemble shellcode 104 | """ 105 | self._disasm = "\n".join(map( 106 | lambda i: i.mnemonic+" "+i.op_str, 107 | self.arch["MD"].disasm(self.shc, 0x1000))) 108 | 109 | def _inject(self): 110 | """ 111 | Inject assembly snippet into assembly template 112 | """ 113 | self._assembly = self.arch["template"].format( 114 | self._disasm) 115 | 116 | def _parse_binary(self): 117 | """ 118 | Use LIEF to get details regarding the executable binary 119 | """ 120 | self._binary = lief.parse(self.name) 121 | 122 | def _compile(self): 123 | """ 124 | Create executable binary 125 | """ 126 | self.to_disk(self.assembly, self.filename) 127 | 128 | # assemble to object file 129 | os.system(self.arch["assemble"].format( 130 | self.name)) 131 | 132 | # link to get executable 133 | os.system(self.arch["link"].format( 134 | self.name)) 135 | self._cleanup() 136 | 137 | def _cleanup(self): 138 | """ 139 | Delete artifacts left by the compilation process 140 | """ 141 | for _file in glob.glob(os.path.join(CURRENT_DIR, self.name+".*")): 142 | os.remove(_file) 143 | 144 | 145 | def get_shellcode(filepath): 146 | """ 147 | Get shellcode from file and then sanitize it for usage 148 | """ 149 | with open(filepath) as shellcode_file: 150 | _bytes = shellcode_file.read().strip().split(r"\x") 151 | _bytes = [struct.pack("B", int(b, 16)) for b in _bytes if b] 152 | shellcode = "".join(_bytes) 153 | return shellcode 154 | 155 | @click.command() 156 | @click.argument("path", required=True) 157 | @click.argument("filename", default="executable", required=False) 158 | @click.option("--arch", "-a", default="x86", help="What is the target ISA?") 159 | def main(path, arch, filename): 160 | """ 161 | *** CURRENTLY ONLY SUPPORT X86 and X86-64 *** 162 | """ 163 | shellcode = get_shellcode(path) 164 | 165 | _bin = BinaryBuilder(shellcode, filename, arch) 166 | print "Inserted shellcode is at address: {}".format(_bin.va) 167 | 168 | if __name__ == "__main__": 169 | main() 170 | --------------------------------------------------------------------------------