├── README.md ├── coloring_call_jmp.py ├── non_zero_xor_search.py ├── sc_hashes.json ├── shellcode_hash_search.py ├── sqlite2json.py └── stackstrings.py /README.md: -------------------------------------------------------------------------------- 1 | # shellcode_hashes 2 | 3 | shellcode_hashs was created inspired by a [script of the same name in flare](https://github.com/fireeye/flare-ida/tree/master/shellcode_hashes). 4 | Find the name that matches the [hash](https://www.fireeye.com/blog/threat-research/2012/11/precalculated-string-hashes-reverse-engineering-shellcode.html) used in the shellcode. 5 | Use the database created by flare script. 6 | 7 | ## sqlite2json.py 8 | Since Ghidra could not import sqlite, I created a script to convert it to json. 9 | Convert with the following command: 10 | ``` 11 | python sqlite2json.py 12 | ``` 13 | 14 | ## shellcode_hash_search.py 15 | 16 | Open the target shellcode and execute the script. 17 | 18 | ![ch03_shellcodehash](https://user-images.githubusercontent.com/18203311/64575824-a5bf6700-d3b0-11e9-8294-c6b045c127a5.png) 19 | 20 | 21 | ![ch03_shellcodehash_decompile](https://user-images.githubusercontent.com/18203311/64575814-9c35ff00-d3b0-11e9-8cb8-3b686ae553a9.png) 22 | 23 | # non-zero_xor_search.py 24 | Finds XOR instructions whose source and destination operands are not equivalent. 25 | It is registered in the bookmark. 26 | 27 | ![ch03_non-zero_xor](https://user-images.githubusercontent.com/18203311/64575818-9fc98600-d3b0-11e9-8732-bccf8d0e3c1f.png) 28 | 29 | # coloring_call_jmp.py 30 | 31 | Coloring of CALL and JMP instructions. 32 | Color the following instructions 33 | * CALL  34 | * JE 35 | * JZ 36 | * JNE 37 | * JNZ 38 | * JA 39 | * JAE 40 | * JBE 41 | * JB 42 | * JL 43 | * JLE 44 | * JG 45 | * JGE 46 | 47 | ![ch03_coloring_call_jmp](https://user-images.githubusercontent.com/18203311/64575795-87596b80-d3b0-11e9-847b-f46ab6aefa4b.png) 48 | 49 | # stackstrings.py 50 | 51 | Deobfuscate stackstrings used by Godzilla Loader. 52 | 53 | ### before 54 | ![stackstrings_execute_before](https://user-images.githubusercontent.com/18203311/65371013-13fe0680-dc9a-11e9-910a-37329767a26a.png) 55 | 56 | ### after 57 | ![stackstrings_execute_after](https://user-images.githubusercontent.com/18203311/65371015-15c7ca00-dc9a-11e9-80c2-5028a8c3d03f.png) 58 | 59 | ### console output 60 | ![stackstrings_console_result](https://user-images.githubusercontent.com/18203311/65371016-16f8f700-dc9a-11e9-981c-552a7e9152a4.png) 61 | -------------------------------------------------------------------------------- /coloring_call_jmp.py: -------------------------------------------------------------------------------- 1 | #coloring call and jmp instruction 2 | # @author: Allsafe 3 | # @category: tools 4 | 5 | from java.awt import Color 6 | 7 | CALL_COLOR = Color(3,169,244) 8 | CONDITIONAL_COLOR = Color(205,220,57) 9 | 10 | call_color_count = 0 11 | conditional_color_count = 0 12 | 13 | #get all memory ranges 14 | addr_ranges = currentProgram.getMemory().getAddressRanges() 15 | 16 | for addr_range in addr_ranges: 17 | insts = currentProgram.getListing().getInstructions(addr_range.getMinAddress(), True) 18 | for inst in insts: 19 | flow_type = inst.getFlowType() 20 | if flow_type.isCall(): 21 | setBackgroundColor(inst.getAddress(), CALL_COLOR) 22 | call_color_count += 1 23 | elif flow_type.isConditional(): 24 | setBackgroundColor(inst.getAddress(), CONDITIONAL_COLOR) 25 | conditional_color_count += 1 26 | 27 | 28 | print('colored Call: {}'.format(call_color_count)) 29 | print('colored Conditional: {}'.format(conditional_color_count)) -------------------------------------------------------------------------------- /non_zero_xor_search.py: -------------------------------------------------------------------------------- 1 | #search non zero xor 2 | # @author: Allsafe 3 | # @category: tools 4 | 5 | from ghidra.program.model.listing import CodeUnit 6 | 7 | def add_bookmark_comment(addr, text): 8 | cu = currentProgram.getListing().getCodeUnitAt(addr) 9 | createBookmark(addr, "non zero xor", text) 10 | cu.setComment(CodeUnit.EOL_COMMENT, text) 11 | 12 | def getFunctionNameAtAddress(address): 13 | function = getFunctionContaining(address) 14 | if function is not None: 15 | return function.getName() 16 | else: 17 | return "No function at specified address" 18 | 19 | #get all memory ranges 20 | ranges = currentProgram.getMemory().getAddressRanges() 21 | print("--------------------------------") 22 | 23 | instructions = currentProgram.getListing().getInstructions(True) 24 | 25 | for ins in instructions: 26 | mnemonic = ins.getMnemonicString() 27 | if mnemonic == "XOR": 28 | operand1 = ins.getOpObjects(0) 29 | operand2 = ins.getOpObjects(1) 30 | if operand1 != operand2: 31 | print("{} {} '{}'".format(ins.address, getFunctionNameAtAddress(ins.address), ins)) 32 | add_bookmark_comment(ins.address, str(ins)) 33 | ins = getInstructionAfter(ins) 34 | -------------------------------------------------------------------------------- /shellcode_hash_search.py: -------------------------------------------------------------------------------- 1 | #search shellcode hashes 2 | # @author: Allsafe 3 | # @category: tools 4 | import json 5 | from ghidra.program.model.listing import CodeUnit 6 | from ghidra.util.exception import CancelledException 7 | from ghidra.program.model.scalar import Scalar 8 | def add_bookmark_comment(addr, text): 9 | cu = currentProgram.getListing().getCodeUnitAt(addr) 10 | createBookmark(addr, "shellcode_hash", text) 11 | cu.setComment(CodeUnit.EOL_COMMENT, text) 12 | try: 13 | sc_hashes_file = askFile("sc_hashes.json", "sc_hashes.json").getPath() 14 | except CancelledException as e: 15 | print str(e) 16 | exit() 17 | with open(sc_hashes_file, 'r') as f: 18 | sc_hashes = json.load(f) 19 | def db_search(data): 20 | if isinstance(data[0], Scalar): 21 | decimal_data= int(str(data[0]), 16) 22 | try: 23 | return sc_hashes['symbol_hashes'][str(decimal_data)] 24 | except: 25 | return -1 26 | 27 | #get all instructions 28 | instructions = currentProgram.getListing().getInstructions(True) 29 | 30 | print("--------------------------------") 31 | 32 | for ins in instructions: 33 | mnemonic = ins.getMnemonicString() 34 | if mnemonic == "MOV": 35 | operand2 = ins.getOpObjects(1) 36 | symbol_info = db_search(operand2) 37 | if symbol_info != -1 and symbol_info != None: 38 | text = "{} {} [{}]{}".format(ins.address, sc_hashes['hash_types'][str(symbol_info['hash_type'])], sc_hashes['source_libs'][str(symbol_info['lib_key'])], symbol_info['symbol_name']) 39 | print(text) 40 | add_bookmark_comment(ins.address, text) 41 | elif mnemonic == "PUSH": 42 | operand1 = ins.getOpObjects(0) 43 | symbol_info = db_search(operand1) 44 | if symbol_info != -1 and symbol_info != None: 45 | text = "{} {} [{}]{}".format(ins.address, sc_hashes['hash_types'][str(symbol_info['hash_type'])], sc_hashes['source_libs'][str(symbol_info['lib_key'])], symbol_info['symbol_name']) 46 | print(text) 47 | add_bookmark_comment(ins.address, text) 48 | -------------------------------------------------------------------------------- /sqlite2json.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import json 3 | 4 | hash_types_dict={} 5 | source_libs_dict={} 6 | symbol_hashes_dict={} 7 | 8 | conn = sqlite3.connect('sc_hashes.db') 9 | c = conn.cursor() 10 | 11 | c.execute("SELECT * FROM hash_types") 12 | hash_types = c.fetchall() 13 | 14 | for type in hash_types: 15 | hash_types_dict.update({type[0]:type[2]}) 16 | 17 | c.execute("SELECT * FROM source_libs") 18 | source_libs = c.fetchall() 19 | 20 | for source_lib in source_libs: 21 | source_libs_dict.update({source_lib[0]:source_lib[1]}) 22 | 23 | c.execute("SELECT * FROM symbol_hashes") 24 | symbol_hashes = c.fetchall() 25 | 26 | for symbol_hashe in symbol_hashes: 27 | symbol_hashes_dict.update({symbol_hashe[1]:{"hash_type":symbol_hashe[2], "lib_key":symbol_hashe[3], "symbol_name":symbol_hashe[4]}}) 28 | 29 | 30 | sc_hashes = { 31 | "hash_types":hash_types_dict, 32 | "source_libs":source_libs_dict, 33 | "symbol_hashes":symbol_hashes_dict 34 | } 35 | 36 | with open("sc_hashes.json", 'w') as f: 37 | json.dump(sc_hashes, f) 38 | -------------------------------------------------------------------------------- /stackstrings.py: -------------------------------------------------------------------------------- 1 | # Deobfuscate stackstrings used by Godzilla Loader 2 | # @category: Godzilla Loader 3 | 4 | import string 5 | 6 | 7 | def set_comments(stackstrings): 8 | for addr, value in stackstrings.items(): 9 | if len(value) <= 4: 10 | continue 11 | listing = currentProgram.getListing() 12 | codeUnit = listing.getCodeUnitAt(toAddr(addr)) 13 | codeUnit.setComment(codeUnit.EOL_COMMENT, value) 14 | print('%08x: %s' % (addr, value)) 15 | 16 | def is_ascii(value): 17 | if value == 0 or chr(value) in string.printable: 18 | return True 19 | return False 20 | 21 | def is_ascii_dword(value): 22 | for i in range(4): 23 | if not is_ascii((value >> (i * 8)) & 0xff): 24 | return False 25 | return True 26 | 27 | def get_ascii_from_dword(value): 28 | return ''.join([chr((value >> (i * 8)) & 0xff) for i in range(4)]).rstrip('\x00') 29 | 30 | def decode_ss2(stackstrings): 31 | result = {} 32 | prev_offset = None 33 | 34 | for offset, ss in sorted(stackstrings.items()): 35 | if prev_offset == None: 36 | value = ss[0] 37 | start_addr = ss[1] 38 | elif ss[0] == '\x00': 39 | if len(value) > 4: 40 | result[start_addr] = value 41 | prev_offset = None 42 | continue 43 | elif prev_offset + 2 == offset: 44 | value += ss[0] 45 | prev_offset = offset 46 | return result 47 | 48 | def handle_ss2_clear(reg_state, inst): 49 | reg = inst.getOpObjects(0)[0].getName()[1:] 50 | reg_state[reg] = '\x00' 51 | return reg_state 52 | 53 | def handle_ss2_push(inst): 54 | value = chr(inst.getOpObjects(0)[0].getUnsignedValue()) 55 | return value 56 | 57 | def handle_ss2_pop(reg_state, inst, tos_value): 58 | dst_reg = inst.getOpObjects(0)[0].getName()[1:] 59 | reg_state[dst_reg] = tos_value 60 | return reg_state 61 | 62 | def handle_ss2_copy(reg_state, inst): 63 | dst_reg = inst.getOpObjects(0)[0].getName()[1:] 64 | src_reg = inst.getOpObjects(1)[0].getName()[1:] 65 | src_value = reg_state.get(src_reg, '\x00') 66 | reg_state[dst_reg] = src_value 67 | return reg_state 68 | 69 | def handle_ss2_store(reg_state, inst): 70 | dst = inst.getOpObjects(0) 71 | if isinstance(dst[0], ghidra.program.model.lang.Register): 72 | if len(dst) == 2: 73 | dst_offset = dst[1].getSignedValue() 74 | else: 75 | dst_offset = 0 76 | else: 77 | dst_offset = dst[0].getSignedValue() 78 | src_reg = inst.getOpObjects(1)[0].getName() 79 | src_value = reg_state.get(src_reg, '\x00') 80 | return dst_offset, src_value 81 | 82 | def deobfuscate_stackstrings2(func): 83 | stackstrings = {} 84 | tos_value = '\x00' 85 | reg_state = {} 86 | 87 | func_addr = func.getEntryPoint() 88 | inst = getInstructionAt(func_addr) 89 | while inst and getFunctionContaining(inst.getAddress()) == func: 90 | # print('[%08x] %s' % (inst.getAddress().getOffset(), inst.toString())) 91 | if is_ss2_clear(inst): 92 | reg_state = handle_ss2_clear(reg_state, inst) 93 | elif is_ss2_push(inst): 94 | tos_value = handle_ss2_push(inst) 95 | elif is_ss2_pop(inst): 96 | reg_state = handle_ss2_pop(reg_state, inst, tos_value) 97 | elif is_ss2_store(inst): 98 | offset, value = handle_ss2_store(reg_state, inst) 99 | inst_addr = inst.getAddress().getOffset() 100 | stackstrings[offset] = (value, inst_addr) 101 | elif is_ss2_copy(inst): 102 | reg_state = handle_ss2_copy(reg_state, inst) 103 | inst = inst.getNext() 104 | result = decode_ss2(stackstrings) 105 | return result 106 | 107 | def is_ss2_clear(inst): 108 | mnemonic = inst.getMnemonicString() 109 | if not mnemonic.startswith('XOR'): 110 | return False 111 | 112 | dst = inst.getOpObjects(0) 113 | if len(dst) != 1 or not isinstance(dst[0], ghidra.program.model.lang.Register) or \ 114 | dst[0].minimumByteSize != 4: 115 | return False 116 | 117 | src = inst.getOpObjects(1) 118 | if len(src) != 1 or not isinstance(src[0], ghidra.program.model.lang.Register) or \ 119 | src[0].minimumByteSize != 4: 120 | return False 121 | 122 | src_reg = src[0].getName() 123 | dst_reg = dst[0].getName() 124 | 125 | if src_reg != dst_reg: 126 | return False 127 | 128 | return True 129 | 130 | def is_ss2_push(inst): 131 | mnemonic = inst.getMnemonicString() 132 | if not mnemonic.startswith('PUSH') or inst.getLength() != 2: 133 | return False 134 | 135 | op = inst.getOpObjects(0)[0] 136 | if not isinstance(op, ghidra.program.model.scalar.Scalar): 137 | return False 138 | 139 | value = op.getUnsignedValue() & 0xff 140 | if not chr(value) in string.printable: 141 | return False 142 | 143 | return True 144 | 145 | def is_ss2_pop(inst): 146 | mnemonic = inst.getMnemonicString() 147 | if not mnemonic.startswith('POP'): 148 | return False 149 | 150 | op = inst.getOpObjects(0)[0] 151 | if not isinstance(op, ghidra.program.model.lang.Register): 152 | return False 153 | 154 | return True 155 | 156 | def is_ss2_copy(inst): 157 | mnemonic = inst.getMnemonicString() 158 | if not mnemonic.startswith('MOV'): 159 | return False 160 | 161 | dst = inst.getOpObjects(0) 162 | if len(dst) != 1 or not isinstance(dst[0], ghidra.program.model.lang.Register): 163 | return False 164 | 165 | src = inst.getOpObjects(1) 166 | if len(src) != 1 or not isinstance(src[0], ghidra.program.model.lang.Register) or \ 167 | dst[0].getName() == 'ESP': 168 | return False 169 | 170 | return True 171 | 172 | def is_ss2_store(inst): 173 | mnemonic = inst.getMnemonicString() 174 | if not mnemonic.startswith('MOV'): 175 | return False 176 | 177 | src = inst.getOpObjects(1) 178 | if len(src) != 1 or not isinstance(src[0], ghidra.program.model.lang.Register) or \ 179 | src[0].minimumByteSize != 2: 180 | return False 181 | 182 | dst = inst.getOpObjects(0) 183 | 184 | if len(dst) == 1 and isinstance(dst[0], ghidra.program.model.lang.Register) and \ 185 | dst[0].getName() == 'EBP': 186 | return True 187 | 188 | if len(dst) == 2 and isinstance(dst[0], ghidra.program.model.lang.Register) and \ 189 | dst[0].getName() == 'EBP' and isinstance(dst[1], ghidra.program.model.scalar.Scalar): 190 | return True 191 | 192 | if len(dst) == 2 and isinstance(dst[1], ghidra.program.model.lang.Register) and \ 193 | dst[1].getName() == 'EBP' and isinstance(dst[0], ghidra.program.model.scalar.Scalar): 194 | return True 195 | 196 | return False 197 | 198 | 199 | def decode_ss1(stackstrings): 200 | result = {} 201 | prev_offset = None 202 | 203 | for offset, ss in sorted(stackstrings.items()): 204 | if prev_offset == None: 205 | if len(ss[0]) == 4: 206 | value = ss[0] 207 | start_addr = ss[1] 208 | prev_offset = offset 209 | elif prev_offset + 4 == offset: 210 | if len(ss[0]) == 4: 211 | value += ss[0] 212 | prev_offset = offset 213 | else: 214 | value += ss[0] 215 | result[start_addr] = value 216 | prev_offset = None 217 | else: 218 | result[start_addr] = value 219 | prev_offset = offset 220 | value = ss[0] 221 | start_addr = ss[1] 222 | 223 | return result 224 | 225 | def is_ss1_store(inst): 226 | mnemonic = inst.getMnemonicString() 227 | if not mnemonic.startswith('MOV'): 228 | return False 229 | 230 | dst = inst.getOpObjects(0) 231 | if len(dst) != 2 or not isinstance(dst[0], ghidra.program.model.lang.Register) or \ 232 | dst[0].getName() != 'EBP' or not isinstance(dst[1], ghidra.program.model.scalar.Scalar): 233 | return False 234 | 235 | offset = dst[1].getSignedValue() 236 | src = inst.getOpObjects(1) 237 | if len(src) != 1 or not isinstance(src[0], ghidra.program.model.scalar.Scalar): 238 | return False 239 | 240 | value = src[0].getUnsignedValue() 241 | if not is_ascii_dword(value): 242 | return False 243 | 244 | return True 245 | 246 | def handle_ss1_store(inst): 247 | offset = inst.getOpObjects(0)[1].getSignedValue() 248 | value = get_ascii_from_dword(inst.getOpObjects(1)[0].getUnsignedValue()) 249 | return offset, value 250 | 251 | def deobfuscate_stackstrings1(func): 252 | stackstrings = {} 253 | func_addr = func.getEntryPoint() 254 | inst = getInstructionAt(func_addr) 255 | while inst and getFunctionContaining(inst.getAddress()) == func: 256 | # print('[%08x] %s' % (inst.getAddress().getOffset(), inst.toString())) 257 | if is_ss1_store(inst): 258 | offset, value = handle_ss1_store(inst) 259 | inst_addr = inst.getAddress().getOffset() 260 | stackstrings[offset] = (value, inst_addr) 261 | # print('[%08x:%08x] %s' % (inst_addr, offset, value)) 262 | inst = inst.getNext() 263 | result = decode_ss1(stackstrings) 264 | return result 265 | 266 | def deobfuscate_godzilla_loader(): 267 | func = getFirstFunction() 268 | while func: 269 | stackstrings1 = deobfuscate_stackstrings1(func) 270 | set_comments(stackstrings1) 271 | stackstrings2 = deobfuscate_stackstrings2(func) 272 | set_comments(stackstrings2) 273 | func = getFunctionAfter(func) 274 | 275 | deobfuscate_godzilla_loader() --------------------------------------------------------------------------------