├── .gitignore ├── bytesearch ├── maze_cfg_cleanup.py ├── maze_deobf_utils.py ├── maze_function_analysis.py └── maze_functions.py ├── images └── maze_featureimg.png └── readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /bytesearch/maze_cfg_cleanup.py: -------------------------------------------------------------------------------- 1 | import idautils, idaapi, idc, ida_search, ida_ua, ida_name 2 | import string 3 | 4 | idaapi.require("maze_functions") 5 | idaapi.require("maze_function_analysis") 6 | idaapi.require("maze_deobf_utils") 7 | 8 | # 9 | # Registers, if needed 10 | # 11 | gp_registers = {0:'eax',1:'ecx',2:'edx',3:'ebx',4:'esp',5:'ebp',6:'esi',7:'edi'} 12 | 13 | 14 | def CheckIsASCII(Ascii_ea): 15 | ''' 16 | @brief Do a quick to see if an ASCII string is present. 17 | 18 | @return length 19 | ''' 20 | length = 0 21 | 22 | byte = -1 23 | iter_ea = Ascii_ea 24 | while byte != 0: 25 | byte = get_wide_byte(iter_ea) 26 | if chr(c) not in string.printable: 27 | break 28 | length += 1 29 | iter_ea += 1 30 | 31 | return length 32 | 33 | def ZeroOutInstruction(Targ_insn): 34 | ''' 35 | @brief Replace instruction with NULL bytes. 36 | 37 | @detail Currently, this function replaces opcode bytes with NULL bytes, but this 38 | may get switched to 0xCC. I don't know which one messes with the bytes around it less. 39 | ''' 40 | 41 | #print "reached" 42 | if type(Targ_insn) == ida_ua.insn_t: 43 | idx = 0 44 | insn_ea = Targ_insn.ea 45 | #print "Type ok: ", hex(insn_ea), hex(Targ_insn.ip) 46 | while idx < Targ_insn.size: 47 | #print "Patching: ", hex(insn_ea) 48 | patch_byte(insn_ea,0x00) 49 | 50 | insn_ea += 1 51 | idx += 1 52 | 53 | def GetModuleName(Strlit_ea): 54 | ''' 55 | @brief Get the module name, this was already checked by the IsValidName from 56 | a previous call, so it's knownt that it is the module name. 57 | 58 | @return BOOL 59 | ''' 60 | 61 | start_ea = Strlit_ea 62 | module_name = "" 63 | char = get_wide_byte(start_ea) 64 | 65 | while ida_name.is_strlit_cp(char): 66 | module_name += chr(char) 67 | start_ea += 1 68 | char = get_wide_byte(start_ea) 69 | 70 | if get_wide_byte(start_ea) != 0x00: 71 | # 72 | # Final char in ASCII string should be a NULL byte 73 | # 74 | 75 | module_name = "" 76 | 77 | return module_name 78 | 79 | def CleanupPatchArea(Start_ea, Size): 80 | ''' 81 | @brief Delete instructuions that may exist where a patched instruction is 82 | going to exist. 83 | ''' 84 | 85 | for idx in range(Size): 86 | del_items(Start_ea+idx,1) 87 | 88 | plan_and_wait(Start_ea,Size) 89 | 90 | def CheckJCCSameEdge(jcc_ea1, jcc_ea2): 91 | ''' 92 | @brief Check if both edges of a JCC instruction are the same. 93 | 94 | @return BOOL 95 | ''' 96 | 97 | return get_operand_value(jcc_ea1,0) == get_operand_value(jcc_ea2,0) 98 | 99 | def IdentifyFunctionEpilogue(EpligueStartIns): 100 | ''' 101 | @detail Start with the initial instruction and build a basic block. Determine 102 | if the final instruction of the block is either a return instruction 103 | or an equivelant. 104 | 105 | Anything jumping to the value stored in [ESP-4] is suspect, and likely an 106 | attempt to return to the caller after cleanuping esp by incremented the stack 107 | pointer in some manner. 108 | ''' 109 | 110 | terminators = ["jmp", "retn"] 111 | 112 | 113 | def FindCallTypeOneCFGObfuscation(): 114 | ''' 115 | @detail Find all of the CFG Obfuscations that match the following pattern (Type One): 116 | 117 | 68 E8 52 44 6E push offset loc_6E4452E8 118 | FF E7 jmp edi ; or any register 119 | 120 | The jump destination for both is the same. 121 | ''' 122 | 123 | opcodes = "68 ? ? ? ? FF" 124 | 125 | end_ea = ida_ida.cvar.inf.max_ea 126 | 127 | addr_list = set() 128 | cfg1_ea = ida_search.find_binary(0, end_ea, opcodes, 0, SEARCH_DOWN | SEARCH_CASE) 129 | 130 | 131 | 132 | while cfg1_ea != idc.BADADDR: 133 | # 134 | # Iterate over all found addresses and add to list 135 | # 136 | 137 | 138 | 139 | push_instr_ea = cfg1_ea 140 | push_insn = ida_ua.insn_t() 141 | ida_ua.decode_insn(push_insn, push_instr_ea) 142 | push_insn_target_ea = maze_deobf_utils.GetInstuctionTargetAddress(push_insn) 143 | 144 | #print "Find Type One 1: 0x%08x, target 0x%08x" % (cfg1_ea, push_insn_target_ea) 145 | 146 | if maze_deobf_utils.CheckValidTargettingInstr(push_insn, "push"): 147 | #print "Find Type One 2: 0x%08x" % cfg1_ea 148 | 149 | jmp_instr_ea = push_instr_ea + push_insn.size 150 | jmp_dism = generate_disasm_line(jmp_instr_ea,1) 151 | 152 | if 'jmp' in jmp_dism[:3]: 153 | # 154 | # Ensure mnemonic is correct 155 | # 156 | 157 | jmp_insn = ida_ua.insn_t() 158 | ida_ua.decode_insn(jmp_insn, jmp_instr_ea) 159 | 160 | jmp_operand = jmp_insn.ops[0] 161 | 162 | #print "Find Type One 3: 0x%08x %d" % (cfg1_ea, jmp_operand.type) 163 | 164 | if jmp_operand.type in [1, 3, 4]: 165 | addr_list.add(push_instr_ea) 166 | 167 | 168 | 169 | cfg1_ea =ida_search.find_binary(cfg1_ea+4, end_ea, opcodes, 0, SEARCH_DOWN | SEARCH_CASE) 170 | 171 | return addr_list 172 | 173 | 174 | def PatchCallTypeOneCFGObfuscation(instr_ea): 175 | ''' 176 | @detail The patch for type one is to reorder the instructions, place the patched CALL instruction 177 | where the PUSH instruction starts, and place the patched JMP instruction after the new CALL 178 | instruction, overwriting the JMP that is currently in place. 179 | 180 | 68 E8 52 44 6E push 181 | FF E7 jmp edi ; or any register 182 | 183 | Patch: 184 | 185 | FF D7 call edi ; or any register 186 | <5 bytes> JMP 187 | 188 | The second opcode, the 'E7' in the original JMP is decreased by 0x10, making it a call statement. 189 | The call and jmp instructions should always be the same size, but the second opcode is decreased by 190 | 0x10. 191 | ''' 192 | 193 | push_instr_ea = instr_ea 194 | 195 | # 196 | # Get push instruction data 197 | # 198 | push_insn = ida_ua.insn_t() 199 | ida_ua.decode_insn(push_insn, push_instr_ea) 200 | push_insn_target = maze_deobf_utils.GetInstuctionTargetAddress(push_insn) 201 | 202 | # 203 | # Get JMP instruction data 204 | # 205 | jmp_insn_ea = push_instr_ea + push_insn.size 206 | jmp_insn = ida_ua.insn_t() 207 | ida_ua.decode_insn(jmp_insn, jmp_insn_ea) 208 | 209 | # 210 | # Get address for patched JMP and CALL instructions 211 | # 212 | patch_jmp_ea = push_instr_ea + jmp_insn.size 213 | patch_call_ea = push_instr_ea 214 | 215 | # 216 | # Calculate the offset for the address that was pushed to the 217 | # stack by the push instruction and will now be used by the JMP 218 | # 219 | # offset = target_address - address_of_insn_after_CALL 220 | # 221 | patch_jmp_target_ea = push_insn_target 222 | patch_jmp_dest_offset = (patch_jmp_target_ea - (patch_jmp_ea + 5)) & 0xFFFFFFFF 223 | 224 | # 225 | # Patch opcode from JMP to CALL 226 | # 227 | idx = 0 228 | del_items(patch_call_ea,1) 229 | for idx in range(jmp_insn.size): 230 | byte = get_wide_byte(jmp_insn_ea+idx) 231 | if idx == 1: 232 | patch_byte(patch_call_ea+idx, byte-0x10) 233 | else: 234 | patch_byte(patch_call_ea+idx, byte) 235 | idx += 1 236 | create_insn(patch_call_ea) 237 | 238 | # 239 | # Create JMP from PUSH instruction 240 | # 241 | CleanupPatchArea(patch_jmp_ea,5) 242 | patch_byte(patch_jmp_ea, 0xE9) 243 | patch_dword(patch_jmp_ea+1,patch_jmp_dest_offset) 244 | create_insn(patch_jmp_ea) 245 | plan_and_wait(patch_jmp_ea,patch_jmp_ea+5) 246 | 247 | 248 | 249 | plan_and_wait(patch_call_ea, patch_call_ea+jmp_insn.size) 250 | 251 | # 252 | # Make JMP destination code 253 | # 254 | del_items(patch_jmp_target_ea,1) 255 | jmp_target_insn = ida_ua.insn_t() 256 | ida_ua.decode_insn(jmp_target_insn, patch_jmp_target_ea) 257 | create_insn(patch_jmp_target_ea) 258 | plan_and_wait(patch_jmp_target_ea, patch_jmp_target_ea+jmp_target_insn.size) 259 | 260 | 261 | def FindCallTypeTwoCFGObfuscation(): 262 | ''' 263 | @detail Find all of the CFG Obfuscations that match the following pattern (Type Two): 264 | 265 | push 266 | JZ xxxxxxxxxx 267 | JNZ xxxxxxxxxx 268 | 269 | The jump destination for both is the same. 270 | ''' 271 | 272 | opcodes = "68 ? ? ? ? 0f 84 ? ? ? ? 0F 85" 273 | 274 | end_ea = ida_ida.cvar.inf.max_ea 275 | 276 | addr_list = set() 277 | cfg2_ea = ida_search.find_binary(0, end_ea, opcodes, 0, SEARCH_DOWN | SEARCH_CASE) 278 | 279 | # 280 | # useful for identifying what was filtered out and why 281 | # 282 | all_found_from_search = set() 283 | found_pushes = set() 284 | missing_pushes = set() 285 | found_jzjnz_mnems = set() 286 | missing_jzjnz_mnems = set() 287 | found_different_edge = set() 288 | 289 | 290 | while cfg2_ea != idc.BADADDR: 291 | # 292 | # Iterate over all found addresses and add to list 293 | # 294 | 295 | push_instr_ea = cfg2_ea 296 | 297 | push_insn = ida_ua.insn_t() 298 | ida_ua.decode_insn(push_insn, push_instr_ea) 299 | if maze_deobf_utils.CheckValidTargettingInstr(push_insn, "push"): 300 | 301 | push_op = push_insn.ops[0] 302 | 303 | jz_ea = push_instr_ea + push_insn.size 304 | jz_insn = ida_ua.insn_t() 305 | ida_ua.decode_insn(jz_insn, jz_ea) 306 | 307 | if maze_deobf_utils.CheckValidTargettingInstr(jz_insn, "jz"): 308 | jz_op = jz_insn.ops[0] 309 | 310 | # 311 | # Get the target address for the JZ 312 | # 313 | jz_target = 0 314 | if jz_op.type == 5: 315 | jz_target = jz_op.value 316 | elif jz_op.type == 6 or jz_op.type == 7: 317 | jz_target = jz_op.addr 318 | 319 | jnz_ea = jz_ea + jz_insn.size 320 | jnz_insn = ida_ua.insn_t() 321 | ida_ua.decode_insn(jnz_insn, jnz_ea) 322 | 323 | if maze_deobf_utils.CheckValidTargettingInstr(jnz_insn, "jnz"): 324 | jnz_op = jnz_insn.ops[0] 325 | 326 | # 327 | # Get the target address for the JNZ 328 | # 329 | jnz_target = 0 330 | if jnz_op.type == 5: 331 | jnz_target = jnz_op.value 332 | elif jnz_op.type == 6 or jnz_op.type == 7: 333 | jnz_target = jnz_op.addr 334 | 335 | if jnz_target == jz_target: 336 | # 337 | # Destinations for Type Two must be the same 338 | # 339 | 340 | addr_list.add(push_instr_ea) 341 | 342 | 343 | cfg2_ea =ida_search.find_binary(cfg2_ea+4, end_ea, opcodes, 0, SEARCH_DOWN | SEARCH_CASE) 344 | 345 | return addr_list 346 | 347 | 348 | def PatchCallTypeTwoCFGObfuscation(instr_ea): 349 | ''' 350 | @detail Find all of the CFG Obfuscations that match the following pattern (Type Two): 351 | 352 | 68 19 28 45 6E push 353 | 0F 84 05 3C 01 00 JZ xxxxxxxxxx 354 | 0F 85 FF 3B 01 00 JNZ xxxxxxxxxx 355 | 356 | The jump destination for both is the same. These are going to be patched to look like the 357 | following: 358 | 359 | 90 NOP 360 | 90 NOP 361 | 90 NOP 362 | 90 NOP 363 | 90 NOP 364 | 90 NOP 365 | CALL xxxxxxxxxx 366 | NOP 367 | <5 bytes> JMP 368 | ''' 369 | 370 | push_instr_ea = instr_ea 371 | 372 | push_insn = ida_ua.insn_t() 373 | ida_ua.decode_insn(push_insn, push_instr_ea) 374 | push_insn_target = maze_deobf_utils.GetInstuctionTargetAddress(push_insn) 375 | 376 | jz_instr_ea = push_instr_ea+push_insn.size 377 | jz_insn = ida_ua.insn_t() 378 | ida_ua.decode_insn(jz_insn, jz_instr_ea) 379 | jz_insn_target = maze_deobf_utils.GetInstuctionTargetAddress(jz_insn) 380 | 381 | jnz_instr_ea = jz_instr_ea + jz_insn.size 382 | jnz_insn = ida_ua.insn_t() 383 | ida_ua.decode_insn(jnz_insn, jnz_instr_ea) 384 | 385 | call_instr_ea = jz_instr_ea + 1 386 | jmp_instr_ea = jnz_instr_ea + 1 387 | 388 | call_target = jz_insn_target 389 | jmp_target = push_insn_target 390 | 391 | # 392 | # Calculate the offset for the address that was pushed to the 393 | # stack by the push instruction and will now be used by the JMP 394 | # 395 | # offset = target_address - address_of_insn_after_CALL 396 | # 397 | jmp_dest_offset = (jmp_target - (jmp_instr_ea + 5)) & 0xFFFFFFFF 398 | 399 | # 400 | # Calculate the offset for the address that was the target for the 401 | # two JCC instructions and will now be used in a CALL 402 | # 403 | # offset = target_address - address_of_insn_after_CALL 404 | # 405 | call_dest_offset = (call_target - (call_instr_ea + 5)) & 0xFFFFFFFF 406 | 407 | 408 | #print hex(call_instr_ea), hex(call_instr_ea + 5 + new_call_dest_offset) + hex(new_call_dest_offset) 409 | 410 | # 411 | # Patch PUSH;JZ;JNZ 412 | # 413 | patch_byte(push_instr_ea, 0x90) 414 | patch_byte(push_instr_ea+1, 0x90) 415 | patch_byte(push_instr_ea+2, 0x90) 416 | patch_byte(push_instr_ea+3, 0x90) 417 | patch_byte(push_instr_ea+4, 0x90) 418 | patch_byte(call_instr_ea-1, 0x90) 419 | patch_byte(call_instr_ea, 0xE8) 420 | patch_dword(call_instr_ea+1, call_dest_offset) 421 | patch_byte(jmp_instr_ea-1, 0x90) 422 | patch_byte(jmp_instr_ea, 0xE9) 423 | patch_dword(jmp_instr_ea+1,jmp_dest_offset) 424 | 425 | # 426 | # Delete NOP; CALL; JMP 427 | # 428 | del_items(push_instr_ea,1) 429 | 430 | # 431 | # cleanup post JMP bytes 432 | # 433 | # 434 | patch_byte(jmp_instr_ea+5, 0x00) 435 | patch_byte(jmp_instr_ea+6, 0x00) 436 | patch_byte(jmp_instr_ea+7, 0x00) 437 | patch_byte(jmp_instr_ea+8, 0x00) 438 | 439 | # 440 | # Redifine NOP; CALL; JMP 441 | # 442 | create_insn(push_instr_ea) 443 | plan_and_wait(push_instr_ea, push_instr_ea+1) 444 | 445 | # 446 | # Make JUMP Target Code 447 | # 448 | del_items(jmp_target,1) 449 | jmp_target_insn = ida_ua.insn_t() 450 | ida_ua.decode_insn(jmp_target_insn, jmp_target) 451 | create_insn(jmp_target) 452 | plan_and_wait(jmp_target, jmp_target+jmp_target_insn.size) 453 | 454 | # 455 | # Make Call Target Code 456 | # 457 | del_items(call_target,1) 458 | call_target_insn = ida_ua.insn_t() 459 | ida_ua.decode_insn(call_target_insn, call_target) 460 | create_insn(call_target) 461 | plan_and_wait(call_target, call_target+call_target_insn.size) 462 | 463 | def WalkCallTypeThreeControlFlow(CallAddress, StartAddress): 464 | ''' 465 | @detail Start with the JNZ edge of a JZ/JNZ obfuscation and walk down until the 466 | the JNZ target address matches the Call address. This will handle multiple 467 | JZ/JNZ blocks for Call Type Three Obfuscations. The assumption is that when 468 | a call is obfuscated in this manner it has to always be called. 469 | 470 | Unused and can probably be deleted. 471 | 472 | ''' 473 | 474 | print "Call Target: %08x, Starting at: %08x" % (CallAddress,StartAddress) 475 | jnz_call_target_ea = StartAddress 476 | 477 | is_cal_type_three = False 478 | 479 | while jnz_call_target_ea != CallAddress: 480 | 481 | jnz_ea = jnz_call_target_ea 482 | jnz_insn = ida_ua.insn_t() 483 | ida_ua.decode_insn(jnz_insn, jnz_ea) 484 | 485 | if not maze_deobf_utils.CheckValidTargettingInstr(jnz_insn, "jnz"): 486 | ''' 487 | Break out of the loop if the instruction is not a JNZ 488 | ''' 489 | print "Failed at %08x" % jnz_ea 490 | break 491 | 492 | jnz_call_target_ea = maze_deobf_utils.GetInstuctionTargetAddress(jnz_insn) 493 | 494 | if jnz_call_target_ea == CallAddress: 495 | is_cal_type_three = True 496 | #print "\tnext address: %08x" % jnz_call_target_ea 497 | 498 | return is_cal_type_three 499 | 500 | def FindCallTypeThreeCFGObfuscation(): 501 | ''' 502 | @detail Find all of the CFG Obfuscations that match the following pattern (Type Three): 503 | 504 | push 505 | 0000 JZ xxxxxxxx 506 | 0001 JNZ 0010 507 | .... 508 | 0010 JNZ xxxxxxxx 509 | 0011 JZ
;unreachable 510 | 511 | 512 | The jump destination for both is the same. 513 | ''' 514 | 515 | near_opcodes = "68 ? ? ? ? 0f 84 ? ? ? ? 75" 516 | short_opcodes = "68 ? ? ? ? 74 ? 75" 517 | 518 | end_ea = ida_ida.cvar.inf.max_ea 519 | 520 | addr_list = set() 521 | 522 | obfuscated_list = [] 523 | cfg = ida_search.find_binary(0, end_ea, near_opcodes, 0, SEARCH_DOWN | SEARCH_CASE) 524 | while cfg != idc.BADADDR: 525 | 526 | obfuscated_list.append(cfg) 527 | cfg =ida_search.find_binary(cfg+4, end_ea, near_opcodes, 0, SEARCH_DOWN | SEARCH_CASE) 528 | 529 | cfg = ida_search.find_binary(0, end_ea, short_opcodes, 0, SEARCH_DOWN | SEARCH_CASE) 530 | while cfg != idc.BADADDR: 531 | 532 | obfuscated_list.append(cfg) 533 | cfg =ida_search.find_binary(cfg+4, end_ea, short_opcodes, 0, SEARCH_DOWN | SEARCH_CASE) 534 | 535 | for cfg3_ea in obfuscated_list: 536 | 537 | 538 | push_instr_ea = cfg3_ea 539 | 540 | push_insn = ida_ua.insn_t() 541 | ida_ua.decode_insn(push_insn, push_instr_ea) 542 | if maze_deobf_utils.CheckValidTargettingInstr(push_insn, "push"): 543 | 544 | push_op = push_insn.ops[0] 545 | 546 | jz_ea = push_instr_ea + push_insn.size 547 | jz_insn = ida_ua.insn_t() 548 | ida_ua.decode_insn(jz_insn, jz_ea) 549 | 550 | if maze_deobf_utils.CheckValidTargettingInstr(jz_insn, "jz"): 551 | # 552 | # Get the target address for the first JZ 553 | # 554 | 555 | jz_calltarget_ea = maze_deobf_utils.GetInstuctionTargetAddress(jz_insn) 556 | 557 | jnz_ea = jz_ea + jz_insn.size 558 | jnz_insn = ida_ua.insn_t() 559 | ida_ua.decode_insn(jnz_insn, jnz_ea) 560 | 561 | if maze_deobf_utils.CheckValidTargettingInstr(jnz_insn, "jnz"): 562 | 563 | jnz_calltarget_ea = maze_deobf_utils.GetInstuctionTargetAddress(jnz_insn) 564 | 565 | if jnz_calltarget_ea != jz_calltarget_ea: 566 | # 567 | # These two values do not match for Call Type 3 568 | # 569 | 570 | addr_list.add(push_instr_ea) 571 | 572 | return addr_list 573 | 574 | 575 | def PatchJZJNZControlFlow(StartAddress, TargetAddress=0): 576 | """ 577 | @detail This code will zero out a JZ/JNZ code block and undefine the targets for 578 | each instruction. 579 | """ 580 | 581 | # 582 | # JNZ Instruction 583 | # 584 | jnz_ea = StartAddress 585 | jnz_insn = ida_ua.insn_t() 586 | ida_ua.decode_insn(jnz_insn, jnz_ea) 587 | jnz_insn_target_ea = maze_deobf_utils.GetInstuctionTargetAddress(jnz_insn) 588 | 589 | # 590 | # Handle JZ instruction 591 | # 592 | jz_insn_ea = jnz_ea + jnz_insn.size 593 | jz_insn = ida_ua.insn_t() 594 | ida_ua.decode_insn(jz_insn, jz_insn_ea) 595 | jz_insn_target_ea = maze_deobf_utils.GetInstuctionTargetAddress(jz_insn) 596 | 597 | 598 | if maze_deobf_utils.CheckValidTargettingInstr(jnz_ea, "jnz"): 599 | ZeroOutInstruction(jnz_insn) 600 | 601 | if maze_deobf_utils.CheckValidTargettingInstr(jz_insn_ea, "jz"): 602 | ZeroOutInstruction(jz_insn) 603 | 604 | 605 | 606 | del_items(jz_insn_target_ea,1) 607 | 608 | if jnz_insn_target_ea != TargetAddress: 609 | # 610 | # Undefine only if it isn't the address of a call instruction 611 | # 612 | 613 | del_items(jnz_insn_target_ea,1) 614 | 615 | print "[PatchJZJNZControlFlow], JNZ: %08x, JZ: %08x, JNZT: %08x, JZT: %08x" % (jnz_ea, jz_insn_ea, jnz_insn_target_ea, jz_insn_target_ea) 616 | 617 | 618 | def WalkAndPatchJZJNZControlFlow(TargetAddress, StartAddress): 619 | """ 620 | @detail Start with the JNZ edge of a JZ/JNZ obfuscation and walk down until the 621 | the JNZ target address matches the Call address. This will handle multiple 622 | JZ/JNZ blocks for Call Type Three Obfuscations. The assumption is that when 623 | a call is obfuscated in this manner it has to always be called. 624 | 625 | """ 626 | 627 | #print "Call Target: %08x, Starting at: %08x" % (CallAddress,StartAddress) 628 | jnz_call_target_ea = StartAddress 629 | 630 | 631 | while jnz_call_target_ea != TargetAddress: 632 | 633 | print "WalkAndPatchJZJNZControlFlow, start: %08x" % jnz_call_target_ea 634 | jnz_ea = jnz_call_target_ea 635 | jnz_insn = ida_ua.insn_t() 636 | ida_ua.decode_insn(jnz_insn, jnz_ea) 637 | 638 | if maze_deobf_utils.CheckValidTargettingInstr(jnz_insn, "jnz"): 639 | ''' 640 | Current instruction is JNZ with valid target address 641 | ''' 642 | 643 | # 644 | # Patch control flow iff the JNZ target is also a JNZ 645 | # 646 | jnz_target_ea = maze_deobf_utils.GetInstuctionTargetAddress(jnz_insn) 647 | if maze_deobf_utils.CheckValidTargettingInstr(jnz_target_ea, "jnz"): 648 | print "WalkAndPatchJZJNZControlFlow, patch: %08x, Target Address: %08x" % (jnz_calljnz_target_ea_target_ea, TargetAddress) 649 | PatchJZJNZControlFlow(jnz_target_ea, TargetAddress) 650 | 651 | jnz_call_target_ea = jnz_target_ea 652 | 653 | def PatchCallTypeThreeCFGObfuscation(Instr_ea): 654 | """ 655 | @detail Patch all of the CFG Obfuscations that match the Call Type Three: 656 | 657 | 68 19 28 45 6E push 658 | 0F 84 05 3C 01 00 JZ xxxxxxxx 659 | 75 JNZ 0010 660 | .... 661 | 0010 JNZ xxxxxxxx 662 | 0011 JZ
663 | 664 | There can be a multitude of JNZ/JZ blocks that ultimately end up executing the function address xxxxxxxx. The 665 | patch is straightfoward: 666 | 667 | 90 NOP 668 | 90 NOP 669 | 90 NOP 670 | 90 NOP 671 | 90 NOP 672 | CALL xxxxxxxx (5 bytes) 673 | JMP (2 bytes) 674 | """ 675 | 676 | #print "Type 3: %08x" % Instr_ea 677 | 678 | # 679 | # Handle the push instruction 680 | # 681 | push_instr_ea = Instr_ea 682 | push_insn = ida_ua.insn_t() 683 | ida_ua.decode_insn(push_insn, push_instr_ea) 684 | push_insn_target = maze_deobf_utils.GetInstuctionTargetAddress(push_insn) 685 | 686 | deobf_jmp_target_ea = push_insn_target 687 | 688 | # 689 | # Handle JZ instruction 690 | # 691 | jz_insn_ea = push_instr_ea + push_insn.size 692 | jz_insn = ida_ua.insn_t() 693 | ida_ua.decode_insn(jz_insn, jz_insn_ea) 694 | jz_insn_target_ea = maze_deobf_utils.GetInstuctionTargetAddress(jz_insn) 695 | 696 | deobf_call_target_ea = jz_insn_target_ea 697 | 698 | 699 | # 700 | # JNZ Instruction 701 | # 702 | jnz_ea = jz_insn_ea + jz_insn.size 703 | jnz_insn = ida_ua.insn_t() 704 | ida_ua.decode_insn(jnz_insn, jnz_ea) 705 | 706 | WalkAndPatchJZJNZControlFlow(deobf_call_target_ea, jnz_ea) 707 | 708 | deobf_call_ea = push_instr_ea+1 709 | deobf_jmp_ea = push_instr_ea+6 710 | 711 | # 712 | # Calculate the offset for the relative JMP instruction, the value that 713 | # was previously the target for the PUSH instruction 714 | # 715 | # offset = target_address - address_of_insn_after_JMP 716 | # 717 | deobf_jmp_dest_offset = (deobf_jmp_target_ea - (deobf_jmp_ea+5)) & 0xFFFFFFFF 718 | 719 | # 720 | # Calculate the offset for the relative CALL instruction, the value that 721 | # was previously the target for the JZ instruction 722 | # 723 | # offset = target_address - address_of_insn_after_CALL 724 | # 725 | deobf_call_dest_offset = (deobf_call_target_ea - (deobf_call_ea + 5)) & 0xFFFFFFFF 726 | 727 | #print "Push instr ea: %08x, Call Instr ea: %08x, Call Instr Offset: %08x" % (push_instr_ea,deobf_call_ea,deobf_call_dest_offset) 728 | #print "JMP Ea: %08x, Offset %08x" % (deobf_jmp_ea, deobf_jmp_dest_offset) 729 | 730 | 731 | # 732 | # Cleanup area after JMP instruction 733 | # 734 | clenaup_junk_code_ea = deobf_jmp_ea + 5 735 | patch_byte(clenaup_junk_code_ea, 0x00) 736 | patch_byte(clenaup_junk_code_ea+1, 0x00) 737 | patch_byte(clenaup_junk_code_ea+2, 0x00) 738 | patch_byte(clenaup_junk_code_ea+3, 0x00) 739 | 740 | # 741 | # Patch JNZ and JZ w/ Call;JMP 742 | # 743 | patch_byte(push_instr_ea, 0x90) 744 | patch_byte(deobf_call_ea, 0xE8) 745 | patch_dword(deobf_call_ea+1,deobf_call_dest_offset) 746 | patch_byte(deobf_jmp_ea, 0xE9) 747 | patch_dword(deobf_jmp_ea+1,deobf_jmp_dest_offset) 748 | 749 | del_items(push_instr_ea,1) 750 | 751 | # 752 | # Make instructions starting with the first NOP (push_instr_ea) 753 | # 754 | create_insn(push_instr_ea) 755 | plan_and_wait(push_instr_ea, push_instr_ea+push_insn.size) 756 | 757 | # 758 | # Make JMP destination code 759 | # 760 | del_items(deobf_jmp_target_ea,1) 761 | deobf_jmp_dst_insn = ida_ua.insn_t() 762 | ida_ua.decode_insn(deobf_jmp_dst_insn, deobf_jmp_target_ea) 763 | create_insn(deobf_jmp_target_ea) 764 | plan_and_wait(deobf_jmp_target_ea, deobf_jmp_target_ea+deobf_jmp_dst_insn.size) 765 | 766 | # 767 | # Make CALL destination code 768 | # 769 | del_items(deobf_call_target_ea,1) 770 | deobf_call_dst_insn = ida_ua.insn_t() 771 | ida_ua.decode_insn(deobf_call_dst_insn, deobf_call_target_ea) 772 | create_insn(deobf_call_target_ea) 773 | plan_and_wait(deobf_call_target_ea, deobf_call_target_ea+deobf_call_dst_insn.size) 774 | 775 | def FindAbsoluteJumps(): 776 | """ 777 | @brief Locates short JZ, near JZ, and near JNZ obfuscations. 778 | 779 | @detail Locates short JZ, near JZ, and near JNZ obfuscations. For example: 780 | 781 | 000: 74 55 jz short loc_6E456049 782 | 002: 75 04 jnz short loc_008 783 | 004: 0E push cs 784 | 005: 02 00 add al, [eax] 785 | ;----------------------------------------------------------- 786 | 007: 00 db 0 787 | ;----------------------------------------------------------- 788 | 008: 75 0A jnz short loc_6E456006 789 | 00A: 74 04 jz short loc_6E456002 790 | 00C: DB 1A fistp dword ptr [edx] 791 | 792 | This obfuscation is actually an absolute jump. In some occasions, there are multiple sequences of these 793 | JZ/JNZ obfuscations chained together. This will only find the obfuscations, the distinction between each 794 | isn't important until it comes time to deobfuscate. 795 | 796 | "74 ? 75" type short jumps can cause issues because the byte-search is generic and can hit on things that are not 797 | instructions. Verfiication is done to determined if the match is in a CODE segment. This will help avoid hitting on 798 | strings located in data segments. 799 | 800 | @return A list containing each address for the located JZ/JNZ obfuscation 801 | 802 | """ 803 | 804 | jz_short_jmp = "74 ? 75" 805 | jz_near_jmp = "0F 84 ? ? ? ? 75" 806 | jnz_near_jmp = "74 ? 0F 85" 807 | 808 | end_ea = ida_ida.cvar.inf.max_ea 809 | 810 | addr_list = set() 811 | 812 | abs_jmp_list = set() 813 | 814 | obfuscated_list = [] 815 | 816 | abs_jmp_ea = ida_search.find_binary(0, end_ea, jz_short_jmp, 0, SEARCH_DOWN | SEARCH_CASE) 817 | while abs_jmp_ea != idc.BADADDR: 818 | #print "Short jump: %08x" % abs_jmp_ea 819 | obfuscated_list.append(abs_jmp_ea) 820 | abs_jmp_ea =ida_search.find_binary(abs_jmp_ea+4, end_ea, jz_short_jmp, 0, SEARCH_DOWN | SEARCH_CASE) 821 | 822 | abs_jmp_ea = ida_search.find_binary(0, end_ea, jz_near_jmp, 0, SEARCH_DOWN | SEARCH_CASE) 823 | while abs_jmp_ea != idc.BADADDR: 824 | obfuscated_list.append(abs_jmp_ea) 825 | abs_jmp_ea =ida_search.find_binary(abs_jmp_ea+4, end_ea, jz_near_jmp, 0, SEARCH_DOWN | SEARCH_CASE) 826 | 827 | abs_jmp_ea = ida_search.find_binary(0, end_ea, jnz_near_jmp, 0, SEARCH_DOWN | SEARCH_CASE) 828 | while abs_jmp_ea != idc.BADADDR: 829 | obfuscated_list.append(abs_jmp_ea) 830 | abs_jmp_ea =ida_search.find_binary(abs_jmp_ea+4, end_ea, jnz_near_jmp, 0, SEARCH_DOWN | SEARCH_CASE) 831 | 832 | 833 | #print "[START] Find absolute jumps." 834 | 835 | 836 | for abs_jmp_ea in obfuscated_list: 837 | # 838 | # Walk each found obfuscation 839 | # 840 | 841 | # 842 | # Get, Decode, and Verify JZ instruction 843 | # 844 | jz_insn_ea = abs_jmp_ea 845 | jz_insn = ida_ua.insn_t() 846 | ida_ua.decode_insn(jz_insn, jz_insn_ea) 847 | 848 | #print "%08x - check" % abs_jmp_ea 849 | if maze_deobf_utils.CheckValidTargettingInstr(jz_insn, "jz"): 850 | 851 | 852 | prev_insn_ea = jz_insn_ea - 5 853 | prev_insn = ida_ua.insn_t() 854 | ida_ua.decode_insn(prev_insn, prev_insn_ea) 855 | if maze_deobf_utils.CheckValidTargettingInstr(prev_insn, "push"): 856 | # 857 | # if previous instruction is a push
, 858 | # this is an obfuscated call, skip to next located absolute jump 859 | # 860 | 861 | continue 862 | 863 | # 864 | # Get, Decode, and Verify JZ instruction 865 | # 866 | jnz_insn_ea = jz_insn_ea + jz_insn.size 867 | jnz_insn = ida_ua.insn_t() 868 | ida_ua.decode_insn(jnz_insn, jnz_insn_ea) 869 | 870 | #print "%08x - JZ" % abs_jmp_ea 871 | if maze_deobf_utils.CheckValidTargettingInstr(jnz_insn, "jnz"): 872 | # 873 | # Absolute JMP found 874 | # 875 | 876 | #print "%08x - verified" % abs_jmp_ea 877 | addr_list.add(jz_insn_ea) 878 | 879 | return addr_list 880 | 881 | def FollowJNZForAbsoluteJumpTarget(Insn_ea): 882 | """ 883 | @detail Follow the JNZ branch of an absolute JMP obfuscation until 884 | an instruction other than JNZ is reached. Once this occurs, return 885 | the address of the non-JNZ instruction. 886 | 887 | @return jnz_target_ea Target address for absolute JMP 888 | """ 889 | 890 | jnz_insn_target_ea = idc.BADADDR 891 | jnz_insn_ea = Insn_ea 892 | insn_dism = generate_disasm_line(jnz_insn_ea,1) 893 | 894 | if insn_dism.startswith("jnz"): 895 | while insn_dism.startswith("jnz"): 896 | # 897 | # Follow target until instruction is not a JNZ 898 | # 899 | 900 | # 901 | # Get JNZ Instruction 902 | # 903 | jnz_insn = ida_ua.insn_t() 904 | ida_ua.decode_insn(jnz_insn, jnz_insn_ea) 905 | jnz_insn_target_ea = maze_deobf_utils.GetInstuctionTargetAddress(jnz_insn) 906 | 907 | # 908 | # Get disassembly for JNZ target 909 | # 910 | jnz_insn_ea = jnz_insn_target_ea 911 | insn_dism = generate_disasm_line(jnz_insn_ea,1) 912 | 913 | else: 914 | # 915 | # Instruction is not JNZ, target address is Insn_ea 916 | # 917 | 918 | jnz_insn_target_ea = Insn_ea 919 | 920 | return jnz_insn_target_ea 921 | 922 | def PatchAbsoluteJump(Insn_ea): 923 | """ 924 | @breif Deobfuscate absolute jump obfuscation types 925 | 926 | @detail Patches Type One and Type Two absolute jumps. There is no need to worry about 927 | any of the located instruction matching the CALL obfuscations, those have already 928 | been dealt with. 929 | 930 | 0000 JZ xxxxxxxx 931 | 0001 JNZ
932 | 933 | The JZ/JNZ block is walked looking for the target address of the abosolute JMP, the initial JZ is not touched. 934 | 935 | 0000 JZ xxxxxxxx 936 | 0001 JMP 937 | """ 938 | 939 | print "Absolute Jump Address: %08x" % (Insn_ea) 940 | jz_insn_ea = Insn_ea 941 | 942 | abs_jump_ea = idc.BADADDR 943 | abs_jmp_target_ea = idc.BADADDR 944 | 945 | # 946 | # Get JZ instruction 947 | # 948 | jz_insn = ida_ua.insn_t() 949 | ida_ua.decode_insn(jz_insn, jz_insn_ea) 950 | jz_insn_target_ea = maze_deobf_utils.GetInstuctionTargetAddress(jz_insn) 951 | 952 | # 953 | # Get JNZ Instruction 954 | # 955 | jnz_insn_ea = jz_insn_ea + jz_insn.size 956 | jnz_insn = ida_ua.insn_t() 957 | ida_ua.decode_insn(jnz_insn, jnz_insn_ea) 958 | jnz_insn_target_ea = maze_deobf_utils.GetInstuctionTargetAddress(jnz_insn) 959 | 960 | abs_jmp_ea = jz_insn_ea + jz_insn.size 961 | abs_jmp_target_ea = FollowJNZForAbsoluteJumpTarget(jnz_insn_target_ea) 962 | 963 | WalkAndPatchJZJNZControlFlow(abs_jmp_target_ea, jnz_insn_ea) 964 | 965 | if jz_insn_target_ea == jnz_insn_target_ea: 966 | 967 | abs_jmp_target_offset = (abs_jmp_target_ea - (abs_jmp_ea+5)) & 0xFFFFFFFF 968 | 969 | del_items(jz_insn_ea,1) 970 | 971 | ZeroOutInstruction(jnz_insn) 972 | 973 | else: 974 | 975 | 976 | 977 | # 978 | # Calculate the offset for the relative JMP instruction. 979 | # 980 | # offset = target_address - address_of_insn_after_JMP 981 | # 982 | abs_jmp_target_offset = (abs_jmp_target_ea - (abs_jmp_ea+5)) & 0xFFFFFFFF 983 | 984 | print "JMP EA: %08x, Absolute JMP Target EA: %08x, Offset: %08x" % (abs_jmp_ea, abs_jmp_target_ea, abs_jmp_target_offset) 985 | 986 | del_items(jz_insn_ea,1) 987 | 988 | # 989 | # Apply patch 990 | # 991 | patch_byte(abs_jmp_ea, 0xE9) 992 | patch_dword(abs_jmp_ea+1, abs_jmp_target_offset) 993 | 994 | create_insn(jz_insn_ea) 995 | 996 | def FindObfuscatedWindowsAPICalls(): 997 | ''' 998 | @detail This obfuscation is used to resolve and call Windows procedures. Here is the obfuscation: 999 | 1000 | 68 3F 25 00 00 push 253Fh ; unknown 1001 | 68 DC 20 DE 30 push 30DE20DCh ; hash 1002 | E8 0D 00 00 00 call loc_6E44BCBF ; procedure call 1003 | 6B 65 72 6E 65 6C 33 32 2E 64 6C 6C 00 'kernel32.dll',0 1004 | 6E44BCBF 68 00 BD 44 6E push offset callwindowsproc_6E44BD00 ; ret addr 1005 | 6E44BCC4 0F 84 56 58 FE FF jz findLibrary 1006 | 6E44BCCA 75 04 jnz short loc_6E44BCD0 1007 | 1008 | It is not used for every call to a Windows procedure. However, this function will identify the 1009 | following pattern: 1010 | 1011 | 68 3F 25 00 00 push 253Fh ; unknown 1012 | 68 DC 20 DE 30 push 30DE20DCh ; hash 1013 | E8 0D 00 00 00 call loc_6E44BCBF ; procedure call 1014 | 6B 65 72 6E 65 6C 33 32 2E 64 6C 6C 00 aKernel32Dll_44 db 'kernel32.dll',0 1015 | 1016 | The match will search for the PUSH->PUSH->CALL and then validate that the bytes following 1017 | call instruction is an ASCII string that ends with ".dll" 1018 | 1019 | ''' 1020 | 1021 | 1022 | opcodes = "68 ? ? ? ? 68 ? ? ? ? E8" 1023 | 1024 | end_ea = ida_ida.cvar.inf.max_ea 1025 | 1026 | addr_list = set() 1027 | 1028 | obfuscated_list = [] 1029 | cfg6_ea = ida_search.find_binary(0, end_ea, opcodes, 0, SEARCH_DOWN | SEARCH_CASE) 1030 | 1031 | while cfg6_ea != idc.BADADDR: 1032 | 1033 | first_push_instr_ea = cfg6_ea 1034 | first_push_insn = ida_ua.insn_t() 1035 | ida_ua.decode_insn(first_push_insn, first_push_instr_ea) 1036 | 1037 | #print "First Push %08x" % first_push_instr_ea 1038 | if maze_deobf_utils.CheckValidInstrImmediate(first_push_insn,"push"): 1039 | 1040 | second_push_instr_ea = first_push_instr_ea + first_push_insn.size 1041 | second_push_insn = ida_ua.insn_t() 1042 | ida_ua.decode_insn(second_push_insn, second_push_instr_ea) 1043 | 1044 | #print "Second Push %08x" % second_push_instr_ea 1045 | if maze_deobf_utils.CheckValidInstrImmediate(second_push_insn,"push"): 1046 | 1047 | call_instr_ea = second_push_instr_ea + second_push_insn.size 1048 | call_insn = ida_ua.insn_t() 1049 | ida_ua.decode_insn(call_insn, call_instr_ea) 1050 | call_insn_disasm = generate_disasm_line(call_instr_ea,1) 1051 | 1052 | #print "Call %08x" % call_instr_ea 1053 | if call_insn_disasm.startswith("call"): 1054 | 1055 | call_insn_target_ea = maze_deobf_utils.GetInstuctionTargetAddress(call_insn) 1056 | #print "Call target %08x" % call_insn_target_ea 1057 | if maze_deobf_utils.CheckValidTarget(call_instr_ea, call_insn_target_ea): 1058 | 1059 | module_name_ea = call_instr_ea + call_insn.size 1060 | 1061 | #print "Module Name %08x" % module_name_ea 1062 | if maze_deobf_utils.CheckIsDllName(module_name_ea): 1063 | addr_list.add(first_push_instr_ea) 1064 | 1065 | # 1066 | # Next match 1067 | # 1068 | cfg6_ea = ida_search.find_binary(cfg6_ea+5, end_ea, opcodes, 0, SEARCH_DOWN | SEARCH_CASE) 1069 | 1070 | return addr_list 1071 | 1072 | def PatchObfuscatedWindowsAPICalls(Insn_ea): 1073 | ''' 1074 | @detail This patch is a bit tricky and can be easily messed with by the malware's author. Here is the 1075 | original 1076 | 1077 | 68 3F 25 00 00 push 253Fh ; unknown 1078 | 68 DC 20 DE 30 push 30DE20DCh ; hash 1079 | E8 0D 00 00 00 call loc_6E44BCBF ; procedure call 1080 | 6B 65 72 6E 65 6C 33 32 2E 64 6C 6C 00 'kernel32.dll',0 1081 | 6E44BCBF 68 00 BD 44 6E push offset callwindowsproc_6E44BD00 ; ret addr 1082 | 6E44BCC4 0F 84 56 58 FE FF jz findLibrary 1083 | 6E44BCCA 75 04 jnz findLibrary 1084 | 1085 | The patch is going to look like the following set of instructions: 1086 | 1087 | 68 3F 25 00 00 push 253Fh ; unknown 1088 | 68 DC 20 DE 30 push 30DE20DCh ; hash 1089 | 68 XX XX XX XX push XXXXXXXX ; address of kernel32.dll string 1090 | E8 3E F4 FC FF call findLibrary 1091 | E9 31 00 00 00 jmp loc_6E44BD00 1092 | XXXXXXXX 65 72 6E 65 6C 33 32 2E 64 6C 6C 00 aKernel32Dll_44 db 'kernel32.dll',0 1093 | 1094 | The address of the "findLibrary" call needs to be found, the module name string 1095 | needs to be moved, and the push to someplace else for this patch to be effective. 1096 | 1097 | My initial patch is not going to worry about moving the module name, only locating 1098 | the address of findLibrary. Fortunately, that is simple enough the original call instruction 1099 | can be followed to get both the address of the push instruction AND the return address that is 1100 | pushed on the stack. 1101 | 1102 | ''' 1103 | 1104 | first_push_ea = Insn_ea 1105 | # 1106 | # Get first Push information 1107 | # 1108 | first_push_insn = ida_ua.insn_t() 1109 | ida_ua.decode_insn(first_push_insn, first_push_ea) 1110 | 1111 | second_push_ea = first_push_ea + first_push_insn.size 1112 | # 1113 | # Get second Push information 1114 | # 1115 | second_push_insn = ida_ua.insn_t() 1116 | ida_ua.decode_insn(second_push_insn, second_push_ea) 1117 | 1118 | orig_call_ea = second_push_ea + second_push_insn.size 1119 | # 1120 | # Get call information, call to get to findlibrary 1121 | # 1122 | orig_call_insn = ida_ua.insn_t() 1123 | ida_ua.decode_insn(orig_call_insn, orig_call_ea) 1124 | orig_call_target_ea = maze_deobf_utils.GetInstuctionTargetAddress(orig_call_insn) 1125 | 1126 | module_name_ea = orig_call_ea + orig_call_insn.size 1127 | 1128 | orig_call_target_insn = ida_ua.insn_t() 1129 | ida_ua.decode_insn(orig_call_target_insn, orig_call_target_ea) 1130 | 1131 | push_retaddr_insn_ea = orig_call_target_ea 1132 | push_ret_addr_insn = ida_ua.insn_t() 1133 | ida_ua.decode_insn(push_ret_addr_insn, push_retaddr_insn_ea) 1134 | 1135 | # 1136 | # Get address of find library and return address 1137 | # 1138 | if maze_deobf_utils.CheckValidTargettingInstr(push_ret_addr_insn,"push"): 1139 | 1140 | #print "Type6 push: %08x" % push_retaddr_insn_ea 1141 | 1142 | findlibrary_return_address = maze_deobf_utils.GetInstuctionTargetAddress(push_ret_addr_insn) 1143 | 1144 | 1145 | 1146 | # 1147 | # JZ calllibrary instruction 1148 | # 1149 | findlibrary_jz_ea = push_retaddr_insn_ea + push_ret_addr_insn.size 1150 | findlibrary_jz_insn = ida_ua.insn_t() 1151 | ida_ua.decode_insn(findlibrary_jz_insn, findlibrary_jz_ea) 1152 | 1153 | if maze_deobf_utils.CheckValidTargettingInstr(findlibrary_jz_insn,"jz"): 1154 | 1155 | #print "Type6 jz: %08x" % findlibrary_jz_ea 1156 | 1157 | findlibrary_address = maze_deobf_utils.GetInstuctionTargetAddress(findlibrary_jz_insn) 1158 | 1159 | push_modulename_insn_ea = orig_call_ea 1160 | call_find_library_ea = orig_call_ea + 5 1161 | jmp_ret_addr_ea = orig_call_ea + 10 1162 | 1163 | call_findlibrary_offset = (findlibrary_address - (call_find_library_ea + 5)) & 0xFFFFFFFF 1164 | jmp_ret_addr_offset = (findlibrary_return_address - (jmp_ret_addr_ea + 5)) & 0xFFFFFFFF 1165 | 1166 | # 1167 | # JNZ calllibrary instruction, to be zero'd out 1168 | # 1169 | findlibrary_jnz_ea = findlibrary_jz_ea + findlibrary_jz_insn.size 1170 | findlibrary_jnz_insn = ida_ua.insn_t() 1171 | ida_ua.decode_insn(findlibrary_jnz_insn, findlibrary_jnz_ea) 1172 | jnz_calllibrary_target = maze_deobf_utils.GetInstuctionTargetAddress(findlibrary_jnz_insn) 1173 | 1174 | # 1175 | # Prepare module name related addresses 1176 | # 1177 | new_modulename_ea = jmp_ret_addr_ea + 5 1178 | module_name = GetModuleName(module_name_ea) 1179 | 1180 | if module_name: 1181 | del_items(push_modulename_insn_ea,1) 1182 | 1183 | ''' 1184 | START cleanup of previous instructions 1185 | ''' 1186 | 1187 | # 1188 | # Zero out previous string 1189 | # 1190 | module_name_len = len(module_name) 1191 | 1192 | zero_name_idx = 0 1193 | while zero_name_idx < module_name_len: 1194 | patch_byte( module_name_ea+zero_name_idx, 0x00 ) 1195 | zero_name_idx += 1 1196 | 1197 | if jnz_calllibrary_target != findlibrary_address: 1198 | # 1199 | # Check if Type Three, if so, Zero out the next set of JZ/JNZ instructions as well 1200 | # 1201 | 1202 | junk_type3_jnz_insn_ea = jnz_calllibrary_target 1203 | junk_type3_jnz_insn = ida_ua.insn_t() 1204 | ida_ua.decode_insn(junk_type3_jnz_insn, junk_type3_jnz_insn_ea) 1205 | 1206 | junk_type3_jz_insn_ea = junk_type3_jnz_insn_ea + junk_type3_jnz_insn.size 1207 | junk_type3_jz_insn = ida_ua.insn_t() 1208 | ida_ua.decode_insn(junk_type3_jz_insn, junk_type3_jz_insn_ea) 1209 | 1210 | # 1211 | # Zero them out 1212 | # 1213 | ZeroOutInstruction(junk_type3_jnz_insn) 1214 | ZeroOutInstruction(junk_type3_jz_insn) 1215 | 1216 | # 1217 | # NULL out original instructions related to FindLibrary 1218 | # 1219 | ZeroOutInstruction(push_ret_addr_insn) 1220 | ZeroOutInstruction(findlibrary_jz_insn) 1221 | ZeroOutInstruction(findlibrary_jnz_insn) 1222 | ''' 1223 | END cleanup of previous instructions 1224 | ''' 1225 | 1226 | module_name_idx = new_modulename_ea 1227 | for char in module_name: 1228 | # 1229 | # Write module name to new address 1230 | # 1231 | 1232 | patch_byte(module_name_idx,ord(char)) 1233 | module_name_idx += 1 1234 | #print hex(module_name_idx), char 1235 | patch_byte(module_name_idx,0x00) 1236 | 1237 | # 1238 | # Add push module name string instruction 1239 | # 1240 | CleanupPatchArea(push_modulename_insn_ea,5) 1241 | patch_byte(push_modulename_insn_ea,0x68) 1242 | patch_dword(push_modulename_insn_ea+1, new_modulename_ea) 1243 | 1244 | # 1245 | # Add call findlibrary instruction 1246 | # 1247 | CleanupPatchArea(call_find_library_ea,5) 1248 | patch_byte(call_find_library_ea,0xE8) 1249 | patch_dword(call_find_library_ea+1, call_findlibrary_offset) 1250 | 1251 | # 1252 | # Add JMP ret addr instruction 1253 | # 1254 | CleanupPatchArea(jmp_ret_addr_ea,5) 1255 | patch_byte(jmp_ret_addr_ea,0xE9) 1256 | patch_dword(jmp_ret_addr_ea+1, jmp_ret_addr_offset) 1257 | 1258 | create_insn(push_modulename_insn_ea) 1259 | create_insn(call_find_library_ea) 1260 | create_insn(jmp_ret_addr_ea) 1261 | create_strlit(new_modulename_ea, module_name_idx) 1262 | 1263 | 1264 | def WalkBasicBlockUntilTerminator(Start_ea): 1265 | ''' 1266 | @detail This function will identify the start and ending instructions for a basic block. 1267 | 1268 | Notice, call instructions are not terminators and will not be followed, this is not part of a 1269 | recursive descent parser. All it does is find the start and end of a basic: ingress and egress. 1270 | 1271 | It tracks start address, end address, all addresses in-between, and call instructions. 1272 | 1273 | @return address of last instruction in the basic block 1274 | ''' 1275 | 1276 | # 1277 | # All jump terminators 1278 | # 1279 | terminators = ['retn','jmp','jnz','jz','jo','jno', 'js','jns', 'je','jne', 'jb', 'jnae', 'jc', 'jnb', 'jae', 'jnc', 'jbe','jna', 'ja','jnbe', 'jl','jnge','jge','jnl','jle','jng','jg','jnle','jp','jpe','jnp','jpo','jcxz','jecxz'] 1280 | 1281 | bb_start_ea = Start_ea 1282 | curr_insn_ea = bb_start_ea 1283 | 1284 | bb_end_ea = -1 1285 | 1286 | while curr_insn_ea != idc.BADADDR: 1287 | # 1288 | # Not ideal, but we will have to rely on other methods to determine if 1289 | # a false positive is reached. However, if the address provided is always a legitimate instruction 1290 | # it has to lead to a terminator at some point. 1291 | # 1292 | 1293 | curr_insn = ida_ua.insn_t() 1294 | decoded_success = ida_ua.decode_insn(curr_insn, curr_insn_ea) 1295 | 1296 | if decoded_success > 0: 1297 | # 1298 | # Ensure instruction was properly decoded 1299 | # 1300 | 1301 | curr_insn_dism = generate_disasm_line(curr_insn_ea,1) 1302 | 1303 | # 1304 | # Undefine instruction 1305 | # 1306 | del_items(curr_insn_ea,1) 1307 | 1308 | if curr_insn_dism.startswith( tuple( terminators ) ): 1309 | # 1310 | # End of block found 1311 | # 1312 | 1313 | bb_end_ea = curr_insn_ea 1314 | break 1315 | 1316 | curr_insn_ea = curr_insn_ea + curr_insn.size 1317 | 1318 | # 1319 | # Redefine instruction as code 1320 | # 1321 | create_insn(curr_insn_ea) 1322 | else: 1323 | break 1324 | 1325 | return bb_end_ea 1326 | 1327 | def GetFunctionEpiloguesOne(): 1328 | ''' 1329 | @detail Identify function epilogues with two different search pattenrs (below). Once the two byte 1330 | patterns have been identified, each instruction will be walked from the first byte pattern 1331 | down to the second. This should build a basic block for the function. 1332 | 1333 | 6E454237 81 C4 08 08 00 00 add esp, 808h 1334 | 1335 | The first byte pattern looks for cleanup the space allocated for local variables on the stack 1336 | for the stack frame. This is typically followed by restoring registers that were preserved by 1337 | the prologue, and terminated by a return-like instruction. 1338 | 1339 | 6E454245 FF 64 24 FC jmp dword ptr [esp-4] 1340 | 1341 | The second byte pattern is the return instruction. In this case, the "jmp esp-4" instruction is 1342 | proceeded by some form of incrementing the stack pointer by four. This increment will shift esp 1343 | past the return address that pushed by the call (thus, esp-4 for the jmp). 1344 | 1345 | If the first byte pattern reaches any version of the return instruction (retn or jmp esp-4) then 1346 | the address of the first instruction, address of the second instruction, the restored registers, 1347 | and the value that esp was shifted by the "add esp, x" instructions are all saved. 1348 | 1349 | The purpsoe of knowning which registers are restored and the value added to esp is going to come up 1350 | when attempting to identify the prologue. Knowing this information will help know what to expect at the 1351 | start of the prologue AND if the prologue goes with this epilogue. That is both the registers and the 1352 | value for esp should be the same for both epilogue and prologue. 1353 | 1354 | @return epilogues_found { start_ea:{ "epilogue":FunctionEpilogue } } 1355 | ''' 1356 | 1357 | dword_stackframe_cleanup_opcode = "81 C4" 1358 | byte_stackframe_cleanup_opcode = "83 C4" 1359 | return_insn_opcode = "FF 64 24 FC" 1360 | 1361 | end_ea = ida_ida.cvar.inf.max_ea 1362 | stackframe_cleanup_set = set() 1363 | return_insn_set = set() 1364 | 1365 | epilogues_found = {} 1366 | stack_immediates_found = {} 1367 | 1368 | 1369 | # 1370 | # Find stack cleanup instructions 1371 | # 1372 | epi1_ea = ida_search.find_binary(0, end_ea, dword_stackframe_cleanup_opcode, 0, SEARCH_DOWN | SEARCH_CASE) 1373 | while epi1_ea != idc.BADADDR: 1374 | stackframe_cleanup_set.add(epi1_ea) 1375 | epi1_ea = ida_search.find_binary(epi1_ea+5, end_ea, dword_stackframe_cleanup_opcode, 0, SEARCH_DOWN | SEARCH_CASE) 1376 | 1377 | # 1378 | # Find stack cleanup instructions 1379 | # 1380 | epi1_ea = ida_search.find_binary(0, end_ea, byte_stackframe_cleanup_opcode, 0, SEARCH_DOWN | SEARCH_CASE) 1381 | while epi1_ea != idc.BADADDR: 1382 | stackframe_cleanup_set.add(epi1_ea) 1383 | epi1_ea = ida_search.find_binary(epi1_ea+5, end_ea, byte_stackframe_cleanup_opcode, 0, SEARCH_DOWN | SEARCH_CASE) 1384 | 1385 | # 1386 | # Find return instructions 1387 | # 1388 | epi1_ea = ida_search.find_binary(0, end_ea, return_insn_opcode, 0, SEARCH_DOWN | SEARCH_CASE) 1389 | while epi1_ea != idc.BADADDR: 1390 | return_insn_set.add(epi1_ea) 1391 | epi1_ea = ida_search.find_binary(epi1_ea+7, end_ea, return_insn_opcode, 0, SEARCH_DOWN | SEARCH_CASE) 1392 | 1393 | for epi1_ea in stackframe_cleanup_set: 1394 | # 1395 | # Start with the stack frame cleanup addresses. This seems to be the most natural way to start. 1396 | # A stack frame cleanup address should lead to either an actual retn instruction or one of the 1397 | # return addresses in return_insn_set. If it doesn't, put it in an unmatched set and ignore it. 1398 | # 1399 | 1400 | stackframe_cleanup_immediate = 0 1401 | bb_start_ea = 0 1402 | bb_end_ea = 0 1403 | unmatched_stackframe_set = set() 1404 | unmatched_return_set = set() 1405 | saved_registers = [] 1406 | 1407 | addesp_ea = epi1_ea 1408 | addesp_insn = ida_ua.insn_t() 1409 | ida_ua.decode_insn(addesp_insn, addesp_ea) 1410 | 1411 | addesp_dism = generate_disasm_line(addesp_ea,1) 1412 | 1413 | epilogue_data = {} 1414 | saved_immediates = [] 1415 | 1416 | 1417 | if (addesp_dism.startswith("add")) and ("esp," in addesp_dism): 1418 | if addesp_insn.ops[1].type == 5: 1419 | # 1420 | # verify second operand is an immediate (type 5) 1421 | # 1422 | 1423 | stackframe_cleanup_immediate = addesp_insn.ops[1].value 1424 | 1425 | next_insn_ea = addesp_ea + addesp_insn.size 1426 | ctr = 0 1427 | max_depth = 50 1428 | while ctr < max_depth: 1429 | # 1430 | # Only walk to max_depth 1431 | # 1432 | 1433 | ctr += 1 1434 | next_insn = ida_ua.insn_t() 1435 | decoded_success = ida_ua.decode_insn(next_insn, next_insn_ea) 1436 | 1437 | if decoded_success > 0: 1438 | # 1439 | # Ensure instruction was properly decoded 1440 | # 1441 | #print "epi1_ea: %08x, Insn_3a: %08x" % (addesp_ea,next_insn_ea) 1442 | next_insn_dism = generate_disasm_line(next_insn_ea,1) 1443 | if (next_insn_dism.startswith("jmp")) and ("esp" in next_insn_dism): 1444 | if next_insn_ea in return_insn_set: 1445 | # 1446 | # Find jmp instruction in set 1447 | # 1448 | 1449 | bb_end_ea = next_insn_ea 1450 | bb_start_ea = addesp_ea 1451 | break 1452 | else: 1453 | # 1454 | # break if JMP of any other kind 1455 | # 1456 | break 1457 | 1458 | elif next_insn_dism.startswith("retn"): 1459 | # 1460 | # straight return instruction, save 1461 | # 1462 | 1463 | bb_end_ea = next_insn_ea 1464 | bb_start_ea = addesp_ea 1465 | break 1466 | 1467 | elif next_insn_dism.startswith("pop"): 1468 | if next_insn.ops[0].type == 1: 1469 | # 1470 | # Pop general register, restore pre-function reg value 1471 | # 1472 | 1473 | saved_registers.append(next_insn.ops[0].reg) 1474 | 1475 | # 1476 | # Next two checks look for JZ->JNZ and JNZ->JZ, break out of loop 1477 | # if this is true. 1478 | # 1479 | elif next_insn_dism.startswith("jz"): 1480 | next_next_insn_ea = next_insn_ea + next_insn.size 1481 | next_next_insn = ida_ua.insn_t() 1482 | decoded_success = ida_ua.decode_insn(next_next_insn, next_next_insn_ea) 1483 | 1484 | if decoded_success > 0: 1485 | next_next_isn_disasm = generate_disasm_line(next_next_insn_ea,1) 1486 | if next_next_isn_disasm.startswith("jnz"): 1487 | break 1488 | elif next_insn_dism.startswith("jnz"): 1489 | 1490 | next_next_insn_ea = next_insn_ea + next_insn.size 1491 | next_next_insn = ida_ua.insn_t() 1492 | decoded_success = ida_ua.decode_insn(next_next_insn, next_next_insn_ea) 1493 | 1494 | if decoded_success > 0: 1495 | next_next_isn_disasm = generate_disasm_line(next_next_insn_ea,1) 1496 | if next_next_isn_disasm.startswith("jz"): 1497 | break 1498 | 1499 | next_insn_ea = next_insn_ea + next_insn.size 1500 | 1501 | #print "Epilogue end: %08x" % bb_end_ea 1502 | 1503 | if bb_start_ea > 0: 1504 | basic_block = maze_functions.BasicBlock(bb_start_ea, bb_end_ea) 1505 | basic_block.FillOutBlock() 1506 | epilogue = maze_functions.FunctionEpilogue(basic_block, stackframe_cleanup_immediate, saved_registers) 1507 | #epilogue_data = [bb_start_ea, bb_end_ea, unmatched_stackframe_set, unmatched_return_set, saved_registers, stackframe_cleanup_immediate] 1508 | 1509 | # 1510 | # stack immediates 1511 | # 1512 | if stackframe_cleanup_immediate in stack_immediates_found.keys(): 1513 | #stack_immediates_found[stackframe_cleanup_immediate].append(stackframe_cleanup_immediate) 1514 | stack_immediates_found[stackframe_cleanup_immediate].append(bb_start_ea) 1515 | else: 1516 | saved_immediates.append(bb_start_ea) 1517 | stack_immediates_found[stackframe_cleanup_immediate] = saved_immediates 1518 | 1519 | epilogues_found[bb_start_ea] = epilogue 1520 | 1521 | # 1522 | # I don't like doing the return values this way, but whatever 1523 | # 1524 | return [epilogues_found, stack_immediates_found] 1525 | 1526 | def GetFunctionProloguesOne(StackImmeiatesFound, Epilogues): 1527 | ''' 1528 | @detail Identify function prologues with one search pattern (below). Once this search pattern 1529 | has been identified, the immediate value will be checked against the immediate values 1530 | that are used by Epilogues, then the registers pushed will be compared to those poppped, and then 1531 | the code walks forward to find the correct basic block. 1532 | 1533 | 6E453F64 81 EC 08 08 00 00 sub esp, 808h 1534 | 1535 | The first byte pattern looks for cleanup the space allocated for local variables on the stack 1536 | for the stack frame. This is typically proceeded by saving registers by pushing the register 1537 | values to the stack. 1538 | 1539 | The purpsoe of knowning which registers are restored and the value added to esp is going to come up 1540 | when attempting to identify the prologue. Knowing this information will help know what to expect at the 1541 | start of the prologue AND if the prologue goes with this epilogue. That is both the registers and the 1542 | value for esp should be the same for both epilogue and prologue. 1543 | 1544 | @return prologues_found A dictionary of Prologue objects where the keys are the start addresses for the prologue 1545 | ''' 1546 | 1547 | stackframe_reserve_opcodes_0 = "81 EC" 1548 | stackframe_reserve_opcodes_1 = "83 EC" 1549 | 1550 | end_ea = ida_ida.cvar.inf.max_ea 1551 | stackframe_reserve_set = set() 1552 | return_insn_set = set() 1553 | 1554 | stack_immediates_found = StackImmeiatesFound 1555 | 1556 | prologues_found = {} 1557 | 1558 | # 1559 | # Find stack cleanup instructions 1560 | # 1561 | prol1_ea = ida_search.find_binary(0, end_ea, stackframe_reserve_opcodes_0, 0, SEARCH_DOWN | SEARCH_CASE) 1562 | while prol1_ea != idc.BADADDR: 1563 | stackframe_reserve_set.add(prol1_ea) 1564 | prol1_ea = ida_search.find_binary(prol1_ea+5, end_ea, stackframe_reserve_opcodes_0, 0, SEARCH_DOWN | SEARCH_CASE) 1565 | 1566 | prol1_ea = ida_search.find_binary(0, end_ea, stackframe_reserve_opcodes_1, 0, SEARCH_DOWN | SEARCH_CASE) 1567 | while prol1_ea != idc.BADADDR: 1568 | stackframe_reserve_set.add(prol1_ea) 1569 | prol1_ea = ida_search.find_binary(prol1_ea+5, end_ea, stackframe_reserve_opcodes_1, 0, SEARCH_DOWN | SEARCH_CASE) 1570 | 1571 | for prol1_ea in stackframe_reserve_set: 1572 | 1573 | stackframe_reserve_immediate = 0 1574 | bb_start_ea = 0 1575 | bb_end_ea = 0 1576 | 1577 | subesp_ea = prol1_ea 1578 | subesp_insn = ida_ua.insn_t() 1579 | ida_ua.decode_insn(subesp_insn, subesp_ea) 1580 | 1581 | subesp_dism = generate_disasm_line(subesp_ea,1) 1582 | 1583 | if (subesp_dism.startswith("sub")) and ("esp," in subesp_dism): 1584 | if subesp_insn.ops[1].type == 5: 1585 | # 1586 | # verify second operand is an immediate (type 5) 1587 | # 1588 | 1589 | 1590 | stackframe_reserve_immediate = subesp_insn.ops[1].value 1591 | 1592 | if stackframe_reserve_immediate in stack_immediates_found.keys(): 1593 | # 1594 | # Look for start address of the last basic block (epilogue) 1595 | # 1596 | 1597 | #print "Stack immediate found." 1598 | 1599 | possible_epilogues = stack_immediates_found[stackframe_reserve_immediate] 1600 | for epilogue_ea in possible_epilogues: 1601 | # 1602 | # There can be multiple exit points for a function. 1603 | # 1604 | 1605 | #print "possible epilogue ea: %08x" % epilogue_ea 1606 | 1607 | if epilogue_ea in Epilogues.keys(): 1608 | epilogue = Epilogues[epilogue_ea] 1609 | registers = epilogue.registers 1610 | #print "Prologue %08x, Epilogue %08x" % (subesp_ea, epilogue_ea) 1611 | 1612 | register_match = False 1613 | idx = 1 1614 | for popreg in registers: 1615 | # 1616 | # should unwind in the same order as the push instructions 1617 | # 1618 | 1619 | pushreg_ea = subesp_ea - idx 1620 | pushreg_insn = ida_ua.insn_t() 1621 | ida_ua.decode_insn(pushreg_insn, pushreg_ea) 1622 | 1623 | pushreg_dism = generate_disasm_line(pushreg_ea,1) 1624 | if pushreg_dism.startswith("push"): 1625 | if pushreg_insn.ops[0].type == 1: 1626 | if popreg == pushreg_insn.ops[0].reg: 1627 | # 1628 | # mnemonic, operand type, and reg value verified 1629 | # 1630 | 1631 | register_match = True 1632 | idx += 1 1633 | 1634 | else: 1635 | register_match = False 1636 | break 1637 | else: 1638 | register_match = False 1639 | break 1640 | else: 1641 | register_match = False 1642 | break 1643 | 1644 | if register_match: 1645 | prologue_start_ea = subesp_ea - idx + 1 1646 | bb_end_ea = WalkBasicBlockUntilTerminator(prologue_start_ea) 1647 | 1648 | 1649 | if bb_end_ea != idc.BADADDR: 1650 | 1651 | #print "Subesp ea: %08x, Start ea %08x, End ea %08x" % (subesp_ea, prologue_start_ea, bb_end_ea) 1652 | 1653 | idc.create_insn(prologue_start_ea) 1654 | 1655 | if prologue_start_ea in prologues_found.keys(): 1656 | # 1657 | # multiple epilogues 1658 | # 1659 | 1660 | prologue = prologues_found[prologue_start_ea] 1661 | prologue.possible_epilogues.append(epilogue_ea) 1662 | prologues_found[prologue_start_ea] = prologue 1663 | 1664 | else: 1665 | # 1666 | # create a new prologue 1667 | # 1668 | 1669 | basic_block = maze_functions.BasicBlock(prologue_start_ea, bb_end_ea) 1670 | basic_block.FillOutBlock() 1671 | prologue = maze_functions.FunctionPrologue(basic_block, stackframe_reserve_immediate, registers) 1672 | prologue.possible_epilogues.append(epilogue_ea) 1673 | prologues_found[prologue_start_ea] = prologue 1674 | 1675 | return prologues_found 1676 | 1677 | def BuildFunctions(FunctionPrologues, FunctionEpilogues): 1678 | ''' 1679 | @detail Walk over each prologue and pass it the maze_functions.RecursiveDescent.DoDescentParser() 1680 | method to be parsed. Once the method returns, the function has been walked and can be 1681 | defined. 1682 | ''' 1683 | 1684 | idx = 0 1685 | 1686 | ea = FunctionPrologues.keys()[0] 1687 | curr_segm_start_ea = idc.get_segm_start(ea) 1688 | curr_segm_end_ea = idc.get_segm_end(ea) 1689 | 1690 | func_creation_errors = [] 1691 | 1692 | for prologue_ea in FunctionPrologues.keys(): 1693 | 1694 | #if prologue_ea != 0x00409020: 1695 | # continue 1696 | #print "Function: %08x" % prologue_ea 1697 | 1698 | wrong_functions = set() 1699 | 1700 | prologue = FunctionPrologues[prologue_ea] 1701 | prologue_bb = prologue.basic_block 1702 | prologue_bb_end_ea = prologue_bb.end_ea 1703 | 1704 | jcc_queue = [] 1705 | undefined_function_queue = [] 1706 | 1707 | # 1708 | # Walk from Prologue to Epilogue 1709 | # 1710 | print "Check function: %08x" % (prologue_ea) 1711 | rec_descent = maze_functions.RecursiveDescent(prologue_ea) 1712 | rec_descent.DoDescentParser(prologue) 1713 | 1714 | # 1715 | # Get epilogue 1716 | # 1717 | if len(prologue.connected_epilogues) < 1: 1718 | print "[FUNCTION Build Error] No Epilogue Found %08x, having IDA determine end of function" % prologue_ea 1719 | func_creation_errors.append(prologue_ea) 1720 | continue 1721 | 1722 | if prologue.connected_epilogues[0] not in FunctionEpilogues.keys(): 1723 | print "[FUNCTION Build Error] Matching Epilogue Not Found %08x, End %08x" % (prologue_ea, prologue.connected_epilogues[0]) 1724 | func_creation_errors.append(prologue_ea) 1725 | continue 1726 | 1727 | epilogue = FunctionEpilogues[prologue.connected_epilogues[0]] 1728 | epilogue_end_ea = epilogue.basic_block.end_ea 1729 | epilogue_end_insn = ida_ua.insn_t() 1730 | ida_ua.decode_insn(epilogue_end_insn, epilogue_end_ea) 1731 | function_end_ea = epilogue_end_ea + epilogue_end_insn.size 1732 | 1733 | plan_and_wait(prologue_ea,function_end_ea,0) 1734 | 1735 | # 1736 | # Create function 1737 | # 1738 | add_func_result = ida_funcs.add_func(prologue_ea,function_end_ea) 1739 | 1740 | print "Function Created: %s %08x, end ea: %08x" % (add_func_result, prologue_ea, function_end_ea) 1741 | 1742 | for wrong_func_ea in rec_descent.wrong_functions: 1743 | # 1744 | # redefined functions that were undefined in the DoDescentParser method. 1745 | # 1746 | #print "wrong func, start %08x, end %08x" % (wrong_func_ea[0],wrong_func_ea[1]) 1747 | ida_funcs.add_func(wrong_func_ea,idc.BADADDR) 1748 | 1749 | for err_func_ea in func_creation_errors: 1750 | ida_funcs.add_func(err_func_ea,idc.BADADDR) 1751 | 1752 | idx +=1 1753 | 1754 | 1755 | idc.plan_and_wait(curr_segm_start_ea,curr_segm_end_ea,0) 1756 | 1757 | print "Number of functions created: %d" % (idx) 1758 | 1759 | 1760 | def DeprecateCheckAllFunctionsEndAddresses(FunctionPrologues, FunctionEpilogues): 1761 | ''' 1762 | @brief Check all known functions to ensure function start and end_ea are correct 1763 | ''' 1764 | 1765 | for known_func_ea in Functions(): 1766 | # 1767 | # Iterate over each defined function in IDA 1768 | # 1769 | 1770 | for prologue_ea in FunctionPrologues.keys(): 1771 | # 1772 | # Iterater over identified prologues 1773 | # 1774 | 1775 | prologue = FunctionPrologues[prologue_ea] 1776 | prologue_bb = prologue.basic_block 1777 | prologue_start_ea = prologue_bb.start_ea 1778 | 1779 | if prologue_start_ea == known_func_ea: 1780 | 1781 | # 1782 | # ida specified end address of the function 1783 | # 1784 | ida_specified_func_end_ea = idc.find_func_end(known_func_ea) 1785 | 1786 | # 1787 | # Get epilogue, walk each address in known epilogues of the prologue 1788 | # 1789 | function_end_addresses = [] 1790 | for epilogue_ea in prologue.connected_epilogues: 1791 | epilogue = FunctionEpilogues[epilogue_ea] 1792 | epilogue_end_ea = epilogue.basic_block.end_ea 1793 | epilogue_end_insn = ida_ua.insn_t() 1794 | ida_ua.decode_insn(epilogue_end_insn, epilogue_end_ea) 1795 | function_end_ea = epilogue_end_ea + epilogue_end_insn.size 1796 | 1797 | function_end_addresses.append(function_end_ea) 1798 | 1799 | if ida_specified_func_end_ea in function_end_addresses: 1800 | # 1801 | # bail if this is correct 1802 | # 1803 | break 1804 | 1805 | if len(function_end_addresses) < 1: 1806 | continue 1807 | 1808 | ida_funcs.del_func(known_func_ea) 1809 | idc.plan_and_wait(known_func_ea,function_end_addresses[0]) 1810 | idc.del_items(known_func_ea,1) 1811 | ida_funcs.add_func(known_func_ea,function_end_addresses[0]) 1812 | idc.plan_and_wait(known_func_ea,function_end_addresses[0]) 1813 | 1814 | print "Incorrect function ends: %08x, %08x" % (known_func_ea, ida_specified_func_end_ea) 1815 | 1816 | def BuildFunctions2(FunctionPrologues, FunctionEpilogues): 1817 | ''' 1818 | @detail Walk over each prologue and pass it the maze_functions.RecursiveDescent.DoDescentParser() 1819 | method to be parsed. Once the method returns, the function has been walked and can be 1820 | defined. 1821 | ''' 1822 | 1823 | idx = 0 1824 | 1825 | for prologue_ea in FunctionPrologues.keys(): 1826 | 1827 | wrong_functions = set() 1828 | 1829 | prologue = FunctionPrologues[prologue_ea] 1830 | prologue_bb = prologue.basic_block 1831 | prologue_bb_end_ea = prologue_bb.end_ea 1832 | 1833 | jcc_queue = [] 1834 | undefined_function_queue = [] 1835 | 1836 | # 1837 | # Get epilogue 1838 | # 1839 | epilogue = FunctionEpilogues[prologue.connected_epilogues[0]] 1840 | epilogue_end_ea = epilogue.basic_block.end_ea 1841 | epilogue_end_insn = ida_ua.insn_t() 1842 | ida_ua.decode_insn(epilogue_end_insn, epilogue_end_ea) 1843 | function_end_ea = epilogue_end_ea + epilogue_end_insn.size 1844 | 1845 | 1846 | # 1847 | # Call recursion function 1848 | # 1849 | #print "Check function: %08x" % (prologue_ea) 1850 | rec_descent = maze_function_analysis.RecursiveDescent(prologue_ea, None) 1851 | curr_function = rec_descent.DoDescentParser3() 1852 | 1853 | 1854 | for bblock in curr_function.rogue_basic_blocks: 1855 | 1856 | print "[Current Function, Start: %08x, End: %08x" % (curr_function.start_ea, curr_function.end_ea) 1857 | incorrect_func_ea = bblock.incorrect_function_ea 1858 | 1859 | incorrect_ida_func = ida_funcs.get_func(incorrect_func_ea) 1860 | if incorrect_ida_func: 1861 | 1862 | # 1863 | # Delete IDA's incorrect function 1864 | # 1865 | print "Deleting Incorrect IDA function: %08x" % incorrect_ida_func.start_ea 1866 | ida_funcs.del_func(incorrect_ida_func.start_ea) 1867 | 1868 | # 1869 | # Define the new function and wait 1870 | # 1871 | correct_ida_func = ida_funcs.get_func(curr_function.start_ea) 1872 | if correct_ida_func: 1873 | # 1874 | # Delete if the correct function has already been defined 1875 | # 1876 | ida_funcs.del_func(correct_ida_func.start_ea) 1877 | 1878 | print "Creating Corrected IDA function: %08x" % curr_function.start_ea 1879 | ida_funcs.add_func(curr_function.start_ea, curr_function.end_ea) 1880 | 1881 | if bblock.start_ea != incorrect_ida_func.start_ea: 1882 | # 1883 | # Recreate function only if the basic block wasn't the prologue 1884 | # 1885 | print "Re-creating IDA function: %08x" % incorrect_ida_func.start_ea 1886 | ida_funcs.add_func(incorrect_ida_func.start_ea, incorrect_ida_func.end_ea) 1887 | 1888 | 1889 | def CheckAllFunctionsEndAddresses(): 1890 | ''' 1891 | @brief Identify and correct functions that either have not been defined or have incorrect start/end addresses 1892 | 1893 | @detail Multiple steps are involved for created/recreating functions. The first method walks each Function that 1894 | IDA is aware of and verifies the start and end points are correct. 1895 | ''' 1896 | 1897 | for known_func_ea in Functions(): 1898 | # 1899 | # Iterate over each defined function in IDA 1900 | # 1901 | 1902 | curr_ida_func = ida_funcs.get_func(known_func_ea) 1903 | 1904 | 1905 | rec_descent = maze_function_analysis.RecursiveDescent(known_func_ea, None) 1906 | curr_function = rec_descent.DoDescentParser3() 1907 | 1908 | for bblock in curr_function.basic_blocks: 1909 | if curr_ida_func.end_ea in bblock.instruction_addresses: 1910 | # 1911 | # if the end_ea as currently defined by IDA exists within the body of a basic block, then the function 1912 | # is incorrectly defined, this will correct that problem. 1913 | # 1914 | 1915 | print "Mis-match: Start %08x, End: %08x, Fake: %08x" % (curr_function.start_ea, curr_function.end_ea, curr_ida_func.end_ea) 1916 | 1917 | ida_funcs.del_func(curr_function.start_ea) 1918 | ida_funcs.add_func(curr_function.start_ea, curr_function.end_ea) 1919 | 1920 | func_prologue_opcode = "55 89 E5" 1921 | 1922 | end_ea = ida_ida.cvar.inf.max_ea 1923 | 1924 | prologues_found = {} 1925 | 1926 | # 1927 | # Find stack cleanup instructions 1928 | # 1929 | prol1_ea = ida_search.find_binary(0, end_ea, func_prologue_opcode, 0, SEARCH_DOWN | SEARCH_CASE) 1930 | while prol1_ea != idc.BADADDR: 1931 | found_func = ida_funcs.get_func(prol1_ea) 1932 | 1933 | if found_func: 1934 | prol1_ea = ida_search.find_binary(prol1_ea+5, end_ea, func_prologue_opcode, 0, SEARCH_DOWN | SEARCH_CASE) 1935 | continue 1936 | 1937 | 1938 | 1939 | rec_descent = maze_function_analysis.RecursiveDescent(prol1_ea, None) 1940 | curr_function = rec_descent.DoDescentParser3() 1941 | 1942 | print "Found function: Start %08x, End %08x" % (curr_function.start_ea, curr_function.end_ea) 1943 | 1944 | ida_funcs.add_func(curr_function.start_ea, curr_function.end_ea) 1945 | 1946 | prol1_ea = ida_search.find_binary(prol1_ea+5, end_ea, func_prologue_opcode, 0, SEARCH_DOWN | SEARCH_CASE) 1947 | 1948 | #end_addresses, funcs_to_delete = rec_descent.DoDescentParser() 1949 | 1950 | #if (ida_func.end_ea not in end_addresses) and (len(end_addresses) > 0): 1951 | # print "Start address: %08x, End address: %08x, Found address: %08x" % (ida_func.start_ea, ida_func.end_ea,end_addresses[0]) 1952 | # for ea in funcs_to_delete: 1953 | # print "Deleting: %08x" % ea 1954 | # ida_funcs.del_func(ea) 1955 | 1956 | # ida_funcs.add_func(ida_func.start_ea, end_addresses[0]) 1957 | 1958 | # for ea in funcs_to_delete: 1959 | # print "Adding: %08x" % ea 1960 | # ida_funcs.add_func(ea, idc.BADADDR) 1961 | 1962 | 1963 | #if (ida_func.end_ea not in end_address) and (len(end_address) > 0): 1964 | # print "Start address: %08x, End address: %08x, Found address: %08x" % (ida_func.start_ea, ida_func.end_ea,end_address[0]) 1965 | # ida_funcs.del_func(known_func_ea) 1966 | # ida_funcs.add_func(ida_func.start_ea, end_address[0]) 1967 | 1968 | 1969 | 1970 | 1971 | def main(): 1972 | 1973 | typeone_addresses = set() 1974 | typetwo_addresses = set() 1975 | typethree_addresses = set() 1976 | typefour_addresses = set() 1977 | typefive_addresses = set() 1978 | typesix_addresses = set() 1979 | typeseven_address = set() 1980 | 1981 | obf_windowsapi_calls = set() 1982 | calltypethree_addresses = set() 1983 | absolute_jumps = set() 1984 | 1985 | find_obfuscations = True 1986 | do_patches = True 1987 | 1988 | if find_obfuscations: 1989 | obf_windowsapi_calls = FindObfuscatedWindowsAPICalls() 1990 | if len(obf_windowsapi_calls) > 0 and do_patches: 1991 | for obf_windowsapi_call_ea in obf_windowsapi_calls: 1992 | print "Windows API Call: %08x" % obf_windowsapi_call_ea 1993 | PatchObfuscatedWindowsAPICalls(obf_windowsapi_call_ea) 1994 | #break 1995 | 1996 | typetwo_addresses = FindCallTypeTwoCFGObfuscation() 1997 | if len(typetwo_addresses) > 0 and do_patches: 1998 | for typetwo_ea in typetwo_addresses: 1999 | print "Call Type Two: %08x" % (typetwo_ea) 2000 | PatchCallTypeTwoCFGObfuscation(typetwo_ea) 2001 | #break 2002 | 2003 | typeone_addresses = FindCallTypeOneCFGObfuscation() 2004 | if len(typeone_addresses) > 0 and do_patches: 2005 | #print len(typeone_addresses) 2006 | for typeone_ea in typeone_addresses: 2007 | print "Call Type One: %08x" % (typeone_ea) 2008 | PatchCallTypeOneCFGObfuscation(typeone_ea) 2009 | # #break 2010 | 2011 | calltypethree_addresses = FindCallTypeThreeCFGObfuscation() 2012 | if len(calltypethree_addresses) and do_patches: 2013 | for typethree_ea in calltypethree_addresses: 2014 | print "Call Type Three: %08x" % (typethree_ea) 2015 | PatchCallTypeThreeCFGObfuscation(typethree_ea) 2016 | #break 2017 | 2018 | absolute_jumps = FindAbsoluteJumps() 2019 | if len(absolute_jumps) > 0 and do_patches: 2020 | for absolut_jmp_ea in absolute_jumps: 2021 | print "Absolute Jump: %08x" % (absolut_jmp_ea) 2022 | PatchAbsoluteJump(absolut_jmp_ea) 2023 | #break 2024 | 2025 | 2026 | typeone_epilogues, epilogue_immediates = GetFunctionEpiloguesOne() 2027 | typeone_prologues = GetFunctionProloguesOne(epilogue_immediates,typeone_epilogues) 2028 | BuildFunctions(typeone_prologues,typeone_epilogues) 2029 | 2030 | CheckAllFunctionsEndAddresses() 2031 | 2032 | # 2033 | # Updated version of BuildFunctions (in progress) 2034 | # 2035 | #BuildFunctions2(typeone_prologues, typeone_epilogues) 2036 | 2037 | 2038 | #print "Number of Type One epilogues: %d" % len(typeone_epilogues.keys()) 2039 | #print "Number of Type One prologues: %d" % len(typeone_prologues.keys()) 2040 | #print "Aboslute Jump count: ", len(absolute_jumps) 2041 | 2042 | main() -------------------------------------------------------------------------------- /bytesearch/maze_deobf_utils.py: -------------------------------------------------------------------------------- 1 | import idautils, idaapi, idc, ida_search, ida_ua, ida_name 2 | 3 | def CheckInSegment(Curr_ea, Target_ea): 4 | ''' 5 | @brief Check if the Target_ea is in the same segment as the Curr_ea and that it is SEG_CODE 6 | ''' 7 | 8 | curr_segm_start_ea = idc.get_segm_start(Curr_ea) 9 | curr_segm_end_ea = idc.get_segm_end(Curr_ea) 10 | 11 | return Target_ea >= curr_segm_start_ea and Target_ea <= curr_segm_end_ea 12 | 13 | def CheckSegmentIsCode(Curr_ea): 14 | return idc.get_segm_attr(Curr_ea, idc.SEGATTR_TYPE) == idc.SEG_CODE 15 | 16 | def CheckValidInstrImmediate(Curr_insn_t, Expected_mnem): 17 | ''' 18 | @detail Takes single operand instruction and validates the instruction matches Expected_mnem and 19 | that the operand is an immediate (type 5). 20 | 21 | ''' 22 | isValid = False 23 | 24 | mnem = Expected_mnem.lower() 25 | 26 | if type(Curr_insn_t) == ida_ua.insn_t : 27 | # 28 | # Correct type 29 | # 30 | 31 | insn = Curr_insn_t 32 | insn_ea = insn.ea 33 | insn_dism = idc.generate_disasm_line(insn_ea,1) 34 | 35 | if insn_dism[:5].startswith(mnem): 36 | operands = insn.ops 37 | if len(operands) > 0: 38 | idx = 0 39 | for op in operands: 40 | if op.type != 0: 41 | if op.type == 5: 42 | isValid = True 43 | 44 | if idx > 0: 45 | # 46 | # More than one operand 47 | # 48 | isVAlid = False 49 | idx += 1 50 | 51 | return isValid 52 | 53 | def CheckValidTarget(Curr_ea, Target_ea): 54 | ''' 55 | @detail Take the address from the target of a current JMP instruction and verify that the 56 | operand type is immediate (5), the target address is located within the the same section, 57 | the segment is code. 58 | 59 | Not implemented: an xref check may be needed 60 | 61 | @returns BOOL 62 | ''' 63 | 64 | valid_jump_target = False 65 | instr_operand = idautils.DecodeInstruction(Curr_ea).Op1 66 | 67 | if instr_operand.type in [5, 6, 7]: 68 | # 69 | # Type is an immediate, immediate far address, immediate near address 70 | # 71 | 72 | if CheckInSegment(Curr_ea, Target_ea) and CheckSegmentIsCode(Curr_ea): 73 | valid_jump_target = True 74 | 75 | return valid_jump_target 76 | 77 | def CheckValidTargettingInstr(Curr_insn_t, Expected_mnem): 78 | ''' 79 | @detail Takes a push, jz, jnz, instruction and verifies that the operand 80 | and the target of the address is valid. 81 | 82 | @returns BOOL 83 | ''' 84 | 85 | isValid = False 86 | 87 | mnem = Expected_mnem.lower() 88 | 89 | #print type(Curr_insn_t), Expected_mnem 90 | 91 | if (type(Curr_insn_t) == ida_ua.insn_t) and (mnem in ['push', 'jz', 'jnz']) : 92 | # 93 | # Typecheck for Curr_insn_t 94 | # 95 | #print "valid check 0" 96 | 97 | insn_ea = Curr_insn_t.ea 98 | insn_dism = idc.generate_disasm_line(insn_ea,1) 99 | 100 | if mnem in insn_dism[:5]: 101 | # 102 | # Ensure expected mnemonic in disassembly 103 | # 104 | 105 | #print "valid check 1" 106 | # 107 | # There shoud be a single populated operand in the ops array 108 | # I believe I read that the array always contains 8 op_t objects ( could be x86 only ) 109 | # Only the one at idx 0 should have a type other than 0. 110 | # 111 | operands = Curr_insn_t.ops 112 | if len(operands) > 0: 113 | #print "valid check 2" 114 | for op in operands: 115 | if op.type != 0: 116 | if (op.type in [5, 6, 7]) and (not isValid): 117 | # 118 | # Type is an immediate, immediate far address, immediate near address 119 | # 120 | 121 | if op.type == 5: 122 | target_ea = op.value 123 | elif op.type == 6 or op.type == 7: 124 | target_ea = op.addr 125 | #print "valid check 3", hex(insn_ea), hex(target_ea) 126 | if CheckInSegment(insn_ea, target_ea) and CheckSegmentIsCode(insn_ea): 127 | # 128 | # Located in the correct segment 129 | # 130 | 131 | #print "valid check 4" 132 | isValid = True 133 | elif isValid: 134 | # 135 | # If other operand type 136 | # 137 | #print "valid check 5" 138 | isValid = False 139 | break 140 | 141 | 142 | return isValid 143 | 144 | def CheckValidInstrImmediate(Curr_insn_t, Expected_mnem): 145 | ''' 146 | @detail Takes single operand instruction and validates the instruction matches Expected_mnem and 147 | that the operand is an immediate (type 5). 148 | 149 | ''' 150 | isValid = False 151 | 152 | mnem = Expected_mnem.lower() 153 | 154 | if type(Curr_insn_t) == ida_ua.insn_t : 155 | # 156 | # Correct type 157 | # 158 | 159 | insn = Curr_insn_t 160 | insn_ea = insn.ea 161 | insn_dism = idc.generate_disasm_line(insn_ea,1) 162 | 163 | if insn_dism.startswith(mnem): 164 | operands = insn.ops 165 | if len(operands) > 0: 166 | idx = 0 167 | for op in operands: 168 | if op.type != 0: 169 | if op.type == 5: 170 | isValid = True 171 | 172 | if idx > 0: 173 | # 174 | # More than one operand 175 | # 176 | isVAlid = False 177 | idx += 1 178 | 179 | return isValid 180 | 181 | def CheckFirstOperandIsIndirect(Curr_insn_ea): 182 | """ 183 | @brief Check only if the first operand contains a register. This 184 | is for call, jmp, and JCC instruction types. No check is 185 | done on the mnemonic. 186 | 187 | @return True if operand is type 1, 3, or 4 188 | """ 189 | 190 | isIndirect = False 191 | curr_insn = Curr_insn_ea 192 | 193 | if type(curr_insn) == ida_ua.insn_t: 194 | 195 | insn_operand = curr_insn.ops[0] 196 | 197 | if insn_operand.type in [1, 3, 4]: 198 | isIndirect = True 199 | 200 | return isIndirect 201 | 202 | def GetInstuctionTargetAddress(Target_insn): 203 | ''' 204 | @brief Return the operand value for the JZ, JNZ, or push isntruciton 205 | ''' 206 | 207 | target_ea = 0 208 | 209 | if type(Target_insn) == ida_ua.insn_t: 210 | #print "GITA: type match" 211 | target_op = Target_insn.ops[0] 212 | #print "GITA: ", target_op.type, hex(target_op.value), hex(target_op.addr) 213 | if target_op.type == 5: 214 | target_ea = target_op.value 215 | elif target_op.type == 6 or target_op.type == 7: 216 | target_ea = target_op.addr 217 | 218 | return target_ea 219 | 220 | def CheckIsDllName(Strlit_ea): 221 | ''' 222 | @brief Receive an address and verify that the bytes at that address are both 223 | ASCII and in with ".dll" 224 | 225 | @return BOOL 226 | ''' 227 | is_module_name = False 228 | 229 | start_ea = Strlit_ea 230 | module_name = "" 231 | char = idc.get_wide_byte(start_ea) 232 | 233 | while ida_name.is_strlit_cp(char): 234 | module_name += chr(char) 235 | start_ea += 1 236 | char = idc.get_wide_byte(start_ea) 237 | 238 | if idc.get_wide_byte(start_ea) == 0x00: 239 | # 240 | # Final char in ASCII string should be a NULL byte 241 | # 242 | 243 | if module_name.endswith(".dll"): 244 | is_module_name = True 245 | 246 | return is_module_name 247 | 248 | def CheckInstructionIsFunctionTerminator(Insn_ea): 249 | """ 250 | @detail Check if an instruction is RETN or equivilent. 251 | """ 252 | 253 | curr_insn_ea = Insn_ea 254 | 255 | is_return = False 256 | 257 | curr_insn_dism = idc.generate_disasm_line(curr_insn_ea,1) 258 | 259 | if curr_insn_dism.startswith("retn"): 260 | is_return = True 261 | 262 | elif curr_insn_dism.startswith("jmp"): 263 | 264 | jmp_insn = ida_ua.insn_t() 265 | ida_ua.decode_insn(jmp_insn, curr_insn_ea) 266 | 267 | if CheckFirstOperandIsIndirect(jmp_insn): 268 | # 269 | # Check for esp+4 270 | # 271 | 272 | op = jmp_insn.ops[0] 273 | 274 | if (op.addr == 0xfffffffc) and (op.reg == 4): 275 | # 276 | # Check if displacement is -4 and register is ESP (4) 277 | # 278 | 279 | is_return = True 280 | 281 | return is_return -------------------------------------------------------------------------------- /bytesearch/maze_function_analysis.py: -------------------------------------------------------------------------------- 1 | import ida_ua, idc, idaapi, ida_funcs 2 | 3 | idaapi.require("maze_deobf_utils") 4 | #import maze_deobf_utils as mdu 5 | 6 | class RecursiveDescent(object): 7 | ''' 8 | @brief Recurse descent disassembly for a single function (no calls followed) 9 | ''' 10 | 11 | def __init__(self, Start_ea, Approach): 12 | 13 | self.deferred_targets = [] 14 | self.instructions_walked = [] 15 | self.wrong_functions = [] 16 | 17 | self.entry_point = Start_ea 18 | self.deferred_targets.append(Start_ea) 19 | 20 | self.emulators = Eumulators() 21 | 22 | self.deobf_approach = Approach 23 | 24 | def GetInstuctionTargetAddress(self,Target_insn): 25 | ''' 26 | @brief Return the operand value for a unirary instruction that contains a target 27 | address (JMP, JNZ, JZ, push, call, etc). 28 | ''' 29 | 30 | target_ea = 0 31 | 32 | if type(Target_insn) == ida_ua.insn_t: 33 | #print "GITA: type match" 34 | target_op = Target_insn.ops[0] 35 | #print "GITA: ", target_op.type, hex(target_op.value), hex(target_op.addr) 36 | if target_op.type == 5: 37 | target_ea = target_op.value 38 | elif target_op.type == 6 or target_op.type == 7: 39 | target_ea = target_op.addr 40 | 41 | return target_ea 42 | 43 | def DoDescentParser2(self): 44 | ''' 45 | @brief Walk the function leveraging a recursive descent parser 46 | 47 | @detail Walks a function based on an Approach. This is unused in the byte-search implementation. 48 | 49 | @return function object 50 | ''' 51 | 52 | # 53 | # jmps = [eval("idaapi."+name) for name in dir(idaapi) if "NN_j" in name] 54 | # 55 | jcc_terminators = ['jnz','jz','jo','jno', 'js','jns', 'je','jne', 'jb', 'jnae', 'jc', 'jnb', 'jae', 'jnc', 'jbe','jna', 'ja','jnbe', 'jl','jnge','jge','jnl','jle','jng','jg','jnle','jp','jpe','jnp','jpo','jcxz','jecxz'] 56 | 57 | print "Starting recursive decent:, starting at: %08x" % (self.deferred_targets[0]) 58 | 59 | func_end_ea = [] 60 | do_things = False 61 | 62 | while len(self.deferred_targets) > 0: 63 | 64 | curr_insn_ea = self.deferred_targets.pop() 65 | 66 | bblock = BasicBlock(curr_insn_ea) 67 | 68 | if curr_insn_ea in self.instructions_walked: 69 | # 70 | # skip instructions that were already walked 71 | # 72 | 73 | continue 74 | 75 | print "Next BB: %08x" % curr_insn_ea 76 | 77 | while curr_insn_ea != idc.BADADDR: 78 | 79 | print "Current ip: %08x" % (curr_insn_ea) 80 | 81 | self.instructions_walked.append(curr_insn_ea) 82 | 83 | bblock.AddInsnAddress(curr_insn_ea) 84 | 85 | # 86 | # Verify current instruction information 87 | # 88 | curr_insn = ida_ua.insn_t() 89 | decode_result = ida_ua.decode_insn(curr_insn, curr_insn_ea) 90 | if decode_result < 1: 91 | # 92 | # break if instruction invalid 93 | # 94 | bblock.end_ea = curr_insn_ea 95 | break 96 | 97 | # 98 | # Get instruction disasembly 99 | # 100 | curr_insn_dism = idc.generate_disasm_line(curr_insn_ea,1) 101 | 102 | # 103 | # Check Instruction matches Obfuscated Call 104 | # 105 | if curr_insn_dism.startswith("push") and do_things: 106 | 107 | push_insn = curr_insn 108 | if maze_deobf_utils.CheckValidTargettingInstr(push_insn, "push"): 109 | # 110 | # Check for different CALL instruction types 111 | # 112 | 113 | # 114 | # Type One 115 | # 116 | ret_addr_ea = self.deobf_approach.CheckCallTypeOne(curr_insn_ea) 117 | if ret_addr_ea != idc.BADADDR: 118 | self.deferred_targets.append(ret_addr_ea) 119 | bblock.end_ea = curr_insn_ea 120 | break 121 | 122 | # 123 | # Type Two 124 | # 125 | ret_addr_ea = self.deobf_approach.CheckCallTypeTwo(curr_insn_ea) 126 | if ret_addr_ea != idc.BADADDR: 127 | self.deferred_targets.append(ret_addr_ea) 128 | bblock.end_ea = curr_insn_ea 129 | break 130 | 131 | # 132 | # Type Three 133 | # 134 | ret_addr_ea = self.deobf_approach.CheckCallTypeThree(curr_insn_ea) 135 | if ret_addr_ea != idc.BADADDR: 136 | self.deferred_targets.append(ret_addr_ea) 137 | bblock.end_ea = curr_insn_ea 138 | break 139 | 140 | # 141 | # Check instruction matches an obfuscated Windows API Call 142 | # 143 | if maze_deobf_utils.CheckValidInstrImmediate(push_insn,"push"): 144 | ret_addr_ea = self.deobf_approach.CheckObfuscatedWindowsAPICall(curr_insn_ea) 145 | if ret_addr_ea != idc.BADADDR: 146 | 147 | print "Obfuscated Windows API Call: %08x, Target: %08x" % (curr_insn_ea, ret_addr_ea) 148 | 149 | self.deferred_targets.append(ret_addr_ea) 150 | bblock.end_ea = curr_insn_ea 151 | break 152 | 153 | # 154 | # Check Instruction matches Obfuscated Absolute Jumps 155 | # 156 | if curr_insn_dism.startswith("jz") and do_things: 157 | 158 | jz_insn = curr_insn 159 | if maze_deobf_utils.CheckValidTargettingInstr(jz_insn, "jz"): 160 | 161 | jz_target_ea = maze_deobf_utils.GetInstuctionTargetAddress(jz_insn) 162 | 163 | next_insn_ea = curr_insn_ea + jz_insn.size 164 | next_insn = ida_ua.insn_t() 165 | ida_ua.decode_insn(next_insn, next_insn_ea) 166 | if maze_deobf_utils.CheckValidTargettingInstr(next_insn, "jnz"): 167 | # 168 | # Instruction is an absolute jump 169 | # get absolute jump target address. 170 | # 171 | 172 | print "JZ/JNZ block: %08x" % curr_insn_ea 173 | abs_jmp_target_ea = self.deobf_approach.GetObfuscJMPTarget(next_insn_ea) 174 | if abs_jmp_target_ea != idc.BADADDR: 175 | print "JNZ %08x Adding Target: %08x" % (next_insn_ea, abs_jmp_target_ea) 176 | print "JZ %08x Adding Target: %08x" % (curr_insn_ea, jz_target_ea) 177 | 178 | self.deferred_targets.append(jz_target_ea) 179 | self.deferred_targets.append(abs_jmp_target_ea) 180 | bblock.end_ea = curr_insn_ea 181 | break 182 | 183 | if curr_insn_dism.startswith( tuple( jcc_terminators ) ): 184 | # 185 | # JCC conditionals 186 | # 187 | 188 | jcc_insn = curr_insn 189 | jmp_target_ea = self.GetInstuctionTargetAddress(jcc_insn) 190 | if jmp_target_ea not in self.deferred_targets: 191 | self.deferred_targets.append(jmp_target_ea) 192 | 193 | # 194 | # Add fall through address, BB ends at JCC conditional 195 | # 196 | self.deferred_targets.append(curr_insn_ea + jcc_insn.size) 197 | bblock.end_ea = curr_insn_ea 198 | break 199 | 200 | if maze_deobf_utils.CheckInstructionIsFunctionTerminator(curr_insn_ea): 201 | # 202 | # Return instruction 203 | # 204 | 205 | bblock.end_ea = curr_insn_ea 206 | bblock.is_epilogue = True 207 | func_end_ea.append(idc.next_head(curr_insn_ea)) 208 | break 209 | 210 | if curr_insn_dism.startswith("jmp"): 211 | 212 | jmp_insn = curr_insn 213 | jmp_target_ea = maze_deobf_utils.GetInstuctionTargetAddress(jmp_insn) 214 | 215 | if jmp_target_ea not in self.deferred_targets: 216 | self.deferred_targets.append(jmp_target_ea) 217 | 218 | bblock.end_ea = curr_insn_ea 219 | break 220 | 221 | 222 | curr_insn_ea = curr_insn_ea + curr_insn.size 223 | 224 | return func_end_ea 225 | 226 | def DoDescentParser(self): 227 | ''' 228 | @brief Walk the function leveraging a recursive descent parser 229 | 230 | @detail Starting with a prologue walk each instruction until the associated epilogue is reached. For functions 231 | with multiple epilogues, iterate over each one. 232 | 233 | As each instruction is traversed, do the following three 234 | things: 235 | 236 | - Undefine the instruction 237 | - Mark the instruction as code 238 | - Check to see if the instruction is already a member of another function 239 | 240 | If an instruction is a member of another function, undefine that function and place it in a queue. At the end 241 | of traversing each function, a new function is going to be created with the new prologue and the new epilogue. 242 | In addition, the undefined function queue is going to be iterated over and each function will be redefined. This 243 | should clean up messy function 244 | 245 | much thanks to the author of "Practical Binary Analysis" for the break down of the algorithm in Chapter 8. 246 | 247 | @return function object 248 | ''' 249 | 250 | # 251 | # jmps = [eval("idaapi."+name) for name in dir(idaapi) if "NN_j" in name] 252 | # 253 | jcc_terminators = ['jnz','jz','jo','jno', 'js','jns', 'je','jne', 'jb', 'jnae', 'jc', 'jnb', 'jae', 'jnc', 'jbe','jna', 'ja','jnbe', 'jl','jnge','jge','jnl','jle','jng','jg','jnle','jp','jpe','jnp','jpo','jcxz','jecxz'] 254 | 255 | #print "Starting recursive decent:, starting at: %08x" % (self.deferred_targets[0]) 256 | 257 | func_end_ea = [] 258 | ea_part_of_another_func = [] 259 | do_things = False 260 | 261 | curr_func = None 262 | 263 | while len(self.deferred_targets) > 0: 264 | 265 | curr_insn_ea = self.deferred_targets.pop() 266 | 267 | if not curr_func: 268 | curr_func = ida_funcs.get_func(curr_insn_ea) 269 | 270 | else: 271 | target_func = ida_funcs.get_func(curr_insn_ea) 272 | if target_func and (target_func.start_ea != curr_func.start_ea) : 273 | if (target_func.start_ea not in ea_part_of_another_func): 274 | ea_part_of_another_func.append(target_func.start_ea) 275 | 276 | 277 | bblock = BasicBlock(curr_insn_ea) 278 | 279 | if curr_insn_ea in self.instructions_walked: 280 | # 281 | # skip instructions that were already walked 282 | # 283 | 284 | continue 285 | 286 | #print "Next BB: %08x" % curr_insn_ea 287 | 288 | while curr_insn_ea != idc.BADADDR: 289 | 290 | #print "Current ip: %08x" % (curr_insn_ea) 291 | 292 | self.instructions_walked.append(curr_insn_ea) 293 | 294 | bblock.AddInsnAddress(curr_insn_ea) 295 | 296 | # 297 | # Verify current instruction information 298 | # 299 | curr_insn = ida_ua.insn_t() 300 | decode_result = ida_ua.decode_insn(curr_insn, curr_insn_ea) 301 | if decode_result < 1: 302 | # 303 | # break if instruction invalid 304 | # 305 | bblock.end_ea = curr_insn_ea 306 | break 307 | 308 | # 309 | # Get instruction disasembly 310 | # 311 | curr_insn_dism = idc.generate_disasm_line(curr_insn_ea,1) 312 | 313 | if curr_insn_dism.startswith( tuple( jcc_terminators ) ): 314 | # 315 | # JCC conditionals 316 | # 317 | 318 | jcc_insn = curr_insn 319 | jmp_target_ea = self.GetInstuctionTargetAddress(jcc_insn) 320 | if jmp_target_ea not in self.deferred_targets: 321 | self.deferred_targets.append(jmp_target_ea) 322 | 323 | # 324 | # Add fall through address, BB ends at JCC conditional 325 | # 326 | self.deferred_targets.append(curr_insn_ea + jcc_insn.size) 327 | bblock.end_ea = curr_insn_ea 328 | break 329 | 330 | if maze_deobf_utils.CheckInstructionIsFunctionTerminator(curr_insn_ea): 331 | # 332 | # Return instruction 333 | # 334 | 335 | bblock.end_ea = curr_insn_ea 336 | bblock.is_epilogue = True 337 | func_end_ea.append(idc.next_head(curr_insn_ea)) 338 | break 339 | 340 | if curr_insn_dism.startswith("jmp"): 341 | 342 | jmp_insn = curr_insn 343 | jmp_target_ea = maze_deobf_utils.GetInstuctionTargetAddress(jmp_insn) 344 | 345 | if jmp_target_ea not in self.deferred_targets: 346 | self.deferred_targets.append(jmp_target_ea) 347 | 348 | bblock.end_ea = curr_insn_ea 349 | break 350 | 351 | 352 | curr_insn_ea = curr_insn_ea + curr_insn.size 353 | 354 | return [func_end_ea, ea_part_of_another_func] 355 | 356 | def DoDescentParser3(self): 357 | ''' 358 | @brief Walk the function leveraging a recursive descent parser 359 | 360 | @detail Starting at the entry point, it walks a function, creates a basic block, and associates those blocks with a 361 | Function object. 362 | 363 | much thanks to the author of "Practical Binary Analysis" for the break down of the algorithm in Chapter 8. 364 | 365 | @return function object 366 | ''' 367 | 368 | # 369 | # jmps = [eval("idaapi."+name) for name in dir(idaapi) if "NN_j" in name] 370 | # 371 | jcc_terminators = ['jnz','jz','jo','jno', 'js','jns', 'je','jne', 'jb', 'jnae', 'jc', 'jnb', 'jae', 'jnc', 'jbe','jna', 'ja','jnbe', 'jl','jnge','jge','jnl','jle','jng','jg','jnle','jp','jpe','jnp','jpo','jcxz','jecxz'] 372 | 373 | #print "Starting recursive decent:, starting at: %08x" % (self.deferred_targets[0]) 374 | 375 | func_end_ea = [] 376 | ea_part_of_another_func = [] 377 | do_things = False 378 | 379 | func_start_ea = idc.BADADDR 380 | expected_func = Function() 381 | 382 | while len(self.deferred_targets) > 0: 383 | 384 | curr_insn_ea = self.deferred_targets.pop() 385 | 386 | if curr_insn_ea in self.instructions_walked: 387 | # 388 | # skip instructions that were already walked 389 | # 390 | 391 | continue 392 | 393 | bblock = BasicBlock(curr_insn_ea) 394 | expected_func.AddBlock(bblock) 395 | 396 | if expected_func.start_ea == idc.BADADDR: 397 | # 398 | # Set the function's expected start address 399 | # 400 | expected_func.start_ea = curr_insn_ea 401 | 402 | if bblock.incorrect_function_ea == idc.BADADDR: 403 | # 404 | # Check if the BasicBlock is part of another function 405 | # 406 | 407 | if bblock.CheckPartOfAnotherFunction(curr_insn_ea, expected_func.start_ea): 408 | expected_func.rogue_basic_blocks.append(bblock) 409 | 410 | 411 | 412 | #print "Next BB: %08x" % curr_insn_ea 413 | 414 | while curr_insn_ea != idc.BADADDR: 415 | # 416 | # Walks the basic block 417 | # 418 | 419 | #print "Current ip: %08x" % (curr_insn_ea) 420 | 421 | self.instructions_walked.append(curr_insn_ea) 422 | 423 | bblock.AddInsnAddress(curr_insn_ea) 424 | 425 | # 426 | # Verify current instruction information 427 | # 428 | curr_insn = ida_ua.insn_t() 429 | decode_result = ida_ua.decode_insn(curr_insn, curr_insn_ea) 430 | if decode_result < 1: 431 | # 432 | # break if instruction invalid 433 | # 434 | bblock.end_ea = curr_insn_ea 435 | break 436 | 437 | # 438 | # Get instruction disasembly 439 | # 440 | curr_insn_dism = idc.generate_disasm_line(curr_insn_ea,1) 441 | 442 | if curr_insn_dism.startswith( tuple( jcc_terminators ) ): 443 | # 444 | # JCC conditionals 445 | # 446 | 447 | jcc_insn = curr_insn 448 | jmp_target_ea = self.GetInstuctionTargetAddress(jcc_insn) 449 | if jmp_target_ea not in self.deferred_targets: 450 | self.deferred_targets.append(jmp_target_ea) 451 | 452 | # 453 | # Add fall through address, BB ends at JCC conditional 454 | # 455 | self.deferred_targets.append(curr_insn_ea + jcc_insn.size) 456 | bblock.end_ea = curr_insn_ea 457 | break 458 | 459 | if maze_deobf_utils.CheckInstructionIsFunctionTerminator(curr_insn_ea): 460 | # 461 | # Return instruction 462 | # 463 | 464 | bblock.end_ea = curr_insn_ea 465 | bblock.is_epilogue = True 466 | expected_func.end_ea = idc.next_head(curr_insn_ea) 467 | expected_func.AddExitPoint(curr_insn_ea) 468 | break 469 | 470 | if curr_insn_dism.startswith("jmp"): 471 | 472 | jmp_insn = curr_insn 473 | jmp_target_ea = maze_deobf_utils.GetInstuctionTargetAddress(jmp_insn) 474 | 475 | if jmp_target_ea not in self.deferred_targets: 476 | self.deferred_targets.append(jmp_target_ea) 477 | 478 | bblock.end_ea = curr_insn_ea 479 | break 480 | 481 | 482 | curr_insn_ea = curr_insn_ea + curr_insn.size 483 | 484 | return expected_func 485 | 486 | class Function(object): 487 | ''' 488 | @brief Representation of a function. 489 | ''' 490 | def __init__(self): 491 | self.basic_blocks = [] 492 | self.rogue_basic_blocks = [] 493 | self.start_ea = idc.BADADDR 494 | self.end_ea = idc.BADADDR 495 | self.exit_points = set() 496 | 497 | def AddBlock(self, BB): 498 | if BB: 499 | self.basic_blocks.append(BB) 500 | 501 | def AddExitPoint(self, Curr_insn_ea): 502 | self.exit_points.add(Curr_insn_ea) 503 | 504 | class Eumulators(object): 505 | ''' 506 | @brief Identify and Deobfuscate the various obfuscations 507 | 508 | Currently unused. 509 | ''' 510 | 511 | def __init(self): 512 | pass 513 | 514 | def CheckZeroFlagAbsoluteJMP(self, EffectiveAddress): 515 | ''' 516 | ''' 517 | pass 518 | 519 | def CheckZFEmulatedCall(self, EffectiveAddress): 520 | ''' 521 | @brief Identify emulated function calls. 522 | 523 | @detail 524 | 525 | push 526 | 0000 JZ xxxxxxxx 527 | 0001 JNZ 0010 528 | 529 | 530 | @returns The return address that is pushed to the stack for the CALL instruction 531 | ''' 532 | 533 | rtrn_addr = idc.BADADDR 534 | 535 | push_instr_ea = EffectiveAddress 536 | 537 | push_insn = ida_ua.insn_t() 538 | ida_ua.decode_insn(push_insn, push_instr_ea) 539 | 540 | if maze_deobf_utils.CheckValidTargettingInstr(push_insn, "push"): 541 | """ 542 | Valid PUSH instruction. 543 | """ 544 | 545 | jz_insn_ea = push_instr_ea + push_insn.size() 546 | jz_insn = ida_ua.insn_t() 547 | ida_ua.decode_insn(jz_insn, jz_insn_ea) 548 | 549 | if maze_deobf_utils.CheckValidTargettingInstr(push_insn, "push"): 550 | """ 551 | Valid PUSH instruction. 552 | """ 553 | 554 | return rtrn_addr 555 | 556 | 557 | 558 | class BasicBlock(object): 559 | ''' 560 | @brief Class used for describing basic blocks. 561 | ''' 562 | 563 | def __init__(self,Start_ea=0, End_ea=0): 564 | self.start_ea = Start_ea 565 | self.end_ea = End_ea 566 | 567 | self.traversed = False 568 | 569 | self.instruction_addresses = [] 570 | self.non_terminating_egress = set() 571 | 572 | self.is_prologue = False 573 | self.is_epilogue = False 574 | 575 | self.incorrect_function_ea = idc.BADADDR 576 | 577 | 578 | def AddInsnAddress(self, Insn_ea): 579 | self.instruction_addresses.append(Insn_ea) 580 | 581 | def AddNonTerminatingEgress(self,Target_ea): 582 | self.non_terminating_egress.append(Target_ea) 583 | 584 | def FillOutBlock(self): 585 | end_insn = ida_ua.insn_t() 586 | ida_ua.decode_insn(end_insn, self.end_ea) 587 | 588 | curr_insn_ea = self.start_ea 589 | while curr_insn_ea != self.end_ea + end_insn.size: 590 | # 591 | # Walk and add each address to the list of instruction addresses 592 | # 593 | curr_insn = ida_ua.insn_t() 594 | ida_ua.decode_insn(curr_insn, curr_insn_ea) 595 | self.instruction_addresses.append(curr_insn_ea) 596 | curr_insn_ea = curr_insn_ea + curr_insn.size 597 | 598 | def CheckPartOfAnotherFunction(self, Curr_insn_ea, Expected_func_start_ea): 599 | ''' 600 | @brief Check if the start address of the function currently being walked 601 | matches the start address of whatever function in which 602 | the current instruction exists. 603 | ''' 604 | is_rouge = False 605 | 606 | testing_func = ida_funcs.get_func(Curr_insn_ea) 607 | if testing_func: 608 | if testing_func.start_ea != Expected_func_start_ea: 609 | self.incorrect_function_ea = testing_func.start_ea 610 | is_rouge = True 611 | 612 | return is_rouge 613 | 614 | 615 | class FunctionEpilogue(object): 616 | ''' 617 | @brief Describes a function epilogue 618 | ''' 619 | 620 | def __init__(self, BasicBlock, StackSpaceImmediate, Registers): 621 | 622 | self.basic_block = BasicBlock 623 | self.stack_space_immediate = StackSpaceImmediate 624 | self.registers = Registers 625 | 626 | class FunctionPrologue(object): 627 | ''' 628 | @brief Describes a function prologue 629 | ''' 630 | 631 | def __init__(self, BasicBlock, StackSpaceImmediate, Registers): 632 | 633 | self.basic_block = BasicBlock 634 | self.stack_space_immediate = StackSpaceImmediate 635 | self.registers = Registers 636 | 637 | # 638 | # Connected epilogues have been verified via walking the function 639 | # 640 | self.connected_epilogues = [] 641 | 642 | # 643 | # Possible epilogues are address that are unverified addresses 644 | # that could be an epilogue for this function. 645 | # 646 | self.possible_epilogues = [] 647 | -------------------------------------------------------------------------------- /bytesearch/maze_functions.py: -------------------------------------------------------------------------------- 1 | import ida_ua, idc, idaapi 2 | 3 | idaapi.require("maze_deobf_utils") 4 | 5 | """ 6 | This file and the "maze_funtion_analysis.py" containd uplicate method and class names. 7 | The "maze_funtion_analysis.py" is going to be the updated version of this file. 8 | 9 | """ 10 | 11 | class RecursiveDescent(object): 12 | ''' 13 | @brief Recurse descent disassembly for a single function (no calls followed) 14 | ''' 15 | 16 | def __init__(self, Start_ea): 17 | 18 | self.deferred_targets = [] 19 | self.instructions_walked = [] 20 | self.wrong_functions = [] 21 | 22 | self.entry_point = Start_ea 23 | self.deferred_targets.append(Start_ea) 24 | 25 | 26 | def DoDescentParser(self,Prologue): 27 | ''' 28 | @brief Walk the function leveraging a recursive descent parser. 29 | 30 | @detail Starting with a prologue walk each instruction until the associated epilogue is reached. For functions 31 | with multiple epilogues, iterate over each one. 32 | 33 | As each instruction is traversed, do the following three 34 | things: 35 | 36 | - Undefine the instruction 37 | - Mark the instruction as code 38 | - Check to see if the instruction is already a member of another function 39 | 40 | If an instruction is a member of another function, undefine that function and place it in a queue. At the end 41 | of traversing each function, a new function is going to be created with the new prologue and the new epilogue. 42 | In addition, the undefined function queue is going to be iterated over and each function will be redefined. This 43 | should clean up messy function 44 | 45 | much thanks to the author of "Practical Malware Analysis" for the break down of the algorithm in Chapter 8. 46 | ''' 47 | 48 | # 49 | # jmps = [eval("idaapi."+name) for name in dir(idaapi) if "NN_j" in name] 50 | # 51 | jcc_terminators = ['jnz','jz','jo','jno', 'js','jns', 'je','jne', 'jb', 'jnae', 'jc', 'jnb', 'jae', 'jnc', 'jbe','jna', 'ja','jnbe', 'jl','jnge','jge','jnl','jle','jng','jg','jnle','jp','jpe','jnp','jpo','jcxz','jecxz'] 52 | 53 | #print EndAddresses 54 | 55 | while len(self.deferred_targets) > 0: 56 | 57 | curr_insn_ea = self.deferred_targets.pop() 58 | 59 | if curr_insn_ea in self.instructions_walked: 60 | # 61 | # skip instructions that were already walked 62 | # 63 | continue 64 | 65 | #for target in self.deferred_targets: 66 | # print "deferred target: %08x" % target 67 | 68 | 69 | while curr_insn_ea not in Prologue.possible_epilogues: 70 | # 71 | # walk only to a known epilogue 72 | # 73 | 74 | #print "Current EA: %08x" % (curr_insn_ea) 75 | 76 | self.instructions_walked.append(curr_insn_ea) 77 | 78 | # 79 | # Verify current instruction information 80 | # 81 | curr_insn = ida_ua.insn_t() 82 | decode_result = ida_ua.decode_insn(curr_insn, curr_insn_ea) 83 | if decode_result < 1: 84 | # 85 | # break if instruction invalid 86 | # 87 | break 88 | 89 | represented_insn_dism = idc.generate_disasm_line(curr_insn_ea,0) 90 | curr_insn_dism = idc.generate_disasm_line(curr_insn_ea,1) 91 | if curr_insn_dism != represented_insn_dism: 92 | # 93 | # If the item shown at this address in IDA does not match 94 | # what should be shown (due to obfuscation), fix it 95 | # 96 | 97 | #print "Instructions don't match: %08x" % (curr_insn_ea) 98 | idc.del_items(curr_insn_ea,1) 99 | #idc.plan_and_wait(curr_insn_ea, curr_insn_ea+curr_insn.size) 100 | idc.create_insn(curr_insn_ea) 101 | #idc.plan_and_wait(curr_insn_ea, curr_insn_ea+curr_insn.size) 102 | 103 | curr_func_name = idc.get_func_name(curr_insn_ea) 104 | if curr_func_name: 105 | # 106 | # check if in function, undefine function, add to list to redefine later 107 | # 108 | 109 | #print "Part of another function: %08x" % (curr_insn_ea) 110 | curr_func_ea = idc.get_name_ea_simple(curr_func_name) 111 | func_end_ea = idc.find_func_end(curr_func_ea) 112 | idc.del_func(curr_func_ea) 113 | 114 | for curr_function in self.wrong_functions: 115 | if curr_function not in curr_function: 116 | self.wrong_functions.append([curr_func_ea, func_end_ea]) 117 | 118 | 119 | if curr_insn_dism.startswith( tuple( jcc_terminators ) ): 120 | # 121 | # JCC conditionals, recursion control case 122 | # 123 | 124 | #print "Adding jcc target: %08x" % (curr_insn_ea) 125 | jmp_target_ea = self.GetInstuctionTargetAddress(curr_insn) 126 | if jmp_target_ea not in self.deferred_targets: 127 | self.deferred_targets.append(jmp_target_ea) 128 | 129 | curr_insn_ea = curr_insn_ea + curr_insn.size 130 | 131 | elif curr_insn_dism.startswith("jmp"): 132 | jmp_target_ea = self.GetInstuctionTargetAddress(curr_insn) 133 | 134 | #print "Adding jump target: %08x" % (curr_insn_ea) 135 | if jmp_target_ea not in self.deferred_targets: 136 | self.deferred_targets.append(jmp_target_ea) 137 | break 138 | 139 | elif curr_insn_dism.startswith("retn"): 140 | break 141 | 142 | else: 143 | curr_insn_ea = curr_insn_ea + curr_insn.size 144 | 145 | if curr_insn_ea in Prologue.possible_epilogues: 146 | Prologue.connected_epilogues.append(curr_insn_ea) 147 | continue 148 | 149 | 150 | def GetInstuctionTargetAddress(self,Target_insn): 151 | ''' 152 | @brief Return the operand value for a unirary instruction that contains a target 153 | address (JMP, JNZ, JZ, push, call, etc). 154 | 155 | Can probably be deleted because the same function is in the "maze_deobf_utils.py" 156 | ''' 157 | 158 | target_ea = 0 159 | 160 | if type(Target_insn) == ida_ua.insn_t: 161 | #print "GITA: type match" 162 | target_op = Target_insn.ops[0] 163 | #print "GITA: ", target_op.type, hex(target_op.value), hex(target_op.addr) 164 | if target_op.type == 5: 165 | target_ea = target_op.value 166 | elif target_op.type == 6 or target_op.type == 7: 167 | target_ea = target_op.addr 168 | 169 | return target_ea 170 | 171 | 172 | class BasicBlock(object): 173 | ''' 174 | @brief Class used for describing basic blocks. 175 | ''' 176 | 177 | def __init__(self,Start_ea=0, End_ea=0): 178 | self.start_ea = Start_ea 179 | self.end_ea = End_ea 180 | 181 | self.traversed = False 182 | 183 | self.instruction_addresses = [] 184 | self.non_terminating_egress = set() 185 | 186 | self.is_prologue = False 187 | self.is_epilogue = False 188 | 189 | def AddInsnAddress(self, Insn_ea): 190 | self.instruction_addresses.append(ea) 191 | 192 | def AddNonTerminatingEgress(self,Target_ea): 193 | self.non_terminating_egress.append(Target_ea) 194 | 195 | def FillOutBlock(self): 196 | end_insn = ida_ua.insn_t() 197 | ida_ua.decode_insn(end_insn, self.end_ea) 198 | 199 | curr_insn_ea = self.start_ea 200 | while curr_insn_ea != self.end_ea + end_insn.size: 201 | # 202 | # Walk and add each address to the list of instruction addresses 203 | # 204 | curr_insn = ida_ua.insn_t() 205 | ida_ua.decode_insn(curr_insn, curr_insn_ea) 206 | self.instruction_addresses.append(curr_insn_ea) 207 | curr_insn_ea = curr_insn_ea + curr_insn.size 208 | 209 | 210 | class FunctionEpilogue(object): 211 | ''' 212 | @brief Describes a function epilogue 213 | ''' 214 | 215 | def __init__(self, BasicBlock, StackSpaceImmediate, Registers): 216 | 217 | self.basic_block = BasicBlock 218 | self.stack_space_immediate = StackSpaceImmediate 219 | self.registers = Registers 220 | 221 | class FunctionPrologue(object): 222 | ''' 223 | @brief Describes a function prologue 224 | ''' 225 | 226 | def __init__(self, BasicBlock, StackSpaceImmediate, Registers): 227 | 228 | self.basic_block = BasicBlock 229 | self.stack_space_immediate = StackSpaceImmediate 230 | self.registers = Registers 231 | 232 | # 233 | # Connected epilogues have been verified via walking the function 234 | # 235 | self.connected_epilogues = [] 236 | 237 | # 238 | # Possible epilogues are address that are unverified addresses 239 | # that could be an epilogue for this function. 240 | # 241 | self.possible_epilogues = [] 242 | -------------------------------------------------------------------------------- /images/maze_featureimg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shamrockhoax/mazedecoder/a9258ade718369ea099a9f1be7d98342253dedcc/images/maze_featureimg.png -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 | 5 | # Deobfuscating Maze's Control Flow Obfuscations 6 | This is the source code related to my blogpost [The Many Paths Through Maze](https://www.crowdstrike.com/blog/maze-ransomware-deobfuscation/). Forgive me, but currently the source code is written using Python 2.7. 7 | 8 | ## Deobfuscation Methods 9 | 10 | Currently, only the Byte-search Method discussed in the blog post is covered. Eventually, I'd like to add a few different methods. 11 | 12 | ### Bytesearch Method 13 | Relies upon searching for specific bytes to identify the obfuscations. 14 | 15 | * Takes a bit to run, to many "plan_and_wait()" functions, and I print logs to the output window 16 | * Functions that don't get auto-defined after patching should now be definable in IDA via pressing 'p' 17 | * The main-branch works, but I am working on some improvements 18 | * Some functions have orphaned basic blocks, it's annoying and I'm working on a solution 19 | * Comments are being updated 20 | * bytesearch/maze_cfg_cleanup.py 21 | * Execute this script to decode the IDB 22 | 23 | 24 | ## IOCs 25 | 26 | **Hashes** 27 | 28 | * 2a6c602769ac15bd837f9ff390acc443d023ee62f76e1be8236dd2dd957eef3d 29 | 30 | 31 | ## Further Reading 32 | * [Escape from the Maze pt 1](https://www.blueliv.com/cyber-security-and-cyber-threat-intelligence-blog-blueliv/escape-from-the-maze/) Research by Blueliv 33 | * [A Malware Researcher's Guide to Reversing Maze](https://labs.bitdefender.com/2020/03/a-malware-researchers-guide-to-reversing-maze/) by Mihai Neagu (@mneagu8d) and Bogdan BOTEZATU (@bbotezatu) 34 | * Leverages IDA's Processor Module Extensions 35 | * [Transparent Deobfuscation With IDA Processor Module Extensions](https://www.msreverseengineering.com/blog/2015/6/29/transparent-deobfuscation-with-ida-processor-module-extensions) by Rolf Rolles --------------------------------------------------------------------------------