├── ARMInstructionFrequencies.txt ├── DumpFunctionBytes.py ├── README.md ├── allocator.h ├── arm_frequency.py ├── find_hardref.py ├── func_references.py ├── ida_hooks.py ├── machofinder.py ├── mark_interesting.py ├── minset.py ├── operan_offset_base.py ├── prolog_finder.py ├── propagate_types.py ├── references.py ├── renamer.py ├── simple_jack.py ├── string_finder.py └── struct_hint.py /ARMInstructionFrequencies.txt: -------------------------------------------------------------------------------- 1 | From armMissinglinuxMissingandroideabiMissingobjdump Missingd libchromeview_prebuilt.so > sample.s 2 | This .so is 2gb in size with symbols so it is a pretty good indicator of the actual 3 | distribution of the instructions. 4 | 5 | Instruction mov is used 1428331 times | decode_mov 6 | Instruction ldr is used 1181133 times | decode_ldr 7 | Instruction add is used 1065935 times | decode_add 8 | Instruction bl is used 835527 times | decode_bl 9 | Instruction b is used 547924 times | decode_b 10 | Instruction str is used 538433 times | decode_str 11 | Instruction cmp is used 330601 times | decode_cmp 12 | Instruction cbz is used 155987 times | decode_cbz 13 | Instruction blx is used 135995 times | decode_blx 14 | Instruction pop is used 118732 times | decode_pop 15 | Instruction push is used 113136 times | decode_push 16 | Instruction ldmia is used 84363 times | decode_ldmia 17 | Instruction ldrb is used 70881 times | decode_ldrb 18 | Instruction sub is used 69208 times | decode_sub 19 | Instruction cbnz is used 61974 times | decode_cbnz 20 | Instruction subs is used 55950 times | decode_subs 21 | Instruction it is used 55679 times | decode_it 22 | Instruction strb is used 52617 times | decode_strb 23 | Instruction stmia is used 40441 times | decode_stmia 24 | Instruction stmdb is used 34525 times | decode_stmdb 25 | Instruction lsl is used 26392 times | decode_lsl 26 | Instruction and is used 24389 times | decode_and 27 | Instruction orr is used 23024 times | decode_orr 28 | Instruction movw is used 21616 times | decode_movw 29 | Instruction vldr is used 20493 times | decode_vldr 30 | Instruction bx is used 18856 times | decode_bx 31 | Instruction ldrd is used 15264 times | decode_ldrd 32 | Instruction asr is used 15170 times | decode_asr 33 | Instruction strd is used 15058 times | decode_strd 34 | Instruction vmov is used 15012 times | decode_vmov 35 | Instruction rsb is used 14290 times | decode_rsb 36 | Instruction lsr is used 10236 times | decode_lsr 37 | Instruction ldrh is used 9915 times | decode_ldrh 38 | Instruction ubfx is used 9384 times | decode_ubfx 39 | Instruction mul is used 9268 times | decode_mul 40 | Instruction vstr is used 8072 times | decode_vstr 41 | Instruction bic is used 7865 times | decode_bic 42 | Instruction vmrs is used 7408 times | decode_vmrs 43 | Instruction mla is used 7381 times | decode_mla 44 | Instruction adc is used 7004 times | decode_adc 45 | Instruction eor is used 6673 times | decode_eor 46 | Instruction tst is used 6197 times | decode_tst 47 | Instruction uxtb is used 6075 times | decode_uxtb 48 | Instruction strh is used 6019 times | decode_strh 49 | Instruction mvn is used 4914 times | decode_mvn 50 | Instruction neg is used 4309 times | decode_neg 51 | Instruction sbc is used 3545 times | decode_sbc 52 | Instruction bfi is used 3496 times | decode_bfi 53 | Instruction uxth is used 2234 times | decode_uxth Missing 54 | Instruction vpop is used 2185 times | decode_vpop Missing 55 | Instruction vpush is used 2080 times | decode_vpush Missing 56 | Instruction ldrsb is used 1939 times | decode_ldrsb 57 | Instruction tbb is used 1483 times | decode_tbb 58 | Instruction ldrsh is used 1446 times | decode_ldrsh 59 | Instruction dmb is used 1247 times | decode_dmb 60 | Instruction ldmdb is used 1191 times | decode_ldmdb 61 | Instruction bfc is used 1167 times | decode_bfc 62 | Instruction movt is used 841 times | decode_movt 63 | Instruction teq is used 824 times | decode_teq 64 | Instruction strex is used 787 times | decode_strex 65 | Instruction ldrex is used 787 times | decode_ldrex 66 | Instruction cmn is used 738 times | decode_cmn 67 | Instruction sxtb is used 731 times | decode_sxtb Missing 68 | Instruction umull is used 523 times | decode_umull 69 | Instruction tbh is used 502 times | decode_tbh 70 | Instruction sbfx is used 493 times | decode_sbfx Missing 71 | Instruction orn is used 382 times | decode_orn Missing 72 | Instruction sxth is used 366 times | decode_sxth Missing 73 | Instruction vorr is used 329 times | decode_vorr Missing 74 | Instruction smulbb is used 287 times | decode_smulbb 75 | Instruction veor is used 271 times | decode_veor Missing 76 | Instruction smull is used 264 times | decode_smull 77 | Instruction nop is used 204 times | decode_nop 78 | Instruction rev is used 161 times | decode_rev Missing 79 | Instruction vldmia is used 129 times | decode_vldmia Missing 80 | Instruction ror is used 114 times | decode_ror 81 | Instruction vand is used 113 times | decode_vand Missing 82 | Instruction smlal is used 113 times | decode_smlal 83 | Instruction mls is used 85 times | decode_mls 84 | Instruction vstmia is used 76 times | decode_vstmia Missing 85 | Instruction umlal is used 75 times | decode_umlal 86 | Instruction svc is used 72 times | decode_svc 87 | Instruction uxtab is used 69 times | decode_uxtab Missing 88 | Instruction clz is used 68 times | decode_clz 89 | Instruction smlabb is used 55 times | decode_smlabb 90 | Instruction pld is used 52 times | decode_pld 91 | Instruction ldm is used 41 times | decode_ldm 92 | Instruction sxtab is used 40 times | decode_sxtab Missing 93 | Instruction addw is used 38 times | decode_addw 94 | Instruction vswp is used 28 times | decode_vswp Missing 95 | Instruction uxtah is used 19 times | decode_uxtah Missing 96 | Instruction vstmdb is used 13 times | decode_vstmdb Missing 97 | Instruction rsc is used 12 times | decode_rsc 98 | Instruction sxtah is used 12 times | decode_sxtah Missing 99 | Instruction rrx is used 9 times | decode_rrx 100 | Instruction stfe is used 8 times | decode_stfe 101 | Instruction stfp is used 8 times | decode_stfp 102 | Instruction stm is used 8 times | decode_stm 103 | Instruction ldfp is used 8 times | decode_ldfp 104 | Instruction ldfe is used 8 times | decode_ldfe 105 | Instruction vldmdb is used 5 times | decode_vldmdb Missing 106 | Instruction rbit is used 4 times | decode_rbit Missing 107 | Instruction bkpt is used 3 times | decode_bkpt 108 | Instruction vmvn is used 3 times | decode_vmvn Missing 109 | Instruction ldc is used 2 times | decode_ldc 110 | Instruction fldmiax is used 1 times | decode_fldmiax 111 | Instruction ldcl is used 1 times | decode_ldcl 112 | Instruction ssat is used 1 times | decode_ssat Missing 113 | Instruction stc is used 1 times | decode_stc Missing 114 | Instruction fstmiax is used 1 times | decode_fstmiax -------------------------------------------------------------------------------- /DumpFunctionBytes.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script to dump the current function as a C/C++ shellcode blob. 3 | It replaces the calls to external functions with a call to a 4 | trampoline entry that you can change to point to your own implementation 5 | of that function. 6 | 7 | Author: Agustin Gianni (agustingianni@gmail.com) 8 | """ 9 | 10 | import sys 11 | if not "/usr/local/lib/python2.7/site-packages" in sys.path: 12 | sys.path.append("/usr/local/lib/python2.7/site-packages") 13 | 14 | from capstone import * 15 | from capstone.x86_const import * 16 | from binascii import hexlify 17 | 18 | DEBUG = False 19 | 20 | def to_hex(bytes, cformat=False): 21 | tmp = hexlify(bytes) 22 | if cformat: 23 | return "\\x" + "\\x".join([tmp[i:i+2] for i in range(0, len(tmp), 2)]) 24 | 25 | return " ".join([tmp[i:i+2] for i in range(0, len(tmp), 2)]) 26 | 27 | def dump_function_bytes(fn_ea, align=False): 28 | f = idaapi.get_func(fn_ea) 29 | start = f.startEA 30 | size = f.endEA - start 31 | contents = GetManyBytes(start, size) 32 | 33 | if align: 34 | rem = len(fbytes) % 32 35 | fbytes += "\xcc" * (32 - rem) if rem else "" 36 | 37 | return (start, size, contents) 38 | 39 | def make_call_immediate(offset): 40 | import struct 41 | return "\xe8" + struct.pack("= (start + size) 61 | is_call = lambda i: X86_GRP_CALL in i.groups 62 | is_ret = lambda i: X86_GRP_RET in i.groups 63 | is_flow_changing = lambda i: is_jump(i) or is_call(i) or is_ret(i) 64 | is_mem_access = lambda i: X86_OP_MEM in [x.type for x in i.operands] 65 | is_non_local_mem_access = lambda i: not any(x in i.op_str for x in ["rsp", "esp", "rbp", "ebp"]) 66 | 67 | # Instruction query results. 68 | jump_sites = filter(is_jump, instructions) 69 | imm_jump_sites = filter(is_imm_jump, instructions) 70 | call_sites = filter(is_call, instructions) 71 | ret_sites = filter(is_ret, instructions) 72 | invalid_jump_sites = filter(is_invalid_imm_jump, filter(is_imm_jump, jump_sites)) 73 | non_local_mem_access_sites = filter(is_non_local_mem_access, filter(is_mem_access, instructions)) 74 | 75 | # Collect information about jumps and calls that need manual work. 76 | jumps_to_fix = [(i.address, i) for i in invalid_jump_sites] 77 | calls_to_fix = [(i.address, i) for i in call_sites] 78 | mem_ref_to_fix = [(i.address, i) for i in non_local_mem_access_sites] 79 | 80 | # Collect the start of every basic block for pretty printing. 81 | basic_block_start = [] 82 | basic_block_start.extend([i.address + i.size for i in (jump_sites + ret_sites)]) 83 | basic_block_start.extend([i.operands[0].value.imm for i in imm_jump_sites]) 84 | 85 | if DEBUG: 86 | # Debug dump of the function. 87 | print "// Function disassembly:" 88 | for i in instructions: 89 | if i.address in basic_block_start: 90 | print "// " + ("-" * 80) 91 | print "// 0x%.8x: %-32s %-8s %s" %(i.address, to_hex(i.bytes), i.mnemonic, i.op_str) 92 | 93 | # Print a list of functions that need to be implemented for the blob to work. 94 | if len(calls_to_fix): 95 | print "// " 96 | print "// Function calls to replace:" 97 | for e in calls_to_fix: 98 | tmp = "%s %s" %(e[1].mnemonic, e[1].op_str) 99 | print "// off=0x%.8x ins='%s' bytes=%s size=%d imm=0x%.8x" % (e[0], tmp, to_hex(e[1].bytes), e[1].size, e[1].operands[0].value.imm) 100 | 101 | # Warn the user that the function is not complete and should be correctly extracted. 102 | if len(jumps_to_fix): 103 | print "// " 104 | print "// Invalid jumps:" 105 | for e in jumps_to_fix: 106 | tmp = "%s %s" %(e[1].mnemonic, e[1].op_str) 107 | print "// off=0x%.8x ins='%s' bytes=%s size=%d" % (e[0], tmp, to_hex(e[1].bytes), e[1].size) 108 | 109 | if len(mem_ref_to_fix): 110 | print "// " 111 | print "// Potentially invalid memory accesses:" 112 | for e in mem_ref_to_fix: 113 | tmp = "%s %s" %(e[1].mnemonic, e[1].op_str) 114 | print "// off=0x%.8x ins='%s' bytes=%s size=%d" % (e[0], tmp, to_hex(e[1].bytes), e[1].size) 115 | 116 | print '#include ' 117 | print '#include ' 118 | print '#include \n' 119 | print 'using namespace std;\n' 120 | print 'string build_blob();\n' 121 | print 'void *alloc_rwx(size_t size) {' 122 | print ' void *mem = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, 0, 0);' 123 | print ' if (!mem) {' 124 | print ' cout << "Could not alloc RWX memory." << endl;' 125 | print ' return nullptr;' 126 | print ' }\n' 127 | print ' cout << "Allocated RwX memory at: " << mem << endl;' 128 | print ' return mem;' 129 | print '}\n' 130 | print 'int main(int argc, char **argv) {' 131 | print ' string blob = build_blob();' 132 | print ' void *mem = alloc_rwx(blob.size());' 133 | print ' memcpy(mem, &blob[0], blob.size());\n' 134 | print ' // TODO: Change the signature of the function pointer.' 135 | print ' ((int (*)(void)) mem)();' 136 | print '}\n' 137 | print 'string build_blob() {' 138 | 139 | # Make the calls point to our dispatch table. 140 | print ' string dispatch_table;' 141 | imm_to_offset = {} 142 | cur_trampoline_offset = len(fbytes) 143 | 144 | for (call_off, call_ins) in calls_to_fix: 145 | operand = call_ins.operands[0] 146 | if operand.type != X86_OP_IMM: 147 | raise RuntimeException("We don't handle non immediate calls yet.") 148 | 149 | # Get the destination of the call. 150 | operand_imm = operand.value.imm 151 | 152 | # Check if we already have an entry for this immediate value. 153 | if not imm_to_offset.has_key(operand_imm): 154 | # Add the current offset to the dictionary. 155 | imm_to_offset[operand_imm] = cur_trampoline_offset 156 | 157 | tmp_name = "function_%d_address" % imm_to_offset[operand_imm] 158 | tmp = 'dispatch_table.append("\\x48\\xb8", 2);' 159 | print " // TODO: Replace %s with the correct implementation. Called at offset 0x%.8x." % (tmp_name, call_off) 160 | print " uintptr_t %s = reinterpret_cast(0x4040404040404040);" % tmp_name 161 | print " %-60s // 0x%.8x: movabs rax, %s" % (tmp, cur_trampoline_offset, tmp_name) 162 | cur_trampoline_offset += 10 163 | 164 | tmp = 'dispatch_table.append("\\xff\\xe0", 2);' 165 | print " dispatch_table.append(reinterpret_cast(&%s), sizeof(%s));" % (tmp_name, tmp_name) 166 | print " %-60s // 0x%.8x: jmp rax" % (tmp, cur_trampoline_offset) 167 | print 168 | cur_trampoline_offset += 2 169 | 170 | # Get the trampoline offset for the current immediate. 171 | trampoline_offset = imm_to_offset[operand_imm] 172 | 173 | # Adjust the trampoline offset with the offset of the current call. 174 | trampoline_offset -= call_off 175 | 176 | # Create a trampoline call and replace the original call. 177 | trampoline_call = make_call_immediate(trampoline_offset) 178 | 179 | # Replace original call bytes. 180 | fbytes = fbytes[:call_off] + trampoline_call + fbytes[call_off + len(trampoline_call):] 181 | 182 | print ' // Dumped [0x%.8x-0x%.8x]' % (start, start + size) 183 | print ' string shellcode;' 184 | instructions = list(md.disasm(fbytes, 0)) 185 | for i in instructions: 186 | tmp = 'shellcode.append("%s", %d);' % (to_hex(i.bytes, cformat=True), len(i.bytes)) 187 | print ' %-60s // 0x%.8x: %-8s %s' % (tmp, i.address, i.mnemonic, i.op_str) 188 | print 189 | 190 | print " return shellcode + dispatch_table;" 191 | print "}" 192 | print 193 | 194 | main() 195 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Utilities 2 | ========= 3 | 4 | Uncategorized utilities that do not need their own repository. 5 | 6 | Simple Jack Symbol Porting tool 7 | ------------------------------- 8 | 9 | Small dumb utility to port obvious function matches across two IDA databases. 10 | 11 | Instruction frequency counter (arm_frequency.py) 12 | ---------------------------------------------------- 13 | 14 | The script 'arm_frequency.py' takes as input the output of objdump 15 | on an ARM binary. 16 | It will show the ammount of times every instruction was used, sorted 17 | by the most used ones. 18 | 19 | XRef printer (func_references.py) 20 | --------------------------------- 21 | 22 | Small utility to print all the function calls to a given function. 23 | This is generally used to look for calls to malloc like function. 24 | 25 | DumpFunctionBytes.py 26 | -------------------- 27 | 28 | IDA Python script that dumps the current function (you need to position 29 | the cursor on the start of the function) as a shellcode. 30 | It does a very limited analysis of the function in order to let you know 31 | that you need to fix call sites to functions. 32 | 33 | The main idea of this is being able to extract a function from a binary 34 | and use it. 35 | 36 | **Usage:** 37 | 38 | Open IDA Pro and set the cursor to the begining of the function you want to dump. 39 | Run the script, it will print the C++ code to stdout, so copy it and paste it on 40 | a C++ file. 41 | 42 | Now that you have the output on a file look for `TODO` makers and place the needed 43 | manual information. 44 | 45 | **Example output:** 46 | 47 | ```c++ 48 | #include 49 | #include 50 | #include 51 | 52 | using namespace std; 53 | 54 | string build_blob(); 55 | 56 | void *alloc_rwx(size_t size) { 57 | void *mem = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, 0, 0); 58 | if (!mem) { 59 | cout << "Could not alloc RWX memory." << endl; 60 | return nullptr; 61 | } 62 | 63 | cout << "Allocated RwX memory at: " << mem << endl; 64 | return mem; 65 | } 66 | 67 | int main(int argc, char **argv) { 68 | string blob = build_blob(); 69 | void *mem = alloc_rwx(blob.size()); 70 | memcpy(mem, &blob[0], blob.size()); 71 | 72 | // TODO: Change the signature of the function pointer. 73 | ((int (*)(void)) mem)(); 74 | } 75 | 76 | string build_blob() { 77 | string dispatch_table; 78 | // TODO: Replace function_46_address with the correct implementation. 79 | uintptr_t function_46_address = reinterpret_cast(&printf); 80 | dispatch_table.append("\x48\xb8", 2); // 0x0000002e: movabs rax, function_46_address 81 | dispatch_table.append(reinterpret_cast(&function_46_address), sizeof(function_46_address)); 82 | dispatch_table.append("\xff\xe0", 2); // 0x00000038: jmp rax 83 | 84 | // Dumped [0x100000f30-0x100000f5e] 85 | string shellcode; 86 | shellcode.append("\x55", 1); // 0x00000000: push rbp 87 | shellcode.append("\x48\x89\xe5", 3); // 0x00000001: mov rbp, rsp 88 | shellcode.append("\x48\x83\xec\x10", 4); // 0x00000004: sub rsp, 0x10 89 | shellcode.append("\x48\x8d\x3d\x6b\x00\x00\x00", 7); // 0x00000008: lea rdi, qword ptr [rip + 0x6b] 90 | shellcode.append("\xc7\x45\xfc\x00\x00\x00\x00", 7); // 0x0000000f: mov dword ptr [rbp - 4], 0 91 | shellcode.append("\xb0\x00", 2); // 0x00000016: mov al, 0 92 | shellcode.append("\xe8\x11\x00\x00\x00", 5); // 0x00000018: call 0x2e 93 | shellcode.append("\x8b\x4d\xfc", 3); // 0x0000001d: mov ecx, dword ptr [rbp - 4] 94 | shellcode.append("\x83\xc1\x64", 3); // 0x00000020: add ecx, 0x64 95 | shellcode.append("\x89\x45\xf8", 3); // 0x00000023: mov dword ptr [rbp - 8], eax 96 | shellcode.append("\x89\xc8", 2); // 0x00000026: mov eax, ecx 97 | shellcode.append("\x48\x83\xc4\x10", 4); // 0x00000028: add rsp, 0x10 98 | shellcode.append("\x5d", 1); // 0x0000002c: pop rbp 99 | shellcode.append("\xc3", 1); // 0x0000002d: ret 100 | 101 | return shellcode + dispatch_table; 102 | } 103 | ``` 104 | -------------------------------------------------------------------------------- /allocator.h: -------------------------------------------------------------------------------- 1 | /* 2 | * allocator.h 3 | * 4 | * Created on: Feb 11, 2016 5 | * Author: anon 6 | */ 7 | 8 | #ifndef ALLOCATOR_H_ 9 | #define ALLOCATOR_H_ 10 | 11 | namespace os { 12 | 13 | #include 14 | 15 | void *alloc_rwx_memory(size_t size) { 16 | auto tmp = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, 17 | MAP_ANON | MAP_PRIVATE, -1, 0); 18 | 19 | return tmp == MAP_FAILED ? nullptr : tmp; 20 | } 21 | 22 | } 23 | 24 | namespace allocator { 25 | 26 | #include 27 | 28 | class Lock; 29 | 30 | class Mutex { 31 | public: 32 | Mutex() { 33 | pthread_mutexattr_t attr; 34 | pthread_mutexattr_init(&attr); 35 | pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); 36 | pthread_mutex_init(&m_mutex, &attr); 37 | } 38 | 39 | ~Mutex() { 40 | pthread_mutex_destroy(&m_mutex); 41 | } 42 | 43 | private: 44 | friend class Lock; 45 | 46 | void lock() { 47 | pthread_mutex_lock(&m_mutex); 48 | } 49 | 50 | void unlock() { 51 | pthread_mutex_unlock(&m_mutex); 52 | } 53 | 54 | pthread_mutex_t m_mutex; 55 | }; 56 | 57 | class Lock { 58 | public: 59 | Lock(Mutex &mutex) : 60 | m_mutex(mutex) { 61 | m_mutex.lock(); 62 | } 63 | 64 | ~Lock() { 65 | m_mutex.unlock(); 66 | } 67 | 68 | private: 69 | Mutex &m_mutex; 70 | }; 71 | 72 | // Default size of each SLAB is 16 mb. 73 | class BumpAllocator { 74 | public: 75 | BumpAllocator() : 76 | m_memory(0), m_used(0) { 77 | } 78 | 79 | void *allocate(size_t size) { 80 | Lock lock(m_mutex); 81 | 82 | if (!m_memory) { 83 | if (!init()) { 84 | return nullptr; 85 | } 86 | } 87 | 88 | if (fits(size)) { 89 | if (!init()) { 90 | return nullptr; 91 | } 92 | } 93 | 94 | auto tmp = m_memory; 95 | m_memory = static_cast(m_memory) + size; 96 | m_used += size; 97 | 98 | return tmp; 99 | } 100 | 101 | private: 102 | bool init() { 103 | Lock lock(m_mutex); 104 | 105 | m_used = 0; 106 | m_memory = os::alloc_rwx_memory(BumpAllocator::SLAB_SIZE); 107 | if (m_memory == nullptr) { 108 | return false; 109 | } 110 | 111 | return true; 112 | } 113 | 114 | inline bool remaining() { 115 | return BumpAllocator::SLAB_SIZE - m_used; 116 | } 117 | 118 | inline bool fits(size_t size) { 119 | return size < remaining(); 120 | } 121 | 122 | static const size_t SLAB_SIZE = 16777216; 123 | 124 | void *m_memory; 125 | size_t m_used; 126 | Mutex m_mutex; 127 | }; 128 | 129 | } 130 | 131 | #endif /* ALLOCATOR_H_ */ 132 | -------------------------------------------------------------------------------- /arm_frequency.py: -------------------------------------------------------------------------------- 1 | """ 2 | Small script to parse the output of arm-linux-androideabi-objdump 3 | giving as a result the ammount of times a particular opname is used. 4 | 5 | This was used to prioritize the implementation of the most used instructions 6 | in an disassembler / emulator. 7 | 8 | Agustin Gianni (agustingianni@gmail.com) 9 | """ 10 | import re 11 | import sys 12 | import operator 13 | from collections import defaultdict 14 | 15 | setsflags_skip = ["cps", "mls", "mrs", "smmls", "srs", "subs", "vabs", "vcls", "vfms", "vmls", "vmrs", "vnmls", "qabs", "vrecps", "vrsqrts"] 16 | skip_list = ["cbnz", "svc", "lsls", "sbcs", "bics", "rscs", "movs", "muls", "mls", "teq", "adcs", "smmls", "vcls", "vmls"] 17 | cond_codes = ["eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", "hi", "ls", "ge", "lt", "gt", "le"] 18 | def drop_garbage(opname): 19 | if opname.startswith("it"): 20 | return "it" 21 | 22 | # Drop the wide specifier. 23 | t = opname[:-2] if (opname.endswith(".w") or opname.endswith(".n")) else opname 24 | 25 | # Some opcodes end with the leters of condition codes but have nothing to do with them. 26 | if t in skip_list: 27 | # Remove the 'setsflags' indicator. 28 | if t[-1] == "s" and t not in setsflags_skip: 29 | return t[:-1] 30 | 31 | return t 32 | 33 | # Drop condition codes. 34 | for code in cond_codes: 35 | if t.endswith(code): 36 | t2 = t[:-2] 37 | if len(t2) <= 2 and not t2 in ["b", "bl", "bx"]: 38 | print "You need to add the following opname to the 'skip_list' %s" % t 39 | 40 | t = t2 41 | break 42 | 43 | # Remove the 'setsflags' indicator. 44 | if t[-1] == "s" and t not in setsflags_skip: 45 | return t[:-1] 46 | 47 | return t 48 | 49 | # Match the opcode, opname and arguments of objdump's output for ARM. 50 | regex_str = "\s*[0-9a-f]+\:\s+([0-9a-f]+\s+[0-9a-f]*)\s+([a-zA-Z.]+)\s+(.+)" 51 | regex = re.compile(regex_str) 52 | 53 | opname_freq = defaultdict(lambda: 0, {}) 54 | 55 | with open(sys.argv[1]) as f: 56 | for line in f: 57 | r = regex.search(line) 58 | if not r: 59 | continue 60 | 61 | opcode, opname, args = r.groups() 62 | if opname.lower() in [".byte", ".word", ".dword", ".qword", ".short"]: 63 | continue 64 | 65 | opname = drop_garbage(opname) 66 | 67 | opname_freq[opname] += 1 68 | 69 | 70 | for el in sorted(opname_freq.iteritems(), key=operator.itemgetter(1), reverse=True): 71 | print "Instruction %10s is used %10d times | decode_%s" % (el[0], el[1], el[0]) -------------------------------------------------------------------------------- /find_hardref.py: -------------------------------------------------------------------------------- 1 | """ 2 | Script to find hardcoded references inside an IDA database. 3 | This is used to find pointers to stuff inside firmware like 4 | Apple's iBoot. 5 | 6 | Author: Agustin Gianni (agustingianni@gmail.com) 7 | """ 8 | import idaapi 9 | import idc 10 | import idautils 11 | import struct 12 | 13 | class HardReferencesChoose(Choose2): 14 | def __init__(self, title, refs): 15 | Choose2.__init__(self, title, [ ["Address", 10], ["Reference", 10], ["Address Text", 15], ["Reference Text", 15] ]) 16 | self.n = 0 17 | 18 | self.items = [] 19 | for ref in refs: 20 | address = ref[0] 21 | reference = ref[1] 22 | self.items += [self.make_item(address, reference, GetDisasm(address), GetDisasm(reference))] 23 | 24 | self.icon = 0 25 | self.selcount = 0 26 | self.deflt = -1 27 | self.popup_names = ["NOSE"] 28 | 29 | def OnClose(self): 30 | pass 31 | 32 | def OnEditLine(self, n): 33 | idaapi.jumpto(self.items[n]) 34 | 35 | def OnInsertLine(self): 36 | pass 37 | 38 | def OnSelectLine(self, n): 39 | self.selcount += 1 40 | idaapi.jumpto(int(self.items[n][0], 16)) 41 | 42 | def OnGetLine(self, n): 43 | return self.items[n] 44 | 45 | def OnGetSize(self): 46 | n = len(self.items) 47 | return n 48 | 49 | def OnDeleteLine(self, n): 50 | del self.items[n] 51 | return n 52 | 53 | def OnRefresh(self, n): 54 | #print("refresh %d" % n) 55 | return n 56 | 57 | def OnGetIcon(self, n): 58 | if n % 2: 59 | return 10 60 | 61 | return 11 62 | 63 | def show(self): 64 | t = self.Show() 65 | if t < 0: 66 | return False 67 | return True 68 | 69 | def make_item(self, address, reference, addr_asm, ref_asm): 70 | r = ["0x%08x" % address, "0x%08x" % reference, addr_asm, ref_asm] 71 | self.n += 1 72 | return r 73 | 74 | def OnGetLineAttr(self, n): 75 | #if n % 2: 76 | # return [0xF00000, 0] 77 | pass 78 | 79 | def find_string_table_refs(string_table_start, string_table_end, mark=False): 80 | image_start = idc.MinEA() 81 | image_end = idc.MaxEA() 82 | image_size = image_end - image_start 83 | 84 | print "Analyzing image from %.8x-%.8x of size %.8x" % (image_start, image_end, image_size) 85 | 86 | for cur_ea in xrange(image_start, image_end, 4): 87 | cur_long = Dword(cur_ea) 88 | if cur_long >= string_table_start and cur_long < string_table_end: 89 | if mark: 90 | SetType(cur_ea, "char *ptr;") 91 | 92 | print "Found string table ref at 0x%.8x to 0x%.8x (%s)" % (cur_ea, cur_long, GetDisasm(cur_ea)) 93 | 94 | def find_any_refs(image_start, image_end, mark=False): 95 | image_size = image_end - image_start 96 | 97 | refs = [] 98 | for cur_ea in xrange(image_start, image_end, 4): 99 | cur_long = Dword(cur_ea) 100 | if cur_long >= image_start and cur_long < image_end: 101 | if mark: 102 | MakeDword(cur_ea) 103 | 104 | print "Found hard ref at 0x%.8x to 0x%.8x (%s)" % (cur_ea, cur_long, GetDisasm(cur_ea)) 105 | refs.append((cur_ea, cur_long)) 106 | 107 | return refs 108 | 109 | # Image limits 110 | image_start = idc.MinEA() 111 | image_end = idc.MaxEA() 112 | 113 | # Limits of the iBoot string table. 114 | # string_table_start = 0x5FF345C0 115 | # string_table_end = 0x5FF40ACD 116 | # 117 | # find_string_table_refs(string_table_start, string_table_end, mark=True) 118 | refs = find_any_refs(image_start, image_end, mark=False) 119 | choose = HardReferencesChoose("Hard references", refs) 120 | choose.show() -------------------------------------------------------------------------------- /func_references.py: -------------------------------------------------------------------------------- 1 | """ 2 | IDAPython script to search for a given function xreferences and its arguments. 3 | 4 | The approach is very simple and probably wrong in many corner cases. We assume that 5 | arguments are set via 'push' instructions and inside just one basic block (the basic 6 | block that contains the function call). 7 | """ 8 | 9 | from collections import namedtuple 10 | import idautils 11 | from idc import GetFunctionName, GetDisasm, GetMnem, GetOpnd, Demangle, GetLongPrm, INF_SHORT_DN 12 | 13 | __author__ = 'Agustin Gianni (agustin.gianni@gmail.com).' 14 | 15 | import idaapi 16 | 17 | FunctionName = namedtuple('FunctionName', ['ea', 'name']) 18 | ImportEntry = namedtuple('ImportEntry', ['ea', 'name', 'ord']) 19 | BasicBlock = namedtuple('BasicBlock', ['start_ea', 'end_ea', 'function']) 20 | FunctionSignature = namedtuple('FunctionSignature', ['name', 'nargs']) 21 | Instruction = namedtuple('Instruction', ['ea', 'string']) 22 | FunctionArgument = namedtuple('FunctionArgument', ['argument', 'instruction']) 23 | InterestingResult = namedtuple('InterestingResult', ['caller_name', 'callee_name', 'call_address', 'arguments']) 24 | 25 | imports_list = [] 26 | function_names = [] 27 | 28 | 29 | def EnumImportNamesCallback(ea, name, ord_): 30 | """ 31 | Callback used to enumerate all the import entries. 32 | """ 33 | if name: 34 | imports_list.append(ImportEntry(ea, name, ord_)) 35 | 36 | return True 37 | 38 | 39 | def GetAllImportEntries(): 40 | """ 41 | Return a list of all the import entries. 42 | """ 43 | for i in xrange(0, idaapi.get_import_module_qty()): 44 | name = idaapi.get_import_module_name(i) 45 | if not name: 46 | pass 47 | 48 | idaapi.enum_import_names(i, EnumImportNamesCallback) 49 | 50 | return imports_list 51 | 52 | 53 | def GetAllFunctionNames(): 54 | """ 55 | Return a list of all the funtion names in the database. 56 | """ 57 | func_name_list = [] 58 | for x in idautils.Functions(): 59 | func_name_list.append(FunctionName(x, GetFunctionDemangledName(x))) 60 | 61 | return func_name_list 62 | 63 | 64 | def GetAddressBasicBlock(ea): 65 | """ 66 | Get the corresponding basic block for a given address. 67 | """ 68 | f = idaapi.get_func(ea) 69 | if not f: 70 | raise RuntimeError("No basic block at address %.8x" % ea) 71 | 72 | for block in idaapi.FlowChart(f): 73 | if block.startEA <= ea and block.endEA > ea: 74 | return BasicBlock(block.startEA, block.endEA, f) 75 | 76 | raise RuntimeError("No basic block at address %.8x" % ea) 77 | 78 | def GetInstructions(start_ea, end_ea): 79 | """ 80 | Return a list of all the instructions in the range [start_ea, end_ea]. 81 | """ 82 | ins = [] 83 | for head in idautils.Heads(start_ea, end_ea): 84 | if idaapi.isCode(idaapi.getFlags(head)): 85 | ins.append(Instruction(head, GetDisasm(head))) 86 | 87 | return ins 88 | 89 | def IsArgumentSetter(ea): 90 | """ 91 | Architecture dependant utility function that assumes that all the push instructions 92 | ar argument setters. 93 | """ 94 | if GetMnem(ea).lower() == "push": 95 | return True 96 | 97 | def GetFunctionArgument(ins): 98 | """ 99 | Architecture dependant function that takes an instruction and returns 100 | the function argument. 101 | In this case we assume that the first operand is the function argument (for push instructions). 102 | """ 103 | opnd = GetOpnd(ins.ea, 0) 104 | return FunctionArgument(opnd, ins) 105 | 106 | def IsFunctionCall(instruction): 107 | """ 108 | Return true if the instruction 'ins' is a function call. 109 | """ 110 | return GetMnem(instruction.ea).lower() in ["call", "jmp"] 111 | 112 | def GetFunctionCallArguments(func_sig, xref_addr): 113 | """ 114 | Given a function signature (func_sig) return the arguments of the 115 | function call at 'xref_addr'. 116 | """ 117 | args = [] 118 | 119 | # Get the corresponding basic block for the xref. 120 | bb = GetAddressBasicBlock(xref_addr) 121 | 122 | # Now obtain a list of all the instructions in said basic block. 123 | instructions = GetInstructions(bb.start_ea, bb.end_ea) 124 | 125 | # Remove instructions past the reference. 126 | instructions = filter(lambda x: x.ea <= xref_addr, instructions) 127 | 128 | # Check if we have a function call at the 'xref_addr' 129 | if not IsFunctionCall(instructions[-1]): 130 | raise RuntimeError("Cross reference is not a 'call', nor a 'jmp'.") 131 | 132 | # We've assumed that all the function's argument are set within one basic block. 133 | n_found_args = 0 134 | for ins in reversed(instructions[:-1]): 135 | if n_found_args == func_sig.nargs: 136 | break 137 | 138 | # Check if we have an instruction that sets the argument. 139 | if IsArgumentSetter(ins.ea): 140 | n_found_args += 1 141 | args.append(GetFunctionArgument(ins)) 142 | 143 | return args 144 | 145 | def GetFunctionDemangledName(xref_addr): 146 | """ 147 | Return the demanlged name of a function at a given address 'xref_addr'. 148 | """ 149 | tmp = GetFunctionName(xref_addr) 150 | name = Demangle(tmp, GetLongPrm(INF_SHORT_DN)) 151 | return name if name else tmp 152 | 153 | def IDAArgumentToSize(arg): 154 | """ 155 | Try to convert an IDA 'argument' to an integer. If the argument is a 156 | register or a memory reference return -1 to place them last in the sorted list. 157 | """ 158 | a = -1 159 | 160 | if arg[-1] in ['h', 'H']: 161 | 162 | a = int(arg[:-1], 16) 163 | 164 | else: 165 | try: 166 | a = int(arg, 10) 167 | 168 | except ValueError: 169 | a = -1 170 | 171 | return a 172 | 173 | def sort_func(element): 174 | """ 175 | Sort by argument size. 176 | """ 177 | arg = element.arguments[0].argument 178 | return IDAArgumentToSize(arg) 179 | 180 | def print_references(func_name, func_nargs): 181 | # Declare the function signature with name and number of arguments. 182 | func_sig = FunctionSignature(func_name, func_nargs) 183 | 184 | interesting_functions = [] 185 | 186 | # Get all the function names and match against the function name. 187 | for a in GetAllFunctionNames(): 188 | if func_sig.name in a.name: 189 | interesting_functions.append(a) 190 | 191 | # Do the same for imports. 192 | for a in GetAllImportEntries(): 193 | if func_sig.name in a.name: 194 | interesting_functions.append(a) 195 | 196 | results = [] 197 | for function in interesting_functions: 198 | # Get all code xrefs to 199 | xrefs = idautils.CodeRefsTo(function.ea, 0) 200 | for call_address in xrefs: 201 | arguments = GetFunctionCallArguments(func_sig, call_address) 202 | caller_name = GetFunctionDemangledName(call_address) 203 | callee_name = function.name 204 | 205 | results.append(InterestingResult(caller_name, callee_name, call_address, arguments)) 206 | 207 | #print "// Calling function %s " % caller_name 208 | #print "// Call address 0x%.8x" % call_address 209 | #print "%s(%s);" % (function.name, ",".join(map(lambda x: '"%s"' % x.argument, arguments))) 210 | #print 211 | 212 | # Sort by alloc size. 213 | results.sort(key=sort_func, reverse=True) 214 | for result in results: 215 | print "// Caller function %s " % result.caller_name 216 | print "// Call address 0x%.8x" % result.call_address 217 | print "// Alloc size 0x%.8x" % IDAArgumentToSize(result.arguments[0].argument) 218 | print "%s(%s);" % (result.callee_name, ",".join(map(lambda x: '"%s"' % x.argument, result.arguments))) 219 | print 220 | 221 | if __name__ == "__main__": 222 | print_references("operator new", 1) 223 | -------------------------------------------------------------------------------- /ida_hooks.py: -------------------------------------------------------------------------------- 1 | import idc 2 | import idautils 3 | 4 | def hexdump(src, length=16): 5 | FILTER = ''.join([(len(repr(chr(x))) == 3) and chr(x) or '.' for x in range(256)]) 6 | lines = [] 7 | for c in xrange(0, len(src), length): 8 | chars = src[c:c+length] 9 | hex = ' '.join(["%02x" % ord(x) for x in chars]) 10 | printable = ''.join(["%s" % ((ord(x) <= 127 and FILTER[ord(x)]) or '.') for x in chars]) 11 | lines.append("%04x %-*s %s\n" % (c, length*3, hex, printable)) 12 | return ''.join(lines) 13 | 14 | def handle_bp1(): 15 | """ 16 | Handler for the buffer append routine. 17 | 18 | .text:70F75BE0 push edi ; size 19 | .text:70F75BE1 push edx ; src 20 | .text:70F75BE2 push eax ; dst 21 | .text:70F75BE3 call _memcpy_0 22 | """ 23 | size = GetRegValue("edi") 24 | src = GetRegValue("edx") 25 | dst = GetRegValue("eax") 26 | ip = GetRegValue("eip") 27 | 28 | print "memcpy(dst=0x%.8x, src=0x%.8x, size=0x%.8x)" % (dst, src, size) 29 | 30 | # Read the data 31 | data = GetManyBytes(src, size, use_dbg=True) 32 | if not data: 33 | print "Error reading src=0x%.8x" % (src) 34 | return 35 | 36 | hex_data = hexdump(data) 37 | 38 | print hex_data 39 | print 40 | 41 | def install_bp1(): 42 | bpaddr = 0x70F75BE3 43 | AddBpt(bpaddr) 44 | SetBptCnd(bpaddr, "handle_bp0()") 45 | 46 | def handle_bp0(): 47 | """ 48 | .text:6FE414D2 mov esi, [esp+44h+lpString1] 49 | .text:6FE414D6 mov edi, [esp+44h+lp] 50 | """ 51 | string_address = GetRegValue("esi") 52 | data = GetManyBytes(string_address, 32, use_dbg=True) 53 | if not data: 54 | print "Error reading src=0x%.8x" % (src) 55 | return 56 | 57 | hex_data = hexdump(data) 58 | 59 | print "Data @ 0x%.8x" % string_address 60 | print hex_data 61 | print 62 | 63 | def install_bp0(): 64 | bpaddr = 0x6FE414D6 65 | AddBpt(bpaddr) 66 | SetBptCnd(bpaddr, "handle_bp0()") 67 | 68 | def handle_bp2(): 69 | """ 70 | .text:6FE30290 ; int __cdecl SHA1_sub_70F70290_calc(void *data) 71 | .text:6FE30290 SHA1_sub_70F70290_calc proc near ; CODE XREF: SHA1_sub_70F703A0_get+32p 72 | .text:6FE30290 ; SHA1_sub_70F71350+4Dp ... 73 | .text:6FE30296 mov ebp, [esp+44h+data] 74 | .text:6FE3029A push edi 75 | .text:6FE3029B mov edi, eax ; eax=strlen 76 | """ 77 | data_address = GetRegValue("ebp") 78 | data_size = GetRegValue("eax") 79 | data = GetManyBytes(data_address, data_size, use_dbg=True) 80 | if not data: 81 | print "Error reading src=0x%.8x" % (src) 82 | return 83 | 84 | hex_data = hexdump(data) 85 | 86 | print "Data @ 0x%.8x" % data_address 87 | print hex_data 88 | print 89 | 90 | def install_bp2(): 91 | bpaddr = 0x6FE3029B 92 | AddBpt(bpaddr) 93 | SetBptCnd(bpaddr, "handle_bp0()") 94 | 95 | 96 | def main(): 97 | RunPlugin("python", 3) 98 | install_bp2() 99 | 100 | if __name__ == '__main__': 101 | main() -------------------------------------------------------------------------------- /machofinder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Hacky script to gather all the mach-o file (and fat). 3 | It prints the union of all the used load commands in all 4 | the files found in the system. 5 | 6 | Author: Agustin Gianni (agustingianni@gmail.com) 7 | """ 8 | import os 9 | import struct 10 | import shutil 11 | import pickle 12 | 13 | from macholib.MachO import MachO 14 | 15 | #define FAT_MAGIC 0xcafebabe 16 | #define FAT_CIGAM 0xbebafeca /* NXSwapLong(FAT_MAGIC) */ 17 | #define MH_MAGIC 0xfeedface /* the mach magic number */ 18 | #define MH_CIGAM 0xcefaedfe /* NXSwapInt(MH_MAGIC) */ 19 | #define MH_MAGIC_64 0xfeedfacf /* the 64-bit mach magic number */ 20 | #define MH_CIGAM_64 0xcffaedfe /* NXSwapInt(MH_MAGIC_64) */ 21 | 22 | # magics = [0xfeedface, 0xcefaedfe, 0xfeedfacf, 0xcffaedfe, 0xcafebabe, 0xbebafeca] 23 | # magics = [0xfeedface, 0xfeedfacf] 24 | 25 | def is_interesting(filepath, magics): 26 | try: 27 | data = open(filepath).read(4) 28 | magic = struct.unpack(" %s" % (load_command, path) 146 | 147 | try: 148 | shutil.copyfile(path, os.path.join("/Users/anon/workspace/retools/src/libbinary/macho/testfiles", "test_load_command_%s" % load_command.lower())) 149 | 150 | except: 151 | continue 152 | 153 | def print_one_file_for_each_section_type(results): 154 | seen_section_types = set() 155 | for path in results: 156 | if "Applications" in path: 157 | continue 158 | 159 | diff = get_macho_section_types(path) - seen_section_types 160 | 161 | if len(diff): 162 | seen_section_types.update(diff) 163 | print "%s -> [%s]" % (path, ", ".join(diff)) 164 | 165 | for new_type in diff: 166 | try: 167 | shutil.copyfile(path, os.path.join("/Users/anon/workspace/retools/src/libbinary/macho/testfiles", "test_section_type_%s" % new_type.lower())) 168 | except: 169 | continue 170 | 171 | def print_one_file_for_each_section_name(results): 172 | seen_section_types = set() 173 | for path in results: 174 | if "Applications" in path: 175 | continue 176 | diff = get_macho_section_name(path) - seen_section_types 177 | 178 | if len(diff): 179 | seen_section_types.update(diff) 180 | for new_type in diff: 181 | if "__DWARF" in new_type: 182 | print "%-40s" % new_type, path 183 | # try: 184 | # shutil.copyfile(path, os.path.join("/Users/anon/workspace/retools/src/libbinary/macho/testfiles/section_names", "test_section_name_%s" % new_type)) 185 | # except: 186 | # continue 187 | 188 | def get_macho_filetype(filepath): 189 | try: 190 | macho = MachO(filepath) 191 | 192 | except (ValueError, struct.error, IOError): 193 | return set() 194 | 195 | types = set() 196 | for header in macho.headers: 197 | types.add(header.filetype) 198 | 199 | return types 200 | 201 | def print_onefile_for_each_filetype(results): 202 | seen_filetypes = set() 203 | for path in results: 204 | filetypes = get_macho_filetype(path) 205 | diff = filetypes - seen_filetypes 206 | 207 | if len(diff): 208 | print path, diff 209 | seen_filetypes.update(diff) 210 | 211 | def locate_macho_files(root): 212 | results = [] 213 | for root, subFolders, files in os.walk(root): 214 | for name in files: 215 | abs_name = os.path.join(root, name) 216 | if os.path.isfile(abs_name) and is_interesting(abs_name, [0xfeedface, 0xfeedfacf]): 217 | results.append(abs_name) 218 | 219 | return results 220 | 221 | def locate_fat(root): 222 | i = 0 223 | results = [] 224 | for root, subFolders, files in os.walk(root): 225 | for name in files: 226 | i += 1 227 | if not i % 5000: 228 | print "Processed %d files ..." % i 229 | 230 | abs_name = os.path.join(root, name) 231 | if os.path.isfile(abs_name) and is_interesting(abs_name, [0xcafebabe, 0xbebafeca]): 232 | results.append(abs_name) 233 | 234 | return results 235 | 236 | def locate_big_endian(root): 237 | i = 0 238 | results = [] 239 | for root, subFolders, files in os.walk(root): 240 | for name in files: 241 | i += 1 242 | if not i % 5000: 243 | print "Processed %d files ..." % i 244 | 245 | abs_name = os.path.join(root, name) 246 | if os.path.isfile(abs_name) and is_interesting(abs_name, [0xcefaedfe, 0xcffaedfe]): 247 | results.append(abs_name) 248 | 249 | return results 250 | 251 | def print_segment_section_names(results): 252 | a = set() 253 | for filepath in results: 254 | try: 255 | macho = MachO(filepath) 256 | 257 | except (ValueError, struct.error, IOError): 258 | continue 259 | 260 | for header in macho.headers: 261 | for command in header.commands: 262 | try: 263 | if command[1].nsects: 264 | #print command[1] 265 | for sect in command[2]: 266 | segname = sect.segname.replace("\x00", "") 267 | sectname = sect.sectname.replace("\x00", "") 268 | sectype = sect_type[sect.flags & 0xff] 269 | #a.add((segname, sectname, sectype)) 270 | print sectype, segname, sectname, filepath 271 | 272 | except AttributeError, e: 273 | pass 274 | 275 | # print "Results:" 276 | # for r in a: 277 | # print "%s %s %s" % (r[0], r[1], r[2]) 278 | 279 | def get_smallest_macho(files): 280 | files_with_sizes = [] 281 | 282 | import os 283 | import hashlib 284 | 285 | for file in files: 286 | if "invalid" in file or "svn" in file: 287 | continue 288 | 289 | try: 290 | cur_size = os.path.getsize(file) 291 | 292 | except OSError: 293 | continue 294 | 295 | files_with_sizes.append((cur_size, file)) 296 | 297 | for el in sorted(files_with_sizes)[:10000]: 298 | dest_file = os.path.join("/Users/anon/fuzzing/afl-2.10b/macho_inputs", hashlib.md5(el[1]).hexdigest()) 299 | try: 300 | shutil.copyfile(el[1], dest_file) 301 | 302 | except: 303 | continue 304 | 305 | # if not os.path.exists("saved.bigendian.paths.db"): 306 | # print "Saving to disk!" 307 | # output = open("saved.bigendian.paths.db", "wb") 308 | # results = locate_big_endian("/") 309 | # pickle.dump(results, output) 310 | # print "Found %d fat big endian files" % len(results) 311 | 312 | results = [] 313 | if not os.path.exists("saved.macho.paths.db"): 314 | print "Saving to disk!" 315 | output = open("saved.macho.paths.db", "wb") 316 | results = locate_macho_files("/") 317 | pickle.dump(results, output) 318 | print "Found %d macho files" % len(results) 319 | 320 | results = pickle.load(open("saved.macho.paths.db", "rb")) 321 | print "Found %d macho files" % len(results) 322 | get_smallest_macho(results) -------------------------------------------------------------------------------- /mark_interesting.py: -------------------------------------------------------------------------------- 1 | """ 2 | Small idapython script that finds all the signed comparisions and marks 3 | them with a color. 4 | It will also find the sign extension instructions and it will also mark them. 5 | 6 | Author: Agustin Gianni (agustingianni@gmail.com) 7 | """ 8 | 9 | import idautils 10 | import idc 11 | 12 | from idaapi import * 13 | 14 | def rgb_to_bgr(color): 15 | r = color >> 16 16 | g = color >> 8 & 0xff 17 | b = color & 0xff 18 | return (b << 16) | (g << 8) | r 19 | 20 | SIGNED_COLOR = rgb_to_bgr(0xFC8B8B) 21 | INTERESTING_COLOR = rgb_to_bgr(0xFC8B8B) 22 | UNSIGNED_COLOR = rgb_to_bgr(0x8BFCB5) 23 | CALL_COLOR = rgb_to_bgr(0xC7F8FF) 24 | 25 | signed_ins = ["JL", "JNGE", "JGE", "JNL", "JLE", "JNG", "JG", "JNLE"] 26 | unsigned_ins = ["JB", "JNAE", "JC", "JNB", "JAE", "JNC", "JBE", "JNA", "JA", "JNBE"] 27 | interesting_ins = ["MOVSX", "MOVSXD", "LODS", "STOS", "REP", "LOOP"] 28 | 29 | def set_instruction_color(ea, color): 30 | idc.SetColor(ea, idc.CIC_ITEM, color) 31 | 32 | def is_call(mnemonic): 33 | return mnemonic.upper() == "CALL" 34 | 35 | def is_signed(mnemonic): 36 | return mnemonic.upper() in signed_ins 37 | 38 | def is_unsigned(mnemonic): 39 | return mnemonic.upper() in unsigned_ins 40 | 41 | def is_interesting(mnemonic): 42 | return mnemonic.upper() in interesting_ins 43 | 44 | from collections import defaultdict 45 | freq = {"signed" : defaultdict(int), "interesting" : defaultdict(int)} 46 | 47 | i = 0 48 | for seg_ea in Segments(): 49 | for head in Heads(seg_ea, SegEnd(seg_ea)): 50 | i += 1 51 | 52 | if not isCode(GetFlags(head)): 53 | continue 54 | 55 | mnemonic = GetMnem(head) 56 | 57 | if is_unsigned(mnemonic): 58 | set_instruction_color(head, UNSIGNED_COLOR) 59 | 60 | elif is_signed(mnemonic): 61 | set_instruction_color(head, SIGNED_COLOR) 62 | freq["signed"][GetFunctionName(head)] += 1 63 | 64 | elif is_interesting(mnemonic): 65 | set_instruction_color(head, INTERESTING_COLOR) 66 | freq["interesting"][GetFunctionName(head)] += 1 67 | 68 | elif is_call(mnemonic): 69 | set_instruction_color(head, CALL_COLOR) 70 | 71 | import operator 72 | sorted_x = sorted(freq["interesting"].items(), key=operator.itemgetter(1)) 73 | for x in sorted_x: 74 | print "Function '%s' has %d interesting properties" % (x[0], x[1]) 75 | -------------------------------------------------------------------------------- /minset.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tool to calculate the minimum set of files that have 3 | approximatelly the best coverage. 4 | 5 | The tool needs an input directory that contains the input 6 | files which were used along with the trace files. 7 | The name of the trace file must be .log. This 8 | way when we are processing the trace, we can refer to the 9 | input file. 10 | 11 | Example: 12 | 13 | $ ls /tmp/InputFiles 14 | -rw------- 1 anon staff 47K Jan 4 01:16 ffeb4d186925e9538707719e8374d217e1030943.log 15 | -rw------- 1 anon staff 8.8K Jan 3 14:05 ffeb4d186925e9538707719e8374d217e1030943 16 | 17 | $ python minset.py -i /tmp/InputFiles -o /tmp/OutputFiles 18 | Size of the universe: 6257 19 | Current covered=0 new=4053 20 | Current covered=4053 new=581 21 | ... 22 | Original trace count: 1484 23 | Min set trace count : 48 24 | 25 | This will copy the input files that cover the most blocks 26 | from the input directory into the output directory. 27 | """ 28 | import os 29 | import re 30 | import sys 31 | import struct 32 | import shutil 33 | import argparse 34 | from collections import namedtuple 35 | 36 | TraceFile = namedtuple("TraceFile", [ 37 | "path", 38 | "entries" 39 | ]) 40 | 41 | BasicBlock = namedtuple("BasicBlock", [ 42 | "start", 43 | "size", 44 | "id" 45 | ]) 46 | 47 | 48 | def load_drcov_trace(filename): 49 | """ 50 | A drcov trace file consists of a preamble with ascii data up to 51 | the start of the basic block table. The basic block table can be 52 | located by searching for the ascii header: 53 | 54 | 'BB Table: bbs' 55 | 56 | Following the header, the table consists of N structures: 57 | 58 | struct __attribute__((packed)) drcov_bb { 59 | uint32_t start; 60 | uint16_t size; 61 | uint16_t id; 62 | }; 63 | """ 64 | trace = open(filename, "rb").read() 65 | match = re.search(r"BB Table:\s+(\d+)\s+bbs\n", trace) 66 | if not match: 67 | return TraceFile(filename, set()) 68 | 69 | block_count = int(match.group(1)) 70 | 71 | # An empty trace is possible so bail before tryint to process. 72 | if not block_count: 73 | return TraceFile(filename, set()) 74 | 75 | # Size of a single basic block entry. 76 | entry_size = 8 77 | 78 | # Extract the basic block table. 79 | table = trace[match.end():] 80 | if len(table) != entry_size * block_count: 81 | print "The size of the table does not match the count of basic blocks." 82 | 83 | # Unpack the entries into a set to avoid repeats. 84 | entries = set() 85 | for i in range(0, block_count): 86 | block_offset = i * entry_size 87 | block_data = table[block_offset:block_offset+entry_size] 88 | start, size, id_ = struct.unpack(" %s" % (cur_ea, GetDisasm(cur_ea)) 39 | 40 | def find_push_thumb(mark=False): 41 | def is_push(opcode): 42 | return (0xfffffe00 & opcode) == 0x0000b400 43 | 44 | image_start = idc.MinEA() 45 | image_end = idc.MaxEA() 46 | image_size = image_end - image_start 47 | 48 | print "Analyzing image from %.8x-%.8x of size %.8x" % (image_start, image_end, image_size) 49 | 50 | for cur_ea in xrange(image_start, image_end, 2): 51 | cur_word = Word(cur_ea) 52 | if is_push(cur_word) and "DCB" in GetDisasm(cur_ea): 53 | print "Found candidate at 0x%.8x -> %s" % (cur_ea, GetDisasm(cur_ea)) 54 | if mark: 55 | MakeCode(cur_ea) 56 | print "Marked candidate at 0x%.8x -> %s" % (cur_ea, GetDisasm(cur_ea)) 57 | 58 | 59 | def find_nops(mark=False): 60 | """ 61 | Look for THUMB nops (0xbf00). 62 | """ 63 | def is_nop(opcode): 64 | return opcode == 0xbf00 65 | 66 | image_start = idc.MinEA() 67 | image_end = idc.MaxEA() 68 | image_size = image_end - image_start 69 | 70 | print "Analyzing image from %.8x-%.8x of size %.8x" % (image_start, image_end, image_size) 71 | 72 | for cur_ea in xrange(image_start, image_end, 2): 73 | cur_word = Word(cur_ea) 74 | if is_nop(cur_word): 75 | print "Found candidate at 0x%.8x -> %s" % (cur_ea, GetDisasm(cur_ea)) 76 | if mark: 77 | MakeCode(cur_ea) 78 | 79 | find_push_thumb(mark=False) -------------------------------------------------------------------------------- /propagate_types.py: -------------------------------------------------------------------------------- 1 | def GetFunctionDemangledName(xref_addr): 2 | """ 3 | Return the demanlged name of a function at a given address 'xref_addr'. 4 | """ 5 | tmp = GetFunctionName(xref_addr) 6 | name = Demangle(tmp, GetLongPrm(INF_SHORT_DN)) 7 | return name if name else tmp 8 | 9 | ea = AskAddr(ScreenEA(), "Give me the address of what you want to propagate") 10 | t = AskStr(GetType(ea), "Type for the data references") 11 | t2 = AskStr(GetType(ea), "Type for the stub references") 12 | 13 | if t[-1] != ';': 14 | t += ';' 15 | 16 | if t2[-1] != ';': 17 | t2 += ';' 18 | 19 | modified = set() 20 | 21 | for data_ref in DataRefsTo(ea): 22 | if data_ref in modified: 23 | continue 24 | 25 | print "Setting type at 0x%.8x -> %s" % (data_ref, t) 26 | SetType(data_ref, t) 27 | modified.add(data_ref) 28 | 29 | names = set() 30 | # Now for each data reference check if there are code references. 31 | for code_ref in DataRefsTo(data_ref): 32 | if code_ref in modified or code_ref == data_ref: 33 | continue 34 | 35 | f = idaapi.get_func(code_ref) 36 | if not f: 37 | continue 38 | 39 | if f.startEA in modified: 40 | continue 41 | 42 | name = GetFunctionDemangledName(f.startEA) 43 | if "stub" in name.lower(): 44 | # If the name of the code reference contains 'stub' then set also the type. 45 | print "Setting type at 0x%.8x : %s -> %s" % (f.startEA, name, t2) 46 | modified.add(f.startEA) 47 | SetType(f.startEA, t2) 48 | -------------------------------------------------------------------------------- /references.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import idautils 3 | from idc import GetFunctionName, GetDisasm, GetMnem, GetOpnd, Demangle, GetLongPrm, INF_SHORT_DN 4 | 5 | __author__ = 'anon' 6 | 7 | import idaapi 8 | 9 | FunctionName = namedtuple('FunctionName', ['ea', 'name']) 10 | ImportEntry = namedtuple('ImportEntry', ['ea', 'name', 'ord']) 11 | BasicBlock = namedtuple('BasicBlock', ['start_ea', 'end_ea', 'function']) 12 | FunctionSignature = namedtuple('FunctionSignature', ['name', 'nargs']) 13 | Instruction = namedtuple('Instruction', ['ea', 'string']) 14 | FunctionArgument = namedtuple('FunctionArgument', ['argument', 'instruction']) 15 | 16 | imports_list = [] 17 | function_names = [] 18 | 19 | 20 | def EnumImportNamesCallback(ea, name, ord_): 21 | if name: 22 | imports_list.append(ImportEntry(ea, name, ord_)) 23 | 24 | return True 25 | 26 | 27 | def GetAllImportEntries(): 28 | for i in xrange(0, idaapi.get_import_module_qty()): 29 | name = idaapi.get_import_module_name(i) 30 | if not name: 31 | pass 32 | 33 | idaapi.enum_import_names(i, EnumImportNamesCallback) 34 | 35 | return imports_list 36 | 37 | 38 | def GetAllFunctionNames(): 39 | func_name_list = [] 40 | for x in idautils.Functions(): 41 | func_name_list.append(FunctionName(x, GetFunctionDemangledName(x))) 42 | 43 | return func_name_list 44 | 45 | 46 | def GetAddressBasicBlock(ea): 47 | f = idaapi.get_func(ea) 48 | if not f: 49 | raise RuntimeError("No basic block at address %.8x" % ea) 50 | 51 | for block in idaapi.FlowChart(f): 52 | if block.startEA <= ea and block.endEA > ea: 53 | return BasicBlock(block.startEA, block.endEA, f) 54 | 55 | raise RuntimeError("No basic block at address %.8x" % ea) 56 | 57 | def GetInstructions(start_ea, end_ea): 58 | ins = [] 59 | for head in idautils.Heads(start_ea, end_ea): 60 | if idaapi.isCode(idaapi.getFlags(head)): 61 | ins.append(Instruction(head, GetDisasm(head))) 62 | 63 | return ins 64 | 65 | def IsArgumentSetter(ea): 66 | if GetMnem(ea).lower() == "push": 67 | return True 68 | 69 | def GetFunctionArgument(ins): 70 | opnd = GetOpnd(ins.ea, 0) 71 | return FunctionArgument(opnd, ins) 72 | 73 | def GetFunctionCallArguments(func_sig, xref_addr): 74 | args = [] 75 | bb = GetAddressBasicBlock(xref_addr) 76 | 77 | instructions = GetInstructions(bb.start_ea, bb.end_ea) 78 | instructions = filter(lambda x: x.ea <= xref_addr, instructions) 79 | 80 | if GetMnem(instructions[-1].ea).lower() not in ["call", "jmp"]: 81 | print instructions[-1] 82 | raise RuntimeError("Bullshit") 83 | 84 | n_found_args = 0 85 | for ins in reversed(instructions[:-1]): 86 | if n_found_args == func_sig.nargs: 87 | break 88 | 89 | if IsArgumentSetter(ins.ea): 90 | n_found_args += 1 91 | args.append(GetFunctionArgument(ins)) 92 | 93 | return args 94 | 95 | def GetFunctionDemangledName(xref_addr): 96 | tmp = GetFunctionName(xref_addr) 97 | name = Demangle(tmp, GetLongPrm(INF_SHORT_DN)) 98 | return name if name else tmp 99 | 100 | func_sig = FunctionSignature("operator new", 1) 101 | 102 | interesting_functions = [] 103 | 104 | for a in GetAllFunctionNames(): 105 | if func_sig.name in a.name: 106 | print a.name 107 | interesting_functions.append(a) 108 | 109 | for a in GetAllImportEntries(): 110 | if func_sig.name in a.name: 111 | interesting_functions.append(a) 112 | 113 | 114 | InterestingResult = namedtuple('InterestingResult', ['caller_name', 'callee_name', 'call_address', 'arguments']) 115 | 116 | results = [] 117 | for function in interesting_functions: 118 | # Get all code xrefs to 119 | xrefs = idautils.CodeRefsTo(function.ea, 0) 120 | for call_address in xrefs: 121 | arguments = GetFunctionCallArguments(func_sig, call_address) 122 | caller_name = GetFunctionDemangledName(call_address) 123 | callee_name = function.name 124 | 125 | results.append(InterestingResult(caller_name, callee_name, call_address, arguments)) 126 | 127 | #print "// Calling function %s " % caller_name 128 | #print "// Call address 0x%.8x" % call_address 129 | #print "%s(%s);" % (function.name, ",".join(map(lambda x: '"%s"' % x.argument, arguments))) 130 | #print 131 | 132 | def IDAArgumentToSize(arg): 133 | a = -1 134 | 135 | if arg[-1] in ['h', 'H']: 136 | 137 | a = int(arg[:-1], 16) 138 | 139 | else: 140 | try: 141 | a = int(arg, 10) 142 | 143 | except ValueError: 144 | a = -1 145 | 146 | return a 147 | 148 | def sort_func(element): 149 | arg = element.arguments[0].argument 150 | return IDAArgumentToSize(arg) 151 | 152 | # Sort by alloc size. 153 | results.sort(key=sort_func, reverse=True) 154 | for result in results: 155 | print "// Caller function %s " % result.caller_name 156 | print "// Call address 0x%.8x" % result.call_address 157 | print "// Alloc size 0x%.8x" % IDAArgumentToSize(result.arguments[0].argument) 158 | print "%s(%s);" % (result.callee_name, ",".join(map(lambda x: '"%s"' % x.argument, result.arguments))) 159 | print 160 | -------------------------------------------------------------------------------- /renamer.py: -------------------------------------------------------------------------------- 1 | """ 2 | Rename files in a directory to its sha1 sum plus an extension. 3 | """ 4 | import os 5 | import sys 6 | import hashlib 7 | 8 | 9 | def sha1_file(fn): 10 | f = open(fn, 'rb') 11 | r = hashlib.sha1(f.read()).hexdigest() 12 | f.close() 13 | return r 14 | 15 | 16 | directory = os.path.abspath(sys.argv[1]) 17 | extension = sys.argv[2] 18 | 19 | print "Doing directory `%s`" % directory 20 | 21 | for fn in os.listdir(directory): 22 | if fn == ".DS_Store": 23 | continue 24 | 25 | orig_name = os.path.join(directory, fn) 26 | hexh = sha1_file(orig_name) + extension 27 | new_name = os.path.join(directory, hexh) 28 | 29 | print('%s -> %s' % (orig_name, new_name)) 30 | 31 | os.rename(orig_name, new_name) 32 | -------------------------------------------------------------------------------- /simple_jack.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple Jack symbol porting tool by goose (agustingianni@gmail.com). 3 | 4 | This tool exists because for some reason diaphora does not assign 5 | enough priority to "perfect" matches. The main idea is to get 6 | as many symbols right so later on you can run diaphora and iterate. 7 | 8 | The drill is simple, hash the bytes of all the functions in a db 9 | and save them to a file. We will call this the 'primary' database. 10 | Do the same for another DB (we call this the 'secondary' database) 11 | and compare both. We only import identical matches and the only 12 | info we import from the 'primary' is the function name. 13 | 14 | This script has two modes: 15 | 16 | SCRIPT_MODE_DUMP: 17 | 18 | Used to create the primary and secondary database. 19 | You should run the script twice, first in the primary 20 | binary, which has your symbolicated binary, and second 21 | in the secondary binary, that is the one that will 22 | receive the information imported from the primary. 23 | 24 | SCRIPT_MODE_DIFF: 25 | 26 | Used once you've generated both the primary and secondary 27 | databases. It will read them both and perform the diffing. 28 | Once it finds matches, it will import the function name 29 | from the primary into the secondary. 30 | 31 | This mode is destructive, that is, it will change your 32 | IDB. Only run it if you are positive that you like the 33 | results. 34 | """ 35 | 36 | import pickle 37 | import hashlib 38 | 39 | import idaapi 40 | from idc import * 41 | from idaapi import * 42 | from idautils import * 43 | 44 | # Available modes, pick one. 45 | SCRIPT_MODE_DUMP = 0 46 | SCRIPT_MODE_DIFF = 1 47 | 48 | # IMPORTANT: Manually set this to the mode you need, because fuck idapython. 49 | CURRENT_SCRIPT_MODE = SCRIPT_MODE_DUMP 50 | 51 | # Set this to true if you want to see some debugging output. 52 | GLOBAL_DEBUG = False 53 | 54 | 55 | def hash_bytes(bytes): 56 | return hashlib.md5(bytes).hexdigest() 57 | 58 | 59 | def log(msg): 60 | Message("[%s] %s\n" % (time.asctime(), msg)) 61 | 62 | 63 | def load_db(db_name): 64 | db = None 65 | log("Loading DB from %s" % db_name) 66 | with open(db_name, 'rb') as input: 67 | db = pickle.load(input) 68 | 69 | return db 70 | 71 | 72 | def save_db(db, db_name): 73 | log("Saving DB to %s" % db_name) 74 | with open(db_name, 'wb') as output: 75 | pickle.dump(db, output, pickle.HIGHEST_PROTOCOL) 76 | 77 | 78 | def build_db(): 79 | collision_keys = set() 80 | 81 | func_list = [] 82 | segments = list(Segments()) 83 | for seg_ea in segments: 84 | func_list.extend(list(Functions(seg_ea, SegEnd(seg_ea)))) 85 | 86 | total_funcs = len(func_list) 87 | 88 | log("Total number of functions to export: %u" % total_funcs) 89 | 90 | functions_db = {} 91 | for f in func_list: 92 | # Get the function for this address. 93 | func = get_func(f) 94 | if not func: 95 | log("Cannot get a function object for 0x%x" % f) 96 | continue 97 | 98 | # Get the number of instructions. 99 | n_ins = 0 100 | flow = FlowChart(func) 101 | for block in flow: 102 | n_ins += len(list(Heads(block.startEA, block.endEA))) 103 | 104 | # Get the name of the function without demangling. 105 | name = GetFunctionName(f) 106 | 107 | # Calculate the size of the function. 108 | size = func.endEA - func.startEA 109 | 110 | # Do some sanity checks. 111 | assert (size == func.size()), "Invalid size." 112 | assert (func.startEA < func.endEA), "Invalid startEA / endEA values." 113 | 114 | # Get the hash of the function. 115 | ins_hash = hash_bytes(idc.GetManyBytes(func.startEA, size)) 116 | 117 | # Check if we collide with another entry. 118 | if functions_db.has_key(ins_hash): 119 | log("Function @ 0x%.8x collides with function @ 0x%.8x" % 120 | (func.startEA, functions_db[ins_hash][2])) 121 | 122 | # Keep track of the collision. 123 | collision_keys.add(ins_hash) 124 | continue 125 | 126 | # Create an entry in the DB. 127 | functions_db[ins_hash] = (name, n_ins, func.startEA) 128 | 129 | if GLOBAL_DEBUG: 130 | log("Function name:%s start:0x%.8x end:0x%.8x size:%u n_ins:%u hash:%s" % 131 | (name, func.startEA, func.endEA, size, n_ins, ins_hash)) 132 | 133 | # Delete the collision otherwise we may match functions incorrectly. 134 | for collision_key in collision_keys: 135 | del functions_db[collision_key] 136 | 137 | return functions_db 138 | 139 | 140 | def do_diff(): 141 | primary_db_path = AskFile(0, "primary.db", "Select the primary db file.") 142 | if primary_db_path is None: 143 | log("No file selected, exiting") 144 | return False 145 | 146 | secondary_db_path = AskFile( 147 | 0, "secondary.db", "Select the secondary db file.") 148 | 149 | if secondary_db_path is None: 150 | log("No file selected, exiting") 151 | return False 152 | 153 | # Load the databases 154 | primary_db = load_db(primary_db_path) 155 | secondary_db = load_db(secondary_db_path) 156 | 157 | log("Diffing ...") 158 | 159 | # Proceed with the diffing. 160 | matches = 0 161 | for primary_hash, primary_val in primary_db.iteritems(): 162 | # Check if 'primary_hash' from the primary is present in the secondary. 163 | if not secondary_db.has_key(primary_hash): 164 | continue 165 | 166 | # Hashes match. 167 | secondary_val = secondary_db[primary_hash] 168 | 169 | # Only match functions with a different name. 170 | if primary_val[0] == secondary_val[0]: 171 | continue 172 | 173 | function_ea = secondary_val[2] 174 | function_name_old = secondary_val[0] 175 | function_name_new = primary_val[0] 176 | 177 | # if GLOBAL_DEBUG: 178 | log("Function @ 0x%.8x -> From '%s' to '%s'" % 179 | (function_ea, function_name_old, function_name_new)) 180 | 181 | # Set the secondary function name. 182 | if not MakeNameEx(function_ea, function_name_new, SN_NOWARN | SN_NOCHECK): 183 | log("Error setting function name to '%s'" % (function_name_new)) 184 | 185 | matches += 1 186 | 187 | log("Number of matches: %u" % matches) 188 | 189 | 190 | def do_save(): 191 | db_path = AskFile(1, "*.db", "Select the file to store the db.") 192 | if db_path is None: 193 | log("No file selected, exiting") 194 | return False 195 | 196 | # Build the db for the current IDB. 197 | db = build_db() 198 | 199 | log("Number of entries in the DB: %u" % len(db)) 200 | 201 | # Write the DB to disk. 202 | save_db(db, db_path) 203 | 204 | 205 | if CURRENT_SCRIPT_MODE == SCRIPT_MODE_DUMP: 206 | do_save() 207 | 208 | elif CURRENT_SCRIPT_MODE == SCRIPT_MODE_DIFF: 209 | do_diff() 210 | 211 | else: 212 | log("Invalid script mode") 213 | -------------------------------------------------------------------------------- /string_finder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility to find all the strings inside an ill formed IDA Database. 3 | The script does an exhaustive search of all the streams of ASCII characters 4 | ending with a 0x00. Then we verify that the stream contains at least one 5 | english word. If it does, then we define it as a string. 6 | 7 | Coded by Agustin Gianni (agustin.gianni@gmail.com). 8 | """ 9 | import idaapi 10 | import idc 11 | import idautils 12 | import string 13 | 14 | try: 15 | import enchant 16 | except ImportError: 17 | print "You need to install pyenchant to use method_1" 18 | 19 | def is_printable(input_char): 20 | return input_char in string.printable 21 | 22 | def method_1(): 23 | """ 24 | This method does not work very well so far. 25 | """ 26 | dictionary = enchant.Dict("en_US") 27 | 28 | # Get the image base of the database. 29 | image_start = idc.MinEA() 30 | image_end = idc.MaxEA() 31 | image_size = image_end - image_start 32 | 33 | print "Analyzing image from %.8x-%.8x of size %.8x" % (image_start, image_end, image_size) 34 | 35 | cur_string = "" 36 | for cur_ea in xrange(image_start, image_end): 37 | byte_ = chr(Byte(cur_ea)) 38 | if not is_printable(byte_): 39 | # If this is a terminating byte, check if the collected bytes form a string. 40 | if byte_ == '\x00' and len(cur_string): 41 | cur_string = cur_string.replace("-", " ").replace("_", " ") 42 | words = cur_string.split() 43 | 44 | nwords = 0 45 | for word in words: 46 | if len(word) > 2 and dictionary.check(word) and (not word[0] in string.digits): 47 | nwords += 1 48 | if nwords > 1: 49 | print "0x%.8x : %s" % (cur_ea, cur_string) 50 | break 51 | 52 | cur_string = "" 53 | 54 | else: 55 | cur_string += byte_ 56 | 57 | def method_2(): 58 | """ 59 | Simple way to convert a table of strings into strings on IDA. 60 | """ 61 | start = AskAddr(ScreenEA(), "Where do I start looking for strings?") 62 | end = idc.MaxEA() 63 | 64 | cur_string = "" 65 | last_byte = None 66 | for cur_ea in xrange(start, end): 67 | byte_ = chr(Byte(cur_ea)) 68 | if not is_printable(byte_): 69 | # If this is a terminating byte, check if the collected bytes form a string. 70 | if byte_ == '\x00': 71 | if not len(cur_string): 72 | print "Last string at 0x%.8x" % (cur_ea) 73 | break 74 | 75 | else: 76 | print "0x%.8x : %s" % (cur_ea - len(cur_string), cur_string) 77 | MakeStr(cur_ea - len(cur_string), cur_ea + 1) 78 | cur_string = "" 79 | else: 80 | cur_string += byte_ 81 | 82 | last_byte = byte_ 83 | 84 | method_2() 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /struct_hint.py: -------------------------------------------------------------------------------- 1 | """ 2 | Stupid tool to infer what's the underlying structure used by a function. 3 | Highly heuristic. Don't trust it blindly, just try to use what it 4 | gives you and work from that. 5 | """ 6 | from idautils import * 7 | from idaapi import get_func 8 | 9 | def FunctionInstructionsBlocks(function): 10 | return filter(lambda x: isCode(GetFlags(x)), list(Heads(function.startEA, function.endEA))) 11 | 12 | qualifier2size = { "byte" : 1, "word" : 2, "dword" : 4, "qword" : 8, "xmmword" : 8 } 13 | def GetQualifierSize(qualifier): 14 | return qualifier2size[qualifier] 15 | 16 | reg128 = ["xmm0", "xmm1", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9"] 17 | reg64 = ["rax", "rbx", "rcx", "rdx", "rsi", "rdi", "rbp", "rsp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "rip"] 18 | reg32 = ["eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp", "eip", "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d"] 19 | reg16 = ["ax", "bx", "cx", "dx", "si", "di", "bp", "sp", "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"] 20 | reg8 = ["ah", "al", "bh", "bl", "ch", "cl", "dh", "dl", "sil", "dil", "bpl", "spl", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b"] 21 | 22 | def GetRegisterSize(register): 23 | if register in reg128: 24 | return 16 25 | elif register in reg64: 26 | return 8 27 | elif register in reg32: 28 | return 4 29 | elif register in reg16: 30 | return 2 31 | elif register in reg8: 32 | return 1 33 | 34 | raise Exception("Invalid register name: %s" % register) 35 | 36 | def GuessSize(ea): 37 | """ 38 | Fuck you IDA. 39 | """ 40 | if GetOpType(ea, 0) == o_displ and "ptr" in GetOpnd(ea, 0): 41 | return GetQualifierSize(GetOpnd(ea, 0).split()[0]) 42 | 43 | if GetOpType(ea, 1) == o_displ and "ptr" in GetOpnd(ea, 1): 44 | return GetQualifierSize(GetOpnd(ea, 1).split()[0]) 45 | 46 | if GetOpType(ea, 0) == o_reg: 47 | return GetRegisterSize(GetOpnd(ea, 0)) 48 | 49 | if GetOpType(ea, 1) == o_reg: 50 | return GetRegisterSize(GetOpnd(ea, 1)) 51 | 52 | raise Exception("Cannot guess the size of ins: '%s'" % GetDisasm(ea)) 53 | 54 | ea = ScreenEA() 55 | cur_func = get_func(ea) 56 | 57 | base_reg = AskStr("r15", "What's the register used as the base for the struct access?.").lower() 58 | size = AskLong(0, "Enter the size of the structure, leave 0 if unknown.") 59 | 60 | offsets = [] 61 | 62 | for ea in FunctionInstructionsBlocks(cur_func): 63 | # Filter instructions that do not touch our base register. 64 | dis = GetDisasm(ea) 65 | if not base_reg in dis.lower(): 66 | continue 67 | 68 | if GetOpType(ea, 0) == o_displ and base_reg in GetOpnd(ea, 0).lower(): 69 | offsets.append((GetOperandValue(ea, 0), GuessSize(ea), ("0x%.8x: " % ea) + GetDisasm(ea))) 70 | 71 | if GetOpType(ea, 1) == o_displ and base_reg in GetOpnd(ea, 1).lower(): 72 | offsets.append((GetOperandValue(ea, 1), GuessSize(ea), ("0x%.8x: " % ea) + GetDisasm(ea))) 73 | 74 | 75 | size2type = {1 : "uint8_t", 2 : "uint16_t", 4 : "uint32_t", 8 : "uint64_t", 16 : "__m128"} 76 | def GuessField(i, offset, size): 77 | if size in size2type.keys(): 78 | return "%-8s fld_%d;" % (size2type[size], i) 79 | 80 | 81 | return "%-8s pad_%d[%d];" % ("uint8_t", i, size) 82 | 83 | 84 | def MakeUnique(offsets): 85 | unique = [] 86 | 87 | prev_o = -1 88 | prev_s = -1 89 | 90 | for a in offsets: 91 | if prev_o == -1: 92 | unique.append(a) 93 | prev_o = a[0] 94 | prev_s = a[1] 95 | continue 96 | 97 | if prev_o == a[0] and prev_s != a[1]: 98 | raise Exception("Conflict found at offset %.8x with sizes %d and %d" % (prev_o, prev_s, a[1])) 99 | 100 | elif prev_o != a[0]: 101 | unique.append(a) 102 | 103 | prev_o = a[0] 104 | prev_s = a[1] 105 | 106 | return unique 107 | 108 | try: 109 | offsets = sorted(offsets, key=lambda tup: tup[0]) 110 | 111 | print "// User size: 0x%.4x" % size 112 | print "// Inferred size: 0x%.4x" % (offsets[-1][0] + offsets[-1][1]) 113 | print "struct UnknownStructure {" 114 | 115 | i = 0 116 | cur_offset = 0 117 | for a in MakeUnique(offsets): 118 | if cur_offset != a[0]: 119 | print " %-20s // off=%.2xh-%.2xh reason=padding" % (GuessField(i, cur_offset, a[0] - cur_offset), cur_offset, a[0]) 120 | i += 1 121 | 122 | cur_offset = a[0] + a[1] 123 | print " %-20s // off=%.2xh-%.2xh reason=%s" % (GuessField(i, a[0], a[1]), a[0], a[0] + a[1], a[2]) 124 | i += 1 125 | 126 | # Check if the user hinted the final size and pad it if needed. 127 | if size != 0 and size != offsets[-1][0] + offsets[-1][1]: 128 | rem_size = size - (offsets[-1][0] + offsets[-1][1]) 129 | offset = offsets[-1][0] + offsets[-1][1] 130 | print " %-20s // off=%.2xh-%.2xh reason=padding" % (GuessField(i, offset, rem_size), offset, offset + rem_size) 131 | 132 | print "};" 133 | except Exception: 134 | print "DEBUG:" 135 | for a in offsets: 136 | if cur_offset != a[0]: 137 | print " %-20s // off=%.2xh-%.2xh reason=padding" % (GuessField(i, cur_offset, a[0] - cur_offset), cur_offset, a[0]) 138 | i += 1 139 | 140 | cur_offset = a[0] + a[1] 141 | print " %-20s // off=%.2xh-%.2xh reason=%s" % (GuessField(i, a[0], a[1]), a[0], a[0] + a[1], a[2]) 142 | i += 1 143 | --------------------------------------------------------------------------------