├── ARMInstructionFrequencies.txt
├── DumpFunctionBytes.py
├── README.md
├── allocator.h
├── arm_frequency.py
├── find_hardref.py
├── func_references.py
├── ida_hooks.py
├── machofinder.py
├── mark_interesting.py
├── minset.py
├── operan_offset_base.py
├── prolog_finder.py
├── propagate_types.py
├── references.py
├── renamer.py
├── simple_jack.py
├── string_finder.py
└── struct_hint.py


/ARMInstructionFrequencies.txt:
--------------------------------------------------------------------------------
  1 | From armMissinglinuxMissingandroideabiMissingobjdump Missingd libchromeview_prebuilt.so > sample.s
  2 | This .so is 2gb in size with symbols so it is a pretty good indicator of the actual
  3 | distribution of the instructions.
  4 | 
  5 | Instruction        mov is used    1428331 times | decode_mov
  6 | Instruction        ldr is used    1181133 times | decode_ldr
  7 | Instruction        add is used    1065935 times | decode_add
  8 | Instruction         bl is used     835527 times | decode_bl
  9 | Instruction          b is used     547924 times | decode_b
 10 | Instruction        str is used     538433 times | decode_str
 11 | Instruction        cmp is used     330601 times | decode_cmp
 12 | Instruction        cbz is used     155987 times | decode_cbz
 13 | Instruction        blx is used     135995 times | decode_blx
 14 | Instruction        pop is used     118732 times | decode_pop
 15 | Instruction       push is used     113136 times | decode_push
 16 | Instruction      ldmia is used      84363 times | decode_ldmia
 17 | Instruction       ldrb is used      70881 times | decode_ldrb
 18 | Instruction        sub is used      69208 times | decode_sub
 19 | Instruction       cbnz is used      61974 times | decode_cbnz
 20 | Instruction       subs is used      55950 times | decode_subs
 21 | Instruction         it is used      55679 times | decode_it
 22 | Instruction       strb is used      52617 times | decode_strb
 23 | Instruction      stmia is used      40441 times | decode_stmia
 24 | Instruction      stmdb is used      34525 times | decode_stmdb
 25 | Instruction        lsl is used      26392 times | decode_lsl
 26 | Instruction        and is used      24389 times | decode_and
 27 | Instruction        orr is used      23024 times | decode_orr
 28 | Instruction       movw is used      21616 times | decode_movw
 29 | Instruction       vldr is used      20493 times | decode_vldr
 30 | Instruction         bx is used      18856 times | decode_bx
 31 | Instruction       ldrd is used      15264 times | decode_ldrd
 32 | Instruction        asr is used      15170 times | decode_asr
 33 | Instruction       strd is used      15058 times | decode_strd
 34 | Instruction       vmov is used      15012 times | decode_vmov
 35 | Instruction        rsb is used      14290 times | decode_rsb
 36 | Instruction        lsr is used      10236 times | decode_lsr
 37 | Instruction       ldrh is used       9915 times | decode_ldrh
 38 | Instruction       ubfx is used       9384 times | decode_ubfx
 39 | Instruction        mul is used       9268 times | decode_mul
 40 | Instruction       vstr is used       8072 times | decode_vstr
 41 | Instruction        bic is used       7865 times | decode_bic
 42 | Instruction       vmrs is used       7408 times | decode_vmrs
 43 | Instruction        mla is used       7381 times | decode_mla
 44 | Instruction        adc is used       7004 times | decode_adc
 45 | Instruction        eor is used       6673 times | decode_eor
 46 | Instruction        tst is used       6197 times | decode_tst
 47 | Instruction       uxtb is used       6075 times | decode_uxtb
 48 | Instruction       strh is used       6019 times | decode_strh
 49 | Instruction        mvn is used       4914 times | decode_mvn
 50 | Instruction        neg is used       4309 times | decode_neg
 51 | Instruction        sbc is used       3545 times | decode_sbc
 52 | Instruction        bfi is used       3496 times | decode_bfi
 53 | Instruction       uxth is used       2234 times | decode_uxth    Missing
 54 | Instruction       vpop is used       2185 times | decode_vpop    Missing
 55 | Instruction      vpush is used       2080 times | decode_vpush   Missing
 56 | Instruction      ldrsb is used       1939 times | decode_ldrsb
 57 | Instruction        tbb is used       1483 times | decode_tbb
 58 | Instruction      ldrsh is used       1446 times | decode_ldrsh
 59 | Instruction        dmb is used       1247 times | decode_dmb
 60 | Instruction      ldmdb is used       1191 times | decode_ldmdb
 61 | Instruction        bfc is used       1167 times | decode_bfc
 62 | Instruction       movt is used        841 times | decode_movt
 63 | Instruction        teq is used        824 times | decode_teq
 64 | Instruction      strex is used        787 times | decode_strex
 65 | Instruction      ldrex is used        787 times | decode_ldrex
 66 | Instruction        cmn is used        738 times | decode_cmn
 67 | Instruction       sxtb is used        731 times | decode_sxtb    Missing
 68 | Instruction      umull is used        523 times | decode_umull
 69 | Instruction        tbh is used        502 times | decode_tbh
 70 | Instruction       sbfx is used        493 times | decode_sbfx    Missing
 71 | Instruction        orn is used        382 times | decode_orn     Missing
 72 | Instruction       sxth is used        366 times | decode_sxth    Missing
 73 | Instruction       vorr is used        329 times | decode_vorr    Missing
 74 | Instruction     smulbb is used        287 times | decode_smulbb
 75 | Instruction       veor is used        271 times | decode_veor    Missing
 76 | Instruction      smull is used        264 times | decode_smull
 77 | Instruction        nop is used        204 times | decode_nop
 78 | Instruction        rev is used        161 times | decode_rev     Missing
 79 | Instruction     vldmia is used        129 times | decode_vldmia  Missing
 80 | Instruction        ror is used        114 times | decode_ror
 81 | Instruction       vand is used        113 times | decode_vand    Missing
 82 | Instruction      smlal is used        113 times | decode_smlal
 83 | Instruction        mls is used         85 times | decode_mls
 84 | Instruction     vstmia is used         76 times | decode_vstmia  Missing
 85 | Instruction      umlal is used         75 times | decode_umlal
 86 | Instruction        svc is used         72 times | decode_svc
 87 | Instruction      uxtab is used         69 times | decode_uxtab   Missing
 88 | Instruction        clz is used         68 times | decode_clz
 89 | Instruction     smlabb is used         55 times | decode_smlabb
 90 | Instruction        pld is used         52 times | decode_pld
 91 | Instruction        ldm is used         41 times | decode_ldm
 92 | Instruction      sxtab is used         40 times | decode_sxtab   Missing
 93 | Instruction       addw is used         38 times | decode_addw
 94 | Instruction       vswp is used         28 times | decode_vswp    Missing
 95 | Instruction      uxtah is used         19 times | decode_uxtah   Missing
 96 | Instruction     vstmdb is used         13 times | decode_vstmdb  Missing
 97 | Instruction        rsc is used         12 times | decode_rsc
 98 | Instruction      sxtah is used         12 times | decode_sxtah   Missing
 99 | Instruction        rrx is used          9 times | decode_rrx
100 | Instruction       stfe is used          8 times | decode_stfe
101 | Instruction       stfp is used          8 times | decode_stfp
102 | Instruction        stm is used          8 times | decode_stm
103 | Instruction       ldfp is used          8 times | decode_ldfp
104 | Instruction       ldfe is used          8 times | decode_ldfe
105 | Instruction     vldmdb is used          5 times | decode_vldmdb  Missing
106 | Instruction       rbit is used          4 times | decode_rbit    Missing
107 | Instruction       bkpt is used          3 times | decode_bkpt
108 | Instruction       vmvn is used          3 times | decode_vmvn    Missing
109 | Instruction        ldc is used          2 times | decode_ldc
110 | Instruction    fldmiax is used          1 times | decode_fldmiax
111 | Instruction       ldcl is used          1 times | decode_ldcl
112 | Instruction       ssat is used          1 times | decode_ssat    Missing
113 | Instruction        stc is used          1 times | decode_stc     Missing
114 | Instruction    fstmiax is used          1 times | decode_fstmiax


--------------------------------------------------------------------------------
/DumpFunctionBytes.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Script to dump the current function as a C/C++ shellcode blob.
  3 | It replaces the calls to external functions with a call to a
  4 | trampoline entry that you can change to point to your own implementation
  5 | of that function.
  6 | 
  7 | Author: Agustin Gianni (agustingianni@gmail.com)
  8 | """
  9 | 
 10 | import sys
 11 | if not "/usr/local/lib/python2.7/site-packages" in sys.path:
 12 |     sys.path.append("/usr/local/lib/python2.7/site-packages")
 13 | 
 14 | from capstone import *
 15 | from capstone.x86_const import *
 16 | from binascii import hexlify
 17 | 
 18 | DEBUG = False
 19 | 
 20 | def to_hex(bytes, cformat=False):
 21 |     tmp = hexlify(bytes)
 22 |     if cformat:
 23 |         return "\\x" + "\\x".join([tmp[i:i+2] for i in range(0, len(tmp), 2)])
 24 | 
 25 |     return " ".join([tmp[i:i+2] for i in range(0, len(tmp), 2)])
 26 | 
 27 | def dump_function_bytes(fn_ea, align=False):
 28 |     f = idaapi.get_func(fn_ea)
 29 |     start = f.startEA
 30 |     size = f.endEA - start
 31 |     contents = GetManyBytes(start, size)
 32 | 
 33 |     if align:
 34 |         rem = len(fbytes) % 32
 35 |         fbytes += "\xcc" * (32 - rem) if rem else ""
 36 | 
 37 |     return (start, size, contents)
 38 | 
 39 | def make_call_immediate(offset):
 40 |     import struct
 41 |     return "\xe8" + struct.pack("<L", offset - 5)
 42 | 
 43 | def main():
 44 |     # Get the function from IDA.
 45 |     start, size, fbytes = dump_function_bytes(ScreenEA())
 46 | 
 47 |     # Collect and print debugging information about the function.
 48 |     to_replace = []
 49 |     invalid_jumps = []
 50 | 
 51 |     md = Cs(CS_ARCH_X86, CS_MODE_64)
 52 |     md.detail = True
 53 | 
 54 |     # Disassemble the function.
 55 |     instructions = list(md.disasm(fbytes, 0))
 56 | 
 57 |     # Lambdas to query for interesting disassembly bits.
 58 |     is_jump = lambda i: X86_GRP_JUMP in i.groups
 59 |     is_imm_jump = lambda i: is_jump(i) and i.operands[0].type == X86_OP_IMM
 60 |     is_invalid_imm_jump = lambda i: i.operands[0].value.imm < start or i.operands[0].value.imm >= (start + size)
 61 |     is_call = lambda i: X86_GRP_CALL in i.groups
 62 |     is_ret  = lambda i: X86_GRP_RET in i.groups
 63 |     is_flow_changing = lambda i: is_jump(i) or is_call(i) or is_ret(i)
 64 |     is_mem_access = lambda i: X86_OP_MEM in [x.type for x in i.operands]
 65 |     is_non_local_mem_access = lambda i: not any(x in i.op_str for x in ["rsp", "esp", "rbp", "ebp"])
 66 | 
 67 |     # Instruction query results.
 68 |     jump_sites = filter(is_jump, instructions)
 69 |     imm_jump_sites = filter(is_imm_jump, instructions)
 70 |     call_sites = filter(is_call, instructions)
 71 |     ret_sites = filter(is_ret, instructions)
 72 |     invalid_jump_sites = filter(is_invalid_imm_jump, filter(is_imm_jump, jump_sites))
 73 |     non_local_mem_access_sites = filter(is_non_local_mem_access, filter(is_mem_access, instructions))
 74 | 
 75 |     # Collect information about jumps and calls that need manual work.
 76 |     jumps_to_fix = [(i.address, i) for i in invalid_jump_sites]
 77 |     calls_to_fix = [(i.address, i) for i in call_sites]
 78 |     mem_ref_to_fix = [(i.address, i) for i in non_local_mem_access_sites]
 79 | 
 80 |     # Collect the start of every basic block for pretty printing.
 81 |     basic_block_start = []
 82 |     basic_block_start.extend([i.address + i.size for i in (jump_sites + ret_sites)])
 83 |     basic_block_start.extend([i.operands[0].value.imm for i in imm_jump_sites])
 84 | 
 85 |     if DEBUG:
 86 |         # Debug dump of the function.
 87 |         print "// Function disassembly:"
 88 |         for i in instructions:
 89 |             if i.address in basic_block_start:
 90 |                 print "// " + ("-" * 80) 
 91 |             print "// 0x%.8x: %-32s %-8s %s" %(i.address, to_hex(i.bytes), i.mnemonic, i.op_str)
 92 |     
 93 |         # Print a list of functions that need to be implemented for the blob to work.
 94 |         if len(calls_to_fix):
 95 |             print "// "
 96 |             print "// Function calls to replace:"
 97 |             for e in calls_to_fix:
 98 |                 tmp = "%s %s" %(e[1].mnemonic, e[1].op_str)
 99 |                 print "// off=0x%.8x ins='%s' bytes=%s size=%d imm=0x%.8x" % (e[0], tmp, to_hex(e[1].bytes), e[1].size, e[1].operands[0].value.imm)
100 | 
101 |         # Warn the user that the function is not complete and should be correctly extracted.
102 |         if len(jumps_to_fix):
103 |             print "// "
104 |             print "// Invalid jumps:"
105 |             for e in jumps_to_fix:
106 |                 tmp = "%s %s" %(e[1].mnemonic, e[1].op_str)
107 |                 print "// off=0x%.8x ins='%s' bytes=%s size=%d" % (e[0], tmp, to_hex(e[1].bytes), e[1].size)
108 | 
109 |         if len(mem_ref_to_fix):
110 |             print "// "
111 |             print "// Potentially invalid memory accesses:"
112 |             for e in mem_ref_to_fix:
113 |                 tmp = "%s %s" %(e[1].mnemonic, e[1].op_str)
114 |                 print "// off=0x%.8x ins='%s' bytes=%s size=%d" % (e[0], tmp, to_hex(e[1].bytes), e[1].size)
115 | 
116 |     print '#include <string>'
117 |     print '#include <iostream>'
118 |     print '#include <sys/mman.h>\n'
119 |     print 'using namespace std;\n'
120 |     print 'string build_blob();\n'
121 |     print 'void *alloc_rwx(size_t size) {'
122 |     print '    void *mem = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, 0, 0);'
123 |     print '    if (!mem) {'
124 |     print '        cout << "Could not alloc RWX memory." << endl;'
125 |     print '        return nullptr;'
126 |     print '    }\n'
127 |     print '    cout << "Allocated RwX memory at: " << mem << endl;'
128 |     print '    return mem;'
129 |     print '}\n'
130 |     print 'int main(int argc, char **argv) {'
131 |     print '    string blob = build_blob();'
132 |     print '    void *mem = alloc_rwx(blob.size());'
133 |     print '    memcpy(mem, &blob[0], blob.size());\n'
134 |     print '    // TODO: Change the signature of the function pointer.'
135 |     print '    ((int (*)(void)) mem)();'
136 |     print '}\n'
137 |     print 'string build_blob() {'
138 | 
139 |     # Make the calls point to our dispatch table.
140 |     print '    string dispatch_table;'
141 |     imm_to_offset = {}
142 |     cur_trampoline_offset = len(fbytes)
143 | 
144 |     for (call_off, call_ins) in calls_to_fix:
145 |         operand = call_ins.operands[0]
146 |         if operand.type != X86_OP_IMM:
147 |             raise RuntimeException("We don't handle non immediate calls yet.")
148 | 
149 |         # Get the destination of the call.
150 |         operand_imm = operand.value.imm
151 | 
152 |         # Check if we already have an entry for this immediate value.
153 |         if not imm_to_offset.has_key(operand_imm):
154 |             # Add the current offset to the dictionary.
155 |             imm_to_offset[operand_imm] = cur_trampoline_offset
156 | 
157 |             tmp_name = "function_%d_address" % imm_to_offset[operand_imm]
158 |             tmp = 'dispatch_table.append("\\x48\\xb8", 2);'
159 |             print "    // TODO: Replace %s with the correct implementation. Called at offset 0x%.8x." % (tmp_name, call_off)
160 |             print "    uintptr_t %s = reinterpret_cast<uintptr_t>(0x4040404040404040);" % tmp_name
161 |             print "    %-60s // 0x%.8x: movabs   rax, %s" % (tmp, cur_trampoline_offset, tmp_name)
162 |             cur_trampoline_offset += 10
163 | 
164 |             tmp = 'dispatch_table.append("\\xff\\xe0", 2);'
165 |             print "    dispatch_table.append(reinterpret_cast<const char *>(&%s), sizeof(%s));" % (tmp_name, tmp_name)
166 |             print "    %-60s // 0x%.8x: jmp      rax" % (tmp, cur_trampoline_offset)
167 |             print
168 |             cur_trampoline_offset += 2
169 | 
170 |         # Get the trampoline offset for the current immediate.
171 |         trampoline_offset = imm_to_offset[operand_imm]
172 | 
173 |         # Adjust the trampoline offset with the offset of the current call.
174 |         trampoline_offset -= call_off
175 | 
176 |         # Create a trampoline call and replace the original call.
177 |         trampoline_call = make_call_immediate(trampoline_offset)
178 | 
179 |         # Replace original call bytes.
180 |         fbytes = fbytes[:call_off] + trampoline_call + fbytes[call_off + len(trampoline_call):]
181 | 
182 |     print '    // Dumped [0x%.8x-0x%.8x]' % (start, start + size)
183 |     print '    string shellcode;'
184 |     instructions = list(md.disasm(fbytes, 0))
185 |     for i in instructions:
186 |         tmp = 'shellcode.append("%s", %d);' % (to_hex(i.bytes, cformat=True), len(i.bytes))
187 |         print '    %-60s // 0x%.8x: %-8s %s' % (tmp, i.address, i.mnemonic, i.op_str)
188 |     print
189 | 
190 |     print "    return shellcode + dispatch_table;"
191 |     print "}"
192 |     print
193 | 
194 | main()
195 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Utilities
  2 | =========
  3 | 
  4 | Uncategorized utilities that do not need their own repository.
  5 | 
  6 | Simple Jack Symbol Porting tool
  7 | -------------------------------
  8 | 
  9 | Small dumb utility to port obvious function matches across two IDA databases.
 10 | 
 11 | Instruction frequency counter (arm_frequency.py)
 12 | ----------------------------------------------------
 13 | 
 14 | The script 'arm_frequency.py' takes as input the output of objdump
 15 | on an ARM binary.
 16 | It will show the ammount of times every instruction was used, sorted
 17 | by the most used ones.
 18 | 
 19 | XRef printer (func_references.py)
 20 | ---------------------------------
 21 | 
 22 | Small utility to print all the function calls to a given function.
 23 | This is generally used to look for calls to malloc like function.
 24 | 
 25 | DumpFunctionBytes.py
 26 | --------------------
 27 | 
 28 | IDA Python script that dumps the current function (you need to position
 29 | the cursor on the start of the function) as a shellcode.
 30 | It does a very limited analysis of the function in order to let you know
 31 | that you need to fix call sites to functions.
 32 | 
 33 | The main idea of this is being able to extract a function from a binary
 34 | and use it.
 35 | 
 36 | **Usage:**
 37 | 
 38 | Open IDA Pro and set the cursor to the begining of the function you want to dump.
 39 | Run the script, it will print the C++ code to stdout, so copy it and paste it on
 40 | a C++ file.
 41 | 
 42 | Now that you have the output on a file look for `TODO` makers and place the needed
 43 | manual information.
 44 | 
 45 | **Example output:**
 46 | 
 47 | ```c++
 48 | #include <string>
 49 | #include <iostream>
 50 | #include <sys/mman.h>
 51 | 
 52 | using namespace std;
 53 | 
 54 | string build_blob();
 55 | 
 56 | void *alloc_rwx(size_t size) {
 57 |     void *mem = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, 0, 0);
 58 |     if (!mem) {
 59 |         cout << "Could not alloc RWX memory." << endl;
 60 |         return nullptr;
 61 |     }
 62 | 
 63 |     cout << "Allocated RwX memory at: " << mem << endl;
 64 |     return mem;
 65 | }
 66 | 
 67 | int main(int argc, char **argv) {
 68 |     string blob = build_blob();
 69 |     void *mem = alloc_rwx(blob.size());
 70 |     memcpy(mem, &blob[0], blob.size());
 71 | 
 72 |     // TODO: Change the signature of the function pointer.
 73 |     ((int (*)(void)) mem)();
 74 | }
 75 | 
 76 | string build_blob() {
 77 |     string dispatch_table;
 78 |     // TODO: Replace function_46_address with the correct implementation.
 79 |     uintptr_t function_46_address = reinterpret_cast<uintptr_t>(&printf);
 80 |     dispatch_table.append("\x48\xb8", 2);                        // 0x0000002e: movabs   rax, function_46_address
 81 |     dispatch_table.append(reinterpret_cast<const char *>(&function_46_address), sizeof(function_46_address));
 82 |     dispatch_table.append("\xff\xe0", 2);                        // 0x00000038: jmp      rax
 83 | 
 84 |     // Dumped [0x100000f30-0x100000f5e]
 85 |     string shellcode;
 86 |     shellcode.append("\x55", 1);                                 // 0x00000000: push     rbp
 87 |     shellcode.append("\x48\x89\xe5", 3);                         // 0x00000001: mov      rbp, rsp
 88 |     shellcode.append("\x48\x83\xec\x10", 4);                     // 0x00000004: sub      rsp, 0x10
 89 |     shellcode.append("\x48\x8d\x3d\x6b\x00\x00\x00", 7);         // 0x00000008: lea      rdi, qword ptr [rip + 0x6b]
 90 |     shellcode.append("\xc7\x45\xfc\x00\x00\x00\x00", 7);         // 0x0000000f: mov      dword ptr [rbp - 4], 0
 91 |     shellcode.append("\xb0\x00", 2);                             // 0x00000016: mov      al, 0
 92 |     shellcode.append("\xe8\x11\x00\x00\x00", 5);                 // 0x00000018: call     0x2e
 93 |     shellcode.append("\x8b\x4d\xfc", 3);                         // 0x0000001d: mov      ecx, dword ptr [rbp - 4]
 94 |     shellcode.append("\x83\xc1\x64", 3);                         // 0x00000020: add      ecx, 0x64
 95 |     shellcode.append("\x89\x45\xf8", 3);                         // 0x00000023: mov      dword ptr [rbp - 8], eax
 96 |     shellcode.append("\x89\xc8", 2);                             // 0x00000026: mov      eax, ecx
 97 |     shellcode.append("\x48\x83\xc4\x10", 4);                     // 0x00000028: add      rsp, 0x10
 98 |     shellcode.append("\x5d", 1);                                 // 0x0000002c: pop      rbp
 99 |     shellcode.append("\xc3", 1);                                 // 0x0000002d: ret
100 | 
101 |     return shellcode + dispatch_table;
102 | }
103 | ```
104 | 


--------------------------------------------------------------------------------
/allocator.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * allocator.h
  3 |  *
  4 |  *  Created on: Feb 11, 2016
  5 |  *      Author: anon
  6 |  */
  7 | 
  8 | #ifndef ALLOCATOR_H_
  9 | #define ALLOCATOR_H_
 10 | 
 11 | namespace os {
 12 | 
 13 | #include <sys/mman.h>
 14 | 
 15 | void *alloc_rwx_memory(size_t size) {
 16 | 	auto tmp = mmap(0, size, PROT_READ | PROT_WRITE | PROT_EXEC,
 17 | 	MAP_ANON | MAP_PRIVATE, -1, 0);
 18 | 
 19 | 	return tmp == MAP_FAILED ? nullptr : tmp;
 20 | }
 21 | 
 22 | }
 23 | 
 24 | namespace allocator {
 25 | 
 26 | #include <pthread.h>
 27 | 
 28 | class Lock;
 29 | 
 30 | class Mutex {
 31 | public:
 32 | 	Mutex() {
 33 | 		pthread_mutexattr_t attr;
 34 | 		pthread_mutexattr_init(&attr);
 35 | 		pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
 36 | 		pthread_mutex_init(&m_mutex, &attr);
 37 | 	}
 38 | 
 39 | 	~Mutex() {
 40 | 		pthread_mutex_destroy(&m_mutex);
 41 | 	}
 42 | 
 43 | private:
 44 | 	friend class Lock;
 45 | 
 46 | 	void lock() {
 47 | 		pthread_mutex_lock(&m_mutex);
 48 | 	}
 49 | 
 50 | 	void unlock() {
 51 | 		pthread_mutex_unlock(&m_mutex);
 52 | 	}
 53 | 
 54 | 	pthread_mutex_t m_mutex;
 55 | };
 56 | 
 57 | class Lock {
 58 | public:
 59 | 	Lock(Mutex &mutex) :
 60 | 			m_mutex(mutex) {
 61 | 		m_mutex.lock();
 62 | 	}
 63 | 
 64 | 	~Lock() {
 65 | 		m_mutex.unlock();
 66 | 	}
 67 | 
 68 | private:
 69 | 	Mutex &m_mutex;
 70 | };
 71 | 
 72 | // Default size of each SLAB is 16 mb.
 73 | class BumpAllocator {
 74 | public:
 75 | 	BumpAllocator() :
 76 | 			m_memory(0), m_used(0) {
 77 | 	}
 78 | 
 79 | 	void *allocate(size_t size) {
 80 | 		Lock lock(m_mutex);
 81 | 
 82 | 		if (!m_memory) {
 83 | 			if (!init()) {
 84 | 				return nullptr;
 85 | 			}
 86 | 		}
 87 | 
 88 | 		if (fits(size)) {
 89 | 			if (!init()) {
 90 | 				return nullptr;
 91 | 			}
 92 | 		}
 93 | 
 94 | 		auto tmp = m_memory;
 95 | 		m_memory = static_cast<char *>(m_memory) + size;
 96 | 		m_used += size;
 97 | 
 98 | 		return tmp;
 99 | 	}
100 | 
101 | private:
102 | 	bool init() {
103 | 		Lock lock(m_mutex);
104 | 
105 | 		m_used = 0;
106 | 		m_memory = os::alloc_rwx_memory(BumpAllocator::SLAB_SIZE);
107 | 		if (m_memory == nullptr) {
108 | 			return false;
109 | 		}
110 | 
111 | 		return true;
112 | 	}
113 | 
114 | 	inline bool remaining() {
115 | 		return BumpAllocator::SLAB_SIZE - m_used;
116 | 	}
117 | 
118 | 	inline bool fits(size_t size) {
119 | 		return size < remaining();
120 | 	}
121 | 
122 | 	static const size_t SLAB_SIZE = 16777216;
123 | 
124 | 	void *m_memory;
125 | 	size_t m_used;
126 | 	Mutex m_mutex;
127 | };
128 | 
129 | }
130 | 
131 | #endif /* ALLOCATOR_H_ */
132 | 


--------------------------------------------------------------------------------
/arm_frequency.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Small script to parse the output of arm-linux-androideabi-objdump
 3 | giving as a result the ammount of times a particular opname is used.
 4 | 
 5 | This was used to prioritize the implementation of the most used instructions
 6 | in an disassembler / emulator.
 7 | 
 8 | Agustin Gianni (agustingianni@gmail.com)
 9 | """
10 | import re
11 | import sys
12 | import operator
13 | from collections import defaultdict
14 | 
15 | setsflags_skip = ["cps", "mls", "mrs", "smmls", "srs", "subs", "vabs", "vcls", "vfms", "vmls", "vmrs", "vnmls", "qabs", "vrecps", "vrsqrts"]
16 | skip_list = ["cbnz", "svc", "lsls", "sbcs", "bics", "rscs", "movs", "muls", "mls", "teq", "adcs", "smmls", "vcls", "vmls"]
17 | cond_codes = ["eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", "hi", "ls", "ge", "lt", "gt", "le"]
18 | def drop_garbage(opname):
19 | 	if opname.startswith("it"):
20 | 		return "it"
21 | 
22 | 	# Drop the wide specifier.
23 | 	t = opname[:-2] if (opname.endswith(".w") or opname.endswith(".n")) else opname
24 | 
25 | 	# Some opcodes end with the leters of condition codes but have nothing to do with them.
26 | 	if t in skip_list:
27 | 		# Remove the 'setsflags' indicator.
28 | 		if t[-1] == "s" and t not in setsflags_skip:
29 | 			return t[:-1]
30 | 
31 | 		return t
32 | 
33 | 	# Drop condition codes.
34 | 	for code in cond_codes:
35 | 		if t.endswith(code):
36 | 			t2 = t[:-2]
37 | 			if len(t2) <= 2 and not t2 in ["b", "bl", "bx"]:
38 | 				print "You need to add the following opname to the 'skip_list' %s" % t
39 | 
40 | 			t = t2
41 | 			break
42 | 
43 | 	# Remove the 'setsflags' indicator.
44 | 	if t[-1] == "s" and t not in setsflags_skip:
45 | 		return t[:-1]
46 | 
47 | 	return t
48 | 
49 | # Match the opcode, opname and arguments of objdump's output for ARM.
50 | regex_str = "\s*[0-9a-f]+\:\s+([0-9a-f]+\s+[0-9a-f]*)\s+([a-zA-Z.]+)\s+(.+)"
51 | regex = re.compile(regex_str)
52 | 
53 | opname_freq = defaultdict(lambda: 0, {})
54 | 
55 | with open(sys.argv[1]) as f:
56 |     for line in f:
57 |         r = regex.search(line)
58 |         if not r:
59 |         	continue
60 | 
61 |         opcode, opname, args = r.groups()
62 |         if opname.lower() in [".byte", ".word", ".dword", ".qword", ".short"]:
63 |         	continue
64 | 
65 |         opname = drop_garbage(opname)
66 | 
67 |         opname_freq[opname] += 1
68 | 
69 | 
70 | for el in sorted(opname_freq.iteritems(), key=operator.itemgetter(1), reverse=True):
71 | 	print "Instruction %10s is used %10d times | decode_%s" % (el[0], el[1], el[0])


--------------------------------------------------------------------------------
/find_hardref.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Script to find hardcoded references inside an IDA database.
  3 | This is used to find pointers to stuff inside firmware like
  4 | Apple's iBoot.
  5 | 
  6 | Author: Agustin Gianni (agustingianni@gmail.com)
  7 | """
  8 | import idaapi
  9 | import idc
 10 | import idautils
 11 | import struct
 12 | 
 13 | class HardReferencesChoose(Choose2):
 14 |     def __init__(self, title, refs):
 15 |         Choose2.__init__(self, title, [ ["Address", 10], ["Reference", 10], ["Address Text", 15], ["Reference Text", 15] ])
 16 |         self.n = 0
 17 |         
 18 |         self.items = []
 19 |         for ref in refs:
 20 |             address = ref[0]
 21 |             reference = ref[1]
 22 |             self.items += [self.make_item(address, reference, GetDisasm(address), GetDisasm(reference))]
 23 |         
 24 |         self.icon = 0
 25 |         self.selcount = 0
 26 |         self.deflt = -1
 27 |         self.popup_names = ["NOSE"]
 28 | 
 29 |     def OnClose(self):
 30 |         pass
 31 | 
 32 |     def OnEditLine(self, n):
 33 |         idaapi.jumpto(self.items[n])
 34 | 
 35 |     def OnInsertLine(self):
 36 |         pass
 37 | 
 38 |     def OnSelectLine(self, n):
 39 |         self.selcount += 1
 40 |         idaapi.jumpto(int(self.items[n][0], 16))
 41 | 
 42 |     def OnGetLine(self, n):
 43 |         return self.items[n]
 44 | 
 45 |     def OnGetSize(self):
 46 |         n = len(self.items)
 47 |         return n
 48 | 
 49 |     def OnDeleteLine(self, n):
 50 |         del self.items[n]
 51 |         return n
 52 | 
 53 |     def OnRefresh(self, n):
 54 |         #print("refresh %d" % n)
 55 |         return n
 56 | 
 57 |     def OnGetIcon(self, n):
 58 |         if n % 2:
 59 |             return 10
 60 |         
 61 |         return 11
 62 | 
 63 |     def show(self):
 64 |         t = self.Show()
 65 |         if t < 0:
 66 |             return False
 67 |         return True
 68 | 
 69 |     def make_item(self, address, reference, addr_asm, ref_asm):        
 70 |         r = ["0x%08x" % address, "0x%08x" %  reference, addr_asm, ref_asm]
 71 |         self.n += 1
 72 |         return r
 73 | 
 74 |     def OnGetLineAttr(self, n):
 75 |         #if n % 2:
 76 |         #    return [0xF00000, 0]
 77 |         pass
 78 | 
 79 | def find_string_table_refs(string_table_start, string_table_end, mark=False):
 80 |     image_start = idc.MinEA()
 81 |     image_end = idc.MaxEA()
 82 |     image_size = image_end - image_start
 83 | 
 84 |     print "Analyzing image from %.8x-%.8x of size %.8x" % (image_start, image_end, image_size)
 85 | 
 86 |     for cur_ea in xrange(image_start, image_end, 4):
 87 |         cur_long = Dword(cur_ea)
 88 |         if cur_long >= string_table_start and cur_long < string_table_end:
 89 |             if mark:
 90 |                 SetType(cur_ea, "char *ptr;")
 91 | 
 92 |             print "Found string table ref at 0x%.8x to 0x%.8x (%s)" % (cur_ea, cur_long, GetDisasm(cur_ea))
 93 | 
 94 | def find_any_refs(image_start, image_end, mark=False):
 95 |     image_size = image_end - image_start
 96 | 
 97 |     refs = []
 98 |     for cur_ea in xrange(image_start, image_end, 4):
 99 |         cur_long = Dword(cur_ea)
100 |         if cur_long >= image_start and cur_long < image_end:
101 |             if mark:
102 |                 MakeDword(cur_ea)
103 | 
104 |             print "Found hard ref at 0x%.8x to 0x%.8x (%s)" % (cur_ea, cur_long, GetDisasm(cur_ea))
105 |             refs.append((cur_ea, cur_long))
106 | 
107 |     return refs
108 | 
109 | # Image limits
110 | image_start = idc.MinEA()   
111 | image_end = idc.MaxEA()
112 | 
113 | # Limits of the iBoot string table.
114 | # string_table_start = 0x5FF345C0
115 | # string_table_end = 0x5FF40ACD
116 | # 
117 | # find_string_table_refs(string_table_start, string_table_end, mark=True)
118 | refs = find_any_refs(image_start, image_end, mark=False)
119 | choose = HardReferencesChoose("Hard references", refs)
120 | choose.show()


--------------------------------------------------------------------------------
/func_references.py:
--------------------------------------------------------------------------------
  1 | """
  2 | IDAPython script to search for a given function xreferences and its arguments.
  3 | 
  4 | The approach is very simple and probably wrong in many corner cases. We assume that
  5 | arguments are set via 'push' instructions and inside just one basic block (the basic
  6 | block that contains the function call).
  7 | """
  8 | 
  9 | from collections import namedtuple
 10 | import idautils
 11 | from idc import GetFunctionName, GetDisasm, GetMnem, GetOpnd, Demangle, GetLongPrm, INF_SHORT_DN
 12 | 
 13 | __author__ = 'Agustin Gianni (agustin.gianni@gmail.com).'
 14 | 
 15 | import idaapi
 16 | 
 17 | FunctionName = namedtuple('FunctionName', ['ea', 'name'])
 18 | ImportEntry = namedtuple('ImportEntry', ['ea', 'name', 'ord'])
 19 | BasicBlock = namedtuple('BasicBlock', ['start_ea', 'end_ea', 'function'])
 20 | FunctionSignature = namedtuple('FunctionSignature', ['name', 'nargs'])
 21 | Instruction = namedtuple('Instruction', ['ea', 'string'])
 22 | FunctionArgument = namedtuple('FunctionArgument', ['argument', 'instruction'])
 23 | InterestingResult = namedtuple('InterestingResult', ['caller_name', 'callee_name', 'call_address', 'arguments'])
 24 | 
 25 | imports_list = []
 26 | function_names = []
 27 | 
 28 | 
 29 | def EnumImportNamesCallback(ea, name, ord_):
 30 |     """
 31 |     Callback used to enumerate all the import entries.
 32 |     """
 33 |     if name:
 34 |         imports_list.append(ImportEntry(ea, name, ord_))
 35 | 
 36 |     return True
 37 | 
 38 | 
 39 | def GetAllImportEntries():
 40 |     """
 41 |     Return a list of all the import entries.
 42 |     """
 43 |     for i in xrange(0, idaapi.get_import_module_qty()):
 44 |         name = idaapi.get_import_module_name(i)
 45 |         if not name:
 46 |             pass
 47 | 
 48 |         idaapi.enum_import_names(i, EnumImportNamesCallback)
 49 | 
 50 |     return imports_list
 51 | 
 52 | 
 53 | def GetAllFunctionNames():
 54 |     """
 55 |     Return a list of all the funtion names in the database.
 56 |     """
 57 |     func_name_list = []
 58 |     for x in idautils.Functions():
 59 |         func_name_list.append(FunctionName(x, GetFunctionDemangledName(x)))
 60 | 
 61 |     return func_name_list
 62 | 
 63 | 
 64 | def GetAddressBasicBlock(ea):
 65 |     """
 66 |     Get the corresponding basic block for a given address.
 67 |     """
 68 |     f = idaapi.get_func(ea)
 69 |     if not f:
 70 |         raise RuntimeError("No basic block at address %.8x" % ea)
 71 | 
 72 |     for block in idaapi.FlowChart(f):
 73 |         if block.startEA <= ea and block.endEA > ea:
 74 |             return BasicBlock(block.startEA, block.endEA, f)
 75 | 
 76 |     raise RuntimeError("No basic block at address %.8x" % ea)
 77 | 
 78 | def GetInstructions(start_ea, end_ea):
 79 |     """
 80 |     Return a list of all the instructions in the range [start_ea, end_ea].
 81 |     """
 82 |     ins = []
 83 |     for head in idautils.Heads(start_ea, end_ea):
 84 |         if idaapi.isCode(idaapi.getFlags(head)):
 85 |             ins.append(Instruction(head, GetDisasm(head)))
 86 | 
 87 |     return ins
 88 | 
 89 | def IsArgumentSetter(ea):
 90 |     """
 91 |     Architecture dependant utility function that assumes that all the push instructions
 92 |     ar argument setters.
 93 |     """
 94 |     if GetMnem(ea).lower() == "push":
 95 |         return True
 96 | 
 97 | def GetFunctionArgument(ins):
 98 |     """
 99 |     Architecture dependant function that takes an instruction and returns
100 |     the function argument.
101 |     In this case we assume that the first operand is the function argument (for push instructions).
102 |     """
103 |     opnd = GetOpnd(ins.ea, 0)
104 |     return FunctionArgument(opnd, ins)
105 | 
106 | def IsFunctionCall(instruction):
107 |     """
108 |     Return true if the instruction 'ins' is a function call.
109 |     """
110 |     return GetMnem(instruction.ea).lower() in ["call", "jmp"]
111 | 
112 | def GetFunctionCallArguments(func_sig, xref_addr):
113 |     """
114 |     Given a function signature (func_sig) return the arguments of the
115 |     function call at 'xref_addr'.
116 |     """
117 |     args = []
118 | 
119 |     # Get the corresponding basic block for the xref.
120 |     bb = GetAddressBasicBlock(xref_addr)
121 | 
122 |     # Now obtain a list of all the instructions in said basic block.
123 |     instructions = GetInstructions(bb.start_ea, bb.end_ea)
124 | 
125 |     # Remove instructions past the reference.
126 |     instructions = filter(lambda x: x.ea <= xref_addr, instructions)
127 | 
128 |     # Check if we have a function call at the 'xref_addr'
129 |     if not IsFunctionCall(instructions[-1]):
130 |         raise RuntimeError("Cross reference is not a 'call', nor a 'jmp'.")
131 | 
132 |     # We've assumed that all the function's argument are set within one basic block.
133 |     n_found_args = 0
134 |     for ins in reversed(instructions[:-1]):
135 |         if n_found_args == func_sig.nargs:
136 |             break
137 | 
138 |         # Check if we have an instruction that sets the argument.
139 |         if IsArgumentSetter(ins.ea):
140 |             n_found_args += 1
141 |             args.append(GetFunctionArgument(ins))
142 | 
143 |     return args
144 | 
145 | def GetFunctionDemangledName(xref_addr):
146 |     """
147 |     Return the demanlged name of a function at a given address 'xref_addr'.
148 |     """
149 |     tmp = GetFunctionName(xref_addr)
150 |     name = Demangle(tmp, GetLongPrm(INF_SHORT_DN))
151 |     return name if name else tmp
152 | 
153 | def IDAArgumentToSize(arg):
154 |     """
155 |     Try to convert an IDA 'argument' to an integer. If the argument is a
156 |     register or a memory reference return -1 to place them last in the sorted list.    
157 |     """
158 |     a = -1
159 | 
160 |     if arg[-1] in ['h', 'H']:
161 | 
162 |         a = int(arg[:-1], 16)
163 | 
164 |     else:
165 |         try:
166 |             a = int(arg, 10)
167 | 
168 |         except ValueError:
169 |             a = -1
170 | 
171 |     return a
172 | 
173 | def sort_func(element):
174 |     """
175 |     Sort by argument size.
176 |     """
177 |     arg = element.arguments[0].argument
178 |     return IDAArgumentToSize(arg)
179 | 
180 | def print_references(func_name, func_nargs):
181 |     # Declare the function signature with name and number of arguments.
182 |     func_sig = FunctionSignature(func_name, func_nargs)
183 | 
184 |     interesting_functions = []
185 | 
186 |     # Get all the function names and match against the function name.
187 |     for a in GetAllFunctionNames():
188 |         if func_sig.name in a.name:
189 |             interesting_functions.append(a)
190 | 
191 |     # Do the same for imports.
192 |     for a in GetAllImportEntries():
193 |         if func_sig.name in a.name:
194 |             interesting_functions.append(a)
195 | 
196 |     results = []
197 |     for function in interesting_functions:
198 |         # Get all code xrefs to
199 |         xrefs = idautils.CodeRefsTo(function.ea, 0)
200 |         for call_address in xrefs:
201 |             arguments = GetFunctionCallArguments(func_sig, call_address)
202 |             caller_name = GetFunctionDemangledName(call_address)
203 |             callee_name = function.name
204 | 
205 |             results.append(InterestingResult(caller_name, callee_name, call_address, arguments))
206 | 
207 |             #print "// Calling function %s " % caller_name
208 |             #print "// Call address 0x%.8x" % call_address
209 |             #print "%s(%s);" % (function.name, ",".join(map(lambda x: '"%s"' % x.argument, arguments)))
210 |             #print
211 | 
212 |     # Sort by alloc size.
213 |     results.sort(key=sort_func, reverse=True)
214 |     for result in results:
215 |         print "// Caller function %s " % result.caller_name
216 |         print "// Call address    0x%.8x" % result.call_address
217 |         print "// Alloc size      0x%.8x" % IDAArgumentToSize(result.arguments[0].argument)
218 |         print "%s(%s);" % (result.callee_name, ",".join(map(lambda x: '"%s"' % x.argument, result.arguments)))
219 |         print
220 | 
221 | if __name__ == "__main__":
222 |     print_references("operator new", 1)
223 | 


--------------------------------------------------------------------------------
/ida_hooks.py:
--------------------------------------------------------------------------------
  1 | import idc
  2 | import idautils
  3 | 
  4 | def hexdump(src, length=16):
  5 |     FILTER = ''.join([(len(repr(chr(x))) == 3) and chr(x) or '.' for x in range(256)])
  6 |     lines = []
  7 |     for c in xrange(0, len(src), length):
  8 |         chars = src[c:c+length]
  9 |         hex = ' '.join(["%02x" % ord(x) for x in chars])
 10 |         printable = ''.join(["%s" % ((ord(x) <= 127 and FILTER[ord(x)]) or '.') for x in chars])
 11 |         lines.append("%04x  %-*s  %s\n" % (c, length*3, hex, printable))
 12 |     return ''.join(lines)
 13 |    
 14 | def handle_bp1():
 15 |     """
 16 |     Handler for the buffer append routine.
 17 | 
 18 |     .text:70F75BE0 push    edi                             ; size
 19 |     .text:70F75BE1 push    edx                             ; src
 20 |     .text:70F75BE2 push    eax                             ; dst
 21 |     .text:70F75BE3 call    _memcpy_0
 22 |     """
 23 |     size = GetRegValue("edi")
 24 |     src = GetRegValue("edx")
 25 |     dst = GetRegValue("eax")
 26 |     ip = GetRegValue("eip")
 27 | 
 28 |     print "memcpy(dst=0x%.8x, src=0x%.8x, size=0x%.8x)" % (dst, src, size)
 29 | 
 30 |     # Read the data
 31 |     data = GetManyBytes(src, size, use_dbg=True)
 32 |     if not data:
 33 |         print "Error reading src=0x%.8x" % (src)
 34 |         return
 35 |     
 36 |     hex_data = hexdump(data)
 37 | 
 38 |     print hex_data
 39 |     print
 40 |  
 41 | def install_bp1():
 42 |     bpaddr = 0x70F75BE3
 43 |     AddBpt(bpaddr)
 44 |     SetBptCnd(bpaddr, "handle_bp0()")
 45 | 
 46 | def handle_bp0():
 47 |     """
 48 |     .text:6FE414D2 mov     esi, [esp+44h+lpString1]
 49 |     .text:6FE414D6 mov     edi, [esp+44h+lp]
 50 |     """
 51 |     string_address = GetRegValue("esi")
 52 |     data = GetManyBytes(string_address, 32, use_dbg=True)
 53 |     if not data:
 54 |         print "Error reading src=0x%.8x" % (src)
 55 |         return
 56 |     
 57 |     hex_data = hexdump(data)
 58 |     
 59 |     print "Data @ 0x%.8x" % string_address
 60 |     print hex_data
 61 |     print
 62 | 
 63 | def install_bp0():
 64 |     bpaddr = 0x6FE414D6
 65 |     AddBpt(bpaddr)
 66 |     SetBptCnd(bpaddr, "handle_bp0()")
 67 | 
 68 | def handle_bp2():
 69 |     """
 70 |     .text:6FE30290 ; int __cdecl SHA1_sub_70F70290_calc(void *data)
 71 |     .text:6FE30290 SHA1_sub_70F70290_calc proc near        ; CODE XREF: SHA1_sub_70F703A0_get+32p
 72 |     .text:6FE30290                                         ; SHA1_sub_70F71350+4Dp ...
 73 |     .text:6FE30296 mov     ebp, [esp+44h+data]
 74 |     .text:6FE3029A push    edi
 75 |     .text:6FE3029B mov     edi, eax                        ; eax=strlen
 76 |     """
 77 |     data_address = GetRegValue("ebp")
 78 |     data_size = GetRegValue("eax")
 79 |     data = GetManyBytes(data_address, data_size, use_dbg=True)
 80 |     if not data:
 81 |         print "Error reading src=0x%.8x" % (src)
 82 |         return
 83 |     
 84 |     hex_data = hexdump(data)
 85 |     
 86 |     print "Data @ 0x%.8x" % data_address
 87 |     print hex_data
 88 |     print
 89 | 
 90 | def install_bp2():
 91 |     bpaddr = 0x6FE3029B
 92 |     AddBpt(bpaddr)
 93 |     SetBptCnd(bpaddr, "handle_bp0()")
 94 | 
 95 | 
 96 | def main():
 97 |     RunPlugin("python", 3)
 98 |     install_bp2()
 99 | 
100 | if __name__ == '__main__':
101 |     main()


--------------------------------------------------------------------------------
/machofinder.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Hacky script to gather all the mach-o file (and fat).
  3 | It prints the union of all the used load commands in all
  4 | the files found in the system.
  5 | 
  6 | Author: Agustin Gianni (agustingianni@gmail.com)
  7 | """
  8 | import os
  9 | import struct
 10 | import shutil
 11 | import pickle
 12 | 
 13 | from macholib.MachO import MachO
 14 | 
 15 | #define FAT_MAGIC   0xcafebabe
 16 | #define FAT_CIGAM   0xbebafeca /* NXSwapLong(FAT_MAGIC) */
 17 | #define MH_MAGIC    0xfeedface /* the mach magic number */
 18 | #define MH_CIGAM    0xcefaedfe /* NXSwapInt(MH_MAGIC) */
 19 | #define MH_MAGIC_64 0xfeedfacf /* the 64-bit mach magic number */
 20 | #define MH_CIGAM_64 0xcffaedfe /* NXSwapInt(MH_MAGIC_64) */
 21 | 
 22 | # magics = [0xfeedface, 0xcefaedfe, 0xfeedfacf, 0xcffaedfe, 0xcafebabe, 0xbebafeca]
 23 | # magics = [0xfeedface, 0xfeedfacf]
 24 | 
 25 | def is_interesting(filepath, magics):
 26 |     try:
 27 |         data = open(filepath).read(4)
 28 |         magic = struct.unpack("<L", data)[0]
 29 | 
 30 |     except (struct.error, IOError):
 31 |         return False
 32 | 
 33 |     return magic in magics
 34 | 
 35 | def get_macho_load_commands(filepath):
 36 |     commands = set()
 37 | 
 38 |     try:    
 39 |         macho = MachO(filepath)
 40 |     
 41 |     except (ValueError, struct.error):
 42 |         return set()
 43 | 
 44 |     for header in macho.headers:
 45 |         for command in header.commands:
 46 |             commands.add(command[0].get_cmd_name())
 47 | 
 48 |     return commands
 49 | 
 50 | sect_type = {}
 51 | sect_type[0] = "S_REGULAR"
 52 | sect_type[1] = "S_ZEROFILL"
 53 | sect_type[2] = "S_CSTRING_LITERALS"
 54 | sect_type[3] = "S_4BYTE_LITERALS"
 55 | sect_type[4] = "S_8BYTE_LITERALS"
 56 | sect_type[5] = "S_LITERAL_POINTERS"
 57 | sect_type[6] = "S_NON_LAZY_SYMBOL_POINTERS"
 58 | sect_type[7] = "S_LAZY_SYMBOL_POINTERS"
 59 | sect_type[8] = "S_SYMBOL_STUBS"
 60 | sect_type[9] = "S_MOD_INIT_FUNC_POINTERS"
 61 | sect_type[10] = "S_MOD_TERM_FUNC_POINTERS"
 62 | sect_type[11] = "S_COALESCED"
 63 | sect_type[12] = "S_GB_ZEROFILL"
 64 | sect_type[13] = "S_INTERPOSING"
 65 | sect_type[14] = "S_16BYTE_LITERALS"
 66 | sect_type[15] = "S_DTRACE_DOF"
 67 | sect_type[16] = "S_LAZY_DYLIB_SYMBOL_POINTERS"
 68 | sect_type[17] = "S_THREAD_LOCAL_REGULAR"
 69 | sect_type[18] = "S_THREAD_LOCAL_ZEROFILL"
 70 | sect_type[19] = "S_THREAD_LOCAL_VARIABLES"
 71 | sect_type[20] = "S_THREAD_LOCAL_VARIABLE_POINTERS"
 72 | sect_type[21] = "S_THREAD_LOCAL_INIT_FUNCTION_POINTERS"
 73 | 
 74 | def get_macho_section_types(filepath):
 75 |     section_types = set()
 76 | 
 77 |     try:    
 78 |         macho = MachO(filepath)
 79 |     
 80 |     except (ValueError, struct.error, IOError):
 81 |         return set()
 82 | 
 83 |     for header in macho.headers:
 84 |         for command in header.commands:
 85 |             try:
 86 |                 if command[1].nsects:
 87 |                     # print command[1]
 88 |                     for sect in command[2]:
 89 |                         segname = sect.segname.replace("\x00", "")
 90 |                         sectname = sect.sectname.replace("\x00", "")
 91 |                         # print "  ", sect_type[sect.flags & 0xff], segname, sectname
 92 | 
 93 |                     # print "-" * 80
 94 |                     section_types.add(sect_type[sect.flags & 0xff])
 95 | 
 96 |             except AttributeError, e:
 97 |                 pass
 98 | 
 99 |     return section_types
100 | 
101 | def get_macho_section_name(filepath):
102 |     section_types = set()
103 | 
104 |     try:    
105 |         macho = MachO(filepath)
106 |     
107 |     except (ValueError, struct.error, IOError):
108 |         return set()
109 | 
110 |     for header in macho.headers:
111 |         for command in header.commands:
112 |             try:
113 |                 if command[1].nsects:
114 |                     for sect in command[2]:
115 |                         if sect.size:
116 |                             segname = sect.segname.replace("\x00", "")
117 |                             sectname = sect.sectname.replace("\x00", "")
118 |                             section_types.add("%s.%s" % (segname, sectname))
119 | 
120 |             except AttributeError, e:
121 |                 pass
122 | 
123 |     return section_types
124 | 
125 | def print_all_used_load_commands(results):
126 |     print "Global list of used load commands"
127 |     print "-" * 80
128 |     for command in sorted(set.union(*map(get_macho_load_commands, results))):
129 |         print command
130 |     print "-" * 80
131 | 
132 |     print "Number of files %d" % len(results)
133 | 
134 | def print_one_file_for_each_load_command(results):
135 |     seen_commands = set()
136 |     for path in results:
137 |         if "Applications" in path:
138 |             continue
139 | 
140 |         for load_command in get_macho_load_commands(path):
141 |             if load_command in seen_commands:
142 |                 continue
143 | 
144 |             seen_commands.add(load_command)
145 |             print "command %-30s -> %s" % (load_command, path)
146 |             
147 |             try:
148 |                 shutil.copyfile(path, os.path.join("/Users/anon/workspace/retools/src/libbinary/macho/testfiles", "test_load_command_%s" % load_command.lower()))
149 | 
150 |             except:
151 |                 continue
152 | 
153 | def print_one_file_for_each_section_type(results):
154 |     seen_section_types = set()
155 |     for path in results:
156 |         if "Applications" in path:
157 |             continue
158 | 
159 |         diff = get_macho_section_types(path) - seen_section_types
160 | 
161 |         if len(diff):
162 |             seen_section_types.update(diff)
163 |             print "%s -> [%s]" % (path, ", ".join(diff))
164 | 
165 |             for new_type in diff:
166 |                 try:
167 |                     shutil.copyfile(path, os.path.join("/Users/anon/workspace/retools/src/libbinary/macho/testfiles", "test_section_type_%s" % new_type.lower()))
168 |                 except:
169 |                     continue
170 | 
171 | def print_one_file_for_each_section_name(results):
172 |     seen_section_types = set()
173 |     for path in results:
174 |         if "Applications" in path:
175 |             continue
176 |         diff = get_macho_section_name(path) - seen_section_types
177 | 
178 |         if len(diff):
179 |             seen_section_types.update(diff)
180 |             for new_type in diff:
181 |                 if "__DWARF" in new_type:
182 |                     print "%-40s" % new_type, path
183 |                     # try:
184 |                     #     shutil.copyfile(path, os.path.join("/Users/anon/workspace/retools/src/libbinary/macho/testfiles/section_names", "test_section_name_%s" % new_type))
185 |                     # except:
186 |                     #     continue
187 | 
188 | def get_macho_filetype(filepath):
189 |     try:    
190 |         macho = MachO(filepath)
191 |     
192 |     except (ValueError, struct.error, IOError):
193 |         return set()
194 | 
195 |     types = set()
196 |     for header in macho.headers:
197 |         types.add(header.filetype)
198 | 
199 |     return types
200 | 
201 | def print_onefile_for_each_filetype(results):
202 |     seen_filetypes = set()
203 |     for path in results:
204 |         filetypes = get_macho_filetype(path)
205 |         diff = filetypes - seen_filetypes
206 | 
207 |         if len(diff):
208 |             print path, diff
209 |             seen_filetypes.update(diff)
210 | 
211 | def locate_macho_files(root):
212 |     results = []
213 |     for root, subFolders, files in os.walk(root):
214 |         for name in files:
215 |             abs_name = os.path.join(root, name)
216 |             if os.path.isfile(abs_name) and is_interesting(abs_name, [0xfeedface, 0xfeedfacf]):
217 |                 results.append(abs_name)
218 | 
219 |     return results
220 | 
221 | def locate_fat(root):
222 |     i = 0
223 |     results = []
224 |     for root, subFolders, files in os.walk(root):
225 |         for name in files:
226 |             i += 1
227 |             if not i % 5000:
228 |                 print "Processed %d files ..." % i
229 |             
230 |             abs_name = os.path.join(root, name)
231 |             if os.path.isfile(abs_name) and is_interesting(abs_name, [0xcafebabe, 0xbebafeca]):
232 |                 results.append(abs_name)
233 | 
234 |     return results
235 | 
236 | def locate_big_endian(root):
237 |     i = 0
238 |     results = []
239 |     for root, subFolders, files in os.walk(root):
240 |         for name in files:
241 |             i += 1
242 |             if not i % 5000:
243 |                 print "Processed %d files ..." % i
244 |             
245 |             abs_name = os.path.join(root, name)
246 |             if os.path.isfile(abs_name) and is_interesting(abs_name, [0xcefaedfe, 0xcffaedfe]):
247 |                 results.append(abs_name)
248 | 
249 |     return results
250 | 
251 | def print_segment_section_names(results):
252 |     a = set()
253 |     for filepath in results:
254 |         try:    
255 |             macho = MachO(filepath)
256 |         
257 |         except (ValueError, struct.error, IOError):
258 |             continue
259 | 
260 |         for header in macho.headers:
261 |             for command in header.commands:
262 |                 try:
263 |                     if command[1].nsects:
264 |                         #print command[1]
265 |                         for sect in command[2]:
266 |                             segname = sect.segname.replace("\x00", "")
267 |                             sectname = sect.sectname.replace("\x00", "")
268 |                             sectype = sect_type[sect.flags & 0xff]
269 |                             #a.add((segname, sectname, sectype))
270 |                             print sectype, segname, sectname, filepath
271 | 
272 |                 except AttributeError, e:
273 |                     pass
274 | 
275 |     # print "Results:"
276 |     # for r in a:
277 |     #     print "%s %s %s" % (r[0], r[1], r[2])
278 | 
279 | def get_smallest_macho(files):
280 |     files_with_sizes = []
281 | 
282 |     import os
283 |     import hashlib
284 | 
285 |     for file in files:
286 |         if "invalid" in file or "svn" in file:
287 |             continue
288 | 
289 |         try:
290 |             cur_size = os.path.getsize(file)
291 |         
292 |         except OSError:
293 |             continue
294 | 
295 |         files_with_sizes.append((cur_size, file))
296 | 
297 |     for el in sorted(files_with_sizes)[:10000]:
298 |         dest_file = os.path.join("/Users/anon/fuzzing/afl-2.10b/macho_inputs", hashlib.md5(el[1]).hexdigest())
299 |         try:
300 |             shutil.copyfile(el[1], dest_file)
301 | 
302 |         except:
303 |             continue
304 | 
305 | # if not os.path.exists("saved.bigendian.paths.db"):
306 | #     print "Saving to disk!"
307 | #     output = open("saved.bigendian.paths.db", "wb")
308 | #     results = locate_big_endian("/")
309 | #     pickle.dump(results, output)
310 | #     print "Found %d fat big endian files" % len(results)
311 | 
312 | results = []
313 | if not os.path.exists("saved.macho.paths.db"):
314 |     print "Saving to disk!"
315 |     output = open("saved.macho.paths.db", "wb")
316 |     results = locate_macho_files("/")
317 |     pickle.dump(results, output)
318 |     print "Found %d macho files" % len(results)
319 | 
320 | results = pickle.load(open("saved.macho.paths.db", "rb"))
321 | print "Found %d macho files" % len(results)
322 | get_smallest_macho(results)


--------------------------------------------------------------------------------
/mark_interesting.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Small idapython script that finds all the signed comparisions and marks
 3 | them with a color.
 4 | It will also find the sign extension instructions and it will also mark them.
 5 | 
 6 | Author: Agustin Gianni (agustingianni@gmail.com)
 7 | """
 8 | 
 9 | import idautils
10 | import idc
11 | 
12 | from idaapi import *
13 | 
14 | def rgb_to_bgr(color):
15 |     r = color >> 16
16 |     g = color >> 8 & 0xff
17 |     b = color & 0xff
18 |     return (b << 16) | (g << 8) | r 
19 | 
20 | SIGNED_COLOR = rgb_to_bgr(0xFC8B8B)
21 | INTERESTING_COLOR = rgb_to_bgr(0xFC8B8B)
22 | UNSIGNED_COLOR = rgb_to_bgr(0x8BFCB5)
23 | CALL_COLOR = rgb_to_bgr(0xC7F8FF)
24 | 
25 | signed_ins = ["JL", "JNGE", "JGE", "JNL", "JLE", "JNG", "JG", "JNLE"]
26 | unsigned_ins = ["JB", "JNAE", "JC", "JNB", "JAE", "JNC", "JBE", "JNA", "JA", "JNBE"]
27 | interesting_ins = ["MOVSX", "MOVSXD", "LODS", "STOS", "REP", "LOOP"]
28 | 
29 | def set_instruction_color(ea, color):
30 |     idc.SetColor(ea, idc.CIC_ITEM, color)
31 | 
32 | def is_call(mnemonic):
33 |     return mnemonic.upper() == "CALL"
34 | 
35 | def is_signed(mnemonic):
36 |     return mnemonic.upper() in signed_ins
37 | 
38 | def is_unsigned(mnemonic):
39 |     return mnemonic.upper() in unsigned_ins
40 | 
41 | def is_interesting(mnemonic):
42 |     return mnemonic.upper() in interesting_ins
43 | 
44 | from collections import defaultdict
45 | freq = {"signed" : defaultdict(int), "interesting" : defaultdict(int)}
46 | 
47 | i = 0
48 | for seg_ea in Segments():
49 |     for head in Heads(seg_ea, SegEnd(seg_ea)):
50 |         i += 1
51 | 
52 |         if not isCode(GetFlags(head)):
53 |             continue
54 | 
55 |         mnemonic = GetMnem(head)
56 |         
57 |         if is_unsigned(mnemonic):
58 |             set_instruction_color(head, UNSIGNED_COLOR)
59 |         
60 |         elif is_signed(mnemonic):
61 |             set_instruction_color(head, SIGNED_COLOR)
62 |             freq["signed"][GetFunctionName(head)] += 1
63 | 
64 |         elif is_interesting(mnemonic):
65 |             set_instruction_color(head, INTERESTING_COLOR)
66 |             freq["interesting"][GetFunctionName(head)] += 1
67 | 
68 |         elif is_call(mnemonic):
69 |             set_instruction_color(head, CALL_COLOR)
70 | 
71 | import operator
72 | sorted_x = sorted(freq["interesting"].items(), key=operator.itemgetter(1))
73 | for x in sorted_x:
74 |     print "Function '%s' has %d interesting properties" % (x[0], x[1])
75 | 


--------------------------------------------------------------------------------
/minset.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Tool to calculate the minimum set of files that have
  3 | approximatelly the best coverage.
  4 | 
  5 | The tool needs an input directory that contains the input
  6 | files which were used along with the trace files.
  7 | The name of the trace file must be <input_file>.log. This
  8 | way when we are processing the trace, we can refer to the
  9 | input file.
 10 | 
 11 | Example:
 12 | 
 13 | $ ls /tmp/InputFiles
 14 | -rw-------  1 anon  staff    47K Jan  4 01:16 ffeb4d186925e9538707719e8374d217e1030943.log
 15 | -rw-------  1 anon  staff   8.8K Jan  3 14:05 ffeb4d186925e9538707719e8374d217e1030943
 16 | 
 17 | $ python minset.py -i /tmp/InputFiles -o /tmp/OutputFiles
 18 | Size of the universe: 6257
 19 | Current covered=0 new=4053
 20 | Current covered=4053 new=581
 21 | ...
 22 | Original trace count: 1484
 23 | Min set trace count : 48
 24 | 
 25 | This will copy the input files that cover the most blocks
 26 | from the input directory into the output directory.
 27 | """
 28 | import os
 29 | import re
 30 | import sys
 31 | import struct
 32 | import shutil
 33 | import argparse
 34 | from collections import namedtuple
 35 | 
 36 | TraceFile = namedtuple("TraceFile", [
 37 |     "path",
 38 |     "entries"
 39 | ])
 40 | 
 41 | BasicBlock = namedtuple("BasicBlock", [
 42 |     "start",
 43 |     "size",
 44 |     "id"
 45 | ])
 46 | 
 47 | 
 48 | def load_drcov_trace(filename):
 49 |     """
 50 |     A drcov trace file consists of a preamble with ascii data up to
 51 |     the start of the basic block table. The basic block table can be
 52 |     located by searching for the ascii header:
 53 | 
 54 |         'BB Table: <number> bbs'
 55 | 
 56 |     Following the header, the table consists of N structures:
 57 | 
 58 |         struct __attribute__((packed)) drcov_bb {
 59 |             uint32_t start;
 60 |             uint16_t size;
 61 |             uint16_t id;
 62 |         };    
 63 |     """
 64 |     trace = open(filename, "rb").read()
 65 |     match = re.search(r"BB Table:\s+(\d+)\s+bbs\n", trace)
 66 |     if not match:
 67 |         return TraceFile(filename, set())
 68 | 
 69 |     block_count = int(match.group(1))
 70 | 
 71 |     # An empty trace is possible so bail before tryint to process.
 72 |     if not block_count:
 73 |         return TraceFile(filename, set())
 74 | 
 75 |     # Size of a single basic block entry.
 76 |     entry_size = 8
 77 | 
 78 |     # Extract the basic block table.
 79 |     table = trace[match.end():]
 80 |     if len(table) != entry_size * block_count:
 81 |         print "The size of the table does not match the count of basic blocks."
 82 | 
 83 |     # Unpack the entries into a set to avoid repeats.
 84 |     entries = set()
 85 |     for i in range(0, block_count):
 86 |         block_offset = i * entry_size
 87 |         block_data = table[block_offset:block_offset+entry_size]
 88 |         start, size, id_ = struct.unpack("<LHH", block_data)
 89 |         entries.add(BasicBlock(start, size, id_))
 90 | 
 91 |     return TraceFile(filename, entries)
 92 | 
 93 | 
 94 | parser = argparse.ArgumentParser(description="Coverage Minimum Set")
 95 | parser.add_argument("-i", dest="input_dir")
 96 | parser.add_argument("-o", dest="output_dir")
 97 | 
 98 | args = parser.parse_args()
 99 | if not args.input_dir:
100 |     print "No input directory supplied, use option -i to supply one."
101 |     sys.exit(-1)
102 | 
103 | if not args.output_dir:
104 |     print "No output directory supplied, use option -o to supply one."
105 |     sys.exit(-1)
106 | 
107 | output_dir = os.path.abspath(args.output_dir)
108 | 
109 | # Get all trace files in the traces directory.
110 | input_dir = os.path.abspath(args.input_dir)
111 | traces_paths = filter(lambda fn: fn.endswith(".log"), os.listdir(input_dir))
112 | traces_paths = map(lambda fn: os.path.join(input_dir, fn), traces_paths)
113 | 
114 | # Load the actual traces.
115 | traces = map(load_drcov_trace, traces_paths)
116 | 
117 | # Implementation of https://en.wikipedia.org/wiki/Set_cover_problem#Greedy_algorithm.
118 | universe = map(lambda x: x.entries, traces)
119 | universe = set.union(*universe)
120 | 
121 | # Set of covered blocks.
122 | covered = set()
123 | 
124 | # List with the actual files that create the min set.
125 | min_set = []
126 | 
127 | print "Size of the universe: %u" % len(universe)
128 | 
129 | # Iterate until the 'covered' set equals the 'universe'.
130 | while covered != universe:
131 |     # Get the set that adds the most hits to the 'covered' set.
132 |     subset = max(traces, key=lambda x: len(x.entries - covered))
133 | 
134 |     print "Current covered=%u new=%u" % (
135 |         len(covered), len(subset.entries - covered))
136 | 
137 |     min_set.append(subset)
138 |     covered |= subset.entries
139 | 
140 | print "Original trace count: %u" % len(traces)
141 | print "Min set trace count : %u" % len(min_set)
142 | 
143 | # Get the input file name from the trace file and copy it to the output directory.
144 | for trace in min_set:
145 |     path = trace.path.replace(".log", "").replace("drcov.", "")
146 |     shutil.copy(path, output_dir)
147 | 


--------------------------------------------------------------------------------
/operan_offset_base.py:
--------------------------------------------------------------------------------
 1 | for seg_ea in Segments():
 2 |     for head in Heads(seg_ea, SegEnd(seg_ea)):
 3 |         if isCode(GetFlags(head)):
 4 |             mnem = GetMnem(head)
 5 |             off = idaapi.get_offbase(head, 1)
 6 |             if mnem == "lea" and (not off in [0, -1, 0xffffffffffffffff, head, 0x5197d]):
 7 |                 # Some of these are wrong, fix them
 8 |                 if off in [0x5e98d, 0x6010d, 0x604ed, 0x61a9d, 0x63c7d, 0x671cd, 0x68afd, \
 9 |                             0x6a88d, 0x6c0ad, 0x6c60d, 0x6cacd, 0x6cf3d, 0x712ed, \
10 |                             0x72c1d, 0x7450d, 0x753dd, 0x7643d, 0x78cbd, 0x78ffd, \
11 |                             0x79a7d, 0x7a56d, 0x7b50d, 0x7dfed, 0x7eb0d, ]:
12 |                     idaapi.set_offset(head, 1, head)
13 | 
14 |                 print "%.8x %d %8x %s" % (head, GetOpType(head, 1), off, GetDisasm(head))
15 | 


--------------------------------------------------------------------------------
/prolog_finder.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Find potential ARM procedures prolog.
 3 | 
 4 | The idea is to detect all the common instructions used to build a function prolog, 
 5 | that is instructions like PUSH.
 6 | 
 7 | Coded by Agustin Gianni (agustin.gianni@gmail.com).
 8 | 
 9 | (0xfffffe00, 0x0000b400, eEncodingT1, eSize16)
10 | (0xffffa000, 0xe92d0000, eEncodingT2, eSize32)
11 | (0xffff0fff, 0xf84d0d04, eEncodingT3, eSize32)
12 | (0x0fff0000, 0x092d0000, eEncodingA1, eSize32)
13 | (0x0fff0fff, 0x052d0004, eEncodingA2, eSize32)
14 | """
15 | import idaapi
16 | import idc
17 | import idautils
18 | import struct
19 | 
20 | def find_prologs():
21 | 	push_masks = [(0x0fff0000, 0x092d0000), (0x0fff0fff, 0x052d0004)]
22 | 	def is_push(opcode):
23 | 		for mask in push_masks:
24 | 			if (mask[0] & opcode) == mask[1]:
25 | 				return True
26 | 
27 | 		return False
28 | 
29 | 	image_start = idc.MinEA()
30 | 	image_end = idc.MaxEA()
31 | 	image_size = image_end - image_start
32 | 
33 | 	print "Analyzing image from %.8x-%.8x of size %.8x" % (image_start, image_end, image_size)
34 | 
35 | 	for cur_ea in xrange(image_start, image_end, 4):
36 | 		cur_long = Dword(cur_ea)
37 | 		if is_push(cur_long):
38 | 			print "Found candidate at 0x%.8x -> %s" % (cur_ea, GetDisasm(cur_ea))
39 | 
40 | def find_push_thumb(mark=False):
41 | 	def is_push(opcode):
42 | 		return (0xfffffe00 & opcode) == 0x0000b400
43 | 
44 | 	image_start = idc.MinEA()
45 | 	image_end = idc.MaxEA()
46 | 	image_size = image_end - image_start
47 | 
48 | 	print "Analyzing image from %.8x-%.8x of size %.8x" % (image_start, image_end, image_size)
49 | 
50 | 	for cur_ea in xrange(image_start, image_end, 2):
51 | 		cur_word = Word(cur_ea)
52 | 		if is_push(cur_word) and "DCB" in GetDisasm(cur_ea):
53 | 			print "Found  candidate at 0x%.8x -> %s" % (cur_ea, GetDisasm(cur_ea))
54 | 			if mark:
55 | 				MakeCode(cur_ea)
56 | 				print "Marked candidate at 0x%.8x -> %s" % (cur_ea, GetDisasm(cur_ea))
57 | 
58 | 
59 | def find_nops(mark=False):
60 | 	"""
61 | 	Look for THUMB nops (0xbf00).
62 | 	"""
63 | 	def is_nop(opcode):
64 | 		return opcode == 0xbf00
65 | 
66 | 	image_start = idc.MinEA()
67 | 	image_end = idc.MaxEA()
68 | 	image_size = image_end - image_start
69 | 
70 | 	print "Analyzing image from %.8x-%.8x of size %.8x" % (image_start, image_end, image_size)
71 | 
72 | 	for cur_ea in xrange(image_start, image_end, 2):
73 | 		cur_word = Word(cur_ea)
74 | 		if is_nop(cur_word):
75 | 			print "Found candidate at 0x%.8x -> %s" % (cur_ea, GetDisasm(cur_ea))
76 | 			if mark:
77 | 				MakeCode(cur_ea)
78 | 
79 | find_push_thumb(mark=False)


--------------------------------------------------------------------------------
/propagate_types.py:
--------------------------------------------------------------------------------
 1 | def GetFunctionDemangledName(xref_addr):
 2 | 	"""
 3 | 	Return the demanlged name of a function at a given address 'xref_addr'.
 4 | 	"""
 5 | 	tmp = GetFunctionName(xref_addr)
 6 | 	name = Demangle(tmp, GetLongPrm(INF_SHORT_DN))
 7 | 	return name if name else tmp
 8 | 
 9 | ea = AskAddr(ScreenEA(), "Give me the address of what you want to propagate")
10 | t = AskStr(GetType(ea), "Type for the data references")
11 | t2 = AskStr(GetType(ea), "Type for the stub references")
12 | 
13 | if t[-1] != ';':
14 | 	t += ';'
15 | 
16 | if t2[-1] != ';':
17 | 	t2 += ';'
18 | 
19 | modified = set()
20 | 
21 | for data_ref in DataRefsTo(ea):
22 | 	if data_ref in modified:
23 | 		continue
24 | 
25 | 	print "Setting type at 0x%.8x -> %s" % (data_ref, t)
26 | 	SetType(data_ref, t)
27 | 	modified.add(data_ref)
28 | 
29 | 	names = set()
30 | 	# Now for each data reference check if there are code references.
31 | 	for code_ref in DataRefsTo(data_ref):
32 | 		if code_ref in modified or code_ref == data_ref:
33 | 			continue
34 | 
35 | 		f = idaapi.get_func(code_ref)
36 | 		if not f:
37 | 			continue
38 | 
39 | 		if f.startEA in modified:
40 | 			continue
41 | 
42 | 		name = GetFunctionDemangledName(f.startEA)
43 | 		if "stub" in name.lower():
44 | 			# If the name of the code reference contains 'stub' then set also the type.
45 | 			print "Setting type at 0x%.8x : %s -> %s" % (f.startEA, name, t2)
46 | 			modified.add(f.startEA)
47 | 			SetType(f.startEA, t2)
48 | 


--------------------------------------------------------------------------------
/references.py:
--------------------------------------------------------------------------------
  1 | from collections import namedtuple
  2 | import idautils
  3 | from idc import GetFunctionName, GetDisasm, GetMnem, GetOpnd, Demangle, GetLongPrm, INF_SHORT_DN
  4 | 
  5 | __author__ = 'anon'
  6 | 
  7 | import idaapi
  8 | 
  9 | FunctionName = namedtuple('FunctionName', ['ea', 'name'])
 10 | ImportEntry = namedtuple('ImportEntry', ['ea', 'name', 'ord'])
 11 | BasicBlock = namedtuple('BasicBlock', ['start_ea', 'end_ea', 'function'])
 12 | FunctionSignature = namedtuple('FunctionSignature', ['name', 'nargs'])
 13 | Instruction = namedtuple('Instruction', ['ea', 'string'])
 14 | FunctionArgument = namedtuple('FunctionArgument', ['argument', 'instruction'])
 15 | 
 16 | imports_list = []
 17 | function_names = []
 18 | 
 19 | 
 20 | def EnumImportNamesCallback(ea, name, ord_):
 21 |     if name:
 22 |         imports_list.append(ImportEntry(ea, name, ord_))
 23 | 
 24 |     return True
 25 | 
 26 | 
 27 | def GetAllImportEntries():
 28 |     for i in xrange(0, idaapi.get_import_module_qty()):
 29 |         name = idaapi.get_import_module_name(i)
 30 |         if not name:
 31 |             pass
 32 | 
 33 |         idaapi.enum_import_names(i, EnumImportNamesCallback)
 34 | 
 35 |     return imports_list
 36 | 
 37 | 
 38 | def GetAllFunctionNames():
 39 |     func_name_list = []
 40 |     for x in idautils.Functions():
 41 |         func_name_list.append(FunctionName(x, GetFunctionDemangledName(x)))
 42 | 
 43 |     return func_name_list
 44 | 
 45 | 
 46 | def GetAddressBasicBlock(ea):
 47 |     f = idaapi.get_func(ea)
 48 |     if not f:
 49 |         raise RuntimeError("No basic block at address %.8x" % ea)
 50 | 
 51 |     for block in idaapi.FlowChart(f):
 52 |         if block.startEA <= ea and block.endEA > ea:
 53 |             return BasicBlock(block.startEA, block.endEA, f)
 54 | 
 55 |     raise RuntimeError("No basic block at address %.8x" % ea)
 56 | 
 57 | def GetInstructions(start_ea, end_ea):
 58 |     ins = []
 59 |     for head in idautils.Heads(start_ea, end_ea):
 60 |         if idaapi.isCode(idaapi.getFlags(head)):
 61 |             ins.append(Instruction(head, GetDisasm(head)))
 62 | 
 63 |     return ins
 64 | 
 65 | def IsArgumentSetter(ea):
 66 |     if GetMnem(ea).lower() == "push":
 67 |         return True
 68 | 
 69 | def GetFunctionArgument(ins):
 70 |     opnd = GetOpnd(ins.ea, 0)
 71 |     return FunctionArgument(opnd, ins)
 72 | 
 73 | def GetFunctionCallArguments(func_sig, xref_addr):
 74 |     args = []
 75 |     bb = GetAddressBasicBlock(xref_addr)
 76 | 
 77 |     instructions = GetInstructions(bb.start_ea, bb.end_ea)
 78 |     instructions = filter(lambda x: x.ea <= xref_addr, instructions)
 79 | 
 80 |     if GetMnem(instructions[-1].ea).lower() not in ["call", "jmp"]:
 81 |         print instructions[-1]
 82 |         raise RuntimeError("Bullshit")
 83 | 
 84 |     n_found_args = 0
 85 |     for ins in reversed(instructions[:-1]):
 86 |         if n_found_args == func_sig.nargs:
 87 |             break
 88 | 
 89 |         if IsArgumentSetter(ins.ea):
 90 |             n_found_args += 1
 91 |             args.append(GetFunctionArgument(ins))
 92 | 
 93 |     return args
 94 | 
 95 | def GetFunctionDemangledName(xref_addr):
 96 |     tmp = GetFunctionName(xref_addr)
 97 |     name = Demangle(tmp, GetLongPrm(INF_SHORT_DN))
 98 |     return name if name else tmp
 99 | 
100 | func_sig = FunctionSignature("operator new", 1)
101 | 
102 | interesting_functions = []
103 | 
104 | for a in GetAllFunctionNames():
105 |     if func_sig.name in a.name:
106 |         print a.name
107 |         interesting_functions.append(a)
108 | 
109 | for a in GetAllImportEntries():
110 |     if func_sig.name in a.name:
111 |         interesting_functions.append(a)
112 | 
113 | 
114 | InterestingResult = namedtuple('InterestingResult', ['caller_name', 'callee_name', 'call_address', 'arguments'])
115 | 
116 | results = []
117 | for function in interesting_functions:
118 |     # Get all code xrefs to
119 |     xrefs = idautils.CodeRefsTo(function.ea, 0)
120 |     for call_address in xrefs:
121 |         arguments = GetFunctionCallArguments(func_sig, call_address)
122 |         caller_name = GetFunctionDemangledName(call_address)
123 |         callee_name = function.name
124 | 
125 |         results.append(InterestingResult(caller_name, callee_name, call_address, arguments))
126 | 
127 |         #print "// Calling function %s " % caller_name
128 |         #print "// Call address 0x%.8x" % call_address
129 |         #print "%s(%s);" % (function.name, ",".join(map(lambda x: '"%s"' % x.argument, arguments)))
130 |         #print
131 | 
132 | def IDAArgumentToSize(arg):
133 |     a = -1
134 | 
135 |     if arg[-1] in ['h', 'H']:
136 | 
137 |         a = int(arg[:-1], 16)
138 | 
139 |     else:
140 |         try:
141 |             a = int(arg, 10)
142 | 
143 |         except ValueError:
144 |             a = -1
145 | 
146 |     return a
147 | 
148 | def sort_func(element):
149 |     arg = element.arguments[0].argument
150 |     return IDAArgumentToSize(arg)
151 | 
152 | # Sort by alloc size.
153 | results.sort(key=sort_func, reverse=True)
154 | for result in results:
155 |     print "// Caller function %s " % result.caller_name
156 |     print "// Call address    0x%.8x" % result.call_address
157 |     print "// Alloc size      0x%.8x" % IDAArgumentToSize(result.arguments[0].argument)
158 |     print "%s(%s);" % (result.callee_name, ",".join(map(lambda x: '"%s"' % x.argument, result.arguments)))
159 |     print
160 | 


--------------------------------------------------------------------------------
/renamer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Rename files in a directory to its sha1 sum plus an extension.
 3 | """
 4 | import os
 5 | import sys
 6 | import hashlib
 7 | 
 8 | 
 9 | def sha1_file(fn):
10 |     f = open(fn, 'rb')
11 |     r = hashlib.sha1(f.read()).hexdigest()
12 |     f.close()
13 |     return r
14 | 
15 | 
16 | directory = os.path.abspath(sys.argv[1])
17 | extension = sys.argv[2]
18 | 
19 | print "Doing directory `%s`" % directory
20 | 
21 | for fn in os.listdir(directory):
22 |     if fn == ".DS_Store":
23 |         continue
24 | 
25 |     orig_name = os.path.join(directory, fn)
26 |     hexh = sha1_file(orig_name) + extension
27 |     new_name = os.path.join(directory, hexh)
28 | 
29 |     print('%s -> %s' % (orig_name, new_name))
30 | 
31 |     os.rename(orig_name, new_name)
32 | 


--------------------------------------------------------------------------------
/simple_jack.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Simple Jack symbol porting tool by goose (agustingianni@gmail.com).
  3 | 
  4 | This tool exists because for some reason diaphora does not assign
  5 | enough priority to "perfect" matches. The main idea is to get
  6 | as many symbols right so later on you can run diaphora and iterate.
  7 | 
  8 | The drill is simple, hash the bytes of all the functions in a db
  9 | and save them to a file. We will call this the 'primary' database.
 10 | Do the same for another DB (we call this the 'secondary' database)
 11 | and compare both. We only import identical matches and the only
 12 | info we import from the 'primary' is the function name.
 13 | 
 14 | This script has two modes:
 15 | 
 16 |     SCRIPT_MODE_DUMP:
 17 | 
 18 |         Used to create the primary and secondary database.
 19 |         You should run the script twice, first in the primary
 20 |         binary, which has your symbolicated binary, and second
 21 |         in the secondary binary, that is the one that will
 22 |         receive the information imported from the primary.
 23 | 
 24 |     SCRIPT_MODE_DIFF:
 25 | 
 26 |         Used once you've generated both the primary and secondary
 27 |         databases. It will read them both and perform the diffing.
 28 |         Once it finds matches, it will import the function name
 29 |         from the primary into the secondary.
 30 | 
 31 |         This mode is destructive, that is, it will change your
 32 |         IDB. Only run it if you are positive that you like the
 33 |         results.
 34 | """
 35 | 
 36 | import pickle
 37 | import hashlib
 38 | 
 39 | import idaapi
 40 | from idc import *
 41 | from idaapi import *
 42 | from idautils import *
 43 | 
 44 | # Available modes, pick one.
 45 | SCRIPT_MODE_DUMP = 0
 46 | SCRIPT_MODE_DIFF = 1
 47 | 
 48 | # IMPORTANT: Manually set this to the mode you need, because fuck idapython.
 49 | CURRENT_SCRIPT_MODE = SCRIPT_MODE_DUMP
 50 | 
 51 | # Set this to true if you want to see some debugging output.
 52 | GLOBAL_DEBUG = False
 53 | 
 54 | 
 55 | def hash_bytes(bytes):
 56 |     return hashlib.md5(bytes).hexdigest()
 57 | 
 58 | 
 59 | def log(msg):
 60 |     Message("[%s] %s\n" % (time.asctime(), msg))
 61 | 
 62 | 
 63 | def load_db(db_name):
 64 |     db = None
 65 |     log("Loading DB from %s" % db_name)
 66 |     with open(db_name, 'rb') as input:
 67 |         db = pickle.load(input)
 68 | 
 69 |     return db
 70 | 
 71 | 
 72 | def save_db(db, db_name):
 73 |     log("Saving DB to %s" % db_name)
 74 |     with open(db_name, 'wb') as output:
 75 |         pickle.dump(db, output, pickle.HIGHEST_PROTOCOL)
 76 | 
 77 | 
 78 | def build_db():
 79 |     collision_keys = set()
 80 | 
 81 |     func_list = []
 82 |     segments = list(Segments())
 83 |     for seg_ea in segments:
 84 |         func_list.extend(list(Functions(seg_ea, SegEnd(seg_ea))))
 85 | 
 86 |     total_funcs = len(func_list)
 87 | 
 88 |     log("Total number of functions to export: %u" % total_funcs)
 89 | 
 90 |     functions_db = {}
 91 |     for f in func_list:
 92 |         # Get the function for this address.
 93 |         func = get_func(f)
 94 |         if not func:
 95 |             log("Cannot get a function object for 0x%x" % f)
 96 |             continue
 97 | 
 98 |         # Get the number of instructions.
 99 |         n_ins = 0
100 |         flow = FlowChart(func)
101 |         for block in flow:
102 |             n_ins += len(list(Heads(block.startEA, block.endEA)))
103 | 
104 |         # Get the name of the function without demangling.
105 |         name = GetFunctionName(f)
106 | 
107 |         # Calculate the size of the function.
108 |         size = func.endEA - func.startEA
109 | 
110 |         # Do some sanity checks.
111 |         assert (size == func.size()), "Invalid size."
112 |         assert (func.startEA < func.endEA), "Invalid startEA / endEA values."
113 | 
114 |         # Get the hash of the function.
115 |         ins_hash = hash_bytes(idc.GetManyBytes(func.startEA, size))
116 | 
117 |         # Check if we collide with another entry.
118 |         if functions_db.has_key(ins_hash):
119 |             log("Function @ 0x%.8x collides with function @ 0x%.8x" %
120 |                 (func.startEA, functions_db[ins_hash][2]))
121 | 
122 |             # Keep track of the collision.
123 |             collision_keys.add(ins_hash)
124 |             continue
125 | 
126 |         # Create an entry in the DB.
127 |         functions_db[ins_hash] = (name, n_ins, func.startEA)
128 | 
129 |         if GLOBAL_DEBUG:
130 |             log("Function name:%s start:0x%.8x end:0x%.8x size:%u n_ins:%u hash:%s" %
131 |                 (name, func.startEA, func.endEA, size, n_ins, ins_hash))
132 | 
133 |     # Delete the collision otherwise we may match functions incorrectly.
134 |     for collision_key in collision_keys:
135 |         del functions_db[collision_key]
136 | 
137 |     return functions_db
138 | 
139 | 
140 | def do_diff():
141 |     primary_db_path = AskFile(0, "primary.db", "Select the primary db file.")
142 |     if primary_db_path is None:
143 |         log("No file selected, exiting")
144 |         return False
145 | 
146 |     secondary_db_path = AskFile(
147 |         0, "secondary.db", "Select the secondary db file.")
148 | 
149 |     if secondary_db_path is None:
150 |         log("No file selected, exiting")
151 |         return False
152 | 
153 |     # Load the databases
154 |     primary_db = load_db(primary_db_path)
155 |     secondary_db = load_db(secondary_db_path)
156 | 
157 |     log("Diffing ...")
158 | 
159 |     # Proceed with the diffing.
160 |     matches = 0
161 |     for primary_hash, primary_val in primary_db.iteritems():
162 |         # Check if 'primary_hash' from the primary is present in the secondary.
163 |         if not secondary_db.has_key(primary_hash):
164 |             continue
165 | 
166 |         # Hashes match.
167 |         secondary_val = secondary_db[primary_hash]
168 | 
169 |         # Only match functions with a different name.
170 |         if primary_val[0] == secondary_val[0]:
171 |             continue
172 | 
173 |         function_ea = secondary_val[2]
174 |         function_name_old = secondary_val[0]
175 |         function_name_new = primary_val[0]
176 | 
177 |         # if GLOBAL_DEBUG:
178 |         log("Function @ 0x%.8x -> From '%s' to '%s'" %
179 |             (function_ea, function_name_old, function_name_new))
180 | 
181 |         # Set the secondary function name.
182 |         if not MakeNameEx(function_ea, function_name_new, SN_NOWARN | SN_NOCHECK):
183 |             log("Error setting function name to '%s'" % (function_name_new))
184 | 
185 |         matches += 1
186 | 
187 |     log("Number of matches: %u" % matches)
188 | 
189 | 
190 | def do_save():
191 |     db_path = AskFile(1, "*.db", "Select the file to store the db.")
192 |     if db_path is None:
193 |         log("No file selected, exiting")
194 |         return False
195 | 
196 |     # Build the db for the current IDB.
197 |     db = build_db()
198 | 
199 |     log("Number of entries in the DB: %u" % len(db))
200 | 
201 |     # Write the DB to disk.
202 |     save_db(db, db_path)
203 | 
204 | 
205 | if CURRENT_SCRIPT_MODE == SCRIPT_MODE_DUMP:
206 |     do_save()
207 | 
208 | elif CURRENT_SCRIPT_MODE == SCRIPT_MODE_DIFF:
209 |     do_diff()
210 | 
211 | else:
212 |     log("Invalid script mode")
213 | 


--------------------------------------------------------------------------------
/string_finder.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility to find all the strings inside an ill formed IDA Database.
 3 | The script does an exhaustive search of all the streams of ASCII characters
 4 | ending with a 0x00. Then we verify that the stream contains at least one
 5 | english word. If it does, then we define it as a string.
 6 | 
 7 | Coded by Agustin Gianni (agustin.gianni@gmail.com).
 8 | """
 9 | import idaapi
10 | import idc
11 | import idautils
12 | import string
13 | 
14 | try:
15 |     import enchant
16 | except ImportError:
17 |     print "You need to install pyenchant to use method_1"
18 | 
19 | def is_printable(input_char):
20 |     return input_char in string.printable
21 | 
22 | def method_1():
23 |     """
24 |     This method does not work very well so far.
25 |     """
26 |     dictionary = enchant.Dict("en_US")
27 | 
28 |     # Get the image base of the database.
29 |     image_start = idc.MinEA()
30 |     image_end = idc.MaxEA()
31 |     image_size = image_end - image_start
32 | 
33 |     print "Analyzing image from %.8x-%.8x of size %.8x" % (image_start, image_end, image_size)
34 | 
35 |     cur_string = ""
36 |     for cur_ea in xrange(image_start, image_end):
37 |         byte_ = chr(Byte(cur_ea))
38 |         if not is_printable(byte_):
39 |             # If this is a terminating byte, check if the collected bytes form a string.
40 |             if byte_ == '\x00' and len(cur_string):
41 |                 cur_string = cur_string.replace("-", " ").replace("_", " ")
42 |                 words = cur_string.split()
43 | 
44 |                 nwords = 0
45 |                 for word in words:
46 |                     if len(word) > 2 and dictionary.check(word) and (not word[0] in string.digits):
47 |                         nwords += 1
48 |                         if nwords > 1:
49 |                             print "0x%.8x : %s" % (cur_ea, cur_string)
50 |                             break
51 |                 
52 |                 cur_string = ""
53 | 
54 |         else:
55 |             cur_string += byte_
56 | 
57 | def method_2():
58 |     """
59 |     Simple way to convert a table of strings into strings on IDA.
60 |     """
61 |     start = AskAddr(ScreenEA(), "Where do I start looking for strings?")
62 |     end = idc.MaxEA()
63 | 
64 |     cur_string = ""
65 |     last_byte = None
66 |     for cur_ea in xrange(start, end):
67 |         byte_ = chr(Byte(cur_ea))
68 |         if not is_printable(byte_):
69 |             # If this is a terminating byte, check if the collected bytes form a string.
70 |             if byte_ == '\x00':
71 |                 if not len(cur_string):
72 |                     print "Last string at 0x%.8x" % (cur_ea)
73 |                     break
74 | 
75 |                 else:
76 |                     print "0x%.8x : %s" % (cur_ea - len(cur_string), cur_string)
77 |                     MakeStr(cur_ea - len(cur_string), cur_ea + 1)
78 |                     cur_string = ""
79 |         else:
80 |             cur_string += byte_
81 | 
82 |         last_byte = byte_
83 | 
84 | method_2()
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/struct_hint.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Stupid tool to infer what's the underlying structure used by a function.
  3 | Highly heuristic. Don't trust it blindly, just try to use what it
  4 | gives you and work from that.
  5 | """
  6 | from idautils import *
  7 | from idaapi import get_func
  8 | 
  9 | def FunctionInstructionsBlocks(function):
 10 |     return filter(lambda x: isCode(GetFlags(x)), list(Heads(function.startEA, function.endEA)))
 11 | 
 12 | qualifier2size = { "byte" : 1, "word" : 2, "dword" : 4, "qword" : 8, "xmmword" : 8 }
 13 | def GetQualifierSize(qualifier):
 14 |     return qualifier2size[qualifier]
 15 | 
 16 | reg128 = ["xmm0", "xmm1", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9"]
 17 | reg64 = ["rax", "rbx", "rcx", "rdx", "rsi", "rdi", "rbp", "rsp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "rip"]
 18 | reg32 = ["eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp", "eip", "r8d", "r9d", "r10d", "r11d", "r12d", "r13d", "r14d", "r15d"]
 19 | reg16 = ["ax", "bx", "cx", "dx", "si", "di", "bp", "sp", "r8w", "r9w", "r10w", "r11w", "r12w", "r13w", "r14w", "r15w"]
 20 | reg8 = ["ah", "al", "bh", "bl", "ch", "cl", "dh", "dl", "sil", "dil", "bpl", "spl", "r8b", "r9b", "r10b", "r11b", "r12b", "r13b", "r14b", "r15b"]
 21 | 
 22 | def GetRegisterSize(register):
 23 |     if register in reg128:
 24 |         return 16
 25 |     elif register in reg64:
 26 |         return 8
 27 |     elif register in reg32:
 28 |         return 4
 29 |     elif register in reg16:
 30 |         return 2
 31 |     elif register in reg8:
 32 |         return 1
 33 | 
 34 |     raise Exception("Invalid register name: %s" % register)
 35 | 
 36 | def GuessSize(ea):
 37 |     """
 38 |     Fuck you IDA.
 39 |     """
 40 |     if GetOpType(ea, 0) == o_displ and "ptr" in GetOpnd(ea, 0):
 41 |         return GetQualifierSize(GetOpnd(ea, 0).split()[0])
 42 | 
 43 |     if GetOpType(ea, 1) == o_displ and "ptr" in GetOpnd(ea, 1):
 44 |         return GetQualifierSize(GetOpnd(ea, 1).split()[0])
 45 | 
 46 |     if GetOpType(ea, 0) == o_reg:
 47 |         return GetRegisterSize(GetOpnd(ea, 0))
 48 | 
 49 |     if GetOpType(ea, 1) == o_reg:
 50 |         return GetRegisterSize(GetOpnd(ea, 1))
 51 | 
 52 |     raise Exception("Cannot guess the size of ins: '%s'" % GetDisasm(ea))
 53 | 
 54 | ea = ScreenEA()
 55 | cur_func = get_func(ea)
 56 | 
 57 | base_reg = AskStr("r15", "What's the register used as the base for the struct access?.").lower()
 58 | size = AskLong(0, "Enter the size of the structure, leave 0 if unknown.")
 59 | 
 60 | offsets = []
 61 | 
 62 | for ea in FunctionInstructionsBlocks(cur_func):
 63 |     # Filter instructions that do not touch our base register.
 64 |     dis = GetDisasm(ea)
 65 |     if not base_reg in dis.lower():
 66 |         continue
 67 | 
 68 |     if GetOpType(ea, 0) == o_displ and base_reg in GetOpnd(ea, 0).lower():
 69 |         offsets.append((GetOperandValue(ea, 0), GuessSize(ea), ("0x%.8x: " % ea) + GetDisasm(ea)))
 70 | 
 71 |     if GetOpType(ea, 1) == o_displ and base_reg in GetOpnd(ea, 1).lower():
 72 |         offsets.append((GetOperandValue(ea, 1), GuessSize(ea), ("0x%.8x: " % ea) + GetDisasm(ea)))
 73 | 
 74 | 
 75 | size2type = {1 : "uint8_t", 2 : "uint16_t", 4 : "uint32_t", 8 : "uint64_t", 16 : "__m128"}
 76 | def GuessField(i, offset, size):
 77 |     if size in size2type.keys():
 78 |         return "%-8s fld_%d;" % (size2type[size], i)
 79 | 
 80 | 
 81 |     return "%-8s pad_%d[%d];" % ("uint8_t", i, size)
 82 | 
 83 | 
 84 | def MakeUnique(offsets):
 85 |     unique = []
 86 | 
 87 |     prev_o = -1
 88 |     prev_s = -1
 89 | 
 90 |     for a in offsets:
 91 |         if prev_o == -1:
 92 |             unique.append(a)
 93 |             prev_o = a[0]
 94 |             prev_s = a[1]
 95 |             continue
 96 | 
 97 |         if prev_o == a[0] and prev_s != a[1]:
 98 |             raise Exception("Conflict found at offset %.8x with sizes %d and %d" % (prev_o, prev_s, a[1]))
 99 | 
100 |         elif prev_o != a[0]:
101 |             unique.append(a)
102 | 
103 |         prev_o = a[0]
104 |         prev_s = a[1]
105 | 
106 |     return unique
107 | 
108 | try:
109 |     offsets = sorted(offsets, key=lambda tup: tup[0])
110 | 
111 |     print "// User size: 0x%.4x" % size
112 |     print "// Inferred size: 0x%.4x" % (offsets[-1][0] + offsets[-1][1])
113 |     print "struct UnknownStructure {"
114 | 
115 |     i = 0
116 |     cur_offset = 0
117 |     for a in MakeUnique(offsets):
118 |         if cur_offset != a[0]:
119 |             print "    %-20s // off=%.2xh-%.2xh reason=padding" % (GuessField(i, cur_offset, a[0] - cur_offset), cur_offset, a[0])
120 |             i += 1
121 | 
122 |         cur_offset = a[0] + a[1]
123 |         print "    %-20s // off=%.2xh-%.2xh reason=%s" % (GuessField(i, a[0], a[1]), a[0], a[0] + a[1], a[2])
124 |         i += 1
125 | 
126 |     # Check if the user hinted the final size and pad it if needed.
127 |     if size != 0 and size != offsets[-1][0] + offsets[-1][1]:
128 |         rem_size = size - (offsets[-1][0] + offsets[-1][1])
129 |         offset = offsets[-1][0] + offsets[-1][1]
130 |         print "    %-20s // off=%.2xh-%.2xh reason=padding" % (GuessField(i, offset, rem_size), offset, offset + rem_size)
131 | 
132 |     print "};"
133 | except Exception:
134 |     print "DEBUG:"
135 |     for a in offsets:
136 |         if cur_offset != a[0]:
137 |             print "    %-20s // off=%.2xh-%.2xh reason=padding" % (GuessField(i, cur_offset, a[0] - cur_offset), cur_offset, a[0])
138 |             i += 1
139 | 
140 |         cur_offset = a[0] + a[1]
141 |         print "    %-20s // off=%.2xh-%.2xh reason=%s" % (GuessField(i, a[0], a[1]), a[0], a[0] + a[1], a[2])
142 |         i += 1
143 | 


--------------------------------------------------------------------------------