├── BinaryInfoExtractor.py └── README.md /BinaryInfoExtractor.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Created on 2014-12-2 3 | 4 | @author: M.R. Farhadi 5 | ''' 6 | 7 | import idaapi 8 | import idautils 9 | import idc 10 | from sets import Set 11 | 12 | 13 | def block_split(output_file, startEA, endEA): 14 | curName = GetFunctionName(startEA); 15 | dem = idc.Demangle(curName, idc.GetLongPrm(INF_SHORT_DN)); 16 | if dem != None: 17 | curName = dem; 18 | 19 | first=startEA 20 | h = idautils.Heads(startEA, endEA) 21 | for i in h: 22 | mnem = idc.GetMnem(i) 23 | if mnem == "call" and i != endEA: 24 | first=idc.NextHead(i, endEA+1) 25 | 26 | # end of block_split 27 | #------------------------------------------------------------------------------------------------------------------------ 28 | 29 | def function_extract(output_file, func, cg_adjmat, funcs_id, callees, asm_filename): 30 | func_name = GetFunctionName(func) 31 | function_start_phrase = func_name + " proc near" 32 | function_end_phrase = func_name + " endp" 33 | 34 | print >> output_file, "+++++++++++++++++++++++++++++" 35 | print >> output_file, "Function Name: %s" % (func_name) 36 | print >> output_file, " Function ID: %s" % (funcs_id[func_name]) 37 | func_asm_start_address = get_line_number(function_start_phrase, asm_filename) 38 | func_asm_end_address = get_line_number(function_end_phrase, asm_filename) 39 | print >> output_file, " ASM File Starting Address: %#s" % (func_asm_start_address) 40 | print >> output_file, " ASM File Ending Address: %#s" % (func_asm_end_address) 41 | print >> output_file, " Binary File Starting Address: %#x" % (func) 42 | print >> output_file, " Binary File Ending Address: %#x" % (FindFuncEnd(func)) 43 | print >> output_file, "" 44 | print >> output_file, " Caller Functions:" 45 | 46 | for ref_ea in CodeRefsTo(func, 0): 47 | caller_name = GetFunctionName(ref_ea) 48 | callees[caller_name] = callees.get(caller_name, Set()) #add the functions from "CodesRefsTo" to a dictionary for extracting CG and CG adjacency Matrix 49 | callees[caller_name].add(func_name) 50 | print >> output_file, " %s" % (caller_name) 51 | 52 | # end of function_extract 53 | #------------------------------------------------------------------------------------------------------------------------ 54 | 55 | def cg_extract(output_file, cg_adjmat, funcs_id, callees, func_num): 56 | functions = callees.keys() 57 | 58 | for key in functions: 59 | cg_row =[0]*func_num 60 | print >> output_file, "key: %s " % (key) 61 | if callees.has_key(key): 62 | for calling in callees[key]: 63 | cg_row[funcs_id[calling]] = 1 64 | print >> output_file, "key: %s " % (key) 65 | print >> output_file,"cg_row: ", cg_row 66 | if key in funcs_id: 67 | cg_adjmat[funcs_id[key]].append(cg_row) 68 | 69 | print >> output_file, "CG Adjacency Matrix:\n" 70 | cnt = 0 71 | for cg_row in cg_adjmat: 72 | print >> output_file, "Function ID [%d]: " %(cnt) , cg_row 73 | cnt += 1 74 | 75 | # end of cg_extract 76 | #------------------------------------------------------------------------------------------------------------------------ 77 | 78 | def BB_extract(output_file, func, asmplus_filename): 79 | cnt = 0 80 | f = idaapi.FlowChart(idaapi.get_func(func)) 81 | cfg_adjmat = [] 82 | 83 | for block in f: 84 | cfg_row =[0]*f.size 85 | print >> output_file, "" 86 | print >> output_file, " Basic Block:" 87 | block_split(output_file, block.startEA, block.endEA) 88 | print >> output_file, " BB_ID: [%d]" % (block.id) 89 | 90 | bb_asm_start_address = "{0:x}".format(block.startEA) 91 | bb_asm_end_address = "{0:x}".format(block.endEA) 92 | 93 | print >> output_file, " ASM File Starting Address: %#s" % (get_line_number(bb_asm_start_address.upper(), asmplus_filename)) 94 | print >> output_file, " ASM File Ending Address: %#s" % (get_line_number(bb_asm_end_address.upper(), asmplus_filename) - 1 ) 95 | 96 | print >> output_file, " Binary File Starting Address: %#x" % (block.startEA) 97 | print >> output_file, " Binary File Ending Address: %#x" % (block.endEA) 98 | 99 | print >> output_file, " Basic Block Successors:" 100 | 101 | for succ_block in block.succs(): 102 | cfg_row[succ_block.id] = 1 103 | print >> output_file, " Starting Address: %x - Ending Address: %x - BB_ID: [%d]" % (succ_block.startEA, succ_block.endEA, succ_block.id) 104 | #print >> output_file, "Basic Block predecessors:" 105 | #for pred_block in block.preds(): 106 | # print >> output_file, "Starting Address: %x - Ending Address: %x BB_ID:[%d]:" % (pred_block.startEA, pred_block.endEA, pred_block.id) 107 | cfg_adjmat.append(cfg_row) 108 | print >> output_file, "-----------------------------" 109 | 110 | print >> output_file, "CFG Adjacency Matrix for Function: %s\n" % (GetFunctionName(func)) 111 | for cfg_row in cfg_adjmat: 112 | print >> output_file, "BB_ID [%d]: " %(cnt), cfg_row 113 | cnt += 1 114 | print >> output_file, "\n" 115 | 116 | # end of BB_extract 117 | #------------------------------------------------------------------------------------------------------------------------ 118 | 119 | def get_line_number(phrase, file_name): 120 | with open(file_name) as f: 121 | for i, line in enumerate(f, 1): 122 | if phrase in line: 123 | return i 124 | 125 | # end of get_line_number 126 | #------------------------------------------------------------------------------------------------------------------------ 127 | 128 | def controller(): 129 | funcs_id = dict() # to store functions and their IDs 130 | callees = dict() 131 | func_num = 0 132 | func_id = 0 133 | cg_adjmat = [] 134 | info_filename = idc.AskFile(1, "*.*", "Extract Binary File Info") 135 | 136 | basename = idc.GetInputFile() 137 | info_filename = basename + ".info" 138 | asm_filename = basename + ".asm" 139 | asmplus_filename = basename + ".asmplus" 140 | idc.GenerateFile(idc.OFILE_ASM, basename + ".asm", 0, idc.BADADDR, 0) 141 | idc.GenerateFile(idc.OFILE_LST, basename + ".asmplus", 0, idc.BADADDR, 0) 142 | 143 | output_file = open(info_filename,'w') 144 | asm_file = open(asm_filename,'r') 145 | asmplus_file = open(asm_filename,'r') 146 | 147 | funcs = idautils.Functions() 148 | funcs_iterator = idautils.Functions() 149 | 150 | # scan all functions to extract number of functions and add them to the funcs_id 151 | for i in funcs_iterator: 152 | func_name = GetFunctionName(i) 153 | funcs_id.update({func_name:func_id}) 154 | func_num += 1 155 | func_id += 1 156 | cg_adjmat.append([]) 157 | 158 | for f in funcs: 159 | func_name = GetFunctionName(f) 160 | function_extract(output_file, f, cg_adjmat, funcs_id, callees, asm_filename) # extract functions data 161 | BB_extract(output_file, f, asmplus_filename) # extract basic blocks data, CFG and CFG adjacency matrices 162 | 163 | cg_extract(output_file, cg_adjmat, funcs_id, callees, func_num) # extract CG and CG adjacency matrix 164 | 165 | 166 | # end of controller 167 | #------------------------------------------------------------------------------------------------------------------------ 168 | 169 | q = None 170 | f = None 171 | idc.Wait() 172 | controller() 173 | 174 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IDA_Plugin 2 | 3 | The script can be run in two ways: 4 | 5 | 1- Open IDA with a binary file and press "ALT+F7", then choose the script and click submit, IDA also asks for a text file for output, you may choose an empty text file but the output will not be written there. 6 | 7 | 2- Use IDA command line as following: 8 | idaq -A -S[scriptPath] [binaryPath] 9 | 10 | (For more info on IDA command line switches: https://www.hex-rays.com/products/ida/support/idadoc/417.shtml) 11 | 12 | The script will generate 3 files: 13 | 14 | 1. BinayFileName.text.asm --> The ordinary asm file 15 | 2. BinaryFileName.text.info --> Script output file 16 | 3. BinaryFileName.text.asmplus --> assembly code file + segment information to be used for address mapping 17 | 18 | The first file is the ordinary assembly file genereated by IDA Pro. 19 | 20 | The second file is the script output file. 21 | 22 | The last one is an assembly code file with assembly instruction segment information to be used for address mapping. 23 | 24 | For each file, this script extracts all functions and prints the CG adjacency matrix. 25 | 26 | Also, For each function, all basic blocks are extracted as well as CFG adjacency matrices. 27 | 28 | The script output structure is: 29 | 30 | Function Name (First Function): 31 | 32 | Function ID 33 | 34 | ASM FIle Starting Address 35 | 36 | ASM File Ending Address 37 | 38 | Binary File Starting Address 39 | 40 | Binary File Ending Address 41 | 42 | Caller Functions 43 | 44 | 45 | Basic Block 46 | 47 | BB_ID 48 | 49 | ASM FIle Starting Address 50 | 51 | ASM File Ending Address 52 | 53 | Binary File Starting Address 54 | 55 | Binary File Ending Address 56 | 57 | Basic Block Successors 58 | 59 | . 60 | . 61 | . 62 | (ALL BASIC BLOCKS) 63 | 64 | CFG Adjacency Matrix 65 | 66 | 67 | Function Name: (Next Function) 68 | . 69 | . 70 | . 71 | CG adjacency Matrix 72 | --------------------------------------------------------------------------------