├── 0.IDA ├── CoverageAll.py └── GenCoverageInfo.py ├── 1.Generation └── GenBpFiles.py ├── 2.Coverage └── Coverage.py ├── 3.Analyze └── Analyze.py ├── README.md └── Utilities ├── BBminus.py ├── Download.py ├── ResAnd.py └── ResData.py /0.IDA/CoverageAll.py: -------------------------------------------------------------------------------- 1 | import os 2 | from subprocess import call 3 | 4 | idaPro = 'C:\\Program Files\\IDA 6.8\\idaq.exe' 5 | codeCov = 'c:\\Work\\FF\\GenCoverageInfo.py' 6 | binDir = "C:\\Work\\FF\\bin\\" 7 | 8 | for root, subFolder, files in os.walk(binDir): 9 | for item in files: 10 | fname = os.path.join(binDir, root, item) 11 | f=open(fname, "rb") 12 | if f.read(2) == 'MZ': 13 | print "Analysing '%s'" % fname 14 | #cmdLine = '"%s" -S"%s" -A "%s"' % (idaPro, codeCov, fname) 15 | #print cmdLine 16 | #os.system(cmdLine) 17 | call([idaPro, '-S"' + codeCov + '"', '-A', fname]) 18 | f.close() -------------------------------------------------------------------------------- /0.IDA/GenCoverageInfo.py: -------------------------------------------------------------------------------- 1 | from idautils import * 2 | from idaapi import * 3 | import os 4 | 5 | autoWait() 6 | 7 | baseDir = ".\" 8 | 9 | if not os.path.exists(baseDir + "KavalAntsBB"): 10 | os.makedirs(baseDir + "KavalAntsBB") 11 | 12 | filename = idaapi.get_root_filename().lower() 13 | base = idaapi.get_imagebase() 14 | allBlocks = {} 15 | BBcount = 0 16 | Fcount = 0 17 | 18 | file = open(baseDir + "KavalAntsBB/" + filename + ".bb", 'w') 19 | file.write(filename) 20 | for segment_ea in Segments(): 21 | segment = idaapi.getseg(segment_ea) 22 | if segment.perm & idaapi.SEGPERM_EXEC == 0: 23 | continue 24 | 25 | for location in Functions(SegStart(segment.startEA), SegEnd(segment.startEA)): 26 | Fcount += 1 27 | blocks = idaapi.FlowChart(idaapi.get_func(location)) 28 | for block in blocks: 29 | BBcount += 1 30 | if block.startEA not in allBlocks: 31 | if GetMnem(block.startEA) == "": 32 | print "Skipping %08X because this is not code" % (block.startEA) 33 | print " " + GetDisasm(block.startEA) 34 | break 35 | line = "%08X|%08X|%02X" % ((block.startEA - base), (idaapi.get_fileregion_offset(block.startEA)), (idaapi.get_byte(block.startEA))) 36 | file.write("\n" + line) 37 | allBlocks[block.startEA] = True 38 | file.close() 39 | print "Discovered %d basic blocks in %d functions" % (BBcount, Fcount) 40 | qexit() -------------------------------------------------------------------------------- /1.Generation/GenBpFiles.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import getopt 4 | import sys 5 | 6 | 7 | baseExecDir = ".\\" 8 | baseBbDir = ".\\KavalAntsBB" 9 | 10 | 11 | 12 | #Conf 13 | def help(): 14 | print "Possible arguments: GenBpFiles.py [-h] [-e DIR] [-b DIR]" 15 | print " -h Prints this message to you" 16 | print " -e DIR sets the location where the generator looks for exe and dll files" 17 | print " -b DIR set the location where generator looks for breakpoint files" 18 | 19 | 20 | try: 21 | opts, args = getopt.getopt(sys.argv[1:], "he:b:", ["help"]) 22 | except: 23 | help() 24 | sys.exit() 25 | for opt, arg in opts: 26 | if opt in("-h"): 27 | help() 28 | sys.exit() 29 | if opt in("-e"): 30 | baseExecDir = arg 31 | print arg 32 | if opt in("-b"): 33 | baseBbDir = arg 34 | 35 | 36 | #Load BB files 37 | print "Loading all BB files" 38 | bbFiles = {} 39 | confFiles = {} 40 | for bbFile in os.listdir(baseBbDir): 41 | f = open(baseBbDir + "\\" + bbFile, "r") 42 | fname = f.readline().strip() 43 | bbFiles[fname] = [] 44 | confFiles[fname] = baseBbDir + "/" + bbFile 45 | f.close() 46 | 47 | #Find files 48 | print "Finding files to modify" 49 | for root, subFolder, files in os.walk(baseExecDir): 50 | for item in files: 51 | if item in bbFiles: 52 | bbFiles[item].append(str(os.path.join(baseExecDir, root, item))) 53 | 54 | #Any file not found 55 | for fname in bbFiles: 56 | if len(bbFiles[fname]) == 0: 57 | print ">>>No file named '%s' was found" % fname 58 | del bbFiles[fname] 59 | del confFiles[f.readline().strip()] 60 | 61 | #Lets start the modifications 62 | for fname in bbFiles: 63 | for target in bbFiles[fname]: 64 | print "Modifying %s based of BB-s in %s" % (target, confFiles[fname]) 65 | shutil.copyfile(target, target + "_original") 66 | f = open(confFiles[fname], "r") 67 | fa = open(target, "r+b") 68 | 69 | f.readline() 70 | for line in f: 71 | offset = int(line[9:17], 16) 72 | fa.seek(offset) 73 | fa.write(chr(0xCC)) 74 | 75 | f.close() 76 | fa.close() 77 | print "DONE" -------------------------------------------------------------------------------- /2.Coverage/Coverage.py: -------------------------------------------------------------------------------- 1 | import os 2 | import getopt 3 | import sys 4 | import subprocess 5 | from winappdbg import Debug, Crash, win32, HexDump 6 | from time import time 7 | from winappdbg.util import MemoryAddresses 8 | 9 | 10 | 11 | class Coverage: 12 | verbose = False 13 | bbFiles = {} 14 | bbFilesBreakpints = [] 15 | bbFilesData = {} 16 | bbOriginalName = {} 17 | modules = [] 18 | fileOutput = None 19 | 20 | #Construct 21 | def __init__(self): 22 | self.debugger = Debug( bKillOnExit = True ) 23 | 24 | def setVerbose(self, val): 25 | self.verbose = val 26 | 27 | #cuts after . 28 | def cutDot(self, input): 29 | if input.find(".") == -1: 30 | return input 31 | return input[0:input.find(".")] 32 | 33 | #load basic blocks 34 | def loadBB(self, baseBbDir): 35 | self.bbFiles = {} 36 | count = 0 37 | for bbFile in os.listdir(baseBbDir): 38 | f = open(baseBbDir + "/" + bbFile, "r") 39 | fname = f.readline().strip().lower() 40 | fnameOrig = fname 41 | if ".dll" not in fname and ".exe" not in fname: #Stupid hack to avoid problems in loading libs with other extensions then .dll 42 | fname = self.cutDot(fname) + ".dll" 43 | self.bbOriginalName[fname] = fnameOrig 44 | self.bbFiles[fname] = count 45 | self.bbFilesBreakpints.append({}) 46 | rvaHighest = 0 47 | for line in f: 48 | rva = int(line[0:8], 16) 49 | val = int(line[18:20], 16) 50 | self.bbFilesBreakpints[count][rva] = val 51 | if rva > rvaHighest: 52 | rvaHighest = rva 53 | self.bbFilesData[fname] = [rvaHighest + 10, count] 54 | if self.verbose: 55 | print "Loaded breakpoints for %s with index %02X" % (fname, count) 56 | count += 1 57 | f.close() 58 | 59 | #Register module (original exe image or dll) 60 | def registerModule(self, filename, baseaddr): 61 | filename = filename.lower() 62 | if ".dll" not in filename and ".exe" not in filename: #Stupid hack to avoid problems in loading libs with other extensions then .dll 63 | filename = self.cutDot(filename) + ".dll" 64 | if filename not in self.bbFiles: 65 | return 66 | if self.verbose: 67 | print " Image %s has breakpoints defined" % filename 68 | self.modules.append([baseaddr,baseaddr+self.bbFilesData[filename][0], self.bbFilesData[filename][1]]) 69 | if self.verbose: 70 | print " Image has breakpoints from %08X to %08X with index %02X" % (baseaddr,baseaddr+self.bbFilesData[filename][0],self.bbFilesData[filename][1]) 71 | 72 | #Handle a breakpoint 73 | def breakpoint(self, location): 74 | index = None 75 | for i in xrange(len(self.modules)): 76 | if location>=self.modules[i][0] and location<=self.modules[i][1]: 77 | index = i 78 | break 79 | if index == None: 80 | return None 81 | rva = location - self.modules[index][0] 82 | index = self.modules[index][2] 83 | if rva not in self.bbFilesBreakpints[index]: 84 | return None 85 | self.fileOutput.write("%02X|%08X\n" % (index, rva)) 86 | return self.bbFilesBreakpints[index][rva] 87 | 88 | def startFileRec(self, filename): 89 | self.modules = [] 90 | self.fileOutput = open(filename, "w") 91 | for image in self.bbFiles: 92 | self.fileOutput.write("%s|%02X\n" % (self.bbOriginalName[image], self.bbFiles[image])) 93 | 94 | def endFileRec(self): 95 | self.fileOutput.close() 96 | 97 | #Start program 98 | def start(self, execFile, waitTime = 6, recFilename = "output.txt", kill = True): 99 | self.startFileRec(recFilename) 100 | mainProc = self.debugger.execv( execFile, bFollow = True ) 101 | event = None 102 | endTime = time() + waitTime 103 | while time() < endTime: 104 | if not mainProc.is_alive(): 105 | break 106 | try: 107 | event = self.debugger.wait(1000) 108 | except WindowsError, e: 109 | if e.winerror in (win32.ERROR_SEM_TIMEOUT, win32.WAIT_TIMEOUT): 110 | continue 111 | raise 112 | 113 | if event.get_event_code() == win32.LOAD_DLL_DEBUG_EVENT: 114 | module = event.get_module() 115 | if self.verbose: 116 | print "DLL %s loaded on base %08X" % (module.get_name(), module.get_base()) 117 | self.registerModule(self.cutDot(module.get_name())+".dll", module.get_base()) 118 | elif event.get_event_code() == win32.CREATE_PROCESS_DEBUG_EVENT: 119 | tmp = event.get_filename().split("\\") 120 | modName = tmp[len(tmp)-1] 121 | if self.verbose: 122 | print "Process %s loaded on base %08X" % (modName, event.raw.u.CreateProcessInfo.lpBaseOfImage) 123 | self.registerModule(modName,event.raw.u.CreateProcessInfo.lpBaseOfImage) 124 | elif event.get_event_code() == win32.EXCEPTION_DEBUG_EVENT and event.get_exception_code() == win32.STATUS_BREAKPOINT: 125 | pc = event.get_thread().get_pc()-1 126 | val = self.breakpoint(pc) 127 | if val != None: 128 | event.get_process().write(pc, chr(val)) 129 | event.get_thread().set_pc(pc) 130 | endTime = time() + waitTime 131 | 132 | try: 133 | self.debugger.dispatch() 134 | except: 135 | pass 136 | finally: 137 | self.debugger.cont() 138 | self.endFileRec() 139 | if kill: 140 | self.kill() 141 | 142 | 143 | #Kill processes 144 | def kill(self): 145 | pids = self.debugger.get_debugee_pids() 146 | self.debugger.detach_from_all( True ) 147 | for pid in pids: 148 | try: 149 | proc = self.debugger.system.get_process(pid) 150 | proc.kill() 151 | except: 152 | pass 153 | subprocess.call(["taskkill", "/f", "/pid", str(pid)], stdout=subprocess.PIPE, stderr=subprocess.PIPE) 154 | 155 | 156 | if __name__ == "__main__": 157 | baseBbDir = "./KavalAntsBB" 158 | waitTime = 3 159 | resultFile = "./result.txt" 160 | verbose = False 161 | 162 | def help(): 163 | print "Possible arguments: Coverage.py [-h] [-b DIR] [-f FILE] [-T SEC] [-v] ARGS" 164 | print " -h Prints this message to you" 165 | print " -b DIR set the location where generator looks for breakpoint files" 166 | print " -f FILE set the location where the results are written" 167 | print " -t SEC how long to keep running after last breakpoint" 168 | print " -v script shows some information" 169 | 170 | try: 171 | opts, args = getopt.getopt(sys.argv[1:], "he:b:f:t:v") 172 | if args == None or len(args) == 0: 173 | raise "No arguments" 174 | except: 175 | help() 176 | sys.exit() 177 | for opt, arg in opts: 178 | if opt in("-h"): 179 | help() 180 | sys.exit() 181 | if opt in("-b"): 182 | baseBbDir = arg 183 | if opt in("-f"): 184 | resultFile = arg 185 | if opt in("-t"): 186 | waitTime = int(arg) 187 | if opt in("-v"): 188 | verbose = True 189 | 190 | cov = Coverage() 191 | cov.setVerbose(verbose) 192 | cov.loadBB(baseBbDir) 193 | cov.start(args, waitTime, resultFile) 194 | cov.kill() -------------------------------------------------------------------------------- /3.Analyze/Analyze.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import getopt 4 | import sys 5 | import time 6 | 7 | 8 | dir = ".\\input" 9 | tmpDir = ".\\tmp" 10 | output = ".\\output.txt" 11 | BBcount = 0 12 | modules = {} 13 | basicblocks = {} 14 | results = [] 15 | 16 | 17 | 18 | #Conf 19 | def help(): 20 | print "Possible arguments: GenBpFiles.py [-h] [-d DIR] [-o FILE]" 21 | print " -h Prints this message to you" 22 | print " -d DIR Directory that contains coverage files" 23 | print " -o FILE Result file" 24 | 25 | 26 | try: 27 | opts, args = getopt.getopt(sys.argv[1:], "hd:o:", []) 28 | except: 29 | help() 30 | sys.exit() 31 | for opt, arg in opts: 32 | if opt in("-h"): 33 | help() 34 | sys.exit() 35 | if opt in("-o"): 36 | output = arg 37 | if opt in("-d"): 38 | dir = arg 39 | 40 | #Create tmp dirs 41 | if not os.path.exists(tmpDir + "0"): 42 | os.makedirs(tmpDir + "0") 43 | if not os.path.exists(tmpDir + "1"): 44 | os.makedirs(tmpDir + "1") 45 | 46 | #Prep 47 | filelist = os.listdir(dir) 48 | tmpCount = 0x1 49 | 50 | #First pass through 51 | lastTime = time.time() 52 | for fname in filelist: 53 | f = open(dir + "/" + fname) 54 | 55 | #module list 56 | line = f.readline() 57 | modules = {} 58 | while line != "" and line[2] != "|": 59 | moduleName = line[:line.find("|")].lower() 60 | moduleCode = line[line.find("|")+1:line.find("|")+3] 61 | modules[moduleCode] = moduleName 62 | if moduleName not in basicblocks: 63 | basicblocks[moduleName] = {} 64 | line = f.readline() 65 | 66 | #basicblock 67 | while line.strip() != "": 68 | moduleCode = line[0:2] 69 | bb = line[3:11] 70 | moduleName = modules[moduleCode].lower() 71 | if bb not in basicblocks[moduleName]: 72 | basicblocks[moduleName][bb] = False 73 | BBcount += 1 74 | line = f.readline() 75 | f.close() 76 | 77 | print "First analysis" 78 | print " Files: %d" % len(filelist) 79 | print " BasicBlocks: %d" % BBcount 80 | print " Time spent: %d sec" % (time.time() - lastTime) 81 | 82 | 83 | #Real analysis 84 | freport = open(output, 'w') 85 | srcDir = dir 86 | destDir = tmpDir + "0" 87 | while BBcount>0: 88 | best = 0; 89 | bestName = None 90 | lastTime = time.time() 91 | 92 | #Find largest file 93 | for fname in filelist: 94 | if fname in results: 95 | continue 96 | size = os.path.getsize(srcDir + "/" + fname) 97 | if size > best: 98 | best = size 99 | bestName = fname 100 | 101 | #Best coverage file 102 | f = open(srcDir + "/" + bestName) 103 | best = 0 104 | #module list 105 | line = f.readline() 106 | modules = {} 107 | while line != "" and line[2] != "|": 108 | moduleName = line[:line.find("|")].lower() 109 | moduleCode = line[line.find("|")+1:line.find("|")+3] 110 | modules[moduleCode] = moduleName 111 | if moduleName not in basicblocks: 112 | basicblocks[moduleName] = {} 113 | line = f.readline() 114 | #basicblock 115 | while line.strip() != "": 116 | moduleCode = line[0:2] 117 | bb = line[3:11] 118 | moduleName = modules[moduleCode].lower() 119 | basicblocks[moduleName][bb] = True 120 | line = f.readline() 121 | best+=1 122 | f.close() 123 | 124 | 125 | #Remove covered blocks 126 | for fname in filelist: 127 | f = open(srcDir + "/" + fname, "r") 128 | fout = open(destDir + "/" + fname, "w") 129 | #module list 130 | line = f.readline() 131 | modules = {} 132 | while line != "" and line[2] != "|": 133 | fout.write(line) 134 | moduleName = line[:line.find("|")].lower() 135 | moduleCode = line[line.find("|")+1:line.find("|")+3] 136 | modules[moduleCode] = moduleName 137 | if moduleName not in basicblocks: 138 | basicblocks[moduleName] = {} 139 | line = f.readline() 140 | #basicblock 141 | while line.strip() != "": 142 | moduleCode = line[0:2] 143 | bb = line[3:11] 144 | moduleName = modules[moduleCode].lower() 145 | if not basicblocks[moduleName][bb]: 146 | fout.write(line) 147 | line = f.readline() 148 | f.close() 149 | fout.close() 150 | 151 | BBcount -= best 152 | results.append(bestName) 153 | print "%06d[%03d sec]: %s covered %d basicblocks, %d left" % (len(results), (time.time() - lastTime), bestName, best, BBcount) 154 | freport.write("%s\n" % bestName) 155 | destDir = tmpDir + str(tmpCount) 156 | tmpCount = tmpCount ^ 0x1 157 | srcDir = tmpDir + str(tmpCount) 158 | 159 | freport.close() 160 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # KavalAnts 2 | Code coverage calculation/monitoring tool for Corpus Distillation 3 | -------------------------------------------------------------------------------- /Utilities/BBminus.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import getopt 4 | import sys 5 | 6 | 7 | inputDir = ".\\input" 8 | outputDir = ".\\output" 9 | inputFile = ".\\result.txt" 10 | modules = {} 11 | basicblocks = {} 12 | 13 | 14 | 15 | #Conf 16 | def help(): 17 | print "Possible arguments: GenBpFiles.py [-h] [-d DIR] [-o FILE]" 18 | print " -h Prints this message to you" 19 | print " -d DIR Directory that contains basicblocks files" 20 | print " -i FILE File that contains basicblocks to remove" 21 | print " -o DIR Result directory" 22 | 23 | 24 | try: 25 | opts, args = getopt.getopt(sys.argv[1:], "hd:i:o:", []) 26 | except: 27 | help() 28 | sys.exit() 29 | for opt, arg in opts: 30 | if opt in("-h"): 31 | help() 32 | sys.exit() 33 | if opt in("-d"): 34 | inputDir = arg 35 | if opt in("-i"): 36 | inputFile = arg 37 | if opt in("-o"): 38 | outputDir = arg 39 | 40 | 41 | #input file 42 | print "Reading input file %s" % inputFile 43 | f = open(inputFile) 44 | #module list 45 | line = f.readline() 46 | modules = {} 47 | while line != "" and line[2] != "|": 48 | moduleName = line[:line.find("|")] 49 | moduleCode = line[line.find("|")+1:line.find("|")+3] 50 | modules[moduleCode] = moduleName 51 | if moduleName not in basicblocks: 52 | basicblocks[moduleName] = {} 53 | line = f.readline() 54 | #basicblock 55 | while line.strip() != "": 56 | moduleCode = line[0:2] 57 | bb = line[3:11] 58 | moduleName = modules[moduleCode] 59 | if bb not in basicblocks[moduleName]: 60 | basicblocks[moduleName][bb] = 1 61 | else: 62 | basicblocks[moduleName][bb] += 1 63 | line = f.readline() 64 | f.close() 65 | 66 | 67 | #Modifying basicblocks 68 | if not os.path.isdir(outputDir): 69 | os.makedirs(outputDir) 70 | for fname in os.listdir(inputDir): 71 | f = open(inputDir + "/" + fname) 72 | 73 | moduleLine = f.readline() 74 | module = moduleLine.strip().lower() 75 | if len(basicblocks[module]) == 0: 76 | print "File %s remains unchanged" % fname 77 | f.close() 78 | shutil.copy2(inputDir + "/" + fname, outputDir + "/" + fname) 79 | continue 80 | 81 | print "Modifying %s" % fname 82 | #basicblock 83 | fout = open(outputDir + "/" + fname, "w") 84 | fout.write(moduleLine) 85 | line = f.readline() 86 | while line.strip() != "": 87 | bb = line[0:8] 88 | if bb not in basicblocks[module]: 89 | fout.write(line) 90 | line = f.readline() 91 | f.close() 92 | fout.close() -------------------------------------------------------------------------------- /Utilities/Download.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import time 3 | import string 4 | import os.path 5 | import urllib2 6 | import sys 7 | import getopt 8 | from time import gmtime, strftime 9 | 10 | 11 | #variables 12 | class Downloader: 13 | extension = "pdf" 14 | signature = [0x25, 0x50, 0x44, 0x46] 15 | searchChars = ['a', 'a'] 16 | outputDir = "downloaded_" 17 | downloaded = [] 18 | successCount = 0 19 | maxPerSearch = 500 20 | last = 0 21 | lastStatus = 0 22 | 23 | 24 | def loadArguments(self, argv): 25 | options, rem = getopt.getopt(argv, 'x:s:q:o:m:', ['extension=', 'signature=', 'search=', 'output=', 'max=']) 26 | for opt, arg in options: 27 | if opt in ('-x'): 28 | self.extension = arg 29 | elif opt in ('-s'): 30 | self.signature=[] 31 | for x in range(len(arg)/2): 32 | self.signature.append(int(arg[(x*2):(x*2+2)], 16)) 33 | elif opt in ('-q'): 34 | self.searchChars=[] 35 | for x in range(len(arg)): 36 | self.searchChars.append(arg[x]) 37 | if opt in ('-o'): 38 | self.outputDir = arg 39 | if opt in ('-m'): 40 | self.maxPerSearch = int(arg) 41 | 42 | def currentStatusReport(self): 43 | if len(self.downloaded) % 10 != 0 or len(self.downloaded) == self.lastStatus: 44 | return 45 | self.lastStatus = len(self.downloaded) 46 | if not os.path.isdir(self.outputDir + self.extension): 47 | print strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " --- TOTAL: " + str(len(self.downloaded))+ " DOWNLOADED: 0" 48 | else: 49 | print strftime("%Y-%m-%d %H:%M:%S", gmtime()) + " --- TOTAL: " + str(len(self.downloaded))+ " DOWNLOADED: " + str(len(os.listdir(self.outputDir + self.extension))) 50 | 51 | 52 | def loadList(self): 53 | if os.path.isfile("list_" + self.extension + ".txt"): 54 | with open("list_" + self.extension + ".txt") as f: 55 | for line in f: 56 | self.downloaded.append(line.strip()) 57 | if os.path.isdir(self.outputDir + self.extension): 58 | self.successCount = len(os.listdir(self.outputDir + self.extension)) 59 | 60 | def readStatus(self): 61 | if os.path.isfile("status" + self.extension + "_" + str(len(self.searchChars)) + ".txt"): 62 | with open("status" + self.extension + "_" + str(len(self.searchChars)) + ".txt") as f: 63 | x = 0 64 | for line in f: 65 | if x 0: 73 | file.write("%s: %d\n" % (x, len(basicblocks[x]))) 74 | file.close() 75 | --------------------------------------------------------------------------------