├── .gitignore ├── LICENSE ├── README.md ├── deferred_driller ├── __init__.py ├── config.py ├── core.py ├── exploration.py ├── externals.py ├── instrumentation │ ├── PinTool.cpp │ ├── README.md │ ├── build.py │ ├── makefile │ └── makefile.rules ├── runner.py └── tracer.py ├── driller_init.s ├── example ├── Makefile ├── drill.py ├── inputs │ └── dumb ├── test1.c ├── test1_afl └── test1_driller └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2019, Andrea Fioraldi 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # deferred_driller 2 | My version of driller using Intel PIN and angrgdb. In "theory" can work with AFL in deferred and persistent mode. 3 | 4 | This is hugely based on the real [driller](https://github.com/shellphish/driller) so they deserve the majority of the credits. 5 | 6 | What's new? 7 | 8 | The tracer is based on a Pintool that forks from the deferred starting point and collects the trace of the children. 9 | So no more aslr slides and avoiding to explore in angr the code before the deferred starting point may be an huge improvement when such code is complex. 10 | 11 | ## howto 12 | 13 | Look at the example. All the files, also the Makefile. 14 | 15 | This will not work on your code. Or at least can work with a low probability. This is a work in progress PoC of a bored student in the middle of the exams session. 16 | 17 | ### dependencies 18 | 19 | All the following dependencies must be installed from the respective git repo and not from pip at the moment. 20 | 21 | + [archinfo](https://github.com/angr/archinfo) 22 | + [pyvex](https://github.com/angr/pyvex) 23 | + [ailment](https://github.com/angr/ailment) 24 | + [claripy](https://github.com/angr/claripy) 25 | + [cle](https://github.com/angr/cle) 26 | + [angr](https://github.com/angr/angr) 27 | 28 | + [angrdbg](https://github.com/andreafioraldi/angrdbg) 29 | + [angrgdb](https://github.com/andreafioraldi/angrgdb) 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /deferred_driller/__init__.py: -------------------------------------------------------------------------------- 1 | from .runner import PinRunner 2 | from .core import Driller 3 | -------------------------------------------------------------------------------- /deferred_driller/config.py: -------------------------------------------------------------------------------- 1 | ### Redis Options 2 | REDIS_HOST=None 3 | REDIS_PORT=None 4 | REDIS_DB=None 5 | 6 | ### Celery Options 7 | BROKER_URL= None 8 | 9 | CELERY_ROUTES = None 10 | 11 | ### Environment Options 12 | 13 | # directory contain driller-qemu versions, relative to the directoy node.py is invoked in 14 | QEMU_DIR=None 15 | 16 | # directory containing the binaries, used by the driller node to find binaries 17 | BINARY_DIR=None 18 | # directory containing the pcap corpus 19 | PCAP_DIR=None 20 | # directory containing the seeds for fuzzing 21 | SEED_DIR=None 22 | 23 | ### Driller options 24 | # how long to drill before giving up in seconds 25 | DRILL_TIMEOUT=None 26 | 27 | MEM_LIMIT=None 28 | 29 | # where to write a debug file that contains useful debugging information like 30 | # AFL's fuzzing bitmap, input used, binary path, time started. 31 | # Uses following naming convention: 32 | # _.py 33 | DEBUG_DIR = None 34 | 35 | ### Fuzzer options 36 | 37 | # how often to check for crashes in seconds 38 | CRASH_CHECK_INTERVAL=None 39 | 40 | # how long to fuzz before giving up in seconds 41 | FUZZ_TIMEOUT=None 42 | 43 | # how long before we kill a dictionary creation process 44 | DICTIONARY_TIMEOUT=None 45 | 46 | # how many fuzzers should be spun up when a fuzzing job is received 47 | FUZZER_INSTANCES=None 48 | 49 | # where the fuzzer should place it's results on the filesystem 50 | FUZZER_WORK_DIR=None 51 | -------------------------------------------------------------------------------- /deferred_driller/core.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import signal 4 | import hashlib 5 | import resource 6 | import pickle 7 | import logging 8 | import binascii 9 | 10 | import angr 11 | import angrgdb 12 | 13 | import progressbar 14 | 15 | from .tracer import Tracer 16 | from .exploration import DrillerCore 17 | from . import config 18 | 19 | l = logging.getLogger("deferred_driller.core") 20 | l.setLevel(logging.INFO) 21 | 22 | class Driller(object): 23 | """ 24 | Driller object, symbolically follows an input looking for new state transitions. 25 | """ 26 | 27 | def __init__(self, runner, input_str, fuzz_bitmap=None, tag=None, redis=None, hooks=None, exclude_simprocs=[], stdin_bound=True, sync_brk=True, sync_fs=True, explore_found=True, zero_fill=False): 28 | """ 29 | :param runner : The PinRunner instance. 30 | :param input_str : Input string to feed to the binary. 31 | :param fuzz_bitmap : AFL's bitmap of state transitions (defaults to empty). 32 | :param redis : redis.Redis instance for coordinating multiple Driller instances. 33 | :param hooks : Dictionary of addresses to simprocedures. 34 | :param use_simprocs : Use simprocedures 35 | :param exclude_simprocs : List of names of imports to exclude from simprocedures 36 | :param stdin_bound : Bound read stdin 37 | :param sync_brk : Synchronize brk value 38 | :param sync_fs : Synchronize fs base value 39 | :param explore_found : Explore the deferred state to some extent 40 | """ 41 | 42 | self.runner = runner 43 | # Redis channel identifier. 44 | self.identifier = os.path.basename(runner.binary) 45 | self.input = input_str 46 | self.fuzz_bitmap = fuzz_bitmap 47 | self.tag = tag 48 | self.redis = redis 49 | self.exclude_simprocs = exclude_simprocs 50 | self.stdin_bound = stdin_bound 51 | self.sync_brk = sync_brk 52 | self.sync_fs = sync_fs 53 | self.explore_found = explore_found 54 | self.zero_fill = zero_fill 55 | self.base = os.path.join(os.path.dirname(__file__), "..") 56 | 57 | # The simprocedures. 58 | self._hooks = {} if hooks is None else hooks 59 | 60 | # The driller core, which is now an exploration technique in angr. 61 | self._core = None 62 | 63 | # Start time, set by drill method. 64 | self.start_time = time.time() 65 | 66 | # Set of all the generated inputs. 67 | self._generated = set() 68 | 69 | # Set the memory limit specified in the config. 70 | if config.MEM_LIMIT is not None: 71 | resource.setrlimit(resource.RLIMIT_AS, (config.MEM_LIMIT, config.MEM_LIMIT)) 72 | 73 | l.debug("[%s] drilling started on %s.", self.identifier, time.ctime(self.start_time)) 74 | 75 | ### DRILLING 76 | 77 | def drill(self): 78 | """ 79 | Perform the drilling, finding more code coverage based off our existing input base. 80 | """ 81 | 82 | # Don't re-trace the same input. 83 | if self.redis and self.redis.sismember(self.identifier + '-traced', self.input): 84 | return -1 85 | 86 | # Write out debug info if desired. 87 | if l.level == logging.DEBUG and config.DEBUG_DIR: 88 | self._write_debug_info() 89 | elif l.level == logging.DEBUG and not config.DEBUG_DIR: 90 | l.warning("Debug directory is not set. Will not log fuzzing bitmap.") 91 | 92 | # Update traced. 93 | if self.redis: 94 | self.redis.sadd(self.identifier + '-traced', self.input) 95 | 96 | list(self._drill_input()) 97 | 98 | if self.redis: 99 | return len(self._generated) 100 | else: 101 | return self._generated 102 | 103 | def drill_generator(self): 104 | """ 105 | A generator interface to the actual drilling. 106 | """ 107 | 108 | # Set up alarm for timeouts. 109 | if config.DRILL_TIMEOUT is not None: 110 | signal.alarm(config.DRILL_TIMEOUT) 111 | 112 | for i in self._drill_input(): 113 | yield i 114 | 115 | def _drill_input(self): 116 | """ 117 | Symbolically step down a path with a tracer, trying to concretize inputs for unencountered 118 | state transitions. 119 | """ 120 | 121 | p = angrgdb.load_project() 122 | 123 | trace, crash_addr = self.runner.tracer(self.input) 124 | start_addr = self.runner.get_start_addr(trace) 125 | 126 | for bb in self.runner.main_return_blocks(): 127 | try: 128 | idx = trace.index(bb) 129 | except ValueError: 130 | continue 131 | trace = trace[:idx +1] 132 | 133 | for addr, proc in self._hooks.items(): 134 | p.hook(addr, proc) 135 | l.debug("Hooking %#x -> %s...", addr, proc.display_name) 136 | 137 | s = angrgdb.StateShot(sync_brk=False, concrete_imports=self.exclude_simprocs, stdin=angr.SimFileStream) 138 | 139 | if self.zero_fill: 140 | s.options.add(angr.options.ZERO_FILL_UNCONSTRAINED_MEMORY) 141 | s.options.add(angr.options.ZERO_FILL_UNCONSTRAINED_REGISTERS) 142 | 143 | if self.sync_brk: # don't use angrdbg brk but ask for it to the runner 144 | s.posix.set_brk(s.solver.BVV(self.runner.brk(), p.arch.bits)) 145 | if self.sync_fs: 146 | s.regs.fs = self.runner.fs() 147 | s.regs.rax = 0xabadcafe #flag for exit driller_init 148 | 149 | s.preconstrainer.preconstrain_file(self.input, s.posix.stdin, self.stdin_bound) 150 | 151 | simgr = p.factory.simulation_manager(s, save_unsat=True, hierarchy=False, save_unconstrained=True) 152 | 153 | start_addr = self.runner.get_start_addr(trace) 154 | 155 | t = Tracer(start_addr, trace=trace, crash_addr=crash_addr) 156 | self._core = DrillerCore(trace=trace, fuzz_bitmap=self.fuzz_bitmap) 157 | 158 | simgr.use_technique(t) 159 | simgr.use_technique(angr.exploration_techniques.Oppologist()) 160 | simgr.use_technique(self._core) 161 | 162 | self._set_concretizations(simgr.one_active) 163 | 164 | l.info("Drilling into %r.", self.input) 165 | l.debug("Input is %r.", self.input) 166 | 167 | start_addr_idx = trace.index(start_addr) 168 | with progressbar.ProgressBar(max_value=(len(trace) - start_addr_idx)) as bar: 169 | while simgr.active and simgr.one_active.globals['trace_idx'] < len(trace) - 1: 170 | simgr.step() 171 | #print("RIP", simgr.one_active.regs.rip) 172 | #print("TRACE", simgr.one_active.globals['trace_idx'], hex(trace[simgr.one_active.globals['trace_idx']])) 173 | bar.update(simgr.one_active.globals['trace_idx'] - start_addr_idx) 174 | l.debug("stepped to " + str(simgr.one_active.regs.rip)) 175 | 176 | if len(simgr.unconstrained) > 0: 177 | while len(simgr.unconstrained) > 0: 178 | state = simgr.unconstrained.pop(0) 179 | l.debug("Found a unconstrained state, exploring to some extent.") 180 | w = self._writeout(state.history.bbl_addrs[-1], state) 181 | if w is not None: 182 | yield w 183 | 184 | # Check here to see if a crash has been found. 185 | if self.redis and self.redis.sismember(self.identifier + '-finished', True): 186 | return 187 | 188 | if 'diverted' not in simgr.stashes: 189 | continue 190 | 191 | while simgr.diverted: 192 | state = simgr.diverted.pop(0) 193 | l.debug("Found a diverted state, exploring to some extent.") 194 | w = self._writeout(state.history.bbl_addrs[-1], state) 195 | if w is not None: 196 | yield w 197 | if self.explore_found: 198 | for i in self._symbolic_explorer_stub(state): 199 | yield i 200 | 201 | ### EXPLORER 202 | 203 | def _symbolic_explorer_stub(self, state): 204 | # Create a new simulation manager and step it forward up to 1024 205 | # accumulated active states or steps. 206 | steps = 0 207 | accumulated = 1 208 | 209 | p = state.project 210 | state = state.copy() 211 | try: 212 | state.options.remove(angr.options.LAZY_SOLVES) 213 | except KeyError: 214 | pass 215 | simgr = p.factory.simulation_manager(state, hierarchy=False) 216 | 217 | l.info("[%s] started symbolic exploration at %s.", self.identifier, time.ctime()) 218 | 219 | while len(simgr.active) and accumulated < 1024: 220 | simgr.step() 221 | steps += 1 222 | 223 | # Dump all inputs. 224 | accumulated = steps * (len(simgr.active) + len(simgr.deadended)) 225 | 226 | l.info("[%s] stopped symbolic exploration at %s.", self.identifier, time.ctime()) 227 | 228 | for dumpable in simgr.deadended: 229 | try: 230 | if dumpable.satisfiable(): 231 | w = self._writeout(dumpable.history.bbl_addrs[-1], dumpable) 232 | if w is not None: 233 | yield w 234 | 235 | # If the state we're trying to dump wasn't actually satisfiable. 236 | except IndexError: 237 | pass 238 | 239 | ### UTILS 240 | 241 | @staticmethod 242 | def _set_concretizations(state): 243 | if state.project.loader.main_object.os == 'cgc': 244 | flag_vars = set() 245 | for b in state.cgc.flag_bytes: 246 | flag_vars.update(b.variables) 247 | 248 | state.unicorn.always_concretize.update(flag_vars) 249 | 250 | # Let's put conservative thresholds for now. 251 | state.unicorn.concretization_threshold_memory = 50000 252 | state.unicorn.concretization_threshold_registers = 50000 253 | 254 | def _in_catalogue(self, length, prev_addr, next_addr): 255 | """ 256 | Check if a generated input has already been generated earlier during the run or by another 257 | thread. 258 | 259 | :param length : Length of the input. 260 | :param prev_addr: The source address in the state transition. 261 | :param next_addr: The destination address in the state transition. 262 | 263 | :return: boolean describing whether or not the input generated is redundant. 264 | """ 265 | 266 | key = '%x,%x,%x\n' % (length, prev_addr, next_addr) 267 | 268 | if self.redis: 269 | return self.redis.sismember(self.identifier + '-catalogue', key) 270 | 271 | # No redis means no coordination, so no catalogue. 272 | else: 273 | return False 274 | 275 | def _add_to_catalogue(self, length, prev_addr, next_addr): 276 | if self.redis: 277 | key = '%x,%x,%x\n' % (length, prev_addr, next_addr) 278 | self.redis.sadd(self.identifier + '-catalogue', key) 279 | # No redis = no catalogue. 280 | 281 | def _writeout(self, prev_addr, state): 282 | generated = state.posix.stdin.load(0, state.posix.stdin.pos) 283 | generated = state.solver.eval(generated, cast_to=bytes) 284 | 285 | key = (len(generated), prev_addr, state.addr) 286 | 287 | # Checks here to see if the generation is worth writing to disk. 288 | # If we generate too many inputs which are not really different we'll seriously slow down AFL. 289 | if self._in_catalogue(*key): 290 | self._core.encounters.remove((prev_addr, state.addr)) 291 | return None 292 | 293 | else: 294 | self._add_to_catalogue(*key) 295 | 296 | l.info("[%s] dumping input for %#x -> %#x.", self.identifier, prev_addr, state.addr) 297 | 298 | self._generated.add((key, generated)) 299 | 300 | if self.redis: 301 | # Publish it out in real-time so that inputs get there immediately. 302 | channel = self.identifier + '-generated' 303 | 304 | self.redis.publish(channel, pickle.dumps({'meta': key, 'data': generated, "tag": self.tag})) 305 | 306 | else: 307 | l.info("Generated: %s", binascii.hexlify(generated)) 308 | 309 | return (key, generated) 310 | 311 | def _write_debug_info(self): 312 | m = hashlib.md5() 313 | m.update(self.input) 314 | f_name = os.path.join(config.DEBUG_DIR, self.identifier + '_' + m.hexdigest() + '.py') 315 | 316 | with open(f_name, 'w+') as f: 317 | l.debug("Debug log written to %s.", f_name) 318 | f.write("binary = %r\n" % self.binary 319 | + "started = '%s'\n" % time.ctime(self.start_time) 320 | + "input = %r\n" % self.input 321 | + "fuzz_bitmap = %r" % self.fuzz_bitmap) 322 | -------------------------------------------------------------------------------- /deferred_driller/exploration.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from itertools import islice 3 | 4 | from angr.exploration_techniques import ExplorationTechnique 5 | 6 | l = logging.getLogger("deferred_driller.exploration") 7 | l.setLevel(logging.DEBUG) 8 | 9 | class DrillerCore(ExplorationTechnique): 10 | """ 11 | An exploration technique that symbolically follows an input looking for new 12 | state transitions. 13 | 14 | It has to be used with Tracer exploration technique. Results are put in 15 | 'diverted' stash. 16 | """ 17 | 18 | def __init__(self, trace, fuzz_bitmap=None): 19 | """ 20 | :param trace : The basic block trace. 21 | :param fuzz_bitmap: AFL's bitmap of state transitions. Defaults to saying every transition is worth satisfying. 22 | """ 23 | 24 | super(DrillerCore, self).__init__() 25 | self.trace = trace 26 | self.fuzz_bitmap = fuzz_bitmap or b"\xff" * 65536 27 | 28 | # Set of encountered basic block transitions. 29 | self.encounters = set() 30 | 31 | def setup(self, simgr): 32 | self.project = simgr._project 33 | 34 | # Update encounters with known state transitions. 35 | self.encounters.update(zip(self.trace, islice(self.trace, 1, None))) 36 | 37 | def step(self, simgr, stash='active', **kwargs): 38 | simgr.step(stash=stash, **kwargs) 39 | 40 | # Mimic AFL's indexing scheme. 41 | if 'missed' in simgr.stashes and simgr.missed: 42 | # A bit ugly, might be replaced by tracer.predecessors[-1] or crash_monitor.last_state. 43 | prev_addr = simgr.one_missed.history.bbl_addrs[-1] 44 | prev_loc = prev_addr 45 | prev_loc = (prev_loc >> 4) ^ (prev_loc << 8) 46 | prev_loc &= len(self.fuzz_bitmap) - 1 47 | prev_loc = prev_loc >> 1 48 | 49 | for state in simgr.missed: 50 | cur_loc = state.addr 51 | cur_loc = (cur_loc >> 4) ^ (cur_loc << 8) 52 | cur_loc &= len(self.fuzz_bitmap) - 1 53 | 54 | hit = bool(self.fuzz_bitmap[cur_loc ^ prev_loc] ^ 0xff) 55 | 56 | transition = (prev_addr, state.addr) 57 | mapped_to = self.project.loader.find_object_containing(state.addr) 58 | mapped_to = "" if mapped_to is None else mapped_to.binary 59 | 60 | l.debug("Found %#x -> %#x transition.", transition[0], transition[1]) 61 | 62 | if not hit and transition not in self.encounters and not self._has_false(state) and mapped_to != 'cle##externs': 63 | state.preconstrainer.remove_preconstraints() 64 | 65 | if state.satisfiable(): 66 | # A completely new state transition. 67 | l.debug("Found a completely new transition, putting into 'diverted' stash.") 68 | simgr.stashes['diverted'].append(state) 69 | self.encounters.add(transition) 70 | else: 71 | l.debug("State at %#x is not satisfiable.", transition[1]) 72 | 73 | elif self._has_false(state): 74 | l.debug("State at %#x is not satisfiable even remove preconstraints.", transition[1]) 75 | 76 | else: 77 | l.debug("%#x -> %#x transition has already been encountered.", transition[0], transition[1]) 78 | 79 | return simgr 80 | 81 | # 82 | # Private methods 83 | # 84 | 85 | @staticmethod 86 | def _has_false(state): 87 | # Check if the state is unsat even if we remove preconstraints. 88 | claripy_false = state.solver.false 89 | if state.scratch.guard.cache_key == claripy_false.cache_key: 90 | return True 91 | 92 | for c in state.solver.constraints: 93 | if c.cache_key == claripy_false.cache_key: 94 | return True 95 | 96 | return False 97 | -------------------------------------------------------------------------------- /deferred_driller/externals.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | from collections import defaultdict 4 | 5 | import angr 6 | import angrgdb 7 | 8 | l = logging.getLogger("deferred_driller.externals") 9 | l.setLevel(logging.INFO) 10 | 11 | def get_objects(p): 12 | vmmap = angrgdb.get_debugger()._get_vmmap() 13 | objs = defaultdict(lambda: [0xffffffffffffffff, 0]) 14 | paths = [] 15 | for dep in p.loader.main_object.deps: 16 | paths += p.loader._possible_paths(dep) 17 | 18 | for start, end, mapperm, mapname in vmmap: 19 | if not os.path.exists(mapname): 20 | continue 21 | if mapname == p.loader.main_object.binary: 22 | continue 23 | if mapname not in paths: #skip pinbin PinTool.so etc... 24 | continue 25 | objs[mapname][0] = min(objs[mapname][0], start) 26 | objs[mapname][1] = max(objs[mapname][1], end) 27 | return objs 28 | 29 | 30 | def get_got(p): 31 | s = list(filter(lambda x: x.name == ".got.plt", p.loader.main_object.sections))[0] 32 | return (s.vaddr, s.vaddr + s.memsize) 33 | 34 | def get_plt(p): 35 | s = list(filter(lambda x: x.name == ".plt", p.loader.main_object.sections))[0] 36 | return (s.vaddr, s.vaddr + s.memsize) 37 | 38 | def process_got(proj): 39 | debugger = angrgdb.get_debugger() 40 | target_proj = angrgdb.load_project() 41 | 42 | got_start, got_end = get_got(proj) 43 | plt_start, plt_end = get_plt(proj) 44 | 45 | entry_len = proj.arch.bits // 8 46 | get_mem = debugger.get_dword if entry_len == 4 else debugger.get_qword 47 | 48 | got_start += 3 * entry_len # skip first 3 entries 49 | empty_state = proj.factory.blank_state() 50 | 51 | for a in range(got_start, got_end, entry_len): 52 | state_val = empty_state.solver.eval(getattr(empty_state.mem[a], "uint%d_t" % proj.arch.bits).resolved) 53 | 54 | if state_val in proj._sim_procedures: 55 | dbg_val = get_mem(a) 56 | name = proj._sim_procedures[state_val].display_name 57 | 58 | if proj._sim_procedures[state_val].is_stub: 59 | l.debug("Skipping re-hooking of %s cause is a stub" % name) 60 | elif not target_proj.is_hooked(dbg_val): 61 | l.info("Re-hooking %s (got: 0x%x) to 0x%x" % (name, a, dbg_val)) 62 | target_proj.hook_symbol(dbg_val, proj._sim_procedures[state_val]) 63 | 64 | 65 | def apply_external_simprocs(): 66 | objs = get_objects(angrgdb.load_project()) 67 | for o in objs: 68 | l.info("Applying simprocs to " + o) 69 | try: 70 | p = angr.Project(o, main_opts={ 'base_addr': objs[o][0] , 'force_rebase': True}, load_options={ "auto_load_libs": False }) 71 | process_got(p) 72 | except Exception as ee: 73 | l.warning("Failed to apply simprocs to " + o + ": " + str(ee)) 74 | process_got(angrgdb.load_project()) 75 | 76 | 77 | -------------------------------------------------------------------------------- /deferred_driller/instrumentation/PinTool.cpp: -------------------------------------------------------------------------------- 1 | #include "pin.H" 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | ostream* out = &cerr; 13 | 14 | INT32 Usage() 15 | { 16 | cerr << "This tool prints out the number of dynamically executed " << endl << 17 | "instructions, basic blocks and threads in the application." << endl << endl; 18 | 19 | return -1; 20 | } 21 | 22 | 23 | int input_fd; 24 | string trace_filename; 25 | 26 | ADDRINT current_brk; 27 | 28 | //-------------------------------------------------------------------------------------------- 29 | 30 | VOID SysCallEntry(THREADID threadIndex, CONTEXT *ctxt, SYSCALL_STANDARD std, VOID *v) 31 | { 32 | ADDRINT sys_id = PIN_GetSyscallNumber(ctxt, std); 33 | if(sys_id == SYS_read) { 34 | ADDRINT fd = PIN_GetSyscallArgument(ctxt, std, 0); 35 | if(fd == 0) { //change stdin fileno to our input 36 | PIN_SetSyscallArgument(ctxt, std, 0, input_fd); 37 | } 38 | } 39 | } 40 | 41 | VOID LogBbl(ADDRINT addr) 42 | { 43 | *out << addr << endl; 44 | } 45 | 46 | VOID Trace(TRACE trace, VOID *v) 47 | { 48 | for (BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) { 49 | BBL_InsertCall(bbl, IPOINT_BEFORE, (AFUNPTR)LogBbl, IARG_ADDRINT, BBL_Address(bbl), IARG_END); 50 | } 51 | } 52 | 53 | VOID Fini(INT32 code, VOID *v) 54 | { 55 | *out << "END_OF_TRACE" << endl; 56 | if(out != &cerr) { 57 | delete static_cast(out); 58 | } 59 | } 60 | 61 | VOID ContextChange(THREADID threadIndex, CONTEXT_CHANGE_REASON reason, const CONTEXT *from, CONTEXT *to, INT32 info, VOID *v) 62 | { 63 | if(reason == CONTEXT_CHANGE_REASON_FATALSIGNAL) { 64 | //TODO crash addr for x86 65 | *out << "END_OF_TRACE " << PIN_GetContextReg(from, REG_RIP) << endl; 66 | if(out != &cerr) { 67 | static_cast(out)->flush(); 68 | } 69 | } 70 | } 71 | 72 | //-------------------------------------------------------------------------------------------- 73 | 74 | VOID SysCallExit(THREADID threadIndex, CONTEXT *ctxt, SYSCALL_STANDARD std, VOID *v) 75 | { 76 | ADDRINT sys_id = PIN_GetSyscallNumber(ctxt, std); 77 | if(sys_id == SYS_brk) { 78 | current_brk = PIN_GetSyscallReturn(ctxt, std); 79 | } 80 | } 81 | 82 | 83 | BOOL DebugInterpreter(THREADID tid, CONTEXT *ctxt, const string &cmd, string* result, VOID *); 84 | 85 | VOID InChildFork(THREADID threadid, const CONTEXT *ctxt, VOID *v) 86 | { 87 | cerr << "[in child]\n"; 88 | 89 | input_fd = open(trace_filename.c_str(), O_RDONLY); 90 | 91 | PIN_RemoveDebugInterpreter(DebugInterpreter); 92 | 93 | PIN_AddSyscallEntryFunction(SysCallEntry, 0); 94 | 95 | TRACE_AddInstrumentFunction(Trace, 0); 96 | 97 | PIN_AddFiniFunction(Fini, 0); 98 | 99 | PIN_AddContextChangeFunction(ContextChange, 0); 100 | } 101 | 102 | BOOL DebugInterpreter(THREADID tid, CONTEXT *ctxt, const string &cmd, string* result, VOID *) 103 | { 104 | if(cmd == "getpid") { 105 | INT pid = PIN_GetPid(); 106 | cerr << "[pid = " << pid << "]\n"; 107 | std::ostringstream ss; 108 | ss << pid << endl; 109 | *result = ss.str(); 110 | return TRUE; 111 | } 112 | else if(cmd == "enable_fork") { 113 | PIN_AddForkFunction(FPOINT_AFTER_IN_CHILD, InChildFork, 0); 114 | cerr << "[fork hook enabled]\n"; 115 | return TRUE; 116 | } 117 | else if(cmd == "fs") { 118 | ADDRINT regval; 119 | PIN_GetContextRegval(ctxt, REG_SEG_FS_BASE, reinterpret_cast(®val)); 120 | cerr << "[fs base = " << (void*)regval << "]\n"; 121 | std::ostringstream ss; 122 | ss << regval << endl; 123 | *result = ss.str(); 124 | return TRUE; 125 | } 126 | else if(cmd == "brk") { 127 | cerr << "[brk = " << current_brk << "]\n"; 128 | std::ostringstream ss; 129 | ss << current_brk << endl; 130 | *result = ss.str(); 131 | return TRUE; 132 | } 133 | else if(cmd.rfind("input ", 0) == 0) { 134 | trace_filename = cmd.substr(6); 135 | cerr << "[input file = " << trace_filename << "]\n"; 136 | return TRUE; 137 | } 138 | else if(cmd.rfind("out ", 0) == 0) { 139 | string filename = cmd.substr(4); 140 | cerr << "[out file = " << filename << "]\n"; 141 | out = new ofstream(filename.c_str()); 142 | return TRUE; 143 | } 144 | return FALSE; 145 | } 146 | 147 | 148 | //env LD_BIND_NOW=1 ../pin-3.7/pin -appdebug -t obj-intel64/PinTool.so -- ../test1 149 | 150 | int main(int argc, char *argv[]) 151 | { 152 | PIN_InitSymbols(); 153 | if(PIN_Init(argc,argv)) 154 | return Usage(); 155 | 156 | PIN_AddSyscallExitFunction(SysCallExit, 0); 157 | 158 | PIN_AddDebugInterpreter(DebugInterpreter, 0); 159 | 160 | PIN_StartProgram(); 161 | 162 | return 0; 163 | } 164 | 165 | 166 | 167 | 168 | 169 | 170 | -------------------------------------------------------------------------------- /deferred_driller/instrumentation/README.md: -------------------------------------------------------------------------------- 1 | ### howto 2 | 3 | Download and place here a build of Intel Pin (I use the 3.7). 4 | 5 | Then run `build.py`. 6 | -------------------------------------------------------------------------------- /deferred_driller/instrumentation/build.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import glob 4 | import os 5 | 6 | pin_path = glob.glob('./pin-*') 7 | assert len(pin_path) == 1 8 | pin_path = pin_path[0] 9 | 10 | os.system("make PIN_ROOT=%s" % pin_path) 11 | -------------------------------------------------------------------------------- /deferred_driller/instrumentation/makefile: -------------------------------------------------------------------------------- 1 | ############################################################## 2 | # 3 | # DO NOT EDIT THIS FILE! 4 | # 5 | ############################################################## 6 | 7 | # If the tool is built out of the kit, PIN_ROOT must be specified in the make invocation and point to the kit root. 8 | ifdef PIN_ROOT 9 | CONFIG_ROOT := $(PIN_ROOT)/source/tools/Config 10 | else 11 | CONFIG_ROOT := ../Config 12 | endif 13 | include $(CONFIG_ROOT)/makefile.config 14 | include makefile.rules 15 | include $(TOOLS_ROOT)/Config/makefile.default.rules 16 | 17 | ############################################################## 18 | # 19 | # DO NOT EDIT THIS FILE! 20 | # 21 | ############################################################## 22 | -------------------------------------------------------------------------------- /deferred_driller/instrumentation/makefile.rules: -------------------------------------------------------------------------------- 1 | ############################################################## 2 | # 3 | # This file includes all the test targets as well as all the 4 | # non-default build rules and test recipes. 5 | # 6 | ############################################################## 7 | 8 | 9 | ############################################################## 10 | # 11 | # Test targets 12 | # 13 | ############################################################## 14 | 15 | ###### Place all generic definitions here ###### 16 | 17 | # This defines tests which run tools of the same name. This is simply for convenience to avoid 18 | # defining the test name twice (once in TOOL_ROOTS and again in TEST_ROOTS). 19 | # Tests defined here should not be defined in TOOL_ROOTS and TEST_ROOTS. 20 | TEST_TOOL_ROOTS := PinTool 21 | 22 | # This defines the tests to be run that were not already defined in TEST_TOOL_ROOTS. 23 | TEST_ROOTS := 24 | 25 | # This defines the tools which will be run during the the tests, and were not already defined in 26 | # TEST_TOOL_ROOTS. 27 | TOOL_ROOTS := 28 | 29 | # This defines the static analysis tools which will be run during the the tests. They should not 30 | # be defined in TEST_TOOL_ROOTS. If a test with the same name exists, it should be defined in 31 | # TEST_ROOTS. 32 | # Note: Static analysis tools are in fact executables linked with the Pin Static Analysis Library. 33 | # This library provides a subset of the Pin APIs which allows the tool to perform static analysis 34 | # of an application or dll. Pin itself is not used when this tool runs. 35 | SA_TOOL_ROOTS := 36 | 37 | # This defines all the applications that will be run during the tests. 38 | APP_ROOTS := 39 | 40 | # This defines any additional object files that need to be compiled. 41 | OBJECT_ROOTS := 42 | 43 | # This defines any additional dlls (shared objects), other than the pintools, that need to be compiled. 44 | DLL_ROOTS := 45 | 46 | # This defines any static libraries (archives), that need to be built. 47 | LIB_ROOTS := 48 | 49 | ###### Define the sanity subset ###### 50 | 51 | # This defines the list of tests that should run in sanity. It should include all the tests listed in 52 | # TEST_TOOL_ROOTS and TEST_ROOTS excluding only unstable tests. 53 | SANITY_SUBSET := $(TEST_TOOL_ROOTS) $(TEST_ROOTS) 54 | 55 | 56 | ############################################################## 57 | # 58 | # Test recipes 59 | # 60 | ############################################################## 61 | 62 | # This section contains recipes for tests other than the default. 63 | # See makefile.default.rules for the default test rules. 64 | # All tests in this section should adhere to the naming convention: .test 65 | 66 | 67 | ############################################################## 68 | # 69 | # Build rules 70 | # 71 | ############################################################## 72 | 73 | # This section contains the build rules for all binaries that have special build rules. 74 | # See makefile.default.rules for the default build rules. 75 | -------------------------------------------------------------------------------- /deferred_driller/runner.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import tempfile 3 | import logging 4 | import glob 5 | import time 6 | import gdb 7 | import os 8 | 9 | import angr 10 | import angrgdb 11 | 12 | from .externals import apply_external_simprocs 13 | 14 | l = logging.getLogger("deferred_driller.runner") 15 | l.setLevel(logging.DEBUG) 16 | 17 | class PinRunner: 18 | def __init__(self, binary, argv=None, pin_path=None, pintool_path=None, use_simprocs=True): 19 | if pin_path is None: 20 | pin_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "instrumentation", "pin-*", "pin") 21 | pin_path = glob.glob(pin_path) 22 | assert len(pin_path) == 1 23 | pin_path = pin_path[0] 24 | if pintool_path is None: 25 | pintool_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "instrumentation", "obj-intel64", "PinTool.so") 26 | 27 | self._main_returns = None 28 | self.binary = binary 29 | self.cfg = None 30 | self.objs = None 31 | 32 | args = [ 33 | pin_path, 34 | "-appdebug", 35 | "-t", 36 | pintool_path, 37 | "--", 38 | binary, 39 | ] 40 | if argv is not None: 41 | args += argv 42 | 43 | os.environ["LD_BIND_NOW"] = "1" 44 | self.process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 45 | 46 | '''Application stopped until continued from debugger. 47 | Start GDB, then issue this command at the (gdb) prompt: 48 | target remote :56991''' 49 | self.process.stdout.readline() 50 | self.process.stdout.readline() 51 | cmd = self.process.stdout.readline().strip() 52 | 53 | gdb.execute(str(cmd, "utf-8")) 54 | 55 | self.pid = int(gdb.execute("monitor getpid", to_string=True).strip()) 56 | angrgdb.get_debugger().pid = self.pid 57 | 58 | gdb.execute("b *driller_init") 59 | gdb.execute("continue") 60 | gdb.execute("monitor enable_fork") 61 | 62 | if use_simprocs: 63 | apply_external_simprocs() 64 | angrgdb.set_memory_type(angrgdb.GET_ALL_DISCARD_CLE) 65 | 66 | def brk(self): 67 | return int(gdb.execute("monitor brk", to_string=True)) 68 | 69 | def fs(self): 70 | return int(gdb.execute("monitor fs", to_string=True)) 71 | 72 | def tracer(self, concrete_input): 73 | input_path = tempfile.mkstemp(dir="/dev/shm/", prefix="pin-tracer-input-")[1] 74 | with open(input_path, "wb") as f: 75 | f.write(concrete_input) 76 | output_path = tempfile.mkstemp(dir="/dev/shm/", prefix="pin-tracer-log-")[1] 77 | 78 | l.debug("tracing with input file: " + input_path) 79 | l.debug("tracing with output file: " + output_path) 80 | 81 | gdb.execute("monitor input " + input_path) 82 | gdb.execute("monitor out " + output_path) 83 | 84 | st = time.time() 85 | gdb.execute("continue") 86 | 87 | l.debug("traced in %f minutes" % ((time.time() -st)/ 60.0)) 88 | 89 | trace = [] 90 | crash_addr = None 91 | with open(output_path, "r") as f: 92 | while True: 93 | line = f.readline().strip() 94 | if len(line) == 0: 95 | continue 96 | if line.startswith("END_OF_TRACE"): 97 | if len(line) > len("END_OF_TRACE"): 98 | crash_addr = int(line.split()[1]) 99 | break 100 | trace.append(int(line)) 101 | 102 | return trace, crash_addr 103 | 104 | def _driller_init_bounds(self): 105 | project = angrgdb.load_project() 106 | if self.cfg is None: 107 | self.cfg = project.analyses.CFGFast() 108 | begin = 0xffffffffffffffff 109 | end = 0 110 | for bb in project.kb.functions["driller_init"].graph: 111 | begin = min(begin, bb.addr) 112 | end = max(end, bb.addr + bb.size) 113 | return begin, end 114 | 115 | def main_return_blocks(self): 116 | if self._main_returns is None: 117 | project = angrgdb.load_project() 118 | if self.cfg is None: 119 | self.cfg = project.analyses.CFGFast() 120 | self._main_returns = set() 121 | for bb in project.kb.functions["main"].blocks: 122 | for i in bb.capstone.insns: 123 | if i.mnemonic == "ret": 124 | self._main_returns.add(bb.addr) 125 | return self._main_returns 126 | 127 | def get_start_addr(self, trace): 128 | begin, end = self._driller_init_bounds() 129 | cnt = 0 130 | in_init = False 131 | for addr in trace: 132 | if addr >= begin and addr < end: 133 | in_init = True 134 | elif in_init: 135 | return addr 136 | 137 | -------------------------------------------------------------------------------- /deferred_driller/tracer.py: -------------------------------------------------------------------------------- 1 | import angr 2 | from typing import List 3 | import logging 4 | 5 | from angr.exploration_techniques import ExplorationTechnique 6 | from angr import BP_BEFORE, BP_AFTER, sim_options 7 | from angr.errors import AngrTracerError 8 | 9 | l = logging.getLogger("deferred_driller.tracer") 10 | 11 | 12 | class Tracer(ExplorationTechnique): 13 | def __init__(self, 14 | start_addr, 15 | trace=None, 16 | resiliency=False, 17 | keep_predecessors=1, 18 | crash_addr=None): 19 | super(Tracer, self).__init__() 20 | self._trace = trace 21 | self._resiliency = resiliency 22 | self._crash_addr = crash_addr 23 | 24 | # keep track of the last basic block we hit 25 | self.predecessors = [None] * keep_predecessors # type: List[angr.SimState] 26 | self.last_state = None 27 | self.start_addr = start_addr 28 | 29 | # whether we should follow the trace 30 | self._no_follow = self._trace is None 31 | 32 | def setup(self, simgr): 33 | simgr.populate('missed', []) 34 | simgr.populate('traced', []) 35 | simgr.populate('crashed', []) 36 | 37 | self.project = simgr._project 38 | if len(simgr.active) != 1: 39 | raise AngrTracerError("Tracer is being invoked on a SimulationManager without exactly one active state") 40 | 41 | for idx, addr in enumerate(self._trace): 42 | if addr == self.start_addr: 43 | break 44 | 45 | # step to start addr 46 | while self._trace and self._trace[idx] != simgr.one_active.addr: 47 | simgr.step(extra_stop_points={self._trace[idx]}) 48 | if len(simgr.active) == 0: 49 | raise AngrTracerError("Could not step to the first address of the trace - simgr is empty") 50 | elif len(simgr.active) > 1: 51 | raise AngrTracerError("Could not step to the first address of the trace - state split") 52 | 53 | # initialize the state info 54 | simgr.one_active.globals['trace_idx'] = idx 55 | simgr.one_active.globals['sync_idx'] = None 56 | 57 | # enable lazy solves - don't touch z3 unless I tell you so 58 | simgr.one_active.options.add(sim_options.LAZY_SOLVES) 59 | 60 | def complete(self, simgr): 61 | return bool(simgr.traced) 62 | 63 | def filter(self, simgr, state, **kwargs): 64 | # check completion 65 | if state.globals['trace_idx'] >= len(self._trace) - 1: 66 | # do crash windup if necessary 67 | if self._crash_addr is not None: 68 | simgr.populate('crashed', [self._crash_windup(state)]) 69 | 70 | return 'traced' 71 | 72 | return simgr.filter(state, **kwargs) 73 | 74 | def step(self, simgr, stash='active', **kwargs): 75 | simgr.drop(stash='missed') 76 | return simgr.step(stash=stash, **kwargs) 77 | 78 | def step_state(self, simgr, state, **kwargs): 79 | # maintain the predecessors list 80 | self.predecessors.append(state) 81 | self.predecessors.pop(0) 82 | 83 | # perform the step. ask qemu to stop at the termination point. 84 | stops = set(kwargs.pop('extra_stop_points', ())) | {self._trace[-1]} 85 | succs_dict = simgr.step_state(state, extra_stop_points=stops, **kwargs) 86 | succs = succs_dict[None] 87 | 88 | # follow the trace 89 | if len(succs) == 1: 90 | self._update_state_tracking(succs[0]) 91 | elif len(succs) == 0: 92 | raise Exception("All states disappeared!") 93 | else: 94 | succ = self._pick_correct_successor(succs) 95 | succs_dict[None] = [succ] 96 | succs_dict['missed'] = [s for s in succs if s is not succ] 97 | 98 | assert len(succs_dict[None]) == 1 99 | return succs_dict 100 | 101 | def _pick_correct_successor(self, succs): 102 | # there's been a branch of some sort. Try to identify which state stayed on the trace. 103 | assert len(succs) > 0 104 | idx = succs[0].globals['trace_idx'] 105 | 106 | res = [] 107 | for succ in succs: 108 | if succ.addr == self._trace[idx + 1]: 109 | res.append(succ) 110 | 111 | if not res: 112 | raise Exception("No states followed the trace?") 113 | 114 | if len(res) > 1: 115 | raise Exception("The state split but several successors have the same (correct) address?") 116 | 117 | self._update_state_tracking(res[0]) 118 | return res[0] 119 | 120 | def _update_state_tracking(self, state: 'angr.SimState'): 121 | idx = state.globals['trace_idx'] 122 | sync = state.globals['sync_idx'] 123 | 124 | if state.history.recent_block_count > 1: 125 | # multiple blocks were executed this step. they should follow the trace *perfectly* 126 | # or else something is up 127 | # "something else" so far only includes concrete transmits, or... 128 | # TODO: https://github.com/unicorn-engine/unicorn/issues/874 129 | # ^ this means we will see desyncs of the form unicorn suddenly skips a bunch of qemu blocks 130 | assert state.history.recent_block_count == len(state.history.recent_bbl_addrs) 131 | 132 | if sync is not None: 133 | raise Exception("TODO") 134 | 135 | for addr in state.history.recent_bbl_addrs: 136 | if addr == state.unicorn.transmit_addr: 137 | continue 138 | 139 | if self._compare_addr(self._trace[idx], addr): 140 | idx += 1 141 | else: 142 | raise Exception('BUG! Please investivate the claim in the comment above me') 143 | 144 | idx -= 1 # use normal code to do the last synchronization 145 | 146 | if sync is not None: 147 | if self._compare_addr(self._trace[sync], state.addr): 148 | state.globals['trace_idx'] = sync 149 | state.globals['sync_idx'] = None 150 | else: 151 | raise Exception("Trace did not sync after 1 step, you knew this would happen") 152 | 153 | elif self._compare_addr(self._trace[idx + 1], state.addr): 154 | # normal case 155 | state.globals['trace_idx'] = idx + 1 156 | elif self.project.loader._extern_object is not None and self.project.loader.extern_object.contains_addr(state.addr): 157 | # externs 158 | proc = self.project.hooked_by(state.addr) 159 | if proc is None: 160 | raise Exception("Extremely bad news: we're executing an unhooked address in the externs space") 161 | if proc.is_continuation: 162 | orig_addr = self.project.loader.find_symbol(proc.display_name).rebased_addr 163 | orig_trace_addr = orig_addr 164 | if 0 <= self._trace[idx + 1] - orig_trace_addr <= 0x10000: 165 | # this is fine. we do nothing and then next round it'll get handled by the is_hooked(state.history.addr) case 166 | pass 167 | else: 168 | # this may also be triggered as a consequence of the unicorn issue linked above 169 | raise Exception("BUG: State is returning to a continuation that isn't its own???") 170 | else: 171 | # see above 172 | pass 173 | elif state.history.jumpkind.startswith('Ijk_Sys'): 174 | # syscalls 175 | state.globals['sync_idx'] = idx + 1 176 | elif state.history.jumpkind.startswith('Ijk_Exit'): 177 | # termination! 178 | state.globals['trace_idx'] = len(self._trace) - 1 179 | elif self.project.is_hooked(state.history.addr): 180 | # simprocedures - is this safe..? 181 | self._fast_forward(state) 182 | elif self._analyze_misfollow(state, idx): 183 | # misfollow analysis will set a sync point somewhere if it succeeds 184 | pass 185 | else: 186 | raise AngrTracerError("Oops! angr did not follow the trace.") 187 | 188 | l.debug("Trace: %d/%d", state.globals['trace_idx'], len(self._trace)) 189 | 190 | def _compare_addr(self, trace_addr, state_addr): 191 | return trace_addr == state_addr 192 | 193 | def _analyze_misfollow(self, state, idx): 194 | angr_addr = state.addr 195 | trace_addr = self._trace[idx + 1] 196 | l.info("Misfollow: angr says %#x, trace says %#x", angr_addr, trace_addr) 197 | 198 | # TODO: add rep handling 199 | 200 | if 'IRSB' in state.history.recent_description: 201 | last_block = state.block(state.history.bbl_addrs[-1]) 202 | if self._trace[idx + 1] in last_block.instruction_addrs: 203 | # we have disparate block sizes! 204 | # specifically, the angr block size is larger than the trace's. 205 | # allow the trace to catch up. 206 | while self._trace[idx + 1] in last_block.instruction_addrs: 207 | idx += 1 208 | 209 | if self._trace[idx + 1] == state.addr: 210 | state.globals['trace_idx'] = idx + 1 211 | return True 212 | else: 213 | state.globals['trace_idx'] = idx 214 | #state.globals['trace_desync'] = True 215 | return True 216 | 217 | prev_addr = state.history.bbl_addrs[-1] 218 | prev_obj = self.project.loader.find_object_containing(prev_addr) 219 | 220 | if state.block(prev_addr).vex.jumpkind == 'Ijk_Call': 221 | l.info('...trying to sync at callsite') 222 | return self._sync_callsite(state, idx, prev_addr) 223 | 224 | if prev_addr in getattr(prev_obj, 'reverse_plt', ()): 225 | prev_prev_addr = state.history.bbl_addrs[-2] 226 | if not prev_obj.contains_addr(prev_prev_addr) or state.block(prev_prev_addr).vex.jumpkind != 'Ijk_Call': 227 | l.info('...weird interaction with PLT stub, aborting analysis') 228 | return False 229 | l.info('...trying to sync at PLT callsite') 230 | return self._sync_callsite(state, idx, prev_prev_addr) 231 | 232 | l.info('...all analyses failed.') 233 | return False 234 | 235 | def _sync_callsite(self, state, idx, callsite_addr): 236 | retsite_addr = state.block(callsite_addr).size + callsite_addr 237 | try: 238 | retsite_idx = self._trace.index(retsite_addr, idx) 239 | except ValueError: 240 | l.error("Trying to fix desync at callsite but return address does not appear in trace") 241 | return False 242 | 243 | state.globals['sync_idx'] = retsite_idx 244 | state.globals['trace_idx'] = idx 245 | return True 246 | 247 | def _fast_forward(self, state): 248 | target_addr = state.addr 249 | try: 250 | target_idx = self._trace.index(target_addr, state.globals['trace_idx'] + 1) 251 | except ValueError: 252 | raise AngrTracerError("Trace failed to synchronize during fast forward? You might want to unhook %s." % (self.project.hooked_by(state.history.addr).display_name)) 253 | else: 254 | state.globals['trace_idx'] = target_idx 255 | 256 | def _crash_windup(self, state): 257 | # first check: are we just executing user-controlled code? 258 | if not state.ip.symbolic and state.mem[state.ip].char.resolved.symbolic: 259 | l.debug("executing input-related code") 260 | return state 261 | 262 | # before we step through and collect the actions we have to set 263 | # up a special case for address concretization in the case of a 264 | # controlled read or write vulnerability. 265 | bp1 = state.inspect.b( 266 | 'address_concretization', 267 | BP_BEFORE, 268 | action=self._check_add_constraints) 269 | 270 | bp2 = state.inspect.b( 271 | 'address_concretization', 272 | BP_AFTER, 273 | action=self._grab_concretization_results) 274 | 275 | # step to the end of the crashing basic block, 276 | # to capture its actions with those breakpoints 277 | # TODO should this be using simgr.successors? 278 | state.step() 279 | 280 | # Add the constraints from concretized addrs back 281 | for var, concrete_vals in state.preconstrainer.address_concretization: 282 | if len(concrete_vals) > 0: 283 | l.debug("constraining addr to be %#x", concrete_vals[0]) 284 | state.add_constraints(var == concrete_vals[0]) 285 | 286 | # then we step again up to the crashing instruction 287 | inst_addrs = state.block().instruction_addrs 288 | inst_cnt = len(inst_addrs) 289 | 290 | if inst_cnt == 0: 291 | insts = 0 292 | elif self._crash_addr in inst_addrs: 293 | insts = inst_addrs.index(self._crash_addr) + 1 294 | else: 295 | insts = inst_cnt - 1 296 | 297 | l.debug("windup step...") 298 | succs = state.step(num_inst=insts).flat_successors 299 | 300 | if len(succs) > 0: 301 | if len(succs) > 1: 302 | succs = [s for s in succs if s.solver.satisfiable()] 303 | state = succs[0] 304 | self.last_state = state 305 | 306 | # remove the preconstraints 307 | l.debug("removing preconstraints") 308 | state.preconstrainer.remove_preconstraints() 309 | 310 | l.debug("reconstraining... ") 311 | state.preconstrainer.reconstrain() 312 | 313 | # now remove our breakpoints since other people might not want them 314 | state.inspect.remove_breakpoint("address_concretization", bp1) 315 | state.inspect.remove_breakpoint("address_concretization", bp2) 316 | 317 | l.debug("final step...") 318 | succs = state.step() 319 | successors = succs.flat_successors + succs.unconstrained_successors 320 | return successors[0] 321 | 322 | # the below are utility functions for crash windup 323 | 324 | def _grab_concretization_results(self, state): 325 | """ 326 | Grabs the concretized result so we can add the constraint ourselves. 327 | """ 328 | # only grab ones that match the constrained addrs 329 | if self._should_add_constraints(state): 330 | addr = state.inspect.address_concretization_expr 331 | result = state.inspect.address_concretization_result 332 | if result is None: 333 | l.warning("addr concretization result is None") 334 | return 335 | state.preconstrainer.address_concretization.append((addr, result)) 336 | 337 | def _check_add_constraints(self, state): 338 | """ 339 | Obnoxious way to handle this, should ONLY be called from crash monitor. 340 | """ 341 | # for each constrained addrs check to see if the variables match, 342 | # if so keep the constraints 343 | state.inspect.address_concretization_add_constraints = self._should_add_constraints(state) 344 | 345 | def _should_add_constraints(self, state): 346 | """ 347 | Check to see if the current address concretization variable is any of the registered 348 | constrained_addrs we want to allow concretization for 349 | """ 350 | expr = state.inspect.address_concretization_expr 351 | hit_indices = self._to_indices(state, expr) 352 | 353 | for action in state.preconstrainer._constrained_addrs: 354 | var_indices = self._to_indices(state, action.addr) 355 | if var_indices == hit_indices: 356 | return True 357 | return False 358 | 359 | @staticmethod 360 | def _to_indices(state, expr): 361 | indices = [] 362 | for descr in state.solver.describe_variables(expr): 363 | if descr[0] == 'file' and descr[1] == state.posix.stdin.ident: 364 | if descr[2] == 'packet': 365 | indices.append(descr[3]) 366 | elif type(descr[2]) is int: 367 | indices.append(descr[2]) 368 | 369 | return sorted(indices) 370 | -------------------------------------------------------------------------------- /driller_init.s: -------------------------------------------------------------------------------- 1 | .text 2 | .globl driller_init 3 | driller_init: 4 | pushq %rax 5 | start_driller_loop: 6 | callq fork 7 | testl %eax, %eax 8 | je exit_driller_loop 9 | int3 10 | cmpl $0xabadcafe, %eax 11 | jne start_driller_loop 12 | exit_driller_loop: 13 | popq %rax 14 | retq 15 | -------------------------------------------------------------------------------- /example/Makefile: -------------------------------------------------------------------------------- 1 | AFL_PATH=~/afl/ 2 | 3 | all: 4 | clang -no-pie -fno-stack-protector -D__DRILLER test1.c ../driller_init.s -o test1_driller 5 | $(AFL_PATH)/afl-clang-fast -no-pie -fno-stack-protector test1.c -o test1_afl 6 | 7 | musl: 8 | musl-gcc -no-pie -fno-stack-protector -D__DRILLER test1.c ../driller_init.s -o test1_driller 9 | $(AFL_PATH)/afl-clang-fast -no-pie -fno-stack-protector test1.c -o test1_afl 10 | 11 | -------------------------------------------------------------------------------- /example/drill.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This is a very dirty example. 3 | 4 | Commands to run in two different terminals: 5 | 6 | ~/afl/afl-fuzz -M master -i inputs -o ./output -t 20000 -- ./test1_afl 7 | 8 | gdb -q -nh --batch -x drill.py ./test1_driller 9 | 10 | Wait AFL to start fuzzing before to run the second command. 11 | 12 | ''' 13 | 14 | import sys, os 15 | cwd = os.path.dirname(os.path.realpath(__file__)) 16 | sys.path.append(cwd + "/../") 17 | 18 | import json 19 | import time 20 | import base64 21 | import tempfile 22 | import hashlib 23 | import logging 24 | 25 | from deferred_driller import * 26 | 27 | l = logging.getLogger("drill_this_shit") 28 | l.setLevel(logging.INFO) 29 | 30 | BINARY = "./test1_driller" 31 | STDIN_BOUND = True 32 | EXPLORE_FOUND = False 33 | MINIMIZE = True 34 | 35 | os.system("mkdir -p %s/output/driller/queue" % cwd) 36 | 37 | if os.path.exists(os.path.basename(BINARY) + "-deferred-driller-data.json"): 38 | restore = json.load(open(os.path.basename(BINARY) + "-deferred-driller-data.json")) 39 | processed = restore["processed"] 40 | transitions = restore["transitions"] 41 | index = restore["index"] 42 | else: 43 | processed = [] 44 | transitions = [] 45 | index = 0 46 | 47 | runner = PinRunner(BINARY, use_simprocs=True) 48 | 49 | while True: 50 | for subd in os.listdir(cwd + "/output/"): 51 | if not os.path.isdir(cwd + "/output/" + subd): 52 | continue 53 | 54 | stats = open(cwd + "/output/%s/fuzzer_stats"%subd) 55 | pfavs = None 56 | for line in stats: 57 | if line.startswith("pending_favs"): 58 | pfavs = line.split(":")[1].strip() 59 | break 60 | if pfavs != "0": 61 | l.debug("%s pending_favs != 0" % subd) 62 | break 63 | 64 | for f in os.listdir(cwd + "/output/%s/queue/" % subd): 65 | if not os.path.exists(cwd + "/output/%s/queue/" % subd + f) or os.path.isdir(cwd + "/output/%s/queue/" % subd + f): 66 | continue 67 | target = f[:len("id:......")] 68 | l.debug("targetting " + cwd + "/output/%s/queue/" % subd + f) 69 | 70 | if MINIMIZE: 71 | minimized = tempfile.mkstemp(dir="/tmp/", prefix="driller-minimized-")[1] 72 | r = os.system("~/afl/afl-tmin -t 20000 -i '%s' -o %s -- %s" % (cwd + "/output/%s/queue/" % subd + f, minimized, BINARY.replace("driller", "afl"))) 73 | l.debug("afl-tmin ret val:", r) 74 | if r == 0: 75 | input_data = open(minimized, "rb").read() 76 | else: 77 | input_data = open(cwd + "/output/%s/queue/" % subd + f, "rb").read() 78 | os.unlink(minimized) 79 | else: 80 | input_data = open(cwd + "/output/%s/queue/" % subd + f, "rb").read() 81 | 82 | inp_hash = hashlib.md5(input_data).hexdigest() 83 | if inp_hash in processed: 84 | l.debug(f + " already processed (md5 = %s)" % inp_hash) 85 | continue 86 | processed.append(inp_hash) 87 | 88 | ### hack for strncmp not satisfiable diverted state, see https://github.com/shellphish/driller/issues/70 89 | if len(input_data) < 100: 90 | input_data = input_data.ljust(100, b"\x00") # this is shit cause this can alter the behaviour of a generic program and slowdown the exploration 91 | 92 | bmap = open(cwd + "/output/master/fuzz_bitmap", "rb").read() 93 | d = Driller(runner, input_data, bmap, explore_found=EXPLORE_FOUND, stdin_bound=STDIN_BOUND) 94 | 95 | try: 96 | for o in d.drill_generator(): 97 | if o[0] in transitions: 98 | continue 99 | index += 1 100 | out = open(cwd + "/output/driller/queue/id:%06d,src:%s" % (index, target), "wb") 101 | out.write(o[1]) 102 | out.close() 103 | transitions.append(o[0]) 104 | except Exception as ee: 105 | l.warning("!!! ERROR !!!") 106 | import traceback 107 | traceback.print_exc() 108 | 109 | l.info("saving stuffs...") 110 | with open(os.path.basename(BINARY) + "-deferred-driller-data.json", "w") as dmp: 111 | json.dump({"processed": processed, "transitions": transitions, "index": index}, dmp) 112 | 113 | 114 | -------------------------------------------------------------------------------- /example/inputs/dumb: -------------------------------------------------------------------------------- 1 | a 2 | -------------------------------------------------------------------------------- /example/test1.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #ifdef __DRILLER 8 | void driller_init(); 9 | #endif 10 | 11 | int main(int argc, char** argv) { 12 | 13 | char buf[100]; 14 | memset(buf, 0, 100); 15 | 16 | sleep(10); 17 | 18 | #ifdef __AFL_HAVE_MANUAL_CONTROL 19 | __AFL_INIT(); 20 | #endif 21 | #ifdef __DRILLER 22 | driller_init(); 23 | #endif 24 | 25 | read(0, buf, 100); 26 | 27 | fprintf(stderr, "%s\n", buf); 28 | 29 | if(strncmp(buf, "pippo", 5) == 0) { 30 | if(strncmp((char*)buf +6, "franco", 6) == 0) { 31 | int i; 32 | int s = 0; 33 | for(i = 0; i < 12; ++i) 34 | s += buf[i]; 35 | if(s == 1217) 36 | abort(); 37 | } 38 | } 39 | 40 | return 0; 41 | 42 | } 43 | -------------------------------------------------------------------------------- /example/test1_afl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andreafioraldi/deferred_driller/92e7d773c78c0f791118984741cb998f3f9ad626/example/test1_afl -------------------------------------------------------------------------------- /example/test1_driller: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andreafioraldi/deferred_driller/92e7d773c78c0f791118984741cb998f3f9ad626/example/test1_driller -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | __author__ = "Andrea Fioraldi" 4 | __copyright__ = "Copyright 2019, Andrea Fioraldi" 5 | __license__ = "BSD 2-Clause" 6 | __email__ = "andreafioraldi@gmail.com" 7 | 8 | from setuptools import setup 9 | 10 | VER = "1.0.0" 11 | 12 | setup( 13 | name='deferred_driller', 14 | version=VER, 15 | license=__license__, 16 | description='My version of driller using Intel PIN and angrgdb. In "theory" can work with AFL in deferred and persistent mode.', 17 | author=__author__, 18 | author_email=__email__, 19 | url='https://github.com/andreafioraldi/deferred_driller', 20 | download_url = 'https://github.com/andreafioraldi/deferred_driller/archive/' + VER + '.tar.gz', 21 | package_dir={'deferred_driller': 'deferred_driller'}, 22 | packages=['deferred_driller'], 23 | install_requires=[ 24 | 'angrgdb' 25 | ], 26 | ) 27 | --------------------------------------------------------------------------------