├── .gitignore
├── LICENSE
├── README.md
├── deferred_driller
    ├── __init__.py
    ├── config.py
    ├── core.py
    ├── exploration.py
    ├── externals.py
    ├── instrumentation
    │   ├── PinTool.cpp
    │   ├── README.md
    │   ├── build.py
    │   ├── makefile
    │   └── makefile.rules
    ├── runner.py
    └── tracer.py
├── driller_init.s
├── example
    ├── Makefile
    ├── drill.py
    ├── inputs
    │   └── dumb
    ├── test1.c
    ├── test1_afl
    └── test1_driller
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2019, Andrea Fioraldi
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # deferred_driller
 2 | My version of driller using Intel PIN and angrgdb. In "theory" can work with AFL in deferred and persistent mode.
 3 | 
 4 | This is hugely based on the real [driller](https://github.com/shellphish/driller) so they deserve the majority of the credits.
 5 | 
 6 | What's new?
 7 | 
 8 | The tracer is based on a Pintool that forks from the deferred starting point and collects the trace of the children.
 9 | So no more aslr slides and avoiding to explore in angr the code before the deferred starting point may be an huge improvement when such code is complex.
10 | 
11 | ## howto
12 | 
13 | Look at the example. All the files, also the Makefile.
14 | 
15 | This will not work on your code. Or at least can work with a low probability. This is a work in progress PoC of a bored student in the middle of the exams session.
16 | 
17 | ### dependencies
18 | 
19 | All the following dependencies must be installed from the respective git repo and not from pip at the moment.
20 | 
21 | + [archinfo](https://github.com/angr/archinfo)
22 | + [pyvex](https://github.com/angr/pyvex)
23 | + [ailment](https://github.com/angr/ailment)
24 | + [claripy](https://github.com/angr/claripy)
25 | + [cle](https://github.com/angr/cle)
26 | + [angr](https://github.com/angr/angr)
27 | 
28 | + [angrdbg](https://github.com/andreafioraldi/angrdbg)
29 | + [angrgdb](https://github.com/andreafioraldi/angrgdb)
30 | 
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/deferred_driller/__init__.py:
--------------------------------------------------------------------------------
1 | from .runner import PinRunner
2 | from .core import Driller
3 | 


--------------------------------------------------------------------------------
/deferred_driller/config.py:
--------------------------------------------------------------------------------
 1 | ### Redis Options
 2 | REDIS_HOST=None
 3 | REDIS_PORT=None
 4 | REDIS_DB=None
 5 | 
 6 | ### Celery Options
 7 | BROKER_URL= None
 8 | 
 9 | CELERY_ROUTES = None
10 | 
11 | ### Environment Options
12 | 
13 | # directory contain driller-qemu versions, relative to the directoy node.py is invoked in
14 | QEMU_DIR=None
15 | 
16 | # directory containing the binaries, used by the driller node to find binaries
17 | BINARY_DIR=None
18 | # directory containing the pcap corpus
19 | PCAP_DIR=None
20 | # directory containing the seeds for fuzzing
21 | SEED_DIR=None
22 | 
23 | ### Driller options
24 | # how long to drill before giving up in seconds
25 | DRILL_TIMEOUT=None
26 | 
27 | MEM_LIMIT=None
28 | 
29 | # where to write a debug file that contains useful debugging information like
30 | # AFL's fuzzing bitmap, input used, binary path, time started.
31 | # Uses following naming convention:
32 | #   <binary_basename>_<input_str_md5>.py
33 | DEBUG_DIR = None
34 | 
35 | ### Fuzzer options
36 | 
37 | # how often to check for crashes in seconds
38 | CRASH_CHECK_INTERVAL=None
39 | 
40 | # how long to fuzz before giving up in seconds
41 | FUZZ_TIMEOUT=None
42 | 
43 | # how long before we kill a dictionary creation process
44 | DICTIONARY_TIMEOUT=None
45 | 
46 | # how many fuzzers should be spun up when a fuzzing job is received
47 | FUZZER_INSTANCES=None
48 | 
49 | # where the fuzzer should place it's results on the filesystem
50 | FUZZER_WORK_DIR=None
51 | 


--------------------------------------------------------------------------------
/deferred_driller/core.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import signal
  4 | import hashlib
  5 | import resource
  6 | import pickle
  7 | import logging
  8 | import binascii
  9 | 
 10 | import angr
 11 | import angrgdb
 12 | 
 13 | import progressbar
 14 | 
 15 | from .tracer import Tracer
 16 | from .exploration import DrillerCore
 17 | from . import config
 18 | 
 19 | l = logging.getLogger("deferred_driller.core")
 20 | l.setLevel(logging.INFO)
 21 | 
 22 | class Driller(object):
 23 |     """
 24 |     Driller object, symbolically follows an input looking for new state transitions.
 25 |     """
 26 | 
 27 |     def __init__(self, runner, input_str, fuzz_bitmap=None, tag=None, redis=None, hooks=None, exclude_simprocs=[], stdin_bound=True, sync_brk=True, sync_fs=True, explore_found=True, zero_fill=False):
 28 |         """
 29 |         :param runner           : The PinRunner instance.
 30 |         :param input_str        : Input string to feed to the binary.
 31 |         :param fuzz_bitmap      : AFL's bitmap of state transitions (defaults to empty).
 32 |         :param redis            : redis.Redis instance for coordinating multiple Driller instances.
 33 |         :param hooks            : Dictionary of addresses to simprocedures.
 34 |         :param use_simprocs     : Use simprocedures
 35 |         :param exclude_simprocs : List of names of imports to exclude from simprocedures
 36 |         :param stdin_bound      : Bound read stdin
 37 |         :param sync_brk         : Synchronize brk value
 38 |         :param sync_fs          : Synchronize fs base value
 39 |         :param explore_found    : Explore the deferred state to some extent 
 40 |         """
 41 | 
 42 |         self.runner           = runner
 43 |         # Redis channel identifier.
 44 |         self.identifier       = os.path.basename(runner.binary)
 45 |         self.input            = input_str
 46 |         self.fuzz_bitmap      = fuzz_bitmap
 47 |         self.tag              = tag
 48 |         self.redis            = redis
 49 |         self.exclude_simprocs = exclude_simprocs
 50 |         self.stdin_bound      = stdin_bound
 51 |         self.sync_brk         = sync_brk
 52 |         self.sync_fs          = sync_fs
 53 |         self.explore_found    = explore_found
 54 |         self.zero_fill        = zero_fill
 55 |         self.base = os.path.join(os.path.dirname(__file__), "..")
 56 | 
 57 |         # The simprocedures.
 58 |         self._hooks = {} if hooks is None else hooks
 59 | 
 60 |         # The driller core, which is now an exploration technique in angr.
 61 |         self._core = None
 62 | 
 63 |         # Start time, set by drill method.
 64 |         self.start_time = time.time()
 65 | 
 66 |         # Set of all the generated inputs.
 67 |         self._generated = set()
 68 | 
 69 |         # Set the memory limit specified in the config.
 70 |         if config.MEM_LIMIT is not None:
 71 |             resource.setrlimit(resource.RLIMIT_AS, (config.MEM_LIMIT, config.MEM_LIMIT))
 72 | 
 73 |         l.debug("[%s] drilling started on %s.", self.identifier, time.ctime(self.start_time))
 74 | 
 75 | ### DRILLING
 76 | 
 77 |     def drill(self):
 78 |         """
 79 |         Perform the drilling, finding more code coverage based off our existing input base.
 80 |         """
 81 | 
 82 |         # Don't re-trace the same input.
 83 |         if self.redis and self.redis.sismember(self.identifier + '-traced', self.input):
 84 |             return -1
 85 | 
 86 |         # Write out debug info if desired.
 87 |         if l.level == logging.DEBUG and config.DEBUG_DIR:
 88 |             self._write_debug_info()
 89 |         elif l.level == logging.DEBUG and not config.DEBUG_DIR:
 90 |             l.warning("Debug directory is not set. Will not log fuzzing bitmap.")
 91 | 
 92 |         # Update traced.
 93 |         if self.redis:
 94 |             self.redis.sadd(self.identifier + '-traced', self.input)
 95 | 
 96 |         list(self._drill_input())
 97 | 
 98 |         if self.redis:
 99 |             return len(self._generated)
100 |         else:
101 |             return self._generated
102 | 
103 |     def drill_generator(self):
104 |         """
105 |         A generator interface to the actual drilling.
106 |         """
107 | 
108 |         # Set up alarm for timeouts.
109 |         if config.DRILL_TIMEOUT is not None:
110 |             signal.alarm(config.DRILL_TIMEOUT)
111 | 
112 |         for i in self._drill_input():
113 |             yield i
114 | 
115 |     def _drill_input(self):
116 |         """
117 |         Symbolically step down a path with a tracer, trying to concretize inputs for unencountered
118 |         state transitions.
119 |         """
120 |         
121 |         p = angrgdb.load_project()
122 |         
123 |         trace, crash_addr = self.runner.tracer(self.input)
124 |         start_addr = self.runner.get_start_addr(trace)
125 |         
126 |         for bb in self.runner.main_return_blocks():
127 |             try:
128 |                 idx = trace.index(bb)
129 |             except ValueError:
130 |                 continue
131 |             trace = trace[:idx +1]
132 |         
133 |         for addr, proc in self._hooks.items():
134 |             p.hook(addr, proc)
135 |             l.debug("Hooking %#x -> %s...", addr, proc.display_name)
136 |         
137 |         s = angrgdb.StateShot(sync_brk=False, concrete_imports=self.exclude_simprocs, stdin=angr.SimFileStream)
138 |         
139 |         if self.zero_fill:
140 |             s.options.add(angr.options.ZERO_FILL_UNCONSTRAINED_MEMORY)
141 |             s.options.add(angr.options.ZERO_FILL_UNCONSTRAINED_REGISTERS)
142 |         
143 |         if self.sync_brk: # don't use angrdbg brk but ask for it to the runner
144 |             s.posix.set_brk(s.solver.BVV(self.runner.brk(), p.arch.bits))
145 |         if self.sync_fs:
146 |             s.regs.fs = self.runner.fs()
147 |         s.regs.rax = 0xabadcafe #flag for exit driller_init
148 |         
149 |         s.preconstrainer.preconstrain_file(self.input, s.posix.stdin, self.stdin_bound)
150 | 
151 |         simgr = p.factory.simulation_manager(s, save_unsat=True, hierarchy=False, save_unconstrained=True)
152 |         
153 |         start_addr = self.runner.get_start_addr(trace)
154 |         
155 |         t = Tracer(start_addr, trace=trace, crash_addr=crash_addr)
156 |         self._core = DrillerCore(trace=trace, fuzz_bitmap=self.fuzz_bitmap)
157 | 
158 |         simgr.use_technique(t)
159 |         simgr.use_technique(angr.exploration_techniques.Oppologist())
160 |         simgr.use_technique(self._core)
161 | 
162 |         self._set_concretizations(simgr.one_active)
163 | 
164 |         l.info("Drilling into %r.", self.input)
165 |         l.debug("Input is %r.", self.input)
166 |         
167 |         start_addr_idx = trace.index(start_addr)
168 |         with progressbar.ProgressBar(max_value=(len(trace) - start_addr_idx)) as bar:
169 |             while simgr.active and simgr.one_active.globals['trace_idx'] < len(trace) - 1:
170 |                 simgr.step()
171 |                 #print("RIP", simgr.one_active.regs.rip)
172 |                 #print("TRACE", simgr.one_active.globals['trace_idx'], hex(trace[simgr.one_active.globals['trace_idx']]))
173 |                 bar.update(simgr.one_active.globals['trace_idx'] - start_addr_idx)
174 |                 l.debug("stepped to " + str(simgr.one_active.regs.rip))
175 |                 
176 |                 if len(simgr.unconstrained) > 0:
177 |                     while len(simgr.unconstrained) > 0:
178 |                         state = simgr.unconstrained.pop(0)
179 |                         l.debug("Found a unconstrained state, exploring to some extent.")
180 |                         w = self._writeout(state.history.bbl_addrs[-1], state)
181 |                         if w is not None:
182 |                             yield w
183 |                 
184 |                 # Check here to see if a crash has been found.
185 |                 if self.redis and self.redis.sismember(self.identifier + '-finished', True):
186 |                     return
187 | 
188 |                 if 'diverted' not in simgr.stashes:
189 |                     continue
190 | 
191 |                 while simgr.diverted:
192 |                     state = simgr.diverted.pop(0)
193 |                     l.debug("Found a diverted state, exploring to some extent.")
194 |                     w = self._writeout(state.history.bbl_addrs[-1], state)
195 |                     if w is not None:
196 |                         yield w
197 |                     if self.explore_found:
198 |                         for i in self._symbolic_explorer_stub(state):
199 |                             yield i
200 | 
201 | ### EXPLORER
202 | 
203 |     def _symbolic_explorer_stub(self, state):
204 |         # Create a new simulation manager and step it forward up to 1024
205 |         # accumulated active states or steps.
206 |         steps = 0
207 |         accumulated = 1
208 | 
209 |         p = state.project
210 |         state = state.copy()
211 |         try:
212 |             state.options.remove(angr.options.LAZY_SOLVES)
213 |         except KeyError:
214 |             pass
215 |         simgr = p.factory.simulation_manager(state, hierarchy=False)
216 | 
217 |         l.info("[%s] started symbolic exploration at %s.", self.identifier, time.ctime())
218 | 
219 |         while len(simgr.active) and accumulated < 1024:
220 |             simgr.step()
221 |             steps += 1
222 | 
223 |             # Dump all inputs.
224 |             accumulated = steps * (len(simgr.active) + len(simgr.deadended))
225 | 
226 |         l.info("[%s] stopped symbolic exploration at %s.", self.identifier, time.ctime())
227 | 
228 |         for dumpable in simgr.deadended:
229 |             try:
230 |                 if dumpable.satisfiable():
231 |                     w = self._writeout(dumpable.history.bbl_addrs[-1], dumpable)
232 |                     if w is not None:
233 |                         yield w
234 | 
235 |             # If the state we're trying to dump wasn't actually satisfiable.
236 |             except IndexError:
237 |                 pass
238 | 
239 | ### UTILS
240 | 
241 |     @staticmethod
242 |     def _set_concretizations(state):
243 |         if state.project.loader.main_object.os == 'cgc':
244 |             flag_vars = set()
245 |             for b in state.cgc.flag_bytes:
246 |                 flag_vars.update(b.variables)
247 | 
248 |             state.unicorn.always_concretize.update(flag_vars)
249 | 
250 |         # Let's put conservative thresholds for now.
251 |         state.unicorn.concretization_threshold_memory = 50000
252 |         state.unicorn.concretization_threshold_registers = 50000
253 | 
254 |     def _in_catalogue(self, length, prev_addr, next_addr):
255 |         """
256 |         Check if a generated input has already been generated earlier during the run or by another
257 |         thread.
258 | 
259 |         :param length   : Length of the input.
260 |         :param prev_addr: The source address in the state transition.
261 |         :param next_addr: The destination address in the state transition.
262 | 
263 |         :return: boolean describing whether or not the input generated is redundant.
264 |         """
265 | 
266 |         key = '%x,%x,%x\n' % (length, prev_addr, next_addr)
267 | 
268 |         if self.redis:
269 |             return self.redis.sismember(self.identifier + '-catalogue', key)
270 | 
271 |         # No redis means no coordination, so no catalogue.
272 |         else:
273 |             return False
274 | 
275 |     def _add_to_catalogue(self, length, prev_addr, next_addr):
276 |         if self.redis:
277 |             key = '%x,%x,%x\n' % (length, prev_addr, next_addr)
278 |             self.redis.sadd(self.identifier + '-catalogue', key)
279 |         # No redis = no catalogue.
280 | 
281 |     def _writeout(self, prev_addr, state):
282 |         generated = state.posix.stdin.load(0, state.posix.stdin.pos)
283 |         generated = state.solver.eval(generated, cast_to=bytes)
284 | 
285 |         key = (len(generated), prev_addr, state.addr)
286 | 
287 |         # Checks here to see if the generation is worth writing to disk.
288 |         # If we generate too many inputs which are not really different we'll seriously slow down AFL.
289 |         if self._in_catalogue(*key):
290 |             self._core.encounters.remove((prev_addr, state.addr))
291 |             return None
292 | 
293 |         else:
294 |             self._add_to_catalogue(*key)
295 | 
296 |         l.info("[%s] dumping input for %#x -> %#x.", self.identifier, prev_addr, state.addr)
297 | 
298 |         self._generated.add((key, generated))
299 | 
300 |         if self.redis:
301 |             # Publish it out in real-time so that inputs get there immediately.
302 |             channel = self.identifier + '-generated'
303 | 
304 |             self.redis.publish(channel, pickle.dumps({'meta': key, 'data': generated, "tag": self.tag}))
305 | 
306 |         else:
307 |             l.info("Generated: %s", binascii.hexlify(generated))
308 | 
309 |         return (key, generated)
310 | 
311 |     def _write_debug_info(self):
312 |         m = hashlib.md5()
313 |         m.update(self.input)
314 |         f_name = os.path.join(config.DEBUG_DIR, self.identifier + '_' + m.hexdigest() + '.py')
315 | 
316 |         with open(f_name, 'w+') as f:
317 |             l.debug("Debug log written to %s.", f_name)
318 |             f.write("binary = %r\n" % self.binary
319 |                     + "started = '%s'\n" % time.ctime(self.start_time)
320 |                     + "input = %r\n" % self.input
321 |                     + "fuzz_bitmap = %r" % self.fuzz_bitmap)
322 | 


--------------------------------------------------------------------------------
/deferred_driller/exploration.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from itertools import islice
 3 | 
 4 | from angr.exploration_techniques import ExplorationTechnique
 5 | 
 6 | l = logging.getLogger("deferred_driller.exploration")
 7 | l.setLevel(logging.DEBUG)
 8 | 
 9 | class DrillerCore(ExplorationTechnique):
10 |     """
11 |     An exploration technique that symbolically follows an input looking for new
12 |     state transitions.
13 | 
14 |     It has to be used with Tracer exploration technique. Results are put in
15 |     'diverted' stash.
16 |     """
17 | 
18 |     def __init__(self, trace, fuzz_bitmap=None):
19 |         """
20 |         :param trace      : The basic block trace.
21 |         :param fuzz_bitmap: AFL's bitmap of state transitions. Defaults to saying every transition is worth satisfying.
22 |         """
23 | 
24 |         super(DrillerCore, self).__init__()
25 |         self.trace = trace
26 |         self.fuzz_bitmap = fuzz_bitmap or b"\xff" * 65536
27 | 
28 |         # Set of encountered basic block transitions.
29 |         self.encounters = set()
30 | 
31 |     def setup(self, simgr):
32 |         self.project = simgr._project
33 | 
34 |         # Update encounters with known state transitions.
35 |         self.encounters.update(zip(self.trace, islice(self.trace, 1, None)))
36 | 
37 |     def step(self, simgr, stash='active', **kwargs):
38 |         simgr.step(stash=stash, **kwargs)
39 | 
40 |         # Mimic AFL's indexing scheme.
41 |         if 'missed' in simgr.stashes and simgr.missed:
42 |             # A bit ugly, might be replaced by tracer.predecessors[-1] or crash_monitor.last_state.
43 |             prev_addr = simgr.one_missed.history.bbl_addrs[-1]
44 |             prev_loc = prev_addr
45 |             prev_loc = (prev_loc >> 4) ^ (prev_loc << 8)
46 |             prev_loc &= len(self.fuzz_bitmap) - 1
47 |             prev_loc = prev_loc >> 1
48 | 
49 |             for state in simgr.missed:
50 |                 cur_loc = state.addr
51 |                 cur_loc = (cur_loc >> 4) ^ (cur_loc << 8)
52 |                 cur_loc &= len(self.fuzz_bitmap) - 1
53 | 
54 |                 hit = bool(self.fuzz_bitmap[cur_loc ^ prev_loc] ^ 0xff)
55 | 
56 |                 transition = (prev_addr, state.addr)
57 |                 mapped_to = self.project.loader.find_object_containing(state.addr)
58 |                 mapped_to = "" if mapped_to is None else mapped_to.binary
59 |                 
60 |                 l.debug("Found %#x -> %#x transition.", transition[0], transition[1])
61 | 
62 |                 if not hit and transition not in self.encounters and not self._has_false(state) and mapped_to != 'cle##externs':
63 |                     state.preconstrainer.remove_preconstraints()
64 | 
65 |                     if state.satisfiable():
66 |                         # A completely new state transition.
67 |                         l.debug("Found a completely new transition, putting into 'diverted' stash.")
68 |                         simgr.stashes['diverted'].append(state)
69 |                         self.encounters.add(transition)
70 |                     else:
71 |                         l.debug("State at %#x is not satisfiable.", transition[1])
72 | 
73 |                 elif self._has_false(state):
74 |                     l.debug("State at %#x is not satisfiable even remove preconstraints.", transition[1])
75 | 
76 |                 else:
77 |                     l.debug("%#x -> %#x transition has already been encountered.", transition[0], transition[1])
78 | 
79 |         return simgr
80 | 
81 |     #
82 |     # Private methods
83 |     #
84 | 
85 |     @staticmethod
86 |     def _has_false(state):
87 |         # Check if the state is unsat even if we remove preconstraints.
88 |         claripy_false = state.solver.false
89 |         if state.scratch.guard.cache_key == claripy_false.cache_key:
90 |             return True
91 | 
92 |         for c in state.solver.constraints:
93 |             if c.cache_key == claripy_false.cache_key:
94 |                 return True
95 | 
96 |         return False
97 | 


--------------------------------------------------------------------------------
/deferred_driller/externals.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | from collections import defaultdict
 4 | 
 5 | import angr
 6 | import angrgdb
 7 | 
 8 | l = logging.getLogger("deferred_driller.externals")
 9 | l.setLevel(logging.INFO)
10 | 
11 | def get_objects(p):
12 |     vmmap = angrgdb.get_debugger()._get_vmmap()
13 |     objs = defaultdict(lambda: [0xffffffffffffffff, 0])
14 |     paths = []
15 |     for dep in p.loader.main_object.deps:
16 |         paths += p.loader._possible_paths(dep)
17 |     
18 |     for start, end, mapperm, mapname in vmmap:
19 |         if not os.path.exists(mapname):
20 |             continue
21 |         if mapname == p.loader.main_object.binary:
22 |             continue
23 |         if mapname not in paths: #skip pinbin PinTool.so etc...
24 |             continue
25 |         objs[mapname][0] = min(objs[mapname][0], start)
26 |         objs[mapname][1] = max(objs[mapname][1], end)
27 |     return objs
28 | 
29 | 
30 | def get_got(p):
31 |     s = list(filter(lambda x: x.name == ".got.plt", p.loader.main_object.sections))[0]
32 |     return (s.vaddr, s.vaddr + s.memsize)
33 | 
34 | def get_plt(p):
35 |     s = list(filter(lambda x: x.name == ".plt", p.loader.main_object.sections))[0]
36 |     return (s.vaddr, s.vaddr + s.memsize)
37 | 
38 | def process_got(proj):
39 |     debugger = angrgdb.get_debugger()
40 |     target_proj = angrgdb.load_project()
41 |     
42 |     got_start, got_end = get_got(proj)
43 |     plt_start, plt_end = get_plt(proj)
44 |     
45 |     entry_len = proj.arch.bits // 8
46 |     get_mem = debugger.get_dword if entry_len == 4 else debugger.get_qword
47 | 
48 |     got_start += 3 * entry_len  # skip first 3 entries
49 |     empty_state = proj.factory.blank_state()
50 | 
51 |     for a in range(got_start, got_end, entry_len):
52 |         state_val = empty_state.solver.eval(getattr(empty_state.mem[a], "uint%d_t" % proj.arch.bits).resolved)
53 |         
54 |         if state_val in proj._sim_procedures:
55 |             dbg_val = get_mem(a)
56 |             name = proj._sim_procedures[state_val].display_name
57 |             
58 |             if proj._sim_procedures[state_val].is_stub:
59 |                 l.debug("Skipping re-hooking of %s cause is a stub" % name)
60 |             elif not target_proj.is_hooked(dbg_val):
61 |                 l.info("Re-hooking %s (got: 0x%x) to 0x%x" % (name, a, dbg_val))
62 |                 target_proj.hook_symbol(dbg_val, proj._sim_procedures[state_val])
63 | 
64 | 
65 | def apply_external_simprocs():
66 |     objs = get_objects(angrgdb.load_project())
67 |     for o in objs:
68 |         l.info("Applying simprocs to " + o)
69 |         try:
70 |             p = angr.Project(o, main_opts={ 'base_addr': objs[o][0] , 'force_rebase': True}, load_options={ "auto_load_libs": False })
71 |             process_got(p)
72 |         except Exception as ee:
73 |             l.warning("Failed to apply simprocs to " + o + ": " + str(ee))
74 |     process_got(angrgdb.load_project())
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/deferred_driller/instrumentation/PinTool.cpp:
--------------------------------------------------------------------------------
  1 | #include "pin.H"
  2 | #include <iostream>
  3 | #include <sstream>
  4 | #include <fstream>
  5 | 
  6 | #include <unistd.h>
  7 | #include <fcntl.h>
  8 | #include <sys/syscall.h>
  9 | 
 10 | using namespace std;
 11 | 
 12 | ostream* out = &cerr;
 13 | 
 14 | INT32 Usage()
 15 | {
 16 |     cerr << "This tool prints out the number of dynamically executed " << endl <<
 17 |             "instructions, basic blocks and threads in the application." << endl << endl;
 18 | 
 19 |     return -1;
 20 | }
 21 | 
 22 | 
 23 | int input_fd;
 24 | string trace_filename;
 25 | 
 26 | ADDRINT current_brk;
 27 | 
 28 | //--------------------------------------------------------------------------------------------
 29 | 
 30 | VOID SysCallEntry(THREADID threadIndex, CONTEXT *ctxt, SYSCALL_STANDARD std, VOID *v)
 31 | {
 32 |     ADDRINT sys_id = PIN_GetSyscallNumber(ctxt, std);
 33 |     if(sys_id == SYS_read) {
 34 |         ADDRINT fd = PIN_GetSyscallArgument(ctxt, std, 0);
 35 |         if(fd == 0) { //change stdin fileno to our input
 36 |             PIN_SetSyscallArgument(ctxt, std, 0, input_fd);
 37 |         }
 38 |     }
 39 | }
 40 | 
 41 | VOID LogBbl(ADDRINT addr)
 42 | {
 43 |     *out << addr << endl;
 44 | }
 45 | 
 46 | VOID Trace(TRACE trace, VOID *v)
 47 | {
 48 |     for (BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) {
 49 |         BBL_InsertCall(bbl, IPOINT_BEFORE, (AFUNPTR)LogBbl, IARG_ADDRINT, BBL_Address(bbl), IARG_END);
 50 |     }
 51 | }
 52 | 
 53 | VOID Fini(INT32 code, VOID *v)
 54 | {
 55 |     *out << "END_OF_TRACE" << endl;
 56 |     if(out != &cerr) {
 57 |         delete static_cast<ofstream*>(out);
 58 |     }
 59 | }
 60 | 
 61 | VOID ContextChange(THREADID threadIndex, CONTEXT_CHANGE_REASON reason, const CONTEXT *from, CONTEXT *to, INT32 info, VOID *v)
 62 | {
 63 |     if(reason == CONTEXT_CHANGE_REASON_FATALSIGNAL) {
 64 |         //TODO crash addr for x86
 65 |         *out << "END_OF_TRACE " << PIN_GetContextReg(from, REG_RIP) << endl;
 66 |         if(out != &cerr) {
 67 |             static_cast<ofstream*>(out)->flush();
 68 |         }
 69 |     }
 70 | }
 71 | 
 72 | //--------------------------------------------------------------------------------------------
 73 | 
 74 | VOID SysCallExit(THREADID threadIndex, CONTEXT *ctxt, SYSCALL_STANDARD std, VOID *v)
 75 | {
 76 |     ADDRINT sys_id = PIN_GetSyscallNumber(ctxt, std);
 77 |     if(sys_id == SYS_brk) {
 78 |         current_brk = PIN_GetSyscallReturn(ctxt, std);
 79 |     }
 80 | }
 81 | 
 82 | 
 83 | BOOL DebugInterpreter(THREADID tid, CONTEXT *ctxt, const string &cmd, string* result, VOID *);
 84 | 
 85 | VOID InChildFork(THREADID threadid, const CONTEXT *ctxt, VOID *v)
 86 | {
 87 |     cerr << "[in child]\n";
 88 |     
 89 |     input_fd = open(trace_filename.c_str(), O_RDONLY);
 90 |     
 91 |     PIN_RemoveDebugInterpreter(DebugInterpreter);
 92 |     
 93 |     PIN_AddSyscallEntryFunction(SysCallEntry, 0);
 94 |     
 95 |     TRACE_AddInstrumentFunction(Trace, 0);
 96 | 
 97 |     PIN_AddFiniFunction(Fini, 0);
 98 |     
 99 |     PIN_AddContextChangeFunction(ContextChange, 0);
100 | }
101 | 
102 | BOOL DebugInterpreter(THREADID tid, CONTEXT *ctxt, const string &cmd, string* result, VOID *)
103 | {
104 | 	if(cmd == "getpid") {
105 | 	    INT pid = PIN_GetPid();
106 | 	    cerr << "[pid = " << pid << "]\n";
107 | 		std::ostringstream ss;
108 | 	    ss << pid << endl;
109 | 	    *result = ss.str();
110 | 	    return TRUE;
111 | 	}
112 | 	else if(cmd == "enable_fork") {
113 |         PIN_AddForkFunction(FPOINT_AFTER_IN_CHILD, InChildFork, 0);
114 |         cerr << "[fork hook enabled]\n";
115 |         return TRUE;
116 | 	}
117 | 	else if(cmd == "fs") {
118 | 	    ADDRINT regval;
119 | 	    PIN_GetContextRegval(ctxt, REG_SEG_FS_BASE, reinterpret_cast<UINT8*>(&regval));
120 | 	    cerr << "[fs base = " << (void*)regval << "]\n";
121 | 	    std::ostringstream ss;
122 | 	    ss << regval << endl;
123 | 	    *result = ss.str();
124 | 	    return TRUE;
125 | 	}
126 | 	else if(cmd == "brk") {
127 | 	    cerr << "[brk = " << current_brk << "]\n";
128 | 	    std::ostringstream ss;
129 | 	    ss << current_brk << endl;
130 | 	    *result = ss.str();
131 | 	    return TRUE;
132 | 	}
133 | 	else if(cmd.rfind("input ", 0) == 0) {
134 | 		trace_filename = cmd.substr(6);
135 | 		cerr << "[input file = " << trace_filename << "]\n";
136 | 	    return TRUE;
137 | 	}
138 | 	else if(cmd.rfind("out ", 0) == 0) {
139 | 		string filename = cmd.substr(4);
140 | 		cerr << "[out file = " << filename << "]\n";
141 | 		out = new ofstream(filename.c_str());
142 | 	    return TRUE;
143 | 	}
144 | 	return FALSE;
145 | }
146 | 
147 | 
148 | //env LD_BIND_NOW=1 ../pin-3.7/pin -appdebug -t obj-intel64/PinTool.so -- ../test1
149 | 
150 | int main(int argc, char *argv[])
151 | {
152 |     PIN_InitSymbols();
153 |     if(PIN_Init(argc,argv))
154 |         return Usage();
155 |     
156 |     PIN_AddSyscallExitFunction(SysCallExit, 0);
157 |     
158 |     PIN_AddDebugInterpreter(DebugInterpreter, 0);
159 |     
160 |     PIN_StartProgram();
161 |     
162 |     return 0;
163 | }
164 | 
165 | 
166 | 
167 | 
168 | 
169 | 
170 | 


--------------------------------------------------------------------------------
/deferred_driller/instrumentation/README.md:
--------------------------------------------------------------------------------
1 | ### howto
2 | 
3 | Download and place here a build of Intel Pin (I use the 3.7).
4 | 
5 | Then run `build.py`.
6 | 


--------------------------------------------------------------------------------
/deferred_driller/instrumentation/build.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import glob
 4 | import os
 5 | 
 6 | pin_path = glob.glob('./pin-*')
 7 | assert len(pin_path) == 1
 8 | pin_path = pin_path[0]
 9 | 
10 | os.system("make PIN_ROOT=%s" % pin_path)
11 | 


--------------------------------------------------------------------------------
/deferred_driller/instrumentation/makefile:
--------------------------------------------------------------------------------
 1 | ##############################################################
 2 | #
 3 | #                   DO NOT EDIT THIS FILE!
 4 | #
 5 | ##############################################################
 6 | 
 7 | # If the tool is built out of the kit, PIN_ROOT must be specified in the make invocation and point to the kit root.
 8 | ifdef PIN_ROOT
 9 | CONFIG_ROOT := $(PIN_ROOT)/source/tools/Config
10 | else
11 | CONFIG_ROOT := ../Config
12 | endif
13 | include $(CONFIG_ROOT)/makefile.config
14 | include makefile.rules
15 | include $(TOOLS_ROOT)/Config/makefile.default.rules
16 | 
17 | ##############################################################
18 | #
19 | #                   DO NOT EDIT THIS FILE!
20 | #
21 | ##############################################################
22 | 


--------------------------------------------------------------------------------
/deferred_driller/instrumentation/makefile.rules:
--------------------------------------------------------------------------------
 1 | ##############################################################
 2 | #
 3 | # This file includes all the test targets as well as all the
 4 | # non-default build rules and test recipes.
 5 | #
 6 | ##############################################################
 7 | 
 8 | 
 9 | ##############################################################
10 | #
11 | # Test targets
12 | #
13 | ##############################################################
14 | 
15 | ###### Place all generic definitions here ######
16 | 
17 | # This defines tests which run tools of the same name.  This is simply for convenience to avoid
18 | # defining the test name twice (once in TOOL_ROOTS and again in TEST_ROOTS).
19 | # Tests defined here should not be defined in TOOL_ROOTS and TEST_ROOTS.
20 | TEST_TOOL_ROOTS := PinTool
21 | 
22 | # This defines the tests to be run that were not already defined in TEST_TOOL_ROOTS.
23 | TEST_ROOTS :=
24 | 
25 | # This defines the tools which will be run during the the tests, and were not already defined in
26 | # TEST_TOOL_ROOTS.
27 | TOOL_ROOTS :=
28 | 
29 | # This defines the static analysis tools which will be run during the the tests. They should not
30 | # be defined in TEST_TOOL_ROOTS. If a test with the same name exists, it should be defined in
31 | # TEST_ROOTS.
32 | # Note: Static analysis tools are in fact executables linked with the Pin Static Analysis Library.
33 | # This library provides a subset of the Pin APIs which allows the tool to perform static analysis
34 | # of an application or dll. Pin itself is not used when this tool runs.
35 | SA_TOOL_ROOTS :=
36 | 
37 | # This defines all the applications that will be run during the tests.
38 | APP_ROOTS :=
39 | 
40 | # This defines any additional object files that need to be compiled.
41 | OBJECT_ROOTS :=
42 | 
43 | # This defines any additional dlls (shared objects), other than the pintools, that need to be compiled.
44 | DLL_ROOTS :=
45 | 
46 | # This defines any static libraries (archives), that need to be built.
47 | LIB_ROOTS :=
48 | 
49 | ###### Define the sanity subset ######
50 | 
51 | # This defines the list of tests that should run in sanity. It should include all the tests listed in
52 | # TEST_TOOL_ROOTS and TEST_ROOTS excluding only unstable tests.
53 | SANITY_SUBSET := $(TEST_TOOL_ROOTS) $(TEST_ROOTS)
54 | 
55 | 
56 | ##############################################################
57 | #
58 | # Test recipes
59 | #
60 | ##############################################################
61 | 
62 | # This section contains recipes for tests other than the default.
63 | # See makefile.default.rules for the default test rules.
64 | # All tests in this section should adhere to the naming convention: <testname>.test
65 | 
66 | 
67 | ##############################################################
68 | #
69 | # Build rules
70 | #
71 | ##############################################################
72 | 
73 | # This section contains the build rules for all binaries that have special build rules.
74 | # See makefile.default.rules for the default build rules.
75 | 


--------------------------------------------------------------------------------
/deferred_driller/runner.py:
--------------------------------------------------------------------------------
  1 | import subprocess
  2 | import tempfile
  3 | import logging
  4 | import glob
  5 | import time
  6 | import gdb
  7 | import os
  8 | 
  9 | import angr
 10 | import angrgdb
 11 | 
 12 | from .externals import apply_external_simprocs
 13 | 
 14 | l = logging.getLogger("deferred_driller.runner")
 15 | l.setLevel(logging.DEBUG)
 16 | 
 17 | class PinRunner:
 18 |     def __init__(self, binary, argv=None, pin_path=None, pintool_path=None, use_simprocs=True):
 19 |         if pin_path is None:
 20 |             pin_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "instrumentation", "pin-*", "pin")
 21 |             pin_path = glob.glob(pin_path)
 22 |             assert len(pin_path) == 1
 23 |             pin_path = pin_path[0]
 24 |         if pintool_path is None:
 25 |             pintool_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "instrumentation", "obj-intel64", "PinTool.so")
 26 |         
 27 |         self._main_returns = None
 28 |         self.binary = binary
 29 |         self.cfg = None
 30 |         self.objs = None
 31 |         
 32 |         args = [
 33 |             pin_path,
 34 |             "-appdebug",
 35 |             "-t",
 36 |             pintool_path,
 37 |             "--",
 38 |             binary,
 39 |         ]
 40 |         if argv is not None:
 41 |             args += argv
 42 | 
 43 |         os.environ["LD_BIND_NOW"] = "1"
 44 |         self.process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 45 |         
 46 |         '''Application stopped until continued from debugger.
 47 |         Start GDB, then issue this command at the (gdb) prompt:
 48 |           target remote :56991'''
 49 |         self.process.stdout.readline()
 50 |         self.process.stdout.readline()
 51 |         cmd = self.process.stdout.readline().strip()
 52 |         
 53 |         gdb.execute(str(cmd, "utf-8"))
 54 |         
 55 |         self.pid = int(gdb.execute("monitor getpid", to_string=True).strip())
 56 |         angrgdb.get_debugger().pid = self.pid
 57 |         
 58 |         gdb.execute("b *driller_init")
 59 |         gdb.execute("continue")
 60 |         gdb.execute("monitor enable_fork")
 61 |         
 62 |         if use_simprocs:
 63 |             apply_external_simprocs()
 64 |         angrgdb.set_memory_type(angrgdb.GET_ALL_DISCARD_CLE)
 65 |     
 66 |     def brk(self):
 67 |         return int(gdb.execute("monitor brk", to_string=True))
 68 |     
 69 |     def fs(self):
 70 |         return int(gdb.execute("monitor fs", to_string=True))
 71 |     
 72 |     def tracer(self, concrete_input):
 73 |         input_path = tempfile.mkstemp(dir="/dev/shm/", prefix="pin-tracer-input-")[1]
 74 |         with open(input_path, "wb") as f:
 75 |             f.write(concrete_input)
 76 |         output_path = tempfile.mkstemp(dir="/dev/shm/", prefix="pin-tracer-log-")[1]      
 77 |         
 78 |         l.debug("tracing with input file: " + input_path)
 79 |         l.debug("tracing with output file: " + output_path)
 80 |         
 81 |         gdb.execute("monitor input " + input_path)
 82 |         gdb.execute("monitor out " + output_path)
 83 |         
 84 |         st = time.time()
 85 |         gdb.execute("continue")
 86 |         
 87 |         l.debug("traced in %f minutes" % ((time.time() -st)/ 60.0))
 88 |         
 89 |         trace = []
 90 |         crash_addr = None
 91 |         with open(output_path, "r") as f:
 92 |             while True:
 93 |                 line = f.readline().strip()
 94 |                 if len(line) == 0:
 95 |                     continue
 96 |                 if line.startswith("END_OF_TRACE"):
 97 |                     if len(line) > len("END_OF_TRACE"):
 98 |                         crash_addr = int(line.split()[1])
 99 |                     break
100 |                 trace.append(int(line))
101 |         
102 |         return trace, crash_addr
103 | 
104 |     def _driller_init_bounds(self):
105 |         project = angrgdb.load_project()
106 |         if self.cfg is None:
107 |             self.cfg = project.analyses.CFGFast()
108 |         begin = 0xffffffffffffffff
109 |         end = 0
110 |         for bb in project.kb.functions["driller_init"].graph:
111 |             begin = min(begin, bb.addr)
112 |             end = max(end, bb.addr + bb.size)
113 |         return begin, end
114 |     
115 |     def main_return_blocks(self):
116 |         if self._main_returns is None:
117 |             project = angrgdb.load_project()
118 |             if self.cfg is None:
119 |                 self.cfg = project.analyses.CFGFast()
120 |             self._main_returns = set()
121 |             for bb in project.kb.functions["main"].blocks: 
122 |                 for i in bb.capstone.insns: 
123 |                     if i.mnemonic == "ret":
124 |                         self._main_returns.add(bb.addr)
125 |         return self._main_returns
126 |     
127 |     def get_start_addr(self, trace):
128 |         begin, end = self._driller_init_bounds()
129 |         cnt = 0
130 |         in_init = False
131 |         for addr in trace:
132 |             if addr >= begin and addr < end:
133 |                 in_init = True
134 |             elif in_init:
135 |                 return addr
136 | 
137 | 


--------------------------------------------------------------------------------
/deferred_driller/tracer.py:
--------------------------------------------------------------------------------
  1 | import angr
  2 | from typing import List
  3 | import logging
  4 | 
  5 | from angr.exploration_techniques import ExplorationTechnique
  6 | from angr import BP_BEFORE, BP_AFTER, sim_options
  7 | from angr.errors import AngrTracerError
  8 | 
  9 | l = logging.getLogger("deferred_driller.tracer")
 10 | 
 11 | 
 12 | class Tracer(ExplorationTechnique):
 13 |     def __init__(self,
 14 |             start_addr,
 15 |             trace=None,
 16 |             resiliency=False,
 17 |             keep_predecessors=1,
 18 |             crash_addr=None):
 19 |         super(Tracer, self).__init__()
 20 |         self._trace = trace
 21 |         self._resiliency = resiliency
 22 |         self._crash_addr = crash_addr
 23 | 
 24 |         # keep track of the last basic block we hit
 25 |         self.predecessors = [None] * keep_predecessors # type: List[angr.SimState]
 26 |         self.last_state = None
 27 |         self.start_addr = start_addr
 28 | 
 29 |         # whether we should follow the trace
 30 |         self._no_follow = self._trace is None
 31 | 
 32 |     def setup(self, simgr):
 33 |         simgr.populate('missed', [])
 34 |         simgr.populate('traced', [])
 35 |         simgr.populate('crashed', [])
 36 | 
 37 |         self.project = simgr._project
 38 |         if len(simgr.active) != 1:
 39 |             raise AngrTracerError("Tracer is being invoked on a SimulationManager without exactly one active state")
 40 | 
 41 |         for idx, addr in enumerate(self._trace):
 42 |             if addr == self.start_addr:
 43 |                 break
 44 | 
 45 |         # step to start addr
 46 |         while self._trace and self._trace[idx] != simgr.one_active.addr:
 47 |             simgr.step(extra_stop_points={self._trace[idx]})
 48 |             if len(simgr.active) == 0:
 49 |                 raise AngrTracerError("Could not step to the first address of the trace - simgr is empty")
 50 |             elif len(simgr.active) > 1:
 51 |                 raise AngrTracerError("Could not step to the first address of the trace - state split")
 52 | 
 53 |         # initialize the state info
 54 |         simgr.one_active.globals['trace_idx'] = idx
 55 |         simgr.one_active.globals['sync_idx'] = None
 56 | 
 57 |         # enable lazy solves - don't touch z3 unless I tell you so
 58 |         simgr.one_active.options.add(sim_options.LAZY_SOLVES)
 59 | 
 60 |     def complete(self, simgr):
 61 |         return bool(simgr.traced)
 62 | 
 63 |     def filter(self, simgr, state, **kwargs):
 64 |         # check completion
 65 |         if state.globals['trace_idx'] >= len(self._trace) - 1:
 66 |             # do crash windup if necessary
 67 |             if self._crash_addr is not None:
 68 |                 simgr.populate('crashed', [self._crash_windup(state)])
 69 | 
 70 |             return 'traced'
 71 | 
 72 |         return simgr.filter(state, **kwargs)
 73 | 
 74 |     def step(self, simgr, stash='active', **kwargs):
 75 |         simgr.drop(stash='missed')
 76 |         return simgr.step(stash=stash, **kwargs)
 77 | 
 78 |     def step_state(self, simgr, state, **kwargs):
 79 |         # maintain the predecessors list
 80 |         self.predecessors.append(state)
 81 |         self.predecessors.pop(0)
 82 | 
 83 |         # perform the step. ask qemu to stop at the termination point.
 84 |         stops = set(kwargs.pop('extra_stop_points', ())) | {self._trace[-1]}
 85 |         succs_dict = simgr.step_state(state, extra_stop_points=stops, **kwargs)
 86 |         succs = succs_dict[None]
 87 | 
 88 |         # follow the trace
 89 |         if len(succs) == 1:
 90 |             self._update_state_tracking(succs[0])
 91 |         elif len(succs) == 0:
 92 |             raise Exception("All states disappeared!")
 93 |         else:
 94 |             succ = self._pick_correct_successor(succs)
 95 |             succs_dict[None] = [succ]
 96 |             succs_dict['missed'] = [s for s in succs if s is not succ]
 97 | 
 98 |         assert len(succs_dict[None]) == 1
 99 |         return succs_dict
100 | 
101 |     def _pick_correct_successor(self, succs):
102 |         # there's been a branch of some sort. Try to identify which state stayed on the trace.
103 |         assert len(succs) > 0
104 |         idx = succs[0].globals['trace_idx']
105 | 
106 |         res = []
107 |         for succ in succs:
108 |             if succ.addr == self._trace[idx + 1]:
109 |                 res.append(succ)
110 | 
111 |         if not res:
112 |             raise Exception("No states followed the trace?")
113 | 
114 |         if len(res) > 1:
115 |             raise Exception("The state split but several successors have the same (correct) address?")
116 | 
117 |         self._update_state_tracking(res[0])
118 |         return res[0]
119 | 
120 |     def _update_state_tracking(self, state: 'angr.SimState'):
121 |         idx = state.globals['trace_idx']
122 |         sync = state.globals['sync_idx']
123 | 
124 |         if state.history.recent_block_count > 1:
125 |             # multiple blocks were executed this step. they should follow the trace *perfectly*
126 |             # or else something is up
127 |             # "something else" so far only includes concrete transmits, or...
128 |             # TODO: https://github.com/unicorn-engine/unicorn/issues/874
129 |             # ^ this means we will see desyncs of the form unicorn suddenly skips a bunch of qemu blocks
130 |             assert state.history.recent_block_count == len(state.history.recent_bbl_addrs)
131 | 
132 |             if sync is not None:
133 |                 raise Exception("TODO")
134 | 
135 |             for addr in state.history.recent_bbl_addrs:
136 |                 if addr == state.unicorn.transmit_addr:
137 |                     continue
138 | 
139 |                 if self._compare_addr(self._trace[idx], addr):
140 |                     idx += 1
141 |                 else:
142 |                     raise Exception('BUG! Please investivate the claim in the comment above me')
143 | 
144 |             idx -= 1 # use normal code to do the last synchronization
145 | 
146 |         if sync is not None:
147 |             if self._compare_addr(self._trace[sync], state.addr):
148 |                 state.globals['trace_idx'] = sync
149 |                 state.globals['sync_idx'] = None
150 |             else:
151 |                 raise Exception("Trace did not sync after 1 step, you knew this would happen")
152 | 
153 |         elif self._compare_addr(self._trace[idx + 1], state.addr):
154 |             # normal case
155 |             state.globals['trace_idx'] = idx + 1
156 |         elif self.project.loader._extern_object is not None and self.project.loader.extern_object.contains_addr(state.addr):
157 |             # externs
158 |             proc = self.project.hooked_by(state.addr)
159 |             if proc is None:
160 |                 raise Exception("Extremely bad news: we're executing an unhooked address in the externs space")
161 |             if proc.is_continuation:
162 |                 orig_addr = self.project.loader.find_symbol(proc.display_name).rebased_addr
163 |                 orig_trace_addr = orig_addr
164 |                 if 0 <= self._trace[idx + 1] - orig_trace_addr <= 0x10000:
165 |                     # this is fine. we do nothing and then next round it'll get handled by the is_hooked(state.history.addr) case
166 |                     pass
167 |                 else:
168 |                     # this may also be triggered as a consequence of the unicorn issue linked above
169 |                     raise Exception("BUG: State is returning to a continuation that isn't its own???")
170 |             else:
171 |                 # see above
172 |                 pass
173 |         elif state.history.jumpkind.startswith('Ijk_Sys'):
174 |             # syscalls
175 |             state.globals['sync_idx'] = idx + 1
176 |         elif state.history.jumpkind.startswith('Ijk_Exit'):
177 |             # termination!
178 |             state.globals['trace_idx'] = len(self._trace) - 1
179 |         elif self.project.is_hooked(state.history.addr):
180 |             # simprocedures - is this safe..?
181 |             self._fast_forward(state)
182 |         elif self._analyze_misfollow(state, idx):
183 |             # misfollow analysis will set a sync point somewhere if it succeeds
184 |             pass
185 |         else:
186 |             raise AngrTracerError("Oops! angr did not follow the trace.")
187 | 
188 |         l.debug("Trace: %d/%d", state.globals['trace_idx'], len(self._trace))
189 | 
190 |     def _compare_addr(self, trace_addr, state_addr):
191 |         return trace_addr == state_addr
192 | 
193 |     def _analyze_misfollow(self, state, idx):
194 |         angr_addr = state.addr
195 |         trace_addr = self._trace[idx + 1]
196 |         l.info("Misfollow: angr says %#x, trace says %#x", angr_addr, trace_addr)
197 | 
198 |         # TODO: add rep handling
199 | 
200 |         if 'IRSB' in state.history.recent_description:
201 |             last_block = state.block(state.history.bbl_addrs[-1])
202 |             if self._trace[idx + 1] in last_block.instruction_addrs:
203 |                 # we have disparate block sizes!
204 |                 # specifically, the angr block size is larger than the trace's.
205 |                 # allow the trace to catch up.
206 |                 while self._trace[idx + 1] in last_block.instruction_addrs:
207 |                     idx += 1
208 | 
209 |                 if self._trace[idx + 1] == state.addr:
210 |                     state.globals['trace_idx'] = idx + 1
211 |                     return True
212 |                 else:
213 |                     state.globals['trace_idx'] = idx
214 |                     #state.globals['trace_desync'] = True
215 |                     return True
216 | 
217 |         prev_addr = state.history.bbl_addrs[-1]
218 |         prev_obj = self.project.loader.find_object_containing(prev_addr)
219 | 
220 |         if state.block(prev_addr).vex.jumpkind == 'Ijk_Call':
221 |             l.info('...trying to sync at callsite')
222 |             return self._sync_callsite(state, idx, prev_addr)
223 | 
224 |         if prev_addr in getattr(prev_obj, 'reverse_plt', ()):
225 |             prev_prev_addr = state.history.bbl_addrs[-2]
226 |             if not prev_obj.contains_addr(prev_prev_addr) or state.block(prev_prev_addr).vex.jumpkind != 'Ijk_Call':
227 |                 l.info('...weird interaction with PLT stub, aborting analysis')
228 |                 return False
229 |             l.info('...trying to sync at PLT callsite')
230 |             return self._sync_callsite(state, idx, prev_prev_addr)
231 | 
232 |         l.info('...all analyses failed.')
233 |         return False
234 | 
235 |     def _sync_callsite(self, state, idx, callsite_addr):
236 |         retsite_addr = state.block(callsite_addr).size + callsite_addr
237 |         try:
238 |             retsite_idx = self._trace.index(retsite_addr, idx)
239 |         except ValueError:
240 |             l.error("Trying to fix desync at callsite but return address does not appear in trace")
241 |             return False
242 | 
243 |         state.globals['sync_idx'] = retsite_idx
244 |         state.globals['trace_idx'] = idx
245 |         return True
246 | 
247 |     def _fast_forward(self, state):
248 |         target_addr = state.addr
249 |         try:
250 |             target_idx = self._trace.index(target_addr, state.globals['trace_idx'] + 1)
251 |         except ValueError:
252 |             raise AngrTracerError("Trace failed to synchronize during fast forward? You might want to unhook %s." % (self.project.hooked_by(state.history.addr).display_name))
253 |         else:
254 |             state.globals['trace_idx'] = target_idx
255 | 
256 |     def _crash_windup(self, state):
257 |         # first check: are we just executing user-controlled code?
258 |         if not state.ip.symbolic and state.mem[state.ip].char.resolved.symbolic:
259 |             l.debug("executing input-related code")
260 |             return state
261 | 
262 |         # before we step through and collect the actions we have to set
263 |         # up a special case for address concretization in the case of a
264 |         # controlled read or write vulnerability.
265 |         bp1 = state.inspect.b(
266 |             'address_concretization',
267 |             BP_BEFORE,
268 |             action=self._check_add_constraints)
269 | 
270 |         bp2 = state.inspect.b(
271 |             'address_concretization',
272 |             BP_AFTER,
273 |             action=self._grab_concretization_results)
274 | 
275 |         # step to the end of the crashing basic block,
276 |         # to capture its actions with those breakpoints
277 |         # TODO should this be using simgr.successors?
278 |         state.step()
279 | 
280 |         # Add the constraints from concretized addrs back
281 |         for var, concrete_vals in state.preconstrainer.address_concretization:
282 |             if len(concrete_vals) > 0:
283 |                 l.debug("constraining addr to be %#x", concrete_vals[0])
284 |                 state.add_constraints(var == concrete_vals[0])
285 | 
286 |         # then we step again up to the crashing instruction
287 |         inst_addrs = state.block().instruction_addrs
288 |         inst_cnt = len(inst_addrs)
289 | 
290 |         if inst_cnt == 0:
291 |             insts = 0
292 |         elif self._crash_addr in inst_addrs:
293 |             insts = inst_addrs.index(self._crash_addr) + 1
294 |         else:
295 |             insts = inst_cnt - 1
296 | 
297 |         l.debug("windup step...")
298 |         succs = state.step(num_inst=insts).flat_successors
299 | 
300 |         if len(succs) > 0:
301 |             if len(succs) > 1:
302 |                 succs = [s for s in succs if s.solver.satisfiable()]
303 |             state = succs[0]
304 |             self.last_state = state
305 | 
306 |         # remove the preconstraints
307 |         l.debug("removing preconstraints")
308 |         state.preconstrainer.remove_preconstraints()
309 | 
310 |         l.debug("reconstraining... ")
311 |         state.preconstrainer.reconstrain()
312 | 
313 |         # now remove our breakpoints since other people might not want them
314 |         state.inspect.remove_breakpoint("address_concretization", bp1)
315 |         state.inspect.remove_breakpoint("address_concretization", bp2)
316 | 
317 |         l.debug("final step...")
318 |         succs = state.step()
319 |         successors = succs.flat_successors + succs.unconstrained_successors
320 |         return successors[0]
321 | 
322 |     # the below are utility functions for crash windup
323 | 
324 |     def _grab_concretization_results(self, state):
325 |         """
326 |         Grabs the concretized result so we can add the constraint ourselves.
327 |         """
328 |         # only grab ones that match the constrained addrs
329 |         if self._should_add_constraints(state):
330 |             addr = state.inspect.address_concretization_expr
331 |             result = state.inspect.address_concretization_result
332 |             if result is None:
333 |                 l.warning("addr concretization result is None")
334 |                 return
335 |             state.preconstrainer.address_concretization.append((addr, result))
336 | 
337 |     def _check_add_constraints(self, state):
338 |         """
339 |         Obnoxious way to handle this, should ONLY be called from crash monitor.
340 |         """
341 |         # for each constrained addrs check to see if the variables match,
342 |         # if so keep the constraints
343 |         state.inspect.address_concretization_add_constraints = self._should_add_constraints(state)
344 | 
345 |     def _should_add_constraints(self, state):
346 |         """
347 |         Check to see if the current address concretization variable is any of the registered
348 |         constrained_addrs we want to allow concretization for
349 |         """
350 |         expr = state.inspect.address_concretization_expr
351 |         hit_indices = self._to_indices(state, expr)
352 | 
353 |         for action in state.preconstrainer._constrained_addrs:
354 |             var_indices = self._to_indices(state, action.addr)
355 |             if var_indices == hit_indices:
356 |                 return True
357 |         return False
358 | 
359 |     @staticmethod
360 |     def _to_indices(state, expr):
361 |         indices = []
362 |         for descr in state.solver.describe_variables(expr):
363 |             if descr[0] == 'file' and descr[1] == state.posix.stdin.ident:
364 |                 if descr[2] == 'packet':
365 |                     indices.append(descr[3])
366 |                 elif type(descr[2]) is int:
367 |                     indices.append(descr[2])
368 | 
369 |         return sorted(indices)
370 | 


--------------------------------------------------------------------------------
/driller_init.s:
--------------------------------------------------------------------------------
 1 | .text
 2 | .globl    driller_init
 3 | driller_init:
 4 |     pushq %rax
 5 | start_driller_loop:
 6 |     callq fork
 7 |     testl %eax, %eax
 8 |     je exit_driller_loop
 9 |     int3
10 |     cmpl $0xabadcafe, %eax
11 |     jne start_driller_loop
12 | exit_driller_loop:
13 |     popq %rax
14 |     retq
15 | 


--------------------------------------------------------------------------------
/example/Makefile:
--------------------------------------------------------------------------------
 1 | AFL_PATH=~/afl/
 2 | 
 3 | all:
 4 | 	clang -no-pie -fno-stack-protector -D__DRILLER test1.c ../driller_init.s -o test1_driller
 5 | 	$(AFL_PATH)/afl-clang-fast -no-pie -fno-stack-protector test1.c -o test1_afl
 6 | 
 7 | musl:
 8 | 	musl-gcc -no-pie -fno-stack-protector -D__DRILLER test1.c ../driller_init.s -o test1_driller
 9 | 	$(AFL_PATH)/afl-clang-fast -no-pie -fno-stack-protector test1.c -o test1_afl
10 | 
11 | 


--------------------------------------------------------------------------------
/example/drill.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This is a very dirty example.
  3 | 
  4 | Commands to run in two different terminals:
  5 | 
  6 | ~/afl/afl-fuzz -M master -i inputs -o ./output -t 20000 -- ./test1_afl
  7 | 
  8 | gdb -q -nh --batch -x drill.py ./test1_driller
  9 | 
 10 | Wait AFL to start fuzzing before to run the second command.
 11 | 
 12 | '''
 13 | 
 14 | import sys, os
 15 | cwd = os.path.dirname(os.path.realpath(__file__))
 16 | sys.path.append(cwd + "/../")
 17 | 
 18 | import json
 19 | import time
 20 | import base64
 21 | import tempfile
 22 | import hashlib
 23 | import logging
 24 | 
 25 | from deferred_driller import *
 26 | 
 27 | l = logging.getLogger("drill_this_shit")
 28 | l.setLevel(logging.INFO)
 29 | 
 30 | BINARY = "./test1_driller"
 31 | STDIN_BOUND = True
 32 | EXPLORE_FOUND = False
 33 | MINIMIZE = True
 34 | 
 35 | os.system("mkdir -p %s/output/driller/queue" % cwd)
 36 | 
 37 | if os.path.exists(os.path.basename(BINARY) + "-deferred-driller-data.json"):
 38 |     restore = json.load(open(os.path.basename(BINARY) + "-deferred-driller-data.json"))
 39 |     processed = restore["processed"]
 40 |     transitions = restore["transitions"]
 41 |     index = restore["index"]
 42 | else:
 43 |     processed = []
 44 |     transitions = []
 45 |     index = 0
 46 | 
 47 | runner = PinRunner(BINARY, use_simprocs=True)
 48 | 
 49 | while True:
 50 |     for subd in os.listdir(cwd + "/output/"):
 51 |         if not os.path.isdir(cwd + "/output/" + subd):
 52 |             continue
 53 |         
 54 |         stats = open(cwd + "/output/%s/fuzzer_stats"%subd)
 55 |         pfavs = None
 56 |         for line in stats:
 57 |             if line.startswith("pending_favs"):
 58 |                 pfavs = line.split(":")[1].strip()
 59 |                 break
 60 |         if pfavs != "0":
 61 |             l.debug("%s pending_favs != 0" % subd)
 62 |             break
 63 |         
 64 |         for f in os.listdir(cwd + "/output/%s/queue/" % subd):
 65 |             if not os.path.exists(cwd + "/output/%s/queue/" % subd + f) or os.path.isdir(cwd + "/output/%s/queue/" % subd + f):
 66 |                 continue
 67 |             target = f[:len("id:......")]
 68 |             l.debug("targetting " + cwd + "/output/%s/queue/" % subd + f)
 69 | 
 70 |             if MINIMIZE:
 71 |                 minimized = tempfile.mkstemp(dir="/tmp/", prefix="driller-minimized-")[1]
 72 |                 r = os.system("~/afl/afl-tmin -t 20000 -i '%s' -o %s -- %s" % (cwd + "/output/%s/queue/" % subd + f, minimized, BINARY.replace("driller", "afl")))
 73 |                 l.debug("afl-tmin ret val:", r)
 74 |                 if r == 0:
 75 |                     input_data = open(minimized, "rb").read()
 76 |                 else:
 77 |                     input_data = open(cwd + "/output/%s/queue/" % subd + f, "rb").read()
 78 |                 os.unlink(minimized)
 79 |             else:
 80 |                 input_data = open(cwd + "/output/%s/queue/" % subd + f, "rb").read()
 81 |             
 82 |             inp_hash = hashlib.md5(input_data).hexdigest()
 83 |             if inp_hash in processed:
 84 |                 l.debug(f + " already processed (md5 = %s)" % inp_hash)
 85 |                 continue
 86 |             processed.append(inp_hash)
 87 |             
 88 |             ### hack for strncmp not satisfiable diverted state, see https://github.com/shellphish/driller/issues/70
 89 |             if len(input_data) < 100:
 90 |                 input_data = input_data.ljust(100, b"\x00") # this is shit cause this can alter the behaviour of a generic program and slowdown the exploration
 91 |             
 92 |             bmap = open(cwd + "/output/master/fuzz_bitmap", "rb").read()
 93 |             d = Driller(runner, input_data, bmap, explore_found=EXPLORE_FOUND, stdin_bound=STDIN_BOUND)
 94 |             
 95 |             try:
 96 |                 for o in d.drill_generator():
 97 |                     if o[0] in transitions:
 98 |                         continue
 99 |                     index += 1
100 |                     out = open(cwd + "/output/driller/queue/id:%06d,src:%s" % (index, target), "wb")
101 |                     out.write(o[1])
102 |                     out.close()
103 |                     transitions.append(o[0])
104 |             except Exception as ee:
105 |                 l.warning("!!! ERROR !!!")
106 |                 import traceback
107 |                 traceback.print_exc()
108 |     
109 |             l.info("saving stuffs...")
110 |             with open(os.path.basename(BINARY) + "-deferred-driller-data.json", "w") as dmp:
111 |                 json.dump({"processed": processed, "transitions": transitions, "index": index}, dmp)
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/example/inputs/dumb:
--------------------------------------------------------------------------------
1 | a
2 | 


--------------------------------------------------------------------------------
/example/test1.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <unistd.h>
 4 | #include <signal.h>
 5 | #include <string.h>
 6 | 
 7 | #ifdef __DRILLER
 8 | void driller_init();
 9 | #endif
10 | 
11 | int main(int argc, char** argv) {
12 | 
13 |     char buf[100]; 
14 |     memset(buf, 0, 100);
15 | 
16 |     sleep(10);
17 |     
18 | #ifdef __AFL_HAVE_MANUAL_CONTROL
19 |     __AFL_INIT();
20 | #endif
21 | #ifdef __DRILLER
22 |     driller_init();
23 | #endif
24 |     
25 |     read(0, buf, 100);
26 |     
27 |     fprintf(stderr, "%s\n", buf);
28 |     
29 |     if(strncmp(buf, "pippo", 5) == 0) {
30 |         if(strncmp((char*)buf +6, "franco", 6) == 0) {
31 |             int i;
32 |             int s = 0;
33 |             for(i = 0; i < 12; ++i)
34 |                 s += buf[i];
35 |             if(s == 1217)
36 |                 abort();
37 |         }
38 |     }
39 | 
40 |   return 0;
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/example/test1_afl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andreafioraldi/deferred_driller/92e7d773c78c0f791118984741cb998f3f9ad626/example/test1_afl


--------------------------------------------------------------------------------
/example/test1_driller:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/andreafioraldi/deferred_driller/92e7d773c78c0f791118984741cb998f3f9ad626/example/test1_driller


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | __author__ = "Andrea Fioraldi"
 4 | __copyright__ = "Copyright 2019, Andrea Fioraldi"
 5 | __license__ = "BSD 2-Clause"
 6 | __email__ = "andreafioraldi@gmail.com"
 7 | 
 8 | from setuptools import setup
 9 | 
10 | VER = "1.0.0"
11 | 
12 | setup(
13 |     name='deferred_driller',
14 |     version=VER,
15 |     license=__license__,
16 |     description='My version of driller using Intel PIN and angrgdb. In "theory" can work with AFL in deferred and persistent mode.',
17 |     author=__author__,
18 |     author_email=__email__,
19 |     url='https://github.com/andreafioraldi/deferred_driller',
20 |     download_url = 'https://github.com/andreafioraldi/deferred_driller/archive/' + VER + '.tar.gz',
21 |     package_dir={'deferred_driller': 'deferred_driller'},
22 |     packages=['deferred_driller'],
23 |     install_requires=[
24 |         'angrgdb'
25 |     ],
26 | )
27 | 


--------------------------------------------------------------------------------