├── .gitignore
├── README.md
├── fuzzing
    ├── README.md
    ├── config
    │   ├── fts-bug-regexs.toml
    │   └── targets.toml
    ├── extract-from-container.sh
    ├── fts
    │   ├── afl.Dockerfile
    │   ├── aflpp.Dockerfile
    │   ├── aflpp_driver_GNUmakefile
    │   ├── base.Dockerfile
    │   ├── build.sh
    │   ├── common.sh
    │   ├── coverage.Dockerfile
    │   ├── libarchive-2017-01-04-build.sh
    │   ├── libjpeg-turbo-07-2017-build.sh
    │   └── libxml2-v2.9.2-build.sh
    ├── magma
    │   ├── clean_corpora.sh
    │   ├── log-execs.patch
    │   ├── setup.sh
    │   ├── survival_analysis.py
    │   └── v1.1.patch
    ├── readelf
    │   ├── Dockerfile
    │   ├── afl_llvm_mode.Makefile
    │   ├── data
    │   │   ├── aflfast-ascii-cov.csv.gz
    │   │   ├── aflfast-cmin-cov.csv.gz
    │   │   ├── aflfast-singleton-cov.csv.gz
    │   │   ├── aflplusplus-ascii-cov.csv.gz
    │   │   ├── aflplusplus-cmin-cov.csv.gz
    │   │   ├── aflplusplus-singleton-cov.csv.gz
    │   │   ├── honggfuzz-ascii-cov.csv.gz
    │   │   ├── honggfuzz-cmin-cov.csv.gz
    │   │   ├── honggfuzz-singleton-cov.csv.gz
    │   │   └── readelf-experiment.csv.gz
    │   ├── scripts
    │   │   ├── fuzz.sh
    │   │   ├── get_afl_cov.sh
    │   │   ├── get_hfuzz_cov.sh
    │   │   ├── merge_cov.py
    │   │   ├── plot_cov.py
    │   │   └── requirements.txt
    │   └── seeds
    │   │   ├── cmin-seeds.tar.xz
    │   │   └── uninformed-seed
    └── real-world
    │   ├── freetype2
    │       ├── afl.Dockerfile
    │       ├── base.Dockerfile
    │       └── coverage.Dockerfile
    │   ├── librsvg
    │       ├── afl.Dockerfile
    │       ├── base.Dockerfile
    │       └── coverage.Dockerfile
    │   ├── libtiff
    │       ├── afl.Dockerfile
    │       ├── base.Dockerfile
    │       └── coverage.Dockerfile
    │   ├── libxml2
    │       ├── afl.Dockerfile
    │       ├── base.Dockerfile
    │       └── coverage.Dockerfile
    │   ├── poppler
    │       ├── afl-toolchain-llvm.cmake
    │       ├── afl.Dockerfile
    │       ├── base.Dockerfile
    │       ├── coverage.Dockerfile
    │       └── toolchain.cmake
    │   └── sox
    │       ├── afl.Dockerfile
    │       ├── base.Dockerfile
    │       └── coverage.Dockerfile
├── optimin
    ├── .gitignore
    ├── CMakeLists.txt
    ├── Dockerfile
    ├── LICENSE.jsoncpp
    ├── optimin.py
    └── src
    │   ├── AFLShowmapMaxSat.cpp
    │   ├── AFLShowmapZ3.cpp
    │   ├── CMakeLists.txt
    │   ├── Common.cpp
    │   ├── Common.h
    │   ├── LLVMCovZ3.cpp
    │   ├── ProgressBar.h
    │   ├── Z3Common.cpp
    │   ├── Z3Common.h
    │   └── jsoncpp
    │       ├── json
    │           ├── json-forwards.h
    │           ├── json.h
    │           └── jsoncpp.cpp
    │       └── jsoncpp.cpp
└── scripts
    ├── README.md
    ├── bin
        ├── afl_cmin.py
        ├── afl_coverage_merge.py
        ├── afl_coverage_pca.py
        ├── coverage_auc.py
        ├── eval_maxsat.py
        ├── expand_hdf5_coverage.py
        ├── fuzz.py
        ├── get_corpus.py
        ├── get_libs.py
        ├── llvm_cov_merge.py
        ├── llvm_cov_stats.py
        ├── qminset.py
        ├── replay_seeds.py
        ├── timestamp_afl.py
        ├── timestamp_honggfuzz.py
        ├── triage_crashes.py
        └── visualize_corpora.py
    ├── seed_selection
        ├── __init__.py
        ├── afl.py
        ├── argparse.py
        ├── coverage.py
        ├── datastore.py
        ├── istarmap.py
        ├── log.py
        └── seeds.py
    └── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 
134 | # pytype static type analyzer
135 | .pytype/
136 | 
137 | # Cython debug symbols
138 | cython_debug/
139 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Seed Selection for Successful Fuzzing
  2 | 
  3 | The artifact associated with our ISSTA 2021 paper "[Seed Selection for
  4 | Successful Fuzzing](https://hexhive.epfl.ch/publications/files/21ISSTA2.pdf)".
  5 | While our primary artifact is the OptiMin corpus minimizer, we also provide the
  6 | necessary infrastructure to reproduce our fuzzing experiments.
  7 | 
  8 | ## Getting Started
  9 | 
 10 | ### Setup your environment
 11 | 
 12 | Set up your environment (assumes a modern Ubuntu OS, `>= 18.04 && <= 20.04`,
 13 | and Python, `>= 3.6 && <= 3.8`):
 14 | 
 15 | ```bash
 16 | # Install prerequisites
 17 | sudo apt update
 18 | sudo apt install -y git docker.io python3-venv 
 19 | 
 20 | # Add yourself to the docker group (don't forget to log out and log back in so
 21 | # that the group changes take effect)
 22 | sudo usermod -aG docker $USER
 23 | 
 24 | # Setup virtualenv
 25 | python3 -m venv seed_selection
 26 | source seed_selection/bin/activate
 27 | pip3 install wheel
 28 | 
 29 | # Get this repo
 30 | git clone https://github.com/HexHive/fuzzing-seed-selection
 31 | pip3 install fuzzing-seed-selection/scripts
 32 | ```
 33 | 
 34 | ### Build OptiMin
 35 | 
 36 | OptiMin is our SAT-based corpus minimization tool.  It supports coverage
 37 | generated by both [AFL](https://github.com/google/AFL) and
 38 | [llvm-cov](https://llvm.org/docs/CommandGuide/llvm-cov.html) (only AFL is used
 39 | in the paper). Similarly, OptiMin can back out to both
 40 | [Z3](https://github.com/Z3Prover/z3) or
 41 | [EvalMaxSAT](https://github.com/FlorentAvellaneda/EvalMaxSAT) (only EvalMaxSAT
 42 | is used in the paper). To build:
 43 | 
 44 | ```bash
 45 | docker build -t seed-selection/optimin fuzzing-seed-selection/optimin
 46 | ```
 47 | 
 48 | ### Run OptiMin
 49 | 
 50 | OptiMin takes a large "collection corpus" and selects a subset of seeds that are
 51 | used for fuzzing. This is based on the _code coverage_ for each seed in the
 52 | collection corpus.
 53 | 
 54 | While we provide tools to generate code coverage information for a given corpus
 55 | (based on [`afl-showmap`](https://github.com/google/AFL/blob/master/afl-showmap.c)),
 56 | this can be time consuming (depending on the size of the corpus). Thus, we
 57 | provide seed traces in [HDF5](https://en.wikipedia.org/wiki/Hierarchical_Data_Format)
 58 | archives.
 59 | 
 60 | For example, to perform a corpus minimization base on Google FTS FreeType2
 61 | coverage:
 62 | 
 63 | 1. Download the coverage HDF5 from
 64 |    [here](https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/afl-showmap-coverage/fts/freetype2.hdf5).
 65 | 
 66 |    ```bash
 67 |    wget https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/afl-showmap-coverage/fts/freetype2.hdf5
 68 |    ```
 69 | 1. Expand the HDF5 using the
 70 |    [`expand_hdf5_coverage.py`](scripts/bin/expand_hdf5_coverage.py) script
 71 | 
 72 |    ```bash
 73 |    expand_hdf5_coverage.py -i freetype2.hdf5 -o /tmp/freetype2
 74 | 
 75 |    # Expected output:
 76 |    #
 77 |    # 466 seeds to extract
 78 |    # Expanding freetype2.hdf5: 100%
 79 |    ```
 80 | 1. Perform an unweighted minimization based on edges only (not hit counts)
 81 | 
 82 |    ```bash
 83 |    docker run -v /tmp/freetype2:/tmp/freetype2   \
 84 |      seed-selection/optimin -e /tmp/freetype2
 85 | 
 86 |    # Expected output:
 87 |    #
 88 |    # afl-showmap corpus minimization
 89 |    #
 90 |    # [############################################################] 100% Reading seed coverage
 91 |    # [############################################################] 100% Generating clauses
 92 |    # [*] Running Optimin on /tmp/freetype2
 93 |    # [*] Running EvalMaxSAT on WCNF
 94 |    # [+] EvalMaxSAT completed
 95 |    # [*] Parsing EvalMaxSAT output
 96 |    # [+] Solution found for /tmp/freetype2
 97 |    # 
 98 |    # [+] Total time: 0.01 sec
 99 |    # [+] Num. seeds: 37
100 |    #
101 |    # ...
102 |    ```
103 | 1. Perform an unweighted minimization including edge hit counts
104 | 
105 |    ```bash
106 |    docker run -v /tmp/freetype2:/tmp/freetype2  \
107 |      seed-selection/optimin /tmp/freetype2
108 | 
109 |    # Expected output:
110 |    #
111 |    # afl-showmap corpus minimization
112 |    #
113 |    # [############################################################] 100% Reading seed coverage
114 |    # [############################################################] 100% Generating clauses
115 |    # [*] Running Optimin on /tmp/freetype2
116 |    # [*] Running EvalMaxSAT on WCNF
117 |    # [+] EvalMaxSAT completed
118 |    # [*] Parsing EvalMaxSAT output
119 |    # [+] Solution found for /tmp/freetype2
120 |    #
121 |    # [+] Total time: 0.01 sec
122 |    # [+] Num. seeds: 53
123 |    #
124 |    # ...
125 |    ```
126 | 1. Download the file weights (i.e., sizes) from
127 |    [here](https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/weights/ttf.csv).
128 | 
129 |    ```bash
130 |    wget https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/weights/ttf.csv
131 |    ```
132 | 1. Perform a weighted minimization based on file size and edges only
133 | 
134 |    ```bash
135 |    docker run -v /tmp/freetype2:/tmp/freetype2 -v $(pwd):/tmp   \
136 |      seed-selection/optimin -e -w /tmp/ttf.csv /tmp/freetype2
137 | 
138 |    # Expected output:
139 |    #
140 |    # afl-showmap corpus minimization
141 |    #
142 |    # [*] Reading weights from `/tmp/ttf.csv`... 0s
143 |    # [############################################################] 100% Calculating top
144 |    # [############################################################] 100% Reading seed coverage
145 |    # [############################################################] 100% Generating clauses
146 |    # [*] Running Optimin on /tmp/freetype2
147 |    # [*] Running EvalMaxSAT on WCNF
148 |    # [+] EvalMaxSAT completed
149 |    # [*] Parsing EvalMaxSAT output
150 |    # [+] Solution found for /tmp/freetype2
151 |    #
152 |    # [+] Total time: 0.01 sec
153 |    # [+] Num. seeds: 37
154 |    #
155 |    # ...
156 |    ```
157 | 
158 | ## Detailed Description
159 | 
160 | ### Additional Files
161 | 
162 | The sizes of our collection corpora mean that we cannot store them in a Git
163 | repo. Instead, we store ancillary data at ANU's DataCommons repository,
164 | available [here](https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/).
165 | 
166 | ### Tracing Code Coverage
167 | 
168 | Corpus minimization is typically based on some notion of "code coverage". To
169 | ensure a fair and uniform comparison across the three corpus minimization tools
170 | (`afl-cmin`, MinSet, and OptiMin), we use AFL's notion of _edge coverage_. This
171 | coverage information can be generated as follows
172 | 
173 | 1. Compile your target with AFL instrumentation. See the AFL
174 |    [documentation](https://lcamtuf.coredump.cx/afl/) for instructions on how to
175 |    do this.
176 | 1. Run [`replay_seeds.py`](scripts/bin/replay_seeds.py) with your target program
177 |    and your collection corpus. This will generate an HDF5 archive containing
178 |    coverage information that can then be minimized.
179 | 
180 | ### Corpus Minimization
181 | 
182 | Our paper surveys a number of corpus minimization tools: OptiMin, `afl-cmin`,
183 | and MinSet. A more detailed explanation on how to use these tools and reproduce
184 | our results is given below.
185 | 
186 | #### OptiMin
187 | 
188 | Instructions for running OptiMin are given above. As described previously, a
189 | weighted minimization can be performed by supplying a weights CSV file to
190 | OptiMin's `-w` option. This weights file has the following format:
191 | 
192 | ```
193 | FILE_1,WEIGHT
194 | FILE_2,WEIGHT
195 | FILE_3,WEIGHT
196 | FILE_4,WEIGHT
197 | FILE_5,WEIGHT
198 | ```
199 | 
200 | Where `FILE_1`, `FILE_2`, ... corresponds to the name of a file within the
201 | corpus directory (only the filename needs to be provided: the corpus directory
202 | path should **not** be provided), and `WEIGHT` is an unsigned integer >= 1. We
203 | provide weights for our collection corpora
204 | [here](https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/weights/).
205 | 
206 | #### `afl-cmin`
207 | 
208 | [`afl-cmin`](https://github.com/google/AFL/blob/master/afl-cmin) is AFL's
209 | inbuilt corpus minimization tool. [`afl_cmin.py`](scripts/bin/afl_cmin.py) wraps
210 | `afl-cmin` so that it outputs the names of the seeds in the minimized corpus
211 | (rather than copying the seeds and wasting storage).
212 | 
213 | #### MinSet
214 | 
215 | MinSet is the tool developed by Rebert et al. in their paper [Optimizing Seed
216 | Selection for Fuzzing](https://www.usenix.org/system/files/conference/usenixsecurity14/sec14-paper-rebert.pdf).
217 | While we were able to obtain the tool from the authors, it is not open source
218 | and thus we are unable to provide it here. Please contact the authors if you
219 | would like to obtain the source code.
220 | 
221 | If you have access to the source code, you can perform a MinSet minimization by:
222 | 
223 | 1. Generate code coverage as described [here](#tracing-code-coverage)
224 | 1. Expand the generated HDF5 archive using
225 |    [`expand_hdf5_coverage.py`](scripts/bin/expand_hdf5_coverage.py)
226 | 1. Convert the expanded coverage to a set of bitvector traces using
227 |    [MoonBeam](https://gitlab.anu.edu.au/lunar/moonbeam)
228 | 1. Run the [`qminset.py`](scripts/bin/qminset.py) wrapper on the bitvector
229 |    traces
230 | 
231 | ### Fuzzing Experiments
232 | 
233 | In addition to the OptiMin tool, we also provide the necessary infrastructure
234 | to reproduce our fuzzing experiments. Detailed instructions are provided
235 | [here](fuzzing/README.md).
236 | 


--------------------------------------------------------------------------------
/fuzzing/README.md:
--------------------------------------------------------------------------------
  1 | # Fuzzing Targets
  2 | 
  3 | This directory contains build scripts for building the targets fuzzed in the
  4 | paper. Like in the paper, we group the targets into three benchmarks: Magma,
  5 | the Google Fuzzer Test Suite (FTS), and a set of real-world programs.
  6 | 
  7 | Note: AFL typically requires that coredumps be disabled:
  8 | 
  9 | ```bash
 10 | sudo bash -c 'echo core >/proc/sys/kernel/core_pattern'
 11 | sudo systemctl disable apport.service
 12 | ```
 13 | 
 14 | ## Magma
 15 | 
 16 | [Magma](https://hexhive.epfl.ch/magma) is a ground-truth fuzzing benchmark. To
 17 | build:
 18 | 
 19 | 1. Install dependencies, as described [here](https://hexhive.epfl.ch/magma/docs/getting-started.html)
 20 | 1. Run `./magma/setup.sh /magma/benchmark/dir`
 21 | 1. Clean out the default corpora `./magma/clean_corpora.sh /magma/benchmark/dir`
 22 | 1. Copy the relevant `TARGET` corpus into
 23 |    `/magma/benchmark/dir/targets/TARGET/corpus/PROGRAM`. You can either distill
 24 |    your own corpus or use one that we have already prepared. For the former,
 25 |    see the [Run OptiMin](../README.md#run-optimin) instructions. For the latter,
 26 |    use the [`get_corpus.py`](../scripts/bin/get_corpus.py) script. E.g., to
 27 |    download the `afl-cmin`-minimized libpng corpus (this can take up to 15-20
 28 |    mins):
 29 | 
 30 |    ```bash
 31 |    get_corpus.py --benchmark magma --corpus cmin --log info --target libpng \
 32 |      /magma/benchmark/dir/targets/libpng/corpus/libpng_read_fuzzer
 33 |    ```
 34 | 1. Set `WORKDIR` in `/magma/benchmark/dir/tools/captain/captainrc` to something
 35 |    appropriate. If you only want to fuzz a single target (e.g., libpng), edit
 36 |    the `afl_TARGETS`/`aflplusplus_TARGETS` entry in `captainrc`
 37 | 1. Start fuzzing!
 38 | 
 39 |    ```bash
 40 |    cd /magma/benchmark/dir/tools/captain
 41 |    ./run.sh
 42 |    ```
 43 | 1. [This](https://github.com/HexHive/magma/blob/dev/tools/benchd/survival_analysis.py)
 44 |    Magma script can be used to perform the survival analysis on the results
 45 | 
 46 | ## FTS
 47 | 
 48 | The [Google Fuzzer Test Suite](https://github.com/google/fuzzer-test-suite) is
 49 | a widely-used fuzzing benchmark.
 50 | 
 51 | 1. Build the base image
 52 | 
 53 |    ```bash
 54 |    docker build -t seed-selection/fts/base -f fts/base.Dockerfile fts
 55 |    ```
 56 | 1. Build the FTS targets with the required `$INSTRUMENTATION` (one of `afl`,
 57 |    `aflpp`, or `coverage`)
 58 | 
 59 |    ```bash
 60 |    docker build -t seed-selection/fts/$INSTRUMENTATION  \
 61 |      -f fts/$INSTRUMENTATION.Dockerfile fts
 62 |    ```
 63 | 1. Extract the relevant files for fuzzing, as instructed at the end of the
 64 |    previous step. E.g., for AFL++
 65 | 
 66 |    ```bash
 67 |    ./extract-from-container.sh seed-selection/fts/$INSTRUMENTATION /aflplusplus .
 68 |    ./extract-from-container.sh seed-selection/fts/$INSTRUMENTATION /build-aflpp .
 69 |    ./extract-from-container.sh seed-selection/fts/$INSTRUMENTATION /build-cmplog .
 70 |    ```
 71 | 1. Create a fuzzing corpus using the
 72 |    [`get_corpus.py`](../scripts/bin/get_corpus.py) script
 73 | 1. Start fuzzing. The runtime fuzzer configurations (e.g., timeouts and memory
 74 |    limits) that we used are stored [here](config/targets.toml). The `fuzz.py`
 75 |    script (in `scripts/bin`) can be used to launch multiple campaigns in
 76 |    parallel. For example, to fuzz FreeType2 with AFL++ and the provided seeds:
 77 | 
 78 |    ```bash
 79 |    LD_LIBRARY_PATH=$(pwd)/build-aflpp/RUNDIR-aflpp-freetype2-2017/lib   \
 80 |      fuzz.py -i $(pwd)/build-aflpp/RUNDIR-aflpp-freetype2-2017/seeds    \
 81 |      -o fuzz-out -n2 --num-trials 30 --trial-len $((18*60*60))          \
 82 |      --cmp-log $(pwd)/build-aflpp_cmplog/RUNDIR-aflpp_cmplog-freetype2-2017/freetype2-2017-aflpp_cmplog \
 83 |      $(pwd)/build-aflpp/RUNDIR-aflpp-freetype2-2017/freetype2-2017-aflpp
 84 |    ```
 85 | 1. We use the regexs [here](config/fts-bug-regexs.toml) to determine each
 86 |    crash's root cause.
 87 | 
 88 | ## Real-world Targets
 89 | 
 90 | A set of real-world programs.
 91 | 
 92 | 1. Build the base image for a given `$TARGET` (e.g., sox, freetype)
 93 | 
 94 |    ```bash
 95 |    docker build -t seed-selection/real-world/$TARGET/base       \
 96 |      -f real-world/$TARGET/base.Dockerfile real-world/$TARGET
 97 |    ```
 98 | 1. Build the target with the required `$INSTRUMENTATION`
 99 | 
100 |    ```bash
101 |    docker build -t seed-selection/real-world/$TARGET/$INSTRUMENTATION   \
102 |      -f real-world/$TARGET/$INSTRUMENTATION.Dockerfile                  \
103 |      real-world/$TARGET
104 |    ```
105 | 1. Extract the relevant files for fuzzing, using the `extract-from-container.sh`
106 |    script
107 | 1. Create a fuzzing corpus using the
108 |    [`get_corpus.py`](../scripts/bin/get_corpus.py) script
109 | 1. Start fuzzing. Again, the `fuzz.py` script can be used.
110 | 
111 | ## `readelf`
112 | 
113 | To reproduce the `readelf` experiment (Section 3.1 of the paper):
114 | 
115 | 1. Build the Docker image
116 | 
117 |    ```bash
118 |    docker build -t seed-selection/readelf readelf
119 |    ```
120 | 1. Start the container, run the fuzzers, and process the results
121 | 
122 |    ```bash
123 |    docker run -ti --rm seed-selection/readelf
124 |    
125 |    # Execute the following commands inside the Docker container
126 |    
127 |    ./fuzz.sh
128 |    
129 |    ./get_afl_cov.sh
130 |    ./get_hfuzz_cov.sh
131 |    
132 |    ./merge_cov.py 
133 |    ./plot_cov.py
134 |    ```
135 | 
136 | ## Generating LLVM Code Coverage
137 | 
138 | We use LLVM's [source-code-level
139 | coverage](https://clang.llvm.org/docs/SourceBasedCodeCoverage.html) in our
140 | evaluation. To generate LLVM coverage after a fuzzing campaign:
141 | 
142 | 1. Build the target with LLVM's coverage instrumentation. For Magma, this
143 |    requires building with the `llvm_cov` fuzzer. For the FTS and real-world
144 |    targets, build with the `coverage` Dockerfile.
145 | 1. Replay the final fuzzing queue (in AFL, this is the `queue` output directory)
146 |    using the [`llvm_cov_merge`](../scripts/bin/llvm_cov_merge.py) script
147 | 1. Summarize the results using
148 |    [`llvm_cov_stats`](../scripts/bin/llvm_cov_stats.py)
149 | 


--------------------------------------------------------------------------------
/fuzzing/config/fts-bug-regexs.toml:
--------------------------------------------------------------------------------
 1 | # Regular expressions for triaging/deduplicating Google FTS bugs. The regexs
 2 | # basically match on ASan reports.
 3 | #
 4 | # Author: Adrian Herrera
 5 | 
 6 | [guetzli]
 7 | a = "output_image\.cc:398.*?Assertion \`coeff % quant == 0\' failed\."
 8 | 
 9 | [json]
10 | a = "fuzzer-parse_json\.cpp:50.*?Assertion \`s1 == s2\' failed\."
11 | 
12 | [libarchive]
13 | a = "heap-buffer-overflow"
14 | 
15 | [libxml2]
16 | a = "READ of size .+? xmlParseXMLDecl .+? xmlParseDocument .+? xmlDoRead"
17 | b = "READ of size .+? xmlDictComputeFastQKey .+? xmlDictQLookup .+? xmlSAX2StartElementNs .+? xmlParseStartTag2 .+? xmlParseElement"
18 | c = "READ of size .+? xmlDictComputeFastKey .+? xmlDictLookup .+? xmlParseNCNameComplex"
19 | 
20 | [pcre2]
21 | a = "READ of size .+? match .+?pcre2_match\.c:5968:11.+? pcre2_match_8 .+? regexec"
22 | b = "READ of size .+? match .+?pcre2_match\.c:1426:16"
23 | 
24 | [re2]
25 | b = "WRITE of size .+? re2::NFA::Search\(.+? re2::Prog::SearchNFA\(.+? re2::RE2::Match\(.+? re2::RE2::DoMatch\("
26 | 
27 | [vorbis]
28 | a = "READ of size .+? vorbis_book_decodevv_add .+? res2_inverse .+? mapping0_inverse"
29 | b = "READ of size .+? vorbis_book_decodev_add .+? _01inverse .+? res1_inverse .+? mapping0_inverse"
30 | c = "SEGV on unknown address .+? _01inverse .+? res1_inverse .+? mapping0_inverse"
31 | 


--------------------------------------------------------------------------------
/fuzzing/config/targets.toml:
--------------------------------------------------------------------------------
 1 | # This file describes the AFL arguments for the targets in two of our three
 2 | # benchmarks (Google FTS, and a set of real-world programs). Magma is not 
 3 | # included because we just use the default run configuration.
 4 | #
 5 | # Author: Adrian Herrera
 6 | 
 7 | [fts]
 8 | 
 9 | [fts.freetype2]
10 | memory = 600
11 | args = "-1"
12 | 
13 | [fts.guetzli]
14 | memory = 1024
15 | timeout = 5000
16 | args = "-1"
17 | 
18 | [fts.json]
19 | memory = 800
20 | args = "-1"
21 | 
22 | [fts.libarchive]
23 | memory = 800
24 | args = "-1"
25 | 
26 | [fts.libjpeg-turbo]
27 | memory = 800
28 | args = "-1"
29 | 
30 | [fts.libpng]
31 | memory = 800
32 | args = "-1"
33 | 
34 | [fts.libxml2]
35 | memory = 800
36 | args = "-1"
37 | 
38 | [fts.pcre2]
39 | memory = 800
40 | args = "-1"
41 | 
42 | [fts.re2]
43 | memory = 800
44 | args = "-1"
45 | 
46 | [fts.vorbis]
47 | memory = 800
48 | args = "-1"
49 | 
50 | [real-world]
51 | 
52 | [real-world.freetype2]
53 | driver = "char2svg"
54 | memory = 600
55 | args = "@@ @"
56 | 
57 | [real-world.librsvg]
58 | driver = "rsvg-convert"
59 | memory = 800
60 | timeout = 3500
61 | args = "-o /dev/null @@"
62 | 
63 | [real-world.libtiff]
64 | driver = "tiff2pdf"
65 | memory = 800
66 | args = "-o /dev/null @@"
67 | 
68 | [real-world.libxml2]
69 | driver = "xmllint"
70 | memory = 600
71 | args = "-o /dev/null @@"
72 | 
73 | [real-world.poppler]
74 | driver = "pdftotext"
75 | memory = 850
76 | timeout = 3500
77 | args = "@@ /dev/null"
78 | 
79 | [real-world.sox.mp3]
80 | driver = "sox"
81 | memory = 800
82 | args = "--single-threaded @@ -b 16 -t aiff /dev/null channels 1 rate 16k fade 3 norm"
83 | 
84 | [real-world.sox.wav]
85 | driver = "sox"
86 | memory = 800
87 | args = "--single-threaded @@ -b 16 -t aiff /dev/null channels 1 rate 16k fade 3 norm"
88 | 


--------------------------------------------------------------------------------
/fuzzing/extract-from-container.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | if [ "$#" -ne 3 ]; then
 6 |     echo "Usage: $0 <DOCKER_IMAGE> <CONTAINER_PATH> <HOST_DIR>"
 7 |     echo ""
 8 |     echo "DOCKER_IMAGE: Name of the Docker image to extract a directory from"
 9 |     echo "CONTAINER_PATH: The directory in the Docker container to extract"
10 |     echo "HOST_DIR: The location of the host directory to extract to"
11 |     exit 1
12 | fi
13 | 
14 | DOCKER_IMAGE=$1
15 | CONTAINER_PATH=$2
16 | HOST_DIR=$(mkdir -p $3 && cd $3 && pwd)
17 | 
18 | docker_container=$(docker run -d ${DOCKER_IMAGE} sleep 1000)
19 | docker cp ${docker_container}:${CONTAINER_PATH} ${HOST_DIR}
20 | docker kill ${docker_container} > /dev/null
21 | docker rm ${docker_container} > /dev/null
22 | 


--------------------------------------------------------------------------------
/fuzzing/fts/afl.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM seed-selection/fts/base
 2 | 
 3 | # Get and build AFL
 4 | ENV AFL_CC=clang-8
 5 | ENV AFL_CXX=clang++-8
 6 | 
 7 | RUN git clone --no-checkout https://github.com/google/afl &&    \
 8 |     git -C afl checkout v256b
 9 | RUN cd afl &&                           \
10 |     export LLVM_CONFIG=llvm-config-8 && \
11 |     export CC=$AFL_CC &&                \
12 |     export CXX=$AFL_CXX &&              \
13 |     make -j &&                          \
14 |     make -j -C llvm_mode
15 | 
16 | # Build AFL FTS
17 | ENV AFL_SRC="/afl"
18 | ENV FUZZING_ENGINE="afl"
19 | RUN mkdir /build-afl
20 | 
21 | RUN cd /build-afl && /fuzzer-test-suite/build.sh freetype2-2017
22 | RUN get_libs.py -o /build-afl/RUNDIR-afl-freetype2-2017/lib        \
23 |     /build-afl/RUNDIR-afl-freetype2-2017/freetype2-2017-afl
24 | 
25 | RUN cd /build-afl && /fuzzer-test-suite/build.sh guetzli-2017-3-30
26 | RUN get_libs.py -o /build-afl/RUNDIR-afl-guetzli-2017-3-30/lib     \
27 |     /build-afl/RUNDIR-afl-guetzli-2017-3-30/guetzli-2017-3-30-afl
28 | 
29 | RUN cd /build-afl && /fuzzer-test-suite/build.sh json-2017-02-12
30 | RUN get_libs.py -o /build-afl/RUNDIR-afl-json-2017-02-12/lib       \
31 |     /build-afl/RUNDIR-afl-json-2017-02-12/json-2017-02-12-afl
32 | 
33 | RUN cd /build-afl && /fuzzer-test-suite/build.sh libarchive-2017-01-04
34 | RUN get_libs.py -o /build-afl/RUNDIR-afl-libarchive-2017-01-04/lib \
35 |     /build-afl/RUNDIR-afl-libarchive-2017-01-04/libarchive-2017-01-04-afl
36 | 
37 | RUN cd /build-afl && /fuzzer-test-suite/build.sh libjpeg-turbo-07-2017
38 | RUN get_libs.py -o /build-afl/RUNDIR-afl-libjpeg-turbo-07-2017/lib \
39 |     /build-afl/RUNDIR-afl-libjpeg-turbo-07-2017/libjpeg-turbo-07-2017-afl
40 | 
41 | RUN cd /build-afl && /fuzzer-test-suite/build.sh libpng-1.2.56
42 | RUN get_libs.py -o /build-afl/RUNDIR-afl-libpng-1.2.56/lib         \
43 |     /build-afl/RUNDIR-afl-libpng-1.2.56/libpng-1.2.56-afl
44 | 
45 | RUN cd /build-afl && /fuzzer-test-suite/build.sh libxml2-v2.9.2
46 | RUN get_libs.py -o /build-afl/RUNDIR-afl-libxml2-v2.9.2/lib        \
47 |     /build-afl/RUNDIR-afl-libxml2-v2.9.2/libxml2-v2.9.2-afl
48 | 
49 | RUN cd /build-afl && /fuzzer-test-suite/build.sh pcre2-10.00
50 | RUN get_libs.py -o /build-afl/RUNDIR-afl-pcre2-10.00/lib           \
51 |     /build-afl/RUNDIR-afl-pcre2-10.00/pcre2-10.00-afl
52 | 
53 | RUN cd /build-afl && /fuzzer-test-suite/build.sh re2-2014-12-09
54 | RUN get_libs.py -o /build-afl/RUNDIR-afl-re2-2014-12-09/lib        \
55 |     /build-afl/RUNDIR-afl-re2-2014-12-09/re2-2014-12-09-afl
56 | 
57 | RUN cd /build-afl && /fuzzer-test-suite/build.sh vorbis-2017-12-11
58 | RUN get_libs.py -o /build-afl/RUNDIR-afl-vorbis-2017-12-11/lib     \
59 |     /build-afl/RUNDIR-afl-vorbis-2017-12-11/vorbis-2017-12-11-afl
60 | 
61 | RUN echo "\033[0;33m * Extract the '/build-afl' and '/afl' directories\033[0m"
62 | 


--------------------------------------------------------------------------------
/fuzzing/fts/aflpp.Dockerfile:
--------------------------------------------------------------------------------
  1 | FROM seed-selection/fts/base
  2 | 
  3 | # Get and build AFL++
  4 | ENV AFL_CC=clang-8
  5 | ENV AFL_CXX=clang++-8
  6 | 
  7 | RUN git clone --no-checkout https://github.com/aflplusplus/aflplusplus &&   \
  8 |     git -C aflplusplus checkout 5ee63a6e6267e448342ccb28cc8d3c0d34ffc1cd
  9 | ADD aflpp_driver_GNUmakefile /aflplusplus/examples/aflpp_driver/GNUmakefile
 10 | RUN cd aflplusplus &&                       \
 11 |     export LLVM_CONFIG="llvm-config-8" &&   \
 12 |     make -j &&                              \
 13 |     make -j -C llvm_mode
 14 | RUN cd aflplusplus &&                       \
 15 |     export LLVM_CONFIG="llvm-config-8" &&   \
 16 |     export CFLAGS="-m32" &&                 \
 17 |     make -j -C examples/aflpp_driver libAFLDriver.a
 18 | 
 19 | # Build AFL++ FTS
 20 | ENV AFL_SRC="/aflplusplus"
 21 | ENV FUZZING_ENGINE="aflpp"
 22 | RUN mkdir /build-aflpp
 23 | 
 24 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh freetype2-2017
 25 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-freetype2-2017/lib        \
 26 |     /build-aflpp/RUNDIR-aflpp-freetype2-2017/freetype2-2017-aflpp
 27 | 
 28 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh guetzli-2017-3-30
 29 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-guetzli-2017-3-30/lib     \
 30 |     /build-aflpp/RUNDIR-aflpp-guetzli-2017-3-30/guetzli-2017-3-30-aflpp
 31 | 
 32 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh json-2017-02-12
 33 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-json-2017-02-12/lib       \
 34 |     /build-aflpp/RUNDIR-aflpp-json-2017-02-12/json-2017-02-12-aflpp
 35 | 
 36 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh libarchive-2017-01-04
 37 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-libarchive-2017-01-04/lib \
 38 |     /build-aflpp/RUNDIR-aflpp-libarchive-2017-01-04/libarchive-2017-01-04-aflpp
 39 | 
 40 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh libjpeg-turbo-07-2017
 41 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-libjpeg-turbo-07-2017/lib \
 42 |     /build-aflpp/RUNDIR-aflpp-libjpeg-turbo-07-2017/libjpeg-turbo-07-2017-aflpp
 43 | 
 44 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh libpng-1.2.56
 45 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-libpng-1.2.56/lib         \
 46 |     /build-aflpp/RUNDIR-aflpp-libpng-1.2.56/libpng-1.2.56-aflpp
 47 | 
 48 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh libxml2-v2.9.2
 49 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-libxml2-v2.9.2/lib        \
 50 |     /build-aflpp/RUNDIR-aflpp-libxml2-v2.9.2/libxml2-v2.9.2-aflpp
 51 | 
 52 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh pcre2-10.00
 53 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-pcre2-10.00/lib           \
 54 |     /build-aflpp/RUNDIR-aflpp-pcre2-10.00/pcre2-10.00-aflpp
 55 | 
 56 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh re2-2014-12-09
 57 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-re2-2014-12-09/lib        \
 58 |     /build-aflpp/RUNDIR-aflpp-re2-2014-12-09/re2-2014-12-09-aflpp
 59 | 
 60 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh vorbis-2017-12-11
 61 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-vorbis-2017-12-11/lib     \
 62 |     /build-aflpp/RUNDIR-aflpp-vorbis-2017-12-11/vorbis-2017-12-11-aflpp
 63 | 
 64 | # Build AFL++ FTS in CmpLog mode
 65 | ENV FUZZING_ENGINE="aflpp_cmplog"
 66 | RUN mkdir /build-cmplog
 67 | 
 68 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh freetype2-2017
 69 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-freetype2-2017/lib        \
 70 |     /build-cmplog/RUNDIR-aflpp_cmplog-freetype2-2017/freetype2-2017-aflpp_cmplog
 71 | 
 72 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh guetzli-2017-3-30
 73 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-guetzli-2017-3-30/lib     \
 74 |     /build-cmplog/RUNDIR-aflpp_cmplog-guetzli-2017-3-30/guetzli-2017-3-30-aflpp_cmplog
 75 | 
 76 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh json-2017-02-12
 77 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-json-2017-02-12/lib       \
 78 |     /build-cmplog/RUNDIR-aflpp_cmplog-json-2017-02-12/json-2017-02-12-aflpp_cmplog
 79 | 
 80 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh libarchive-2017-01-04
 81 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-libarchive-2017-01-04/lib \
 82 |     /build-cmplog/RUNDIR-aflpp_cmplog-libarchive-2017-01-04/libarchive-2017-01-04-aflpp_cmplog
 83 | 
 84 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh libjpeg-turbo-07-2017
 85 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-libjpeg-turbo-07-2017/lib \
 86 |     /build-cmplog/RUNDIR-aflpp_cmplog-libjpeg-turbo-07-2017/libjpeg-turbo-07-2017-aflpp_cmplog
 87 | 
 88 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh libpng-1.2.56
 89 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-libpng-1.2.56/lib         \
 90 |     /build-cmplog/RUNDIR-aflpp_cmplog-libpng-1.2.56/libpng-1.2.56-aflpp_cmplog
 91 | 
 92 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh libxml2-v2.9.2
 93 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-libxml2-v2.9.2/lib        \
 94 |     /build-cmplog/RUNDIR-aflpp_cmplog-libxml2-v2.9.2/libxml2-v2.9.2-aflpp_cmplog
 95 | 
 96 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh pcre2-10.00
 97 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-pcre2-10.00/lib           \
 98 |     /build-cmplog/RUNDIR-aflpp_cmplog-pcre2-10.00/pcre2-10.00-aflpp_cmplog
 99 | 
100 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh re2-2014-12-09
101 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-re2-2014-12-09/lib        \
102 |     /build-cmplog/RUNDIR-aflpp_cmplog-re2-2014-12-09/re2-2014-12-09-aflpp_cmplog
103 | 
104 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh vorbis-2017-12-11
105 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-vorbis-2017-12-11/lib     \
106 |     /build-cmplog/RUNDIR-aflpp_cmplog-vorbis-2017-12-11/vorbis-2017-12-11-aflpp_cmplog
107 | 
108 | # The `build` directory can be extracted to the host machine
109 | RUN echo "\033[0;33m * Extract the '/build-aflpp', '/build-cmplog', and '/aflplusplus' directories\033[0m"
110 | 


--------------------------------------------------------------------------------
/fuzzing/fts/aflpp_driver_GNUmakefile:
--------------------------------------------------------------------------------
 1 | ifeq "" "$(LLVM_CONFIG)"
 2 |   LLVM_CONFIG=llvm-config
 3 | endif
 4 | 
 5 | LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null)
 6 | ifneq "" "$(LLVM_BINDIR)"
 7 |   LLVM_BINDIR := $(LLVM_BINDIR)/
 8 | endif
 9 | 
10 | CFLAGS += -O3 -funroll-loops -g
11 | 
12 | all:	libAFLDriver.a libAFLQemuDriver.a aflpp_qemu_driver_hook.so
13 | 
14 | aflpp_driver.o:	aflpp_driver.c
15 | 	$(LLVM_BINDIR)clang -I. -I../../include $(CFLAGS) -c aflpp_driver.c
16 | 
17 | libAFLDriver.a:	aflpp_driver.o
18 | 	ar ru libAFLDriver.a aflpp_driver.o
19 | 	cp -vf libAFLDriver.a ../../
20 | 
21 | debug:
22 | 	$(LLVM_BINDIR)clang -Wno-deprecated -I../../include $(CFLAGS) -D_DEBUG=\"1\" -c -o afl-performance.o ../../src/afl-performance.c
23 | 	$(LLVM_BINDIR)clang -I../../include -D_DEBUG=\"1\" -g -funroll-loops -c aflpp_driver.c
24 | 	#$(LLVM_BINDIR)clang -S -emit-llvm -Wno-deprecated -I../../include $(CFLAGS) -D_DEBUG=\"1\" -c -o afl-performance.ll ../../src/afl-performance.c
25 | 	#$(LLVM_BINDIR)clang -S -emit-llvm -I../../include -D_DEBUG=\"1\" -g -funroll-loops -c aflpp_driver.c
26 | 	ar ru libAFLDriver.a afl-performance.o aflpp_driver.o
27 | 
28 | aflpp_qemu_driver.o:	aflpp_qemu_driver.c
29 | 	$(LLVM_BINDIR)clang $(CFLAGS) -O0 -funroll-loops -c aflpp_qemu_driver.c
30 | 
31 | libAFLQemuDriver.a:	aflpp_qemu_driver.o
32 | 	ar ru libAFLQemuDriver.a aflpp_qemu_driver.o
33 | 	cp -vf libAFLQemuDriver.a ../../
34 | 
35 | aflpp_qemu_driver_hook.so:	aflpp_qemu_driver_hook.o
36 | 	$(LLVM_BINDIR)clang -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so
37 | 
38 | aflpp_qemu_driver_hook.o:	aflpp_qemu_driver_hook.c
39 | 	$(LLVM_BINDIR)clang -fPIC $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c
40 | 
41 | test:	debug
42 | 	#clang -S -emit-llvm -D_DEBUG=\"1\" -I../../include -Wl,--allow-multiple-definition -funroll-loops -o aflpp_driver_test.ll aflpp_driver_test.c
43 | 	afl-clang-fast -D_DEBUG=\"1\" -I../../include -Wl,--allow-multiple-definition -funroll-loops -o aflpp_driver_test aflpp_driver_test.c libAFLDriver.a afl-performance.o
44 | 
45 | clean:
46 | 	rm -f *.o libAFLDriver*.a libAFLQemuDriver.a aflpp_qemu_driver_hook.so *~ core aflpp_driver_test
47 | 


--------------------------------------------------------------------------------
/fuzzing/fts/base.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | # Install dependencies
 4 | RUN dpkg --add-architecture i386
 5 | RUN export DEBIAN_FRONTEND=noninteractive &&                                \
 6 |     apt-get update &&                                                       \
 7 |     apt-get -y install git subversion build-essential autoconf libtool      \
 8 |         cmake gcc-multilib g++-multilib pkg-config libarchive-dev:i386      \
 9 |         zlib1g-dev:i386 libbz2-dev:i386 libxml2-dev:i386 libssl-dev:i386    \
10 |         liblzma-dev:i386 libexpat-dev:i386 nasm python3-pip wget
11 | 
12 | # Install LLVM 8
13 | RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
14 | RUN apt-get install -y llvm-8 clang-8
15 | 
16 | # Get helper scripts
17 | RUN git clone https://github.com/HexHive/fuzzing-seed-selection &&  \
18 |     pip3 install fuzzing-seed-selection/scripts
19 | 
20 | # Get LLVM compiler-rt
21 | RUN wget -O - http://releases.llvm.org/8.0.0/compiler-rt-8.0.0.src.tar.xz | tar xJ
22 | 
23 | # Get Google FTS
24 | RUN git clone https://github.com/google/fuzzer-test-suite
25 | ADD build.sh /fuzzer-test-suite
26 | ADD common.sh /fuzzer-test-suite
27 | ADD libarchive-2017-01-04-build.sh /fuzzer-test-suite/libarchive-2017-01-04/build.sh
28 | ADD libjpeg-turbo-07-2017-build.sh /fuzzer-test-suite/libjpeg-turbo-07-2017/build.sh
29 | ADD libxml2-v2.9.2-build.sh /fuzzer-test-suite/libxml2-v2.9.2/build.sh
30 | 
31 | ENV LIBFUZZER_SRC="/compiler-rt-8.0.0.src/lib/fuzzer"
32 | 


--------------------------------------------------------------------------------
/fuzzing/fts/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -x
 2 | # Copyright 2016 Google Inc. All Rights Reserved.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | . $(dirname $(realpath -s $0))/common.sh
 5 | 
 6 | if [ $# -ne 1 ]; then
 7 |     echo "usage: $0 TARGET"
 8 |     exit 1
 9 | fi
10 | 
11 | BUILD=$SCRIPT_DIR/$1/build.sh
12 | 
13 | [ ! -e $BUILD ] && echo "NO SUCH FILE: $BUILD" && exit 1
14 | 
15 | RUNDIR="RUNDIR-${FUZZING_ENGINE}-$1"
16 | mkdir -p $RUNDIR
17 | cd $RUNDIR
18 | $BUILD
19 | 
20 | 


--------------------------------------------------------------------------------
/fuzzing/fts/common.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | # Copyright 2017 Google Inc. All Rights Reserved.
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | 
  5 | # Don't allow to call these scripts from their directories.
  6 | [ -e $(basename $0) ] && echo "PLEASE USE THIS SCRIPT FROM ANOTHER DIR" && exit 1
  7 | 
  8 | # Ensure that fuzzing engine, if defined, is valid
  9 | FUZZING_ENGINE=${FUZZING_ENGINE:-"afl"}
 10 | POSSIBLE_FUZZING_ENGINE="afl aflpp aflpp_cmplog coverage"
 11 | !(echo "$POSSIBLE_FUZZING_ENGINE" | grep -w "$FUZZING_ENGINE" > /dev/null) && \
 12 |   echo "USAGE: Error: If defined, FUZZING_ENGINE should be one of the following:
 13 |   $POSSIBLE_FUZZING_ENGINE. However, it was defined as $FUZZING_ENGINE" && exit 1
 14 | 
 15 | SCRIPT_DIR=$(dirname $(realpath -s $0))
 16 | EXECUTABLE_NAME_BASE=$(basename $SCRIPT_DIR)-${FUZZING_ENGINE}
 17 | LIBFUZZER_SRC=${LIBFUZZER_SRC:-$(dirname $(dirname $SCRIPT_DIR))/Fuzzer}
 18 | STANDALONE_TARGET=0
 19 | AFL_SRC=${AFL_SRC:-$(dirname $(dirname $SCRIPT_DIR))/AFL}
 20 | CORPUS=CORPUS-$EXECUTABLE_NAME_BASE
 21 | JOBS=${JOBS:-"8"}
 22 | 
 23 | export LIB_FUZZING_ENGINE="libFuzzingEngine-${FUZZING_ENGINE}.a"
 24 | 
 25 | if [[ $FUZZING_ENGINE == "afl" ]]; then
 26 |   export AFL_PATH=$(realpath -s ${AFL_SRC})
 27 | 
 28 |   export CC="${AFL_PATH}/afl-clang-fast"
 29 |   export CXX="${AFL_PATH}/afl-clang-fast++"
 30 | 
 31 |   export AFL_CC="clang-8"
 32 |   export AFL_CXX="clang++-8"
 33 | 
 34 |   export AFL_USE_ASAN="1"
 35 |   export CFLAGS="-m32 -O2 -fno-omit-frame-pointer -gline-tables-only"
 36 |   export CXXFLAGS="${CFLAGS}"
 37 |   export LDFLAGS="-m32"
 38 | elif [[ $FUZZING_ENGINE == "aflpp" ]]; then
 39 |   export AFL_PATH=$(realpath -s ${AFL_SRC})
 40 | 
 41 |   export CC="${AFL_PATH}/afl-clang-fast"
 42 |   export CXX="${AFL_PATH}/afl-clang-fast++"
 43 |   export AS="llvm-as-8"
 44 | 
 45 |   export AFL_CC="clang-8"
 46 |   export AFL_CXX="clang++-8"
 47 | 
 48 |   export AFL_USE_ASAN="1"
 49 |   export CFLAGS="-m32 -O2 -fno-omit-frame-pointer -gline-tables-only"
 50 |   export CXXFLAGS="${CFLAGS}"
 51 |   export LDFLAGS="-m32"
 52 | elif [[ $FUZZING_ENGINE == "aflpp_cmplog" ]]; then
 53 |   export AFL_PATH=$(realpath -s ${AFL_SRC})
 54 | 
 55 |   export CC="${AFL_PATH}/afl-clang-fast"
 56 |   export CXX="${AFL_PATH}/afl-clang-fast++"
 57 |   export AS="llvm-as-8"
 58 | 
 59 |   export AFL_CC="clang-8"
 60 |   export AFL_CXX="clang++-8"
 61 | 
 62 |   export AFL_LLVM_CMPLOG=1
 63 |   export CFLAGS="-m32 -O2 -fno-omit-frame-pointer -gline-tables-only"
 64 |   export CXXFLAGS="${CFLAGS}"
 65 |   export LDFLAGS="-m32"
 66 | elif [[ $FUZZING_ENGINE == "coverage" ]]; then
 67 |   export CC="clang-8"
 68 |   export CXX="clang++-8"
 69 | 
 70 |   export CFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping"
 71 |   export CXXFLAGS="${CFLAGS}"
 72 |   export LDFLAGS="-m32"
 73 | fi
 74 | 
 75 | export CPPFLAGS=${CPPFLAGS:-"-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION"}
 76 | 
 77 | get_git_revision() {
 78 |   GIT_REPO="$1"
 79 |   GIT_REVISION="$2"
 80 |   TO_DIR="$3"
 81 |   [ ! -e $TO_DIR ] && git clone $GIT_REPO $TO_DIR && (cd $TO_DIR && git reset --hard $GIT_REVISION)
 82 | }
 83 | 
 84 | get_git_tag() {
 85 |   GIT_REPO="$1"
 86 |   GIT_TAG="$2"
 87 |   TO_DIR="$3"
 88 |   [ ! -e $TO_DIR ] && git clone $GIT_REPO $TO_DIR && (cd $TO_DIR && git checkout $GIT_TAG)
 89 | }
 90 | 
 91 | get_svn_revision() {
 92 |   SVN_REPO="$1"
 93 |   SVN_REVISION="$2"
 94 |   TO_DIR="$3"
 95 |   [ ! -e $TO_DIR ] && svn co -r$SVN_REVISION $SVN_REPO $TO_DIR
 96 | }
 97 | 
 98 | build_afl() {
 99 |   $CXX $CXXFLAGS -std=c++11 -m32 -c ${LIBFUZZER_SRC}/afl/afl_driver.cpp -I$LIBFUZZER_SRC
100 |   ar r $LIB_FUZZING_ENGINE afl_driver.o
101 |   rm *.o
102 | }
103 | 
104 | build_aflpp() {
105 |   cp ${AFL_SRC}/examples/aflpp_driver/libAFLDriver.a $LIB_FUZZING_ENGINE
106 | }
107 | 
108 | build_aflpp_cmplog() {
109 |   build_aflpp
110 | }
111 | 
112 | # This provides a build with no fuzzing engine, just to measure coverage
113 | build_coverage () {
114 |   STANDALONE_TARGET=1
115 |   $CC -m32 -c $LIBFUZZER_SRC/standalone/StandaloneFuzzTargetMain.c
116 |   ar rc $LIB_FUZZING_ENGINE StandaloneFuzzTargetMain.o
117 |   rm *.o
118 | }
119 | 
120 | build_fuzzer() {
121 |   echo "Building with $FUZZING_ENGINE"
122 |   build_${FUZZING_ENGINE}
123 | }
124 | 
125 | 


--------------------------------------------------------------------------------
/fuzzing/fts/coverage.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM seed-selection/fts/base
 2 | 
 3 | # Build coverage FTS
 4 | ENV FUZZING_ENGINE="coverage"
 5 | RUN mkdir /build-cov
 6 | 
 7 | RUN cd /build-cov && /fuzzer-test-suite/build.sh freetype2-2017
 8 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-freetype2-2017/lib           \
 9 |     /build-cov/RUNDIR-coverage-freetype2-2017/freetype2-2017-coverage
10 | 
11 | RUN cd /build-cov && /fuzzer-test-suite/build.sh guetzli-2017-3-30
12 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-guetzli-2017-3-30/lib        \
13 |     /build-cov/RUNDIR-coverage-guetzli-2017-3-30/guetzli-2017-3-30-coverage
14 | 
15 | RUN cd /build-cov && /fuzzer-test-suite/build.sh json-2017-02-12
16 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-json-2017-02-12/lib          \
17 |     /build-cov/RUNDIR-coverage-json-2017-02-12/json-2017-02-12-coverage
18 | 
19 | RUN cd /build-cov && /fuzzer-test-suite/build.sh libarchive-2017-01-04
20 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-libarchive-2017-01-04/lib    \
21 |     /build-cov/RUNDIR-coverage-libarchive-2017-01-04/libarchive-2017-01-04-coverage
22 | 
23 | RUN cd /build-cov && /fuzzer-test-suite/build.sh libjpeg-turbo-07-2017
24 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-libjpeg-turbo-07-2017/lib    \
25 |     /build-cov/RUNDIR-coverage-libjpeg-turbo-07-2017/libjpeg-turbo-07-2017-coverage
26 | 
27 | RUN cd /build-cov && /fuzzer-test-suite/build.sh libpng-1.2.56
28 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-libpng-1.2.56/lib            \
29 |     /build-cov/RUNDIR-coverage-libpng-1.2.56/libpng-1.2.56-coverage
30 | 
31 | RUN cd /build-cov && /fuzzer-test-suite/build.sh libxml2-v2.9.2
32 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-libxml2-v2.9.2/lib           \
33 |     /build-cov/RUNDIR-coverage-libxml2-v2.9.2/libxml2-v2.9.2-coverage
34 | 
35 | RUN cd /build-cov && /fuzzer-test-suite/build.sh pcre2-10.00
36 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-pcre2-10.00/lib              \
37 |     /build-cov/RUNDIR-coverage-pcre2-10.00/pcre2-10.00-coverage
38 | 
39 | RUN cd /build-cov && /fuzzer-test-suite/build.sh re2-2014-12-09
40 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-re2-2014-12-09/lib           \
41 |     /build-cov/RUNDIR-coverage-re2-2014-12-09/re2-2014-12-09-coverage
42 | 
43 | RUN cd /build-cov && /fuzzer-test-suite/build.sh vorbis-2017-12-11
44 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-vorbis-2017-12-11/lib        \
45 |     /build-cov/RUNDIR-coverage-vorbis-2017-12-11/vorbis-2017-12-11-coverage
46 | 
47 | RUN echo "\033[0;33m * Extract the 'build-cov' directory\033[0m"
48 | 


--------------------------------------------------------------------------------
/fuzzing/fts/libarchive-2017-01-04-build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2016 Google Inc. All Rights Reserved.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | . $(dirname $0)/../custom-build.sh $1 $2
 5 | . $(dirname $0)/../common.sh
 6 | 
 7 | build_lib() {
 8 |   rm -rf BUILD
 9 |   cp -rf SRC BUILD
10 |   (cd BUILD/build && ./autogen.sh && cd .. && ./configure --disable-shared --without-nettle && make -j $JOBS)
11 | }
12 | 
13 | get_git_revision https://github.com/libarchive/libarchive.git 51d7afd3644fdad725dd8faa7606b864fd125f88 SRC
14 | build_lib
15 | build_fuzzer
16 | 
17 | if [[ $FUZZING_ENGINE == "hooks" ]]; then
18 |   # Link ASan runtime so we can hook memcmp et al.
19 |   LIB_FUZZING_ENGINE="$LIB_FUZZING_ENGINE -fsanitize=address"
20 | fi
21 | set -x
22 | $CXX $CXXFLAGS -std=c++11 $SCRIPT_DIR/libarchive_fuzzer.cc -I BUILD/libarchive BUILD/.libs/libarchive.a $LIB_FUZZING_ENGINE -lz  -lbz2 -lxml2 -lcrypto -lssl -llzma -lexpat -o $EXECUTABLE_NAME_BASE
23 | 


--------------------------------------------------------------------------------
/fuzzing/fts/libjpeg-turbo-07-2017-build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2017 Google Inc. All Rights Reserved.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | . $(dirname $0)/../custom-build.sh $1 $2
 5 | . $(dirname $0)/../common.sh
 6 | 
 7 | build_lib() {
 8 |   rm -rf BUILD
 9 |   cp -rf SRC BUILD
10 |   (cd BUILD && autoreconf -fiv && ./configure --disable-shared --host=i386-linux && make -j $JOBS)
11 | }
12 | 
13 | get_git_revision https://github.com/libjpeg-turbo/libjpeg-turbo.git b0971e47d76fdb81270e93bbf11ff5558073350d SRC
14 | build_lib
15 | build_fuzzer
16 | 
17 | if [[ $FUZZING_ENGINE == "hooks" ]]; then
18 |   # Link ASan runtime so we can hook memcmp et al.
19 |   LIB_FUZZING_ENGINE="$LIB_FUZZING_ENGINE -fsanitize=address"
20 | fi
21 | set -x
22 | $CXX $CXXFLAGS -std=c++11 $SCRIPT_DIR/libjpeg_turbo_fuzzer.cc -I BUILD BUILD/.libs/libturbojpeg.a $LIB_FUZZING_ENGINE -o $EXECUTABLE_NAME_BASE
23 | 


--------------------------------------------------------------------------------
/fuzzing/fts/libxml2-v2.9.2-build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright 2016 Google Inc. All Rights Reserved.
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | . $(dirname $0)/../custom-build.sh $1 $2
 5 | . $(dirname $0)/../common.sh
 6 | 
 7 | build_lib() {
 8 |   rm -rf BUILD
 9 |   cp -rf SRC BUILD
10 |   (cd BUILD && ./autogen.sh && CCLD="$CXX $CXXFLAGS" ./configure --disable-shared --without-python && make -j $JOBS)
11 | }
12 | 
13 | get_git_tag https://gitlab.gnome.org/GNOME/libxml2.git v2.9.2 SRC
14 | get_git_revision https://github.com/google/afl e9be6bce2282e8db95221c9a17fd10aba9e901bc afl
15 | build_lib
16 | build_fuzzer
17 | 
18 | cp afl/dictionaries/xml.dict .
19 | 
20 | if [[ $FUZZING_ENGINE == "hooks" ]]; then
21 |   # Link ASan runtime so we can hook memcmp et al.
22 |   LIB_FUZZING_ENGINE="$LIB_FUZZING_ENGINE -fsanitize=address"
23 | fi
24 | set -x
25 | $CXX $CXXFLAGS -std=c++11  $SCRIPT_DIR/target.cc -I BUILD/include BUILD/.libs/libxml2.a $LIB_FUZZING_ENGINE -lz -llzma -o $EXECUTABLE_NAME_BASE
26 | 


--------------------------------------------------------------------------------
/fuzzing/magma/clean_corpora.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ "$#" -ne 1 ]; then
 4 |     echo "usage: $0 /path/to/magma"
 5 |     exit 1
 6 | fi
 7 | 
 8 | MAGMA_DIR=$1
 9 | 
10 | set -x
11 | rm -rf ${MAGMA_DIR}/targets/libpng/corpus/libpng_read_fuzzer/*
12 | rm -rf ${MAGMA_DIR}/targets/libtiff/corpus/tiff_read_rgba_fuzzer/*
13 | rm -rf ${MAGMA_DIR}/targets/libxml2/corpus/libxml2_xml_reader_for_file_fuzzer/*
14 | rm -rf ${MAGMA_DIR}/targets/php/corpus/{exif,json,parser}/*
15 | rm -rf ${MAGMA_DIR}/targets/poppler/corpus/pdf_fuzzer/*
16 | 


--------------------------------------------------------------------------------
/fuzzing/magma/log-execs.patch:
--------------------------------------------------------------------------------
 1 | diff --git a/afl-fuzz.c b/afl-fuzz.c
 2 | index 21918df..64e7b35 100644
 3 | --- a/afl-fuzz.c
 4 | +++ b/afl-fuzz.c
 5 | @@ -3543,10 +3543,10 @@ static void maybe_update_plot_file(double bitmap_cvg, double eps) {
 6 |       execs_per_sec */
 7 |  
 8 |    fprintf(plot_file, 
 9 | -          "%llu, %llu, %u, %u, %u, %u, %0.02f%%, %llu, %llu, %u, %0.02f\n",
10 | +          "%llu, %llu, %u, %u, %u, %u, %0.02f%%, %llu, %llu, %u, %llu, %0.02f\n",
11 |            get_cur_time() / 1000, queue_cycle - 1, current_entry, queued_paths,
12 |            pending_not_fuzzed, pending_favored, bitmap_cvg, unique_crashes,
13 | -          unique_hangs, max_depth, eps); /* ignore errors */
14 | +          unique_hangs, max_depth, total_execs, eps); /* ignore errors */
15 |  
16 |    fflush(plot_file);
17 |  
18 | @@ -7266,7 +7266,7 @@ EXP_ST void setup_dirs_fds(void) {
19 |  
20 |    fprintf(plot_file, "# unix_time, cycles_done, cur_path, paths_total, "
21 |                       "pending_total, pending_favs, map_size, unique_crashes, "
22 | -                     "unique_hangs, max_depth, execs_per_sec\n");
23 | +                     "unique_hangs, max_depth, execs, execs_per_sec\n");
24 |                       /* ignore errors */
25 |  
26 |  }
27 | 


--------------------------------------------------------------------------------
/fuzzing/magma/setup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | if [ "$#" -ne 1 ]; then
 4 |     echo "usage: $0 /dest/path"
 5 |     exit 1
 6 | fi
 7 | 
 8 | THIS_DIR=$(dirname $(realpath -s $0))
 9 | DEST_DIR=$1
10 | 
11 | rm -rf ${DEST_DIR}
12 | git clone --branch v1.1 --depth 1 https://github.com/HexHive/magma ${DEST_DIR}
13 | git -C ${DEST_DIR} apply "${THIS_DIR}/v1.1.patch"
14 | cp "${THIS_DIR}/log-execs.patch" ${DEST_DIR}/fuzzers/afl/src/
15 | cp "${THIS_DIR}/../../scripts/bin/timestamp_afl.py" ${DEST_DIR}/fuzzers/afl/src/timestamp_findings.py
16 | 
17 | # Create php corpus directories
18 | mkdir -p ${DEST_DIR}/targets/php/corpus/{exif,json,parser}
19 | 


--------------------------------------------------------------------------------
/fuzzing/magma/survival_analysis.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """
  4 | Compute a survival analysis for each bug reported in the given JSON file. This
  5 | JSON file is generated by running `magma/tools/benchd/exp2json.py` on the Magma
  6 | work directory.
  7 | 
  8 | Author: Adrian Herrera
  9 | """
 10 | 
 11 | 
 12 | from argparse import ArgumentParser, Namespace
 13 | from collections import defaultdict
 14 | from math import sqrt
 15 | from pathlib import Path
 16 | from typing import Tuple
 17 | import json
 18 | import warnings
 19 | 
 20 | from lifelines import KaplanMeierFitter
 21 | from lifelines.utils import restricted_mean_survival_time as rmst
 22 | import pandas as pd
 23 | 
 24 | 
 25 | NUM_TRIALS = 30
 26 | TRIAL_LEN = 18 * 60 * 60
 27 | 
 28 | ddr = lambda: defaultdict(ddr)
 29 | 
 30 | 
 31 | def parse_args() -> Namespace:
 32 |     """Parse command-line arguments."""
 33 |     parser = ArgumentParser(description='Magma survival analysis')
 34 |     parser.add_argument('json', type=Path, nargs='+',
 35 |                         help='Magma-generated JSON file (containing bug data)')
 36 |     return parser.parse_args()
 37 | 
 38 | 
 39 | def get_time_to_bug(data: dict) -> dict:
 40 |     """Get time-to-bug data from Magma JSON dictionary."""
 41 |     for fuzzer, f_data in data.items():
 42 |         for target, t_data in f_data.items():
 43 |             for program, p_data in t_data.items():
 44 |                 bugs = ddr()
 45 |                 for run, r_data in p_data.items():
 46 |                     for metric, m_data in r_data.items():
 47 |                         for bug, time in m_data.items():
 48 |                             if metric not in bugs[bug]:
 49 |                                 bugs[bug][metric] = [None] * NUM_TRIALS
 50 |                             bugs[bug][metric][int(run)] = time
 51 |                 for bug, b_data in bugs.items():
 52 |                     yield dict(
 53 |                         target=target,
 54 |                         program=program,
 55 |                         fuzzer=fuzzer,
 56 |                         bug=bug,
 57 |                         **b_data,
 58 |                     )
 59 | 
 60 | 
 61 | def calc_survival(data: dict) -> Tuple[float, float]:
 62 |     """Do the survival analysis."""
 63 |     df = pd.DataFrame(data)
 64 |     T = df.fillna(TRIAL_LEN) / 60 / 60
 65 |     E = df.notnull()
 66 | 
 67 |     kmf = KaplanMeierFitter()
 68 |     kmf.fit(T, E)
 69 | 
 70 |     # Compute the restricted mean survival time and 95% confidence interval
 71 |     surv_time_mean, surv_time_var = rmst(kmf, t=TRIAL_LEN / 60 / 60,
 72 |                                          return_variance=True)
 73 |     surv_time_var = abs(surv_time_var)
 74 |     surv_time_ci = 1.96 * (sqrt(surv_time_var) /
 75 |                            sqrt(len(kmf.survival_function_)))
 76 | 
 77 |     return surv_time_mean, surv_time_ci
 78 | 
 79 | 
 80 | def main():
 81 |     """The main function."""
 82 |     args = parse_args()
 83 | 
 84 |     # Ignore warnings
 85 |     warnings.simplefilter("ignore")
 86 | 
 87 |     survival_times = dict(target=[],
 88 |                           program=[],
 89 |                           bug=[],
 90 |                           src=[],
 91 |                           fuzzer=[],
 92 |                           survival_time=[],
 93 |                           survival_ci=[])
 94 | 
 95 |     for magma_json in args.json:
 96 |         # Read Magma JSON data
 97 |         with magma_json.open() as inf:
 98 |             json_data = json.load(inf).get('results', {})
 99 | 
100 |         # Do survival analysis on 'triggered' results
101 |         for ttb in get_time_to_bug(json_data):
102 |             if 'triggered' not in ttb:
103 |                 surv_time_mean = None
104 |                 surv_time_ci = None
105 |             else:
106 |                 triggered_data = ttb['triggered']
107 |                 surv_time_mean, surv_time_ci = calc_survival(triggered_data)
108 | 
109 |             # Save table data
110 |             survival_times['target'].append(ttb['target'])
111 |             survival_times['program'].append(ttb['program'])
112 |             survival_times['bug'].append(ttb['bug'])
113 |             survival_times['src'].append(magma_json.stem)
114 |             survival_times['fuzzer'].append('afl')
115 |             survival_times['survival_time'].append(surv_time_mean)
116 |             survival_times['survival_ci'].append(surv_time_ci)
117 | 
118 |     # Write to CSV
119 |     print(pd.DataFrame.from_dict(survival_times).sort_values(by='bug').to_csv(index=False))
120 | 
121 | 
122 | if __name__ == '__main__':
123 |     main()
124 | 


--------------------------------------------------------------------------------
/fuzzing/magma/v1.1.patch:
--------------------------------------------------------------------------------
  1 | diff --git a/fuzzers/afl/fetch.sh b/fuzzers/afl/fetch.sh
  2 | index c2ca3be3..ca1cb7a6 100755
  3 | --- a/fuzzers/afl/fetch.sh
  4 | +++ b/fuzzers/afl/fetch.sh
  5 | @@ -11,3 +11,5 @@ git -C "$FUZZER/repo" checkout fab1ca5ed7e3552833a18fc2116d33a9241699bc
  6 |  #wget -O "$FUZZER/repo/afl_driver.cpp" \
  7 |  #    "https://cs.chromium.org/codesearch/f/chromium/src/third_party/libFuzzer/src/afl/afl_driver.cpp"
  8 |  cp "$FUZZER/src/afl_driver.cpp" "$FUZZER/repo/afl_driver.cpp"
  9 | +
 10 | +git -C "$FUZZER/repo" apply "$FUZZER/src/log-execs.patch"
 11 | diff --git a/fuzzers/afl/run.sh b/fuzzers/afl/run.sh
 12 | index 077ba44b..f20c37aa 100755
 13 | --- a/fuzzers/afl/run.sh
 14 | +++ b/fuzzers/afl/run.sh
 15 | @@ -15,5 +15,7 @@ mkdir -p "$SHARED/findings"
 16 |  
 17 |  export AFL_SKIP_CPUFREQ=1
 18 |  export AFL_NO_AFFINITY=1
 19 | -"$FUZZER/repo/afl-fuzz" -m 100M -i "$TARGET/corpus/$PROGRAM" -o "$SHARED/findings" \
 20 | -    $FUZZARGS -- "$OUT/$PROGRAM" $ARGS 2>&1
 21 | +"$FUZZER/repo/afl-fuzz" -t 1000+ -m 100M -i "$TARGET/corpus/$PROGRAM" -o "$SHARED/findings" \
 22 | +    -M fuzzer-01 $FUZZARGS -- "$OUT/$PROGRAM" $ARGS 2>&1 &
 23 | +"$FUZZER/repo/afl-fuzz" -t 1000+ -m 100M -i "$TARGET/corpus/$PROGRAM" -o "$SHARED/findings" \
 24 | +    -S fuzzer-02 $FUZZARGS -- "$OUT/$PROGRAM" $ARGS 2>&1 &
 25 | diff --git a/fuzzers/aflplusplus/run.sh b/fuzzers/aflplusplus/run.sh
 26 | index c2f1c100..2e814eb2 100755
 27 | --- a/fuzzers/aflplusplus/run.sh
 28 | +++ b/fuzzers/aflplusplus/run.sh
 29 | @@ -21,6 +21,9 @@ export AFL_NO_UI=1
 30 |  export AFL_MAP_SIZE=256000
 31 |  export AFL_DRIVER_DONT_DEFER=1
 32 |  
 33 | -"$FUZZER/repo/afl-fuzz" -i "$TARGET/corpus/$PROGRAM" -o "$SHARED/findings" \
 34 | +"$FUZZER/repo/afl-fuzz" -t 1000+ -i "$TARGET/corpus/$PROGRAM" -o "$SHARED/findings" \
 35 |      "${flag_cmplog[@]}" -d \
 36 | -    $FUZZARGS -- "$OUT/afl/$PROGRAM" $ARGS 2>&1
 37 | +    -M fuzzer-01 $FUZZARGS -- "$OUT/afl/$PROGRAM" $ARGS 2>&1 &
 38 | +"$FUZZER/repo/afl-fuzz" -t 1000+ -i "$TARGET/corpus/$PROGRAM" -o "$SHARED/findings" \
 39 | +    "${flag_cmplog[@]}" -d \
 40 | +    -S fuzzer-02 $FUZZARGS -- "$OUT/afl/$PROGRAM" $ARGS 2>&1 &
 41 | diff --git a/magma/run.sh b/magma/run.sh
 42 | index d6fcdd53..50f3cf52 100755
 43 | --- a/magma/run.sh
 44 | +++ b/magma/run.sh
 45 | @@ -25,15 +25,17 @@ mkdir -p "$MONITOR"
 46 |  cd "$SHARED"
 47 |  
 48 |  # prune the seed corpus for any fault-triggering test-cases
 49 | -for seed in "$TARGET/corpus/$PROGRAM"/*; do
 50 | -    out="$("$MAGMA"/runonce.sh "$seed")"
 51 | -    code=$?
 52 | +if [ ! -z $NO_PRUNE ]; then
 53 | +    for seed in "$TARGET/corpus/$PROGRAM"/*; do
 54 | +        out="$("$MAGMA"/runonce.sh "$seed")"
 55 | +        code=$?
 56 |  
 57 | -    if [ $code -ne 0 ]; then
 58 | -        echo "$seed: $out"
 59 | -        rm "$seed"
 60 | -    fi
 61 | -done
 62 | +        if [ $code -ne 0 ]; then
 63 | +            echo "$seed: $out"
 64 | +            rm "$seed"
 65 | +        fi
 66 | +    done
 67 | +fi
 68 |  
 69 |  shopt -s nullglob
 70 |  seeds=("$1"/*)
 71 | @@ -68,8 +70,10 @@ done &
 72 |  
 73 |  echo "Campaign launched at $(date '+%F %R')"
 74 |  
 75 | -timeout $TIMEOUT "$FUZZER/run.sh" | \
 76 | -    multilog n2 s$LOGSIZE "$SHARED/log"
 77 | +timeout $TIMEOUT bash -c '
 78 | +    "$FUZZER/run.sh" | \
 79 | +        multilog n2 s$LOGSIZE "$SHARED/log"
 80 | +'
 81 |  
 82 |  if [ -f "$SHARED/log/current" ]; then
 83 |      cat "$SHARED/log/current"
 84 | @@ -77,4 +81,10 @@ fi
 85 |  
 86 |  echo "Campaign terminated at $(date '+%F %R')"
 87 |  
 88 | +# XXX This should be in a fuzzer-specific post-processing script
 89 | +python3 "$FUZZER/src/timestamp_findings.py" -o "$SHARED/fuzzer-01-timestamps.csv" -- \
 90 | +    "$SHARED/findings/fuzzer-01"
 91 | +python3 "$FUZZER/src/timestamp_findings.py" -o "$SHARED/fuzzer-02-timestamps.csv" -- \
 92 | +    "$SHARED/findings/fuzzer-02"
 93 | +
 94 |  kill $(jobs -p)
 95 | diff --git a/targets/php/build.sh b/targets/php/build.sh
 96 | index 26f2e2d6..131d83fd 100755
 97 | --- a/targets/php/build.sh
 98 | +++ b/targets/php/build.sh
 99 | @@ -47,16 +47,7 @@ popd
100 |  
101 |  make -j$(nproc)
102 |  
103 | -# Generate seed corpora
104 | -sapi/cli/php sapi/fuzzer/generate_unserialize_dict.php
105 | -sapi/cli/php sapi/fuzzer/generate_parser_corpus.php
106 | -
107 |  FUZZERS="php-fuzz-json php-fuzz-exif php-fuzz-mbstring php-fuzz-unserialize php-fuzz-parser"
108 |  for fuzzerName in $FUZZERS; do
109 |  	cp sapi/fuzzer/$fuzzerName "$OUT/${fuzzerName/php-fuzz-/}"
110 |  done
111 | -
112 | -for fuzzerName in `ls sapi/fuzzer/corpus`; do
113 | -    mkdir -p "$TARGET/corpus/${fuzzerName}"
114 | -    cp sapi/fuzzer/corpus/${fuzzerName}/* "$TARGET/corpus/${fuzzerName}/"
115 | -done
116 | diff --git a/tools/captain/captainrc b/tools/captain/captainrc
117 | index 466d2164..098931fe 100644
118 | --- a/tools/captain/captainrc
119 | +++ b/tools/captain/captainrc
120 | @@ -10,7 +10,7 @@
121 |  WORKDIR=./workdir
122 |  
123 |  # REPEAT: number of campaigns to run per program (per fuzzer)
124 | -REPEAT=3
125 | +REPEAT=30
126 |  
127 |  # [WORKER_MODE]: defines the type of CPU resources to allocate (default: 1)
128 |  # - 1: logical cores (possibly SMT-enabled)
129 | @@ -25,22 +25,22 @@ REPEAT=3
130 |  # WORKER_POOL="1 3 5 7 9"
131 |  
132 |  # [CAMPAIGN_WORKERS]: number of workers to allocate for a campaign (default: 1)
133 | -# CAMPAIGN_WORKERS=1
134 | +CAMPAIGN_WORKERS=2
135 |  
136 |  # [TIMEOUT]: time to run each campaign. This variable supports one-letter
137 |  # suffixes to indicate duration (s: seconds, m: minutes, h: hours, d: days)
138 |  # (default: 1m)
139 | -TIMEOUT=24h
140 | +TIMEOUT=18h
141 |  
142 |  # [POLL]: time (in seconds) between polls (default: 5)
143 |  POLL=5
144 |  
145 |  # [CACHE_ON_DISK]: if set, the cache workdir is mounted on disk instead of
146 |  # in-memory (default: unset)
147 | -# CACHE_ON_DISK=1
148 | +CACHE_ON_DISK=1
149 |  
150 |  # [NO_ARCHIVE]: if set, campaign workdirs will not be tarballed (default: unset)
151 | -# NO_ARCHIVE=1
152 | +NO_ARCHIVE=1
153 |  
154 |  # [TMPFS_SIZE]: the size of the tmpfs mounted volume. This only applies when
155 |  # CACHE_ON_DISK is not set (default: 50g)
156 | @@ -65,23 +65,40 @@ POLL=5
157 |  # (default: unset)
158 |  # POC_EXTRACT=1
159 |  
160 | +# [NO_PRUNE]: if set, do not prune the seed corpus for any fault-triggering
161 | +# test-cases (default: unset)
162 | +NO_PRUNE=1
163 | +
164 |  
165 |  ###
166 |  ## Campaigns to run
167 |  ###
168 |  
169 |  # FUZZERS: an array of fuzzer names (from magma/fuzzers/*) to evaluate
170 | -FUZZERS=(afl aflfast moptafl aflplusplus fairfuzz honggfuzz)
171 | +FUZZERS=(afl aflplusplus )
172 |  
173 |  # [fuzzer_TARGETS]: an array of target names (from magma/targets/*) to fuzz with
174 |  # `fuzzer`. The `fuzzer` prefix is a fuzzer listed in the FUZZERS array
175 |  # (default: all targets)
176 |  # afl_TARGETS=(libpng libtiff libxml2)
177 | +afl_TARGETS=(libpng libtiff libxml2 php poppler)
178 | +aflplusplus_TARGETS=(libpng libtiff libxml2 php poppler)
179 |  
180 |  # [fuzzer_target_PROGRAMS]: an array of program names (from
181 |  # magma/targets/target/configrc) to use as execution drivers when fuzzing the
182 |  # `target`
183 |  # afl_libtiff_PROGRAMS=(tiffcp)
184 | +afl_libpng_PROGRAMS=(libpng_read_fuzzer)
185 | +afl_libtiff_PROGRAMS=(tiff_read_rgba_fuzzer)
186 | +afl_libxml2_PROGRAMS=(libxml2_xml_reader_for_file_fuzzer)
187 | +afl_php_PROGRAMS=(exif json parser)
188 | +afl_poppler_PROGRAMS=(pdf_fuzzer)
189 | +
190 | +aflplusplus_libpng_PROGRAMS=(libpng_read_fuzzer)
191 | +aflplusplus_libtiff_PROGRAMS=(tiff_read_rgba_fuzzer)
192 | +aflplusplus_libxml2_PROGRAMS=(libxml2_xml_reader_for_file_fuzzer)
193 | +aflplusplus_php_PROGRAMS=(exif json parser)
194 | +aflplusplus_poppler_PROGRAMS=(pdf_fuzzer)
195 |  
196 |  # [fuzzer_target_FUZZARGS]: a string containing fuzzer/target-specific arguments
197 |  # when fuzzing `target` with `fuzzer`
198 | 


--------------------------------------------------------------------------------
/fuzzing/readelf/Dockerfile:
--------------------------------------------------------------------------------
  1 | FROM ubuntu:18.04
  2 | 
  3 | # Install depndencies
  4 | RUN export DEBIAN_FRONTEND=noninteractiv &&                                 \
  5 |     apt-get update &&                                                       \
  6 |     apt-get -y install git build-essential wget binutils-dev libunwind-dev  \
  7 |         parallel python3 python3-pip
  8 | 
  9 | # Install LLVM 8
 10 | RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
 11 | RUN apt-get install -y llvm-8 clang-8
 12 | 
 13 | # Get AFLFast
 14 | RUN git clone --no-checkout https://github.com/mboehme/aflfast &&           \
 15 |     git -C aflfast checkout 11ec1828448d27bdcc54fdeb91bf3215d4d8c583
 16 | ADD afl_llvm_mode.Makefile /aflfast/llvm_mode/Makefile
 17 | RUN make -C aflfast -j &&                                                   \
 18 |     CC=clang-8 CXX=clang++-8 LLVM_CONFIG=llvm-config-8 make -C aflfast/llvm_mode
 19 | 
 20 | # Get AFL++
 21 | RUN git clone --no-checkout https://github.com/AFLplusplus/AFLplusplus aflplusplus &&   \
 22 |     git -C aflplusplus checkout fa933036a7bdbf9a59a9b1b7669d6ec7db64a202 && \
 23 |     CC=clang-8 CXX=clang++-8 LLVM_CONFIG=llvm-config-8 make -C aflplusplus
 24 | 
 25 | # Get honggfuzz
 26 | RUN git clone --no-checkout https://github.com/google/honggfuzz &&          \
 27 |     git -C honggfuzz checkout 5810856f5381f93c461e3a7ba6452945c0725574 &&   \
 28 |     make -C honggfuzz
 29 | 
 30 | # Get binutils
 31 | RUN wget ftp://sourceware.org/pub/binutils/snapshots/binutils-2.30.0.tar.xz
 32 | 
 33 | # AFLFast build
 34 | RUN rm -rf binutils-2.30.0
 35 | RUN tar xJf /binutils-2.30.0.tar.xz
 36 | RUN mkdir -p /binutils-afl
 37 | RUN cd binutils-2.30.0 &&                                                   \
 38 |     export AFL_CC=clang-8 &&                                                \
 39 |     export AFL_CXX=clang++-8 &&                                             \
 40 |     export CC=/aflfast/afl-clang-fast &&                                    \
 41 |     export CXX=/aflfast/afl-clang-fast++ &&                                 \
 42 |     ./configure --prefix=/binutils-aflfast --disable-shared --disable-werror\
 43 |     --disable-ld --disable-gdb &&                                           \
 44 |     make -j && make install
 45 | 
 46 | # AFL++ build
 47 | RUN rm -rf binutils-2.30.0
 48 | RUN tar xJf /binutils-2.30.0.tar.xz
 49 | RUN mkdir -p /binutils-aflplusplus/afl
 50 | RUN cd binutils-2.30.0 &&                                                   \
 51 |     export AFL_CC=clang-8 &&                                                \
 52 |     export AFL_CXX=clang++-8 &&                                             \
 53 |     export CC=/aflplusplus/afl-clang-fast &&                                \
 54 |     export CXX=/aflplusplus/afl-clang-fast++ &&                             \
 55 |     ./configure --prefix=/binutils-aflplusplus/afl --disable-shared         \
 56 |     --disable-werror --disable-ld --disable-gdb &&                          \
 57 |     make -j && make install
 58 | 
 59 | RUN rm -rf binutils-2.30.0
 60 | RUN tar xJf /binutils-2.30.0.tar.xz
 61 | RUN mkdir -p /binutils-aflplusplus/cmplog
 62 | RUN cd binutils-2.30.0 &&                                                   \
 63 |     export AFL_CC=clang-8 &&                                                \
 64 |     export AFL_CXX=clang++-8 &&                                             \
 65 |     export CC=/aflplusplus/afl-clang-fast &&                                \
 66 |     export CXX=/aflplusplus/afl-clang-fast++ &&                             \
 67 |     export AFL_LLVM_CMPLOG=1 &&                                             \
 68 |     ./configure --prefix=/binutils-aflplusplus/cmplog --disable-shared      \
 69 |     --disable-werror --disable-ld --disable-gdb &&                          \
 70 |     make -j && make install
 71 | 
 72 | # honggfuzz build
 73 | RUN rm -rf binutils-2.30.0
 74 | RUN tar xJf /binutils-2.30.0.tar.xz
 75 | RUN mkdir -p /binutils-honggfuzz
 76 | RUN cd binutils-2.30.0 &&                                                   \
 77 |     export CC=/honggfuzz/hfuzz_cc/hfuzz-clang &&                            \
 78 |     export CXX=/honggfuzz/hfuzz_cc/hfuzz-clang++ &&                         \
 79 |     ./configure --prefix=/binutils-honggfuzz --disable-shared               \
 80 |     --disable-werror --disable-ld --disable-gdb &&                          \
 81 |     make -j && make install
 82 | 
 83 | # Coverage build
 84 | RUN rm -rf binutils-2.30.0
 85 | RUN tar xJf /binutils-2.30.0.tar.xz
 86 | RUN mkdir -p /binutils-coverage
 87 | RUN cd binutils-2.30.0 &&                                                   \
 88 |     export CC=clang-8 &&                                                    \
 89 |     export CXX=clang++-8 &&                                                 \
 90 |     export CFLAGS="-fprofile-instr-generate -fcoverage-mapping" &&          \
 91 |     export CXXFLAGS="-fprofile-instr-generate -fcoverage-mapping" &&        \
 92 |     ./configure --prefix=/binutils-coverage --disable-shared                \
 93 |     --disable-werror --disable-ld --disable-gdb &&                          \
 94 |     make -j && make install
 95 | 
 96 | # Add fuzzer files
 97 | RUN mkdir /uninformed-seed
 98 | ADD seeds/uninformed-seed /uninformed-seed/seed
 99 | ADD seeds/cmin-seeds.tar.xz /
100 | 
101 | # Add scripts
102 | ADD scripts /scripts
103 | RUN git clone https://github.com/HexHive/fuzzing-seed-selection &&  \
104 |     pip3 install fuzzing-seed-selection/scripts
105 | RUN pip3 install -r /scripts/requirements.txt
106 | 


--------------------------------------------------------------------------------
/fuzzing/readelf/afl_llvm_mode.Makefile:
--------------------------------------------------------------------------------
  1 | #
  2 | # american fuzzy lop - LLVM instrumentation
  3 | # -----------------------------------------
  4 | #
  5 | # Written by Laszlo Szekeres <lszekeres@google.com> and
  6 | #            Michal Zalewski <lcamtuf@google.com>
  7 | #
  8 | # LLVM integration design comes from Laszlo Szekeres.
  9 | #
 10 | # Copyright 2015, 2016 Google Inc. All rights reserved.
 11 | #
 12 | # Licensed under the Apache License, Version 2.0 (the "License");
 13 | # you may not use this file except in compliance with the License.
 14 | # You may obtain a copy of the License at:
 15 | #
 16 | #   http://www.apache.org/licenses/LICENSE-2.0
 17 | #
 18 | 
 19 | PREFIX      ?= /usr/local
 20 | HELPER_PATH  = $(PREFIX)/lib/afl
 21 | BIN_PATH     = $(PREFIX)/bin
 22 | 
 23 | VERSION     = $(shell grep '^\#define VERSION ' ../config.h | cut -d '"' -f2)
 24 | 
 25 | LLVM_CONFIG ?= llvm-config
 26 | 
 27 | CFLAGS      ?= -O3 -funroll-loops
 28 | CFLAGS      += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \
 29 |                -DAFL_PATH=\"$(HELPER_PATH)\" -DBIN_PATH=\"$(BIN_PATH)\" \
 30 |                -DVERSION=\"$(VERSION)\" 
 31 | ifdef AFL_TRACE_PC
 32 |   CFLAGS    += -DUSE_TRACE_PC=1
 33 | endif
 34 | 
 35 | CXXFLAGS    ?= -O3 -funroll-loops
 36 | CXXFLAGS    += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \
 37 |                -DVERSION=\"$(VERSION)\" -Wno-variadic-macros
 38 | 
 39 | # Mark nodelete to work around unload bug in upstream LLVM 5.0+
 40 | CLANG_CFL    = `$(LLVM_CONFIG) --cxxflags` -Wl,-znodelete -fno-rtti -fpic $(CXXFLAGS)
 41 | CLANG_LFL    = `$(LLVM_CONFIG) --ldflags` $(LDFLAGS)
 42 | 
 43 | # User teor2345 reports that this is required to make things work on MacOS X.
 44 | 
 45 | ifeq "$(shell uname)" "Darwin"
 46 |   CLANG_LFL += -Wl,-flat_namespace -Wl,-undefined,suppress
 47 | endif
 48 | 
 49 | # We were using llvm-config --bindir to get the location of clang, but
 50 | # this seems to be busted on some distros, so using the one in $PATH is
 51 | # probably better.
 52 | 
 53 | ifeq "$(origin CC)" "default"
 54 |   CC         = clang
 55 |   CXX        = clang++
 56 | endif
 57 | 
 58 | ifndef AFL_TRACE_PC
 59 |   PROGS      = ../afl-clang-fast ../afl-llvm-pass.so ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o
 60 | else
 61 |   PROGS      = ../afl-clang-fast ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o
 62 | endif
 63 | 
 64 | all: test_deps $(PROGS) test_build all_done
 65 | 
 66 | test_deps:
 67 | ifndef AFL_TRACE_PC
 68 | 	@echo "[*] Checking for working 'llvm-config'..."
 69 | 	@which $(LLVM_CONFIG) >/dev/null 2>&1 || ( echo "[-] Oops, can't find 'llvm-config'. Install clang or set \$$LLVM_CONFIG or \$$PATH beforehand."; echo "    (Sometimes, the binary will be named llvm-config-3.5 or something like that.)"; exit 1 )
 70 | else
 71 | 	@echo "[!] Note: using -fsanitize=trace-pc mode (this will fail with older LLVM)."
 72 | endif
 73 | 	@echo "[*] Checking for working '$(CC)'..."
 74 | 	@which $(CC) >/dev/null 2>&1 || ( echo "[-] Oops, can't find '$(CC)'. Make sure that it's in your \$$PATH (or set \$$CC and \$$CXX)."; exit 1 )
 75 | 	@echo "[*] Checking for '../afl-showmap'..."
 76 | 	@test -f ../afl-showmap || ( echo "[-] Oops, can't find '../afl-showmap'. Be sure to compile AFL first."; exit 1 )
 77 | 	@echo "[+] All set and ready to build."
 78 | 
 79 | ../afl-clang-fast: afl-clang-fast.c | test_deps
 80 | 	$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
 81 | 	ln -sf afl-clang-fast ../afl-clang-fast++
 82 | 
 83 | ../afl-llvm-pass.so: afl-llvm-pass.so.cc | test_deps
 84 | 	$(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL)
 85 | 
 86 | ../afl-llvm-rt.o: afl-llvm-rt.o.c | test_deps
 87 | 	$(CC) $(CFLAGS) -fPIC -c $< -o $@
 88 | 
 89 | ../afl-llvm-rt-32.o: afl-llvm-rt.o.c | test_deps
 90 | 	@printf "[*] Building 32-bit variant of the runtime (-m32)... "
 91 | 	@$(CC) $(CFLAGS) -m32 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi
 92 | 
 93 | ../afl-llvm-rt-64.o: afl-llvm-rt.o.c | test_deps
 94 | 	@printf "[*] Building 64-bit variant of the runtime (-m64)... "
 95 | 	@$(CC) $(CFLAGS) -m64 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi
 96 | 
 97 | test_build: $(PROGS)
 98 | 	@echo "[*] Testing the CC wrapper and instrumentation output..."
 99 | 	unset AFL_USE_ASAN AFL_USE_MSAN AFL_INST_RATIO; AFL_QUIET=1 AFL_PATH=. AFL_CC=$(CC) ../afl-clang-fast $(CFLAGS) ../test-instr.c -o test-instr $(LDFLAGS)
100 | 	echo 0 | ../afl-showmap -m none -q -o .test-instr0 ./test-instr
101 | 	echo 1 | ../afl-showmap -m none -q -o .test-instr1 ./test-instr
102 | 	@rm -f test-instr
103 | 	@cmp -s .test-instr0 .test-instr1; DR="$$?"; rm -f .test-instr0 .test-instr1; if [ "$$DR" = "0" ]; then echo; echo "Oops, the instrumentation does not seem to be behaving correctly!"; echo; echo "Please ping <lcamtuf@google.com> to troubleshoot the issue."; echo; exit 1; fi
104 | 	@echo "[+] All right, the instrumentation seems to be working!"
105 | 
106 | all_done: test_build
107 | 	@echo "[+] All done! You can now use '../afl-clang-fast' to compile programs."
108 | 
109 | .NOTPARALLEL: clean
110 | 
111 | clean:
112 | 	rm -f *.o *.so *~ a.out core core.[1-9][0-9]* test-instr .test-instr0 .test-instr1 
113 | 	rm -f $(PROGS) ../afl-clang-fast++
114 | 


--------------------------------------------------------------------------------
/fuzzing/readelf/data/aflfast-ascii-cov.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/aflfast-ascii-cov.csv.gz


--------------------------------------------------------------------------------
/fuzzing/readelf/data/aflfast-cmin-cov.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/aflfast-cmin-cov.csv.gz


--------------------------------------------------------------------------------
/fuzzing/readelf/data/aflfast-singleton-cov.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/aflfast-singleton-cov.csv.gz


--------------------------------------------------------------------------------
/fuzzing/readelf/data/aflplusplus-ascii-cov.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/aflplusplus-ascii-cov.csv.gz


--------------------------------------------------------------------------------
/fuzzing/readelf/data/aflplusplus-cmin-cov.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/aflplusplus-cmin-cov.csv.gz


--------------------------------------------------------------------------------
/fuzzing/readelf/data/aflplusplus-singleton-cov.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/aflplusplus-singleton-cov.csv.gz


--------------------------------------------------------------------------------
/fuzzing/readelf/data/honggfuzz-ascii-cov.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/honggfuzz-ascii-cov.csv.gz


--------------------------------------------------------------------------------
/fuzzing/readelf/data/honggfuzz-cmin-cov.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/honggfuzz-cmin-cov.csv.gz


--------------------------------------------------------------------------------
/fuzzing/readelf/data/honggfuzz-singleton-cov.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/honggfuzz-singleton-cov.csv.gz


--------------------------------------------------------------------------------
/fuzzing/readelf/data/readelf-experiment.csv.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/readelf-experiment.csv.gz


--------------------------------------------------------------------------------
/fuzzing/readelf/scripts/fuzz.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | NUM_TRIALS=5
 4 | TRIAL_LEN=$((10*60*60))
 5 | NUM_CORES=$(grep -c ^processor /proc/cpuinfo)
 6 | SEM_ID="readelf-fuzz"
 7 | 
 8 | export AFL_NO_UI=1
 9 | 
10 | # AFLFast
11 | mkdir /readelf-aflfast
12 | for TRIAL in $(seq 1 ${NUM_TRIALS}); do
13 |     sem --timeout ${TRIAL_LEN} --jobs ${NUM_CORES} --id ${SEM_ID} -u    \
14 |         --halt now,fail=1                                               \
15 |     /aflfast/afl-fuzz -p fast -i /uninformed-seed                       \
16 |         -o /readelf-aflfast/uninformed-trial-${TRIAL} --                \
17 |         /binutils-aflfast/bin/readelf -a @@ > /dev/null
18 |     sleep 2s
19 | 
20 |     sem --timeout ${TRIAL_LEN} --jobs ${NUM_CORES} --id ${SEM_ID} -u    \
21 |         --halt now,fail=1                                               \
22 |     /aflfast/afl-fuzz -p fast -i /aflfast/testcases/others/elf          \
23 |         -o /readelf-aflfast/singleton-trial-${TRIAL} --                 \
24 |         /binutils-aflfast/bin/readelf -a @@ > /dev/null
25 |     sleep 2s
26 | 
27 |     sem --timeout ${TRIAL_LEN} --jobs ${NUM_CORES} --id ${SEM_ID} -u    \
28 |         --halt now,fail=1                                               \
29 |     /aflfast/afl-fuzz -p fast -i /cmin-seeds                            \
30 |         -o /readelf-aflfast/cmin-trial-${TRIAL} --                      \
31 |         /binutils-aflfast/bin/readelf -a @@ > /dev/null
32 |     sleep 2s
33 | done
34 | 
35 | # AFL++
36 | mkdir /readelf-aflplusplus
37 | for TRIAL in $(seq 1 ${NUM_TRIALS}); do
38 |     sem --timeout ${TRIAL_LEN} --jobs ${NUM_CORES} --id ${SEM_ID} -u    \
39 |         --halt now,fail=1                                               \
40 |     /aflplusplus/afl-fuzz -i /uninformed-seed                           \
41 |         -o /readelf-aflplusplus/uninformed-trial-${TRIAL}               \
42 |         -m none -c /binutils-aflplusplus/cmplog/bin/readelf --          \
43 |         /binutils-aflplusplus/afl/bin/readelf -a @@ > /dev/null
44 |     sleep 2s
45 | 
46 |     sem --timeout ${TRIAL_LEN} --jobs ${NUM_CORES} --id ${SEM_ID} -u    \
47 |         --halt now,fail=1                                               \
48 |     /aflplusplus/afl-fuzz -i /aflfast/testcases/others/elf              \
49 |         -o /readelf-aflplusplus/singleton-trial-${TRIAL}                \
50 |         -m none -c /binutils-aflplusplus/cmplog/bin/readelf --          \
51 |         /binutils-aflplusplus/afl/bin/readelf -a @@ > /dev/null
52 |     sleep 2s
53 | 
54 |     sem --timeout ${TRIAL_LEN} --jobs ${NUM_CORES} --id ${SEM_ID} -u    \
55 |         --halt now,fail=1                                               \
56 |     /aflplusplus/afl-fuzz -i /cmin-seeds                                \
57 |         -o /readelf-aflplusplus/cmin-trial-${TRIAL}                     \
58 |         -m none -c /binutils-aflplusplus/cmplog/bin/readelf --          \
59 |         /binutils-aflplusplus/afl/bin/readelf -a @@ > /dev/null
60 |     sleep 2s
61 | done
62 | 
63 | # honggfuzz
64 | mkdir readelf-honggfuzz
65 | for TRIAL in $(seq 1 ${NUM_TRIALS}); do
66 |     sem --timeout ${TRIAL_LEN} --jobs ${NUM_CORES} --id ${SEM_ID} -u            \
67 |         --halt now,fail=1                                                       \
68 |     /honggfuzz/honggfuzz --threads 1 --quiet -z -q -v                           \
69 |         -i /uninformed-seed -o /readelf-honggfuzz/uninformed-trial-${TRIAL}     \
70 |         -- /binutils-honggfuzz/bin/readelf -a ___FILE___ > /dev/null
71 |     sleep 2s
72 | 
73 |     sem --timeout ${TRIAL_LEN} --jobs ${NUM_CORES} --id ${SEM_ID} -u    \
74 |         --halt now,fail=1                                               \
75 |     /honggfuzz/honggfuzz --threads 1 --quiet -z -q -v                   \
76 |         -i /aflfast/testcases/others/elf                                \
77 |         -o /readelf-honggfuzz/singleton-trial-${TRIAL}                  \
78 |         -- /binutils-honggfuzz/bin/readelf -a ___FILE___ > /dev/null
79 |     sleep 2s
80 | 
81 |     sem --timeout ${TRIAL_LEN} --jobs ${NUM_CORES} --id ${SEM_ID} -u    \
82 |         --halt now,fail=1                                               \
83 |     /honggfuzz/honggfuzz --threads 1 --quiet -z -q -v                   \
84 |         -i /cmin-seeds                                                  \
85 |         -o /readelf-honggfuzz/cmin-trial-${TRIAL}                       \
86 |         -- /binutils-honggfuzz/bin/readelf -a ___FILE___ > /dev/null
87 |     sleep 2s
88 | done
89 | 
90 | # Wait for fuzzers to finish
91 | sem --wait --id ${SEM_ID}
92 | 


--------------------------------------------------------------------------------
/fuzzing/readelf/scripts/get_afl_cov.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -u
 2 | 
 3 | export THIS_DIR=$(dirname $(readlink -f $0))
 4 | export TARGET="/binutils-coverage/bin/readelf"
 5 | export TIMEOUT="1m"
 6 | 
 7 | function get_cov() {
 8 |     local QUEUE=$(realpath $1)
 9 |     local OUT_DIR=$(dirname ${QUEUE})
10 |     local LLVM_COV_DIR=$(realpath "${QUEUE}/../llvm_cov")
11 |     local SEEDS_LIST="${LLVM_COV_DIR}/seeds.txt"
12 | 
13 |     timestamp_afl.py -o ${OUT_DIR}/timestamps.csv ${OUT_DIR}
14 | 
15 |     rm -f ${SEEDS_LIST}
16 |     for SEED in $(ls -rt ${QUEUE}); do
17 |         if [[ $SEED != id:* ]]; then
18 |             continue
19 |         fi
20 | 
21 |         echo "[*] processing ${SEED}"
22 | 
23 |         local SEED_PATH="${QUEUE}/${SEED}"
24 |         export LLVM_PROFILE_FILE="${LLVM_COV_DIR}/${SEED}.profraw"
25 | 
26 |         timeout --preserve-status ${TIMEOUT} ${TARGET} -a ${SEED_PATH} > /dev/null 2>&1
27 | 
28 |         echo "1,${LLVM_PROFILE_FILE}" >> ${SEEDS_LIST}
29 |         llvm-profdata-8 merge --sparse                              \
30 |             --input-files "${LLVM_COV_DIR}/seeds.txt"               \
31 |             --output "${LLVM_PROFILE_FILE%.profraw}.profdata"       \
32 |             --num-threads=5
33 | 
34 |         llvm-cov-8 export --summary-only ${TARGET}                  \
35 |             --instr-profile "${LLVM_PROFILE_FILE%.profraw}.profdata"\
36 |             --format text --num-threads=5 > "${LLVM_PROFILE_FILE%.profraw}.json"
37 |     done
38 | }
39 | 
40 | export -f get_cov
41 | 
42 | find ${THIS_DIR} -maxdepth 4 -name 'queue' -type d -print0 | parallel -0 -u get_cov {}
43 | 


--------------------------------------------------------------------------------
/fuzzing/readelf/scripts/get_hfuzz_cov.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -u
 2 | 
 3 | export THIS_DIR=$(dirname $(readlink -f $0))
 4 | export TARGET="/binutils-coverage/bin/readelf"
 5 | export TIMEOUT="1m"
 6 | 
 7 | function get_cov() {
 8 |     local QUEUE=$(realpath $1)
 9 |     local OUT_DIR=${QUEUE}
10 |     local LLVM_COV_DIR=$(realpath "${QUEUE}/llvm_cov")
11 |     local SEEDS_LIST="${LLVM_COV_DIR}/seeds.txt"
12 | 
13 |     timestamps_honggfuzz.py -o ${OUT_DIR}/timestamps.csv ${OUT_DIR}
14 | 
15 |     rm -f ${SEEDS_LIST}
16 |     for SEED in $(ls -rt ${QUEUE}); do
17 |         if [[ ${SEED} != *.honggfuzz.cov ]]; then
18 |             continue
19 |         fi
20 | 
21 |         echo "[*] processing ${SEED}"
22 | 
23 |         local SEED_PATH="${QUEUE}/${SEED}"
24 |         export LLVM_PROFILE_FILE="${LLVM_COV_DIR}/${SEED}.profraw"
25 | 
26 |         timeout --preserve-status ${TIMEOUT} ${TARGET} -a ${SEED_PATH} > /dev/null 2>&1
27 | 
28 |         echo "1,${LLVM_PROFILE_FILE}" >> ${SEEDS_LIST}
29 |         llvm-profdata-8 merge --sparse                              \
30 |             --input-files "${LLVM_COV_DIR}/seeds.txt"               \
31 |             --output "${LLVM_PROFILE_FILE%.profraw}.profdata"       \
32 |             --num-threads=5
33 | 
34 |         llvm-cov-8 export --summary-only ${TARGET}                  \
35 |             --instr-profile "${LLVM_PROFILE_FILE%.profraw}.profdata"\
36 |             --format text --num-threads=5 > "${LLVM_PROFILE_FILE%.profraw}.json"
37 |     done
38 | }
39 | 
40 | export -f get_cov
41 | 
42 | find . -maxdepth 2 -path '*-honggfuzz/*-trial-*' -type d -print0 | parallel -0 -u get_cov {}
43 | 


--------------------------------------------------------------------------------
/fuzzing/readelf/scripts/merge_cov.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | 
 4 | from functools import partial, reduce
 5 | from itertools import product
 6 | from pathlib import Path
 7 | import json
 8 | import logging
 9 | import multiprocessing.pool as mpp
10 | 
11 | import numpy as np
12 | import pandas as pd
13 | 
14 | 
15 | THIS_DIR = Path(__file__).parent
16 | 
17 | FUZZERS = ('aflfast', 'aflplusplus', 'honggfuzz')
18 | TRIAL_LEN = 10 # Hours
19 | NUM_TRIALS = 5
20 | SEEDS = ('ascii', 'singleton', 'cmin')
21 | FORMATTER = logging.Formatter('[%(asctime)s] %(levelname)s: %(message)s')
22 | NUM_JOBS = NUM_TRIALS
23 | 
24 | logger = logging.getLogger()
25 | 
26 | 
27 | def get_cov(fuzzer: str, seed: str, trial: int) -> pd.DataFrame:
28 |     cov_dir = THIS_DIR / ('readelf-%s' % fuzzer) / ('%s-trial-%d' % (seed, trial)) / 'llvm_cov'
29 |     assert cov_dir.exists()
30 | 
31 |     count_col = 'region_count_%d' % trial
32 |     percent_col = 'region_percent_%d' % trial
33 | 
34 |     df = pd.read_csv(cov_dir.parent / 'timestamps.csv')
35 |     df['time'] = df.unix_time - df.unix_time.iloc[0]
36 |     df['dir'] = df.seed.apply(lambda x: Path(x).parent.name)
37 |     df['seed'] = df.seed.apply(lambda x: Path(x).name)
38 |     df[count_col] = np.nan
39 |     df[percent_col] = np.nan
40 | 
41 |     # Drop crashes
42 |     df = df.drop(df[df.dir == 'crashes'].index)
43 | 
44 |     for cov_file in sorted(list(cov_dir.glob('*.json'))):
45 |         with cov_file.open() as inf:
46 |             try:
47 |                 region_data = json.load(inf)['data'][0]['totals']['regions']
48 |             except json.JSONDecodeError:
49 |                 print('unable to read %s. Skipping' % cov_file)
50 |                 continue
51 |             reg_covered = region_data['covered']
52 |             reg_count = region_data['count']
53 |         df.loc[df.seed == cov_file.stem, count_col] = reg_covered
54 |         df.loc[df.seed == cov_file.stem, percent_col] = reg_covered * 100.0 / reg_count
55 | 
56 |     return df.set_index('time')[[count_col, percent_col]]
57 | 
58 | 
59 | def main():
60 |     """The main function."""
61 |     # Configure logger
62 |     handler = logging.StreamHandler()
63 |     handler.setFormatter(FORMATTER)
64 |     logger.addHandler(handler)
65 | 
66 |     logger.setLevel(logging.INFO)
67 | 
68 |     with mpp.Pool(processes=NUM_JOBS) as pool:
69 |         for fuzzer, seed in product(FUZZERS, SEEDS):
70 |             # Get raw trial data
71 |             logger.info('Getting %s-%s coverage', fuzzer, seed)
72 |             cov_func = partial(get_cov, fuzzer, seed)
73 |             trials = range(1, NUM_TRIALS + 1)
74 |             cov_data = pool.map(cov_func, trials)
75 | 
76 |             # Merge trial data and extend to trial length
77 |             logger.info('Merging coverage')
78 |             df = reduce(lambda x,y: x.join(y, how='outer'), cov_data)
79 |             df.loc[TRIAL_LEN * 60 * 60] = np.nan
80 |             df = df.ffill().cummax()
81 | 
82 |             # Save merged data
83 |             out_path = Path('%s-%s-cov.csv' % (fuzzer, seed))
84 |             logger.info('Saving coverage data to %s', out_path)
85 |             df.to_csv(out_path)
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     main()
90 | 


--------------------------------------------------------------------------------
/fuzzing/readelf/scripts/plot_cov.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | 
  4 | from itertools import product
  5 | from pathlib import Path
  6 | import gzip
  7 | 
  8 | from matplotlib import rc, rcParams
  9 | import matplotlib.pyplot as plt
 10 | import pandas as pd
 11 | import seaborn as sns
 12 | 
 13 | 
 14 | DATA_DIR = Path(__file__).parent.parent / 'data'
 15 | 
 16 | 
 17 | TRIAL_LEN = 10 # Hours
 18 | PLOT_STEP = 10 # Seconds
 19 | NUM_TRIALS = 5
 20 | NUM_BOOTS = 2000
 21 | FUZZERS = ('aflfast', 'aflplusplus', 'honggfuzz')
 22 | SEEDS = ('ascii', 'singleton', 'cmin')
 23 | 
 24 | FUZZER_LABELS = dict(aflfast='AFLFast',
 25 |                      aflplusplus='AFL++',
 26 |                      honggfuzz='honggfuzz')
 27 | SEED_LABELS = dict(ascii='Uninformed',
 28 |                    singleton='Valid',
 29 |                    cmin='Corpus')
 30 | 
 31 | rc('pdf', fonttype=42)
 32 | rc('ps', fonttype=42)
 33 | 
 34 | rc_fonts = {
 35 |     'font.family': 'serif',
 36 |     'text.usetex': True,
 37 |     'text.latex.preamble':
 38 |         r"""
 39 |         \RequirePackage[T1]{fontenc}
 40 |         \RequirePackage[tt=false, type1=true]{libertine}
 41 |         \RequirePackage[varqu]{zi4}
 42 |         \RequirePackage[libertine]{newtxmath}
 43 |         """,
 44 | }
 45 | rcParams.update(rc_fonts)
 46 | 
 47 | 
 48 | def gen_plot_data() -> pd.DataFrame:
 49 |     "Generate the data to plot."""
 50 |     dfs = []
 51 |     trials = range(1, NUM_TRIALS + 1)
 52 |     cols = ['region_percent_%d' % trial for trial in trials]
 53 | 
 54 |     for fuzzer, seed in product(FUZZERS, SEEDS):
 55 |         csv_path = DATA_DIR / f'{fuzzer}-{seed}-cov.csv.gz'
 56 | 
 57 |         print(f'Parsing {csv_path}...')
 58 |         with gzip.open(csv_path, 'rb') as inf:
 59 |             df = pd.read_csv(inf).set_index('time')
 60 | 
 61 |         df = df.loc[~df.index.duplicated(keep='first')]
 62 |         new_idx = pd.RangeIndex(start=0, stop=TRIAL_LEN * 60 * 60,
 63 |                                 step=PLOT_STEP)
 64 |         df = df.reindex(new_idx, method='ffill')
 65 |         df.index = df.index / 60 / 60
 66 |         df['time'] = df.index
 67 |         df = df.melt(id_vars='time',
 68 |                      value_name='region_percent',
 69 |                      value_vars=cols)
 70 |         df['Fuzzer'] = FUZZER_LABELS[fuzzer]
 71 |         df['Seed'] = SEED_LABELS[seed]
 72 | 
 73 |         dfs.append(df)
 74 | 
 75 |     return pd.concat(dfs)
 76 | 
 77 | 
 78 | def main():
 79 |     """The main function."""
 80 | 
 81 |     print('Generating plot data...')
 82 |     plot_data = gen_plot_data()
 83 | 
 84 |     # Do the actual plotting
 85 |     print('plotting results...')
 86 |     sns.set_theme(style='ticks')
 87 |     fig = plt.figure()
 88 |     ax = fig.add_subplot(111)
 89 | 
 90 |     ax = sns.lineplot(ax=ax, data=plot_data, x='time', y='region_percent',
 91 |                       hue='Seed', style='Fuzzer', ci=95, n_boot=NUM_BOOTS)
 92 | 
 93 |     # Tidy up plot
 94 |     xticks = [0, 1, 2, 5, 10] # Hours
 95 |     ax.set(xlabel='Time (h)',
 96 |            ylabel='Regions (\%)',
 97 |            xscale='symlog',
 98 |            xticks=xticks,
 99 |            xticklabels=[f'{x}' for x in xticks])
100 |     ax.set_ylim(bottom=0)
101 |     ax.set_xlim(left=0, right=TRIAL_LEN)
102 |     ax.legend(ncol=2, loc='upper center', bbox_to_anchor=(0.5, 1.3))
103 |     sns.despine()
104 | 
105 |     # Save plot
106 |     fig.savefig('readelf-experiment.pdf', bbox_inches='tight')
107 | 
108 | 
109 | if __name__ == '__main__':
110 |     main()
111 | 


--------------------------------------------------------------------------------
/fuzzing/readelf/scripts/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | pandas
3 | seaborn
4 | 


--------------------------------------------------------------------------------
/fuzzing/readelf/seeds/cmin-seeds.tar.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/seeds/cmin-seeds.tar.xz


--------------------------------------------------------------------------------
/fuzzing/readelf/seeds/uninformed-seed:
--------------------------------------------------------------------------------
1 | ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~


--------------------------------------------------------------------------------
/fuzzing/real-world/freetype2/afl.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM seed-selection/real-world/freetype2/base
 2 | 
 3 | MAINTAINER Adrian Herrera <adrian.herrera@anu.edu.au>
 4 | 
 5 | # Get and build AFL
 6 | ENV AFL_CC=clang-8
 7 | ENV AFL_CXX=clang++-8
 8 | 
 9 | RUN git clone --no-checkout https://github.com/google/afl &&    \
10 |     git -C afl checkout v256b
11 | RUN cd afl &&                           \
12 |     export LLVM_CONFIG=llvm-config-8 && \
13 |     export CC=$AFL_CC &&                \
14 |     export CXX=$AFL_CXX &&              \
15 |     make -j &&                          \
16 |     make -j -C llvm_mode
17 | 
18 | # Build instrumented freetype for AFL
19 | RUN mkdir /build
20 | RUN cd freetype-2.5.3 &&                                                    \
21 |     CC=/afl/afl-clang-fast CXX=/afl/afl-clang-fast++                        \
22 |     CFLAGS="-m32 -fsanitize=address"                                        \
23 |     CXXFLAGS="-m32 -fsanitize=address"                                      \
24 |     LDFLAGS="-m32 -fsanitize=address"                                       \
25 |     ./configure --prefix=/build --host=i386-linux --without-png
26 | RUN cd freetype-2.5.3 &&    \
27 |     make clean &&           \
28 |     make -j &&              \
29 |     make install 
30 | RUN cd ttf_bin && rm -f char2svg &&                                         \
31 |     /afl/afl-clang-fast++ -m32 -fsanitize=address                           \
32 |     -I/build/include/freetype2 -o char2svg char2svg.cpp                 \
33 |     -L/build/lib/ -lfreetype
34 | 
35 | RUN cp /ttf_bin/char2svg /build/bin/
36 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py -o /build/lib   \
37 |     /build/bin/char2svg
38 | 
39 | # The `build` directory can be extracted to the host machine
40 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/freetype2/base.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | # Install dependencies
 4 | RUN dpkg --add-architecture i386
 5 | RUN export DEBIAN_FRONTEND=noninteractive &&                                \
 6 |     apt-get update &&                                                       \
 7 |     apt-get -y install git build-essential gcc-multilib g++-multilib wget   \
 8 |         python3-pip
 9 | 
10 | # Install LLVM 8
11 | RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
12 | RUN apt-get install -y llvm-8 clang-8
13 | 
14 | # Get helper scripts
15 | RUN git clone https://github.com/HexHive/fuzzing-seed-selection &&  \
16 |     pip3 install fuzzing-seed-selection/scripts
17 | 
18 | # Get the freetype source
19 | RUN wget -O - https://download.savannah.gnu.org/releases/freetype/freetype-2.5.3.tar.gz | tar xz
20 | 
21 | # Create custom driver for freetype, taken from
22 | # https://www.freetype.org/freetype2/docs/tutorial/example5.cpp
23 | RUN mkdir ttf_bin
24 | ADD https://www.freetype.org/freetype2/docs/tutorial/example5.cpp /ttf_bin/char2svg.cpp
25 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/freetype2/coverage.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM seed-selection/real-world/freetype2/base
 2 | 
 3 | MAINTAINER Adrian Herrera <adrian.herrera@anu.edu.au>
 4 | 
 5 | # Build coverage freetype 
 6 | RUN mkdir /build
 7 | RUN cd freetype-2.5.3 &&                                                    \
 8 |     CC=clang-8 CXX=clang++-8                                                \
 9 |     CFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping"               \
10 |     CXXFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping"             \
11 |     LDFLAGS="-m32"                                                          \
12 |     ./configure --prefix=/build --host=i386-linux --without-png
13 | RUN cd freetype-2.5.3 &&    \
14 |     make clean &&           \
15 |     make -j &&              \
16 |     make install 
17 | RUN cd ttf_bin && rm -f char2svg &&                                \
18 |     clang++-8 -m32 -fprofile-instr-generate -fcoverage-mapping     \
19 |     -I/build/include/freetype2 -o char2svg                         \
20 |     char2svg.cpp -L/build/lib/ -lfreetype
21 | 
22 | RUN cp /ttf_bin/char2svg /build/bin/
23 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py -o /build/lib  \
24 |     /build/bin/char2svg
25 | 
26 | # The `build` directory can be extracted to the host machine
27 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/librsvg/afl.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM seed-selection/real-world/librsvg/base
 2 | 
 3 | MAINTAINER Adrian Herrera <adrian.herrera@anu.edu.au>
 4 | 
 5 | # Get and build AFL
 6 | ENV AFL_CC=clang-8
 7 | ENV AFL_CXX=clang++-8
 8 | 
 9 | RUN git clone --no-checkout https://github.com/google/afl &&    \
10 |     git -C afl checkout v256b
11 | RUN cd afl &&                           \
12 |     export LLVM_CONFIG=llvm-config-8 && \
13 |     export CC=$AFL_CC &&                \
14 |     export CXX=$AFL_CXX &&              \
15 |     make -j &&                          \
16 |     make -j -C llvm_mode
17 | 
18 | # Build AFL librsvg
19 | RUN mkdir /build
20 | RUN cd librsvg-2.40.20 &&                                                   \
21 |     PKG_CONFIG_PATH=/usr/lib/i386-linux-gnu/pkgconfig                       \
22 |     CC=/afl/afl-clang-fast                                                  \
23 |     CXX=/afl/afl-clang-fast++                                               \
24 |     CFLAGS="-m32 -fsanitize=address" CXXFLAGS="-m32 -fsanitize=address"     \
25 |     LDFLAGS="-L/usr/lib/i386-linux-gnu -m32 -fsanitize=address"             \ 
26 |     ./autogen.sh --prefix=/build                                            \
27 |     --host=i386-linux-gnu --enable-introspection=no
28 | RUN cd librsvg-2.40.20 &&                                                   \
29 |     make clean &&                                                           \
30 |     make -j &&                                                              \
31 |     make install
32 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py -o /build/lib           \
33 |     /build/bin/rsvg-convert
34 | 
35 | # The `build` directory can be extracted to the host machine
36 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/librsvg/base.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | MAINTAINER Adrian Herrera <adrian.herrera@anu.edu.au>
 4 | 
 5 | # Install dependencies
 6 | RUN dpkg --add-architecture i386
 7 | RUN export DEBIAN_FRONTEND=noninteractive &&                                \
 8 |     apt-get update &&                                                       \
 9 |     apt-get -y install git build-essential gcc-multilib g++-multilib        \
10 |     libc6-dev:i386 autoconf pkg-config libtool libgirepository1.0-dev       \
11 |     gtk-doc-tools libgdk-pixbuf2.0-dev:i386 libglib2.0-dev:i386             \
12 |     libgio2.0-cil-dev libxml2-dev:i386 libpango1.0-dev:i386                 \
13 |     libpangocairo-1.0.0:i386 libpangoft2-1.0.0:i386 libcairo2-dev:i386      \
14 |     libcroco3-dev:i386 wget python3-pip
15 | 
16 | # Install LLVM 8
17 | RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
18 | RUN apt-get install -y llvm-8 clang-8
19 | 
20 | # Get helper scripts
21 | RUN git clone https://github.com/HexHive/fuzzing-seed-selection &&  \
22 |     pip3 install fuzzing-seed-selection/scripts
23 | 
24 | # Get the librsvg source
25 | RUN wget -O - https://gitlab.gnome.org/GNOME/librsvg/-/archive/2.40.20/librsvg-2.40.20.tar.gz | tar xz
26 | 
27 | # we have to use 'autoreconf -i' or 'autogen.sh' because we don't have
28 | # the configure script.
29 | 
30 | # for whatever reason, the autoreconf (autogen) couldn't find gtk-doc.make
31 | RUN ln -s /usr/share/gtk-doc/data/gtk-doc.make librsvg-2.40.20
32 | 
33 | # gobject-introspection ultimately has to be disabled because we can't install 
34 | # the 32bit version of it without breaking dependencies. There is also a weird
35 | # case where it is trying to link against 64bit version of libfreetype.so.
36 | # Hence, the extra -L flag in the LDFLAGS
37 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/librsvg/coverage.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM seed-selection/real-world/librsvg/base
 2 | 
 3 | MAINTAINER Adrian Herrera <adrian.herrera@anu.edu.au>
 4 | 
 5 | # Build coverage librsvg
 6 | RUN mkdir /build
 7 | RUN cd librsvg-2.40.20 &&                                                   \
 8 |     PKG_CONFIG_PATH=/usr/lib/i386-linux-gnu/pkgconfig                       \
 9 |     CC=clang-8 CXX=clang++-8                                                \
10 |     CFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping"               \
11 |     CXXFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping"             \
12 |     LDFLAGS="-L/usr/lib/i386-linux-gnu -m32"                                \
13 |     ./autogen.sh --prefix=/build --enable-introspection=no                  \
14 |     --host=i386-linux
15 | RUN cd librsvg-2.40.20 &&                                                   \
16 |     make clean &&                                                           \
17 |     make -j &&                                                              \
18 |     make install
19 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py                         \
20 |     -o /build/lib /build/bin/rsvg-convert
21 | 
22 | # The `build` and directory can be extracted to the host machine
23 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/libtiff/afl.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM seed-selection/real-world/libtiff/base
 2 | 
 3 | MAINTAINER Adrian Herrera <adrian.herrera@anu.edu.au>
 4 | 
 5 | # Get and build AFL
 6 | ENV AFL_CC=clang-8
 7 | ENV AFL_CXX=clang++-8
 8 | 
 9 | RUN git clone --no-checkout https://github.com/google/afl &&    \
10 |     git -C afl checkout v256b
11 | RUN cd afl &&                           \
12 |     export LLVM_CONFIG=llvm-config-8 && \
13 |     export CC=$AFL_CC &&                \
14 |     export CXX=$AFL_CXX &&              \
15 |     make -j &&                          \
16 |     make -j -C llvm_mode
17 | 
18 | # Build AFL libtiff
19 | RUN mkdir /build
20 | RUN cd tiff-4.0.9 &&                                                        \
21 |     CC=/afl/afl-clang-fast CXX=/afl/afl-clang-fast++                        \
22 |     CFLAGS="-m32 -fsanitize=address" CXXFLAGS="-m32 -fsanitize=address"     \
23 |     LDFLAGS="-m32 -fsanitize=address"                                       \
24 |     ./configure --prefix=/build --host=i386-linux-gnu
25 | RUN cd tiff-4.0.9 &&                                                        \
26 |     make clean &&                                                           \
27 |     make -j &&                                                              \
28 |     make install
29 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py -o /build/lib   \
30 |     /build/bin/tiff2pdf
31 | 
32 | # The `build` directory can be extracted to the host machine
33 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/libtiff/base.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | # Install dependencies
 4 | RUN dpkg --add-architecture i386
 5 | RUN export DEBIAN_FRONTEND=noninteractive &&                                \
 6 |     apt-get update &&                                                       \
 7 |     apt-get -y install git wget build-essential gcc-multilib g++-multilib   \
 8 |     libc6-dev:i386 python3-pip
 9 | 
10 | # Install LLVM 8
11 | RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
12 | RUN apt-get install -y llvm-8 clang-8
13 | 
14 | # Get helper scripts
15 | RUN git clone https://github.com/HexHive/fuzzing-seed-selection &&  \
16 |     pip3 install fuzzing-seed-selection/scripts
17 | 
18 | # Get the libtiff source
19 | RUN wget -O - http://download.osgeo.org/libtiff/tiff-4.0.9.tar.gz | tar xz
20 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/libtiff/coverage.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM seed-selection/real-world/libtiff/base
 2 | 
 3 | MAINTAINER Adrian Herrera <adrian.herrera@anu.edu.au>
 4 | 
 5 | # Build coverage libtiff
 6 | RUN mkdir /build
 7 | RUN cd tiff-4.0.9 &&                                                        \
 8 |     CC=clang-8 CXX=clang++-8                                                \
 9 |     CFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping"               \
10 |     CXXFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping"             \
11 |     LDFLAGS="-m32 -fno-stack-protector"                                     \
12 |     ./configure --prefix=/build --host=i386-linux
13 | RUN cd tiff-4.0.9 &&                                                        \
14 |     make clean &&                                                           \
15 |     make -j &&                                                              \
16 |     make install
17 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py                         \
18 |     -o /build/lib /build/bin/tiff2pdf
19 | 
20 | # The `build` directory can be extracted to the host machine
21 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/libxml2/afl.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM seed-selection/real-world/libxml2/base
 2 | 
 3 | MAINTAINER Adrian Herrera <adrian.herrera@anu.edu.au>
 4 | 
 5 | # Get and build AFL
 6 | ENV AFL_CC=clang-8
 7 | ENV AFL_CXX=clang++-8
 8 | 
 9 | RUN git clone --no-checkout https://github.com/google/afl &&    \
10 |     git -C afl checkout v256b
11 | RUN cd afl &&                           \
12 |     export LLVM_CONFIG=llvm-config-8 && \
13 |     export CC=$AFL_CC &&                \
14 |     export CXX=$AFL_CXX &&              \
15 |     make -j &&                          \
16 |     make -j -C llvm_mode
17 | 
18 | # Build AFL libxml2
19 | RUN mkdir /build
20 | RUN cd libxml2-2.9.0 &&                                                     \
21 |     CC=/afl/afl-clang-fast CXX=/afl/afl-clang-fast++                        \
22 |     CFLAGS="-m32 -fsanitize=address"                                        \
23 |     CXXFLAGS="-m32 -fsanitize=address"                                      \
24 |     LDFLAGS="-m32 -fsanitize=address"                                       \
25 |     ./configure --prefix=/build --host=i386-linux
26 | RUN cd libxml2-2.9.0 &&                                                     \
27 |     make clean &&                                                           \
28 |     make -j &&                                                              \
29 |     make install
30 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py -o /build/lib           \
31 |     /build/bin/xmllint
32 | 
33 | # The `build` directory can be extracted to the host machine
34 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/libxml2/base.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | MAINTAINER Adrian Herrera <adrian.herrera@anu.edu.au>
 4 | 
 5 | # Install dependencies
 6 | RUN dpkg --add-architecture i386
 7 | RUN export DEBIAN_FRONTEND=noninteractive &&                                \
 8 |     apt-get update &&                                                       \
 9 |     apt-get -y install git wget build-essential gcc-multilib g++-multilib   \
10 |     libc6-dev:i386 python3-pip
11 | 
12 | # Install LLVM 8
13 | RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
14 | RUN apt-get install -y llvm-8 clang-8
15 | 
16 | # Get helper scripts
17 | RUN git clone https://github.com/HexHive/fuzzing-seed-selection &&  \
18 |     pip3 install fuzzing-seed-selection/scripts
19 | 
20 | # Get the libxml2 source
21 | RUN wget -O - ftp://xmlsoft.org/libxml2/libxml2-2.9.0.tar.gz | tar xz
22 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/libxml2/coverage.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM seed-selection/real-world/libxml2/base
 2 | 
 3 | MAINTAINER Adrian Herrera <adrian.herrera@anu.edu.au>
 4 | 
 5 | # Build coverage libxml2
 6 | RUN mkdir /build
 7 | RUN cd libxml2-2.9.0 &&                                                     \
 8 |     CC=clang-8 CXX=clang++-8                                                \
 9 |     CFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping"               \
10 |     CXXFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping"             \
11 |     LDFLAGS="-m32"                                                          \
12 |     ./configure --prefix=/build --host=i386-linux
13 | RUN cd libxml2-2.9.0 &&                                                     \
14 |     make clean &&                                                           \
15 |     make -j &&                                                              \
16 |     make install
17 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py                         \
18 |     -o /build/lib /build/bin/xmllint
19 | 
20 | # The `build` directory can be extracted to the host machine
21 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/poppler/afl-toolchain-llvm.cmake:
--------------------------------------------------------------------------------
 1 | set(CMAKE_C_COMPILER /afl/afl-clang-fast)
 2 | set(CMAKE_CXX_COMPILER /afl/afl-clang-fast++)
 3 | set(CMAKE_BUILD_TYPE debug)
 4 | set(BUILD_GTK_TESTS off)
 5 | set(BUILD_QT5_TESTS off)
 6 | set(BUILD_CPP_TESTS off)
 7 | set(ENABLE_GTK_DOC off)
 8 | set(ENABLE_QT5 off)
 9 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -m32 -fsanitize=address")
10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32 -fsanitize=address")
11 | set(CMAKE_SYSTEM_PROCESSOR "i386")
12 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/poppler/afl.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM seed-selection/real-world/poppler/base
 2 | 
 3 | MAINTAINER Adrian Herrera <adrian.herrera@anu.edu.au>
 4 | 
 5 | # Get and build AFL
 6 | ENV AFL_CC=clang-8
 7 | ENV AFL_CXX=clang++-8
 8 | 
 9 | RUN git clone --no-checkout https://github.com/google/afl &&    \
10 |     git -C afl checkout v256b
11 | RUN cd afl &&                           \
12 |     export LLVM_CONFIG=llvm-config-8 && \
13 |     export CC=$AFL_CC &&                \
14 |     export CXX=$AFL_CXX &&              \
15 |     make -j &&                          \
16 |     make -j -C llvm_mode
17 | 
18 | # Configure AFL poppler
19 | RUN mkdir /build
20 | RUN mkdir poppler-0.64.0-afl/build &&                                       \
21 |     cd poppler-0.64.0-afl/build &&                                          \
22 |     cmake ..                                                                \
23 |         -DCMAKE_TOOLCHAIN_FILE=/afl-toolchain-llvm.cmake                    \
24 |         -DCMAKE_INSTALL_PREFIX=/build                                       \
25 |         -DCMAKE_C_FLAGS="-m32 -fsanitize=address"                           \
26 |         -DCMAKE_CXX_FLAGS="-m32 -fsanitize=address"                         \
27 |         -DCMAKE_SYSTEM_PROCESSOR="i386"
28 | 
29 | # It is necessary for the 32-bit to reinstall the libopenjp2 as the 32-bit and
30 | # 64-bit versions are mutually exclusive
31 | RUN apt-get -y install libopenjp2-7-dev:i386
32 | 
33 | # Actually make the AFL poppler
34 | RUN cd poppler-0.64.0-afl/build &&                                          \
35 |     make clean && make -j && make install
36 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py -o /build/lib           \
37 |     /build/bin/pdftotext
38 | 
39 | # The `build` directory can be extracted to the host machine
40 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/poppler/base.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | # Install dependencies
 4 | RUN dpkg --add-architecture i386
 5 | RUN export DEBIAN_FRONTEND=noninteractive &&                                \
 6 |     apt-get update &&                                                       \
 7 |     apt-get -y install git build-essential wget gcc-multilib g++-multilib   \
 8 |     libc6-dev:i386 xz-utils pkg-config libfreetype6-dev libfontconfig-dev   \
 9 |     libjpeg-dev libopenjp2-7-dev cmake libfreetype6-dev:i386                \
10 |     libfontconfig-dev:i386 libjpeg-dev:i386 python3-pip
11 | 
12 | # Install LLVM 8
13 | RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
14 | RUN apt-get install -y llvm-8 clang-8
15 | 
16 | # Get helper scripts
17 | RUN git clone https://github.com/HexHive/fuzzing-seed-selection &&  \
18 |     pip3 install fuzzing-seed-selection/scripts
19 | 
20 | # Get the poppler source
21 | RUN wget -O - https://poppler.freedesktop.org/poppler-0.64.0.tar.xz | tar xJ
22 | 
23 | # Make a copy for the source so that we don't have to reinstall libopenjp2-7
24 | # again
25 | RUN cp -r poppler-0.64.0 poppler-0.64.0-afl
26 | 
27 | # Add the poppler build toolchain files
28 | ADD toolchain.cmake /
29 | ADD afl-toolchain-llvm.cmake /
30 | 
31 | # It seems that the compiler flags passed in toolchain.cmake is not used by cmake
32 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/poppler/coverage.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM seed-selection/real-world/poppler/base
 2 | 
 3 | MAINTAINER Adrian Herrera <adrian.herrera@anu.edu.au>
 4 | 
 5 | # Configure coverage poppler
 6 | RUN mkdir /build
 7 | RUN mkdir poppler-0.64.0/build &&                                           \
 8 |     cd poppler-0.64.0/build &&                                              \
 9 |     cmake .. -DCMAKE_TOOLCHAIN_FILE=/toolchain.cmake                        \
10 |         -DCMAKE_INSTALL_PREFIX=/build                                       \
11 |         -DCMAKE_C_FLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping"  \
12 |         -DCMAKE_CXX_FLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping"\
13 |         -DCMAKE_SYSTEM_PROCESSOR="i386"
14 | 
15 | # It is necessary for the 32-bit to reinstall the libopenjp2 as the 32-bit and
16 | # 64-bit versions are mutually exclusive
17 | RUN apt-get -y install libopenjp2-7-dev:i386
18 | 
19 | # Actually make the coverage poppler
20 | RUN cd poppler-0.64.0/build &&                                              \
21 |     make clean && make -j && make install
22 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py -o /build/lib           \
23 |     /build/bin/pdftotext
24 | 
25 | # The `build` directory can be extracted to the host machine
26 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/poppler/toolchain.cmake:
--------------------------------------------------------------------------------
 1 | set(CMAKE_C_COMPILER clang-8)
 2 | set(CMAKE_CXX_COMPILER clang++-8)
 3 | set(CMAKE_BUILD_TYPE debug)
 4 | set(BUILD_GTK_TESTS off)
 5 | set(BUILD_QT5_TESTS off)
 6 | set(BUILD_CPP_TESTS off)
 7 | set(ENABLE_GTK_DOC off)
 8 | set(ENABLE_QT5 off)
 9 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -m32 -fprofile-instr-generate -fcoverage-mapping")
10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32 -fprofile-instr-generate -fcoverage-mapping")
11 | set(CMAKE_SYSTEM_PROCESSOR "i386")
12 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/sox/afl.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM seed-selection/real-world/sox/base
 2 | 
 3 | MAINTAINER Adrian Herrera <adrian.herrera@anu.edu.au>
 4 | 
 5 | # Get and build AFL
 6 | ENV AFL_CC=clang-8
 7 | ENV AFL_CXX=clang++-8
 8 | 
 9 | RUN git clone --no-checkout https://github.com/google/afl &&    \
10 |     git -C afl checkout v256b
11 | RUN cd afl &&                           \
12 |     export LLVM_CONFIG=llvm-config-8 && \
13 |     export CC=$AFL_CC &&                \
14 |     export CXX=$AFL_CXX &&              \
15 |     make -j &&                          \
16 |     make -j -C llvm_mode
17 | 
18 | # Build AFL SoX
19 | RUN mkdir /build
20 | RUN cd sox-14.4.2 &&                                                        \
21 |     CC=/afl/afl-clang-fast CXX=/afl/afl-clang-fast++ LD=ldd                 \
22 |     CFLAGS="-m32 -fsanitize=address" CXXFLAGS="-m32 -fsanitize=address"     \
23 |     LDFLAGS="-m32 -fsanitize=address"                                       \
24 |     ./configure --prefix=/build --host=i386-linux
25 | RUN cd sox-14.4.2 &&    \
26 |     make clean &&       \
27 |     make -s &&          \
28 |     make install
29 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py -o /build/lib           \
30 |     /build/bin/sox
31 | 
32 | # The `build` directory can be extracted to the host machine
33 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/sox/base.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:18.04
 2 | 
 3 | # Install dependencies
 4 | RUN dpkg --add-architecture i386
 5 | RUN export DEBIAN_FRONTEND=noninteractive &&                                \
 6 |     apt-get update &&                                                       \
 7 |     apt-get -y install git build-essential gcc-multilib g++-multilib        \
 8 |     libmad0-dev:i386 libc6-dev:i386 wget python3-pip
 9 | 
10 | # Install LLVM 8
11 | RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
12 | RUN apt-get install -y llvm-8 clang-8
13 | 
14 | # Get helper scripts
15 | RUN git clone https://github.com/HexHive/fuzzing-seed-selection &&  \
16 |     pip3 install fuzzing-seed-selection/scripts
17 | 
18 | # Get the SoX source
19 | RUN wget -O - https://sourceforge.net/projects/sox/files/sox/14.4.2/sox-14.4.2.tar.gz | tar xz
20 | 


--------------------------------------------------------------------------------
/fuzzing/real-world/sox/coverage.Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM seed-selection/real-world/sox/base
 2 | 
 3 | MAINTAINER Adrian Herrera <adrian.herrera@anu.edu.au>
 4 | 
 5 | # Build coverage SoX
 6 | RUN mkdir /build
 7 | RUN cd sox-14.4.2 &&                                                        \
 8 |     CC=clang-8 CXX=clang++-8                                                \
 9 |     CFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping"               \
10 |     CXXFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping"             \
11 |     LDFLAGS="-m32"                                                          \
12 |     ./configure --prefix=/build --host=i386-linux
13 | RUN cd sox-14.4.2 &&    \
14 |     make clean &&       \
15 |     make -s &&          \
16 |     make install
17 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py                         \
18 |     -o /build/lib /build/bin/sox
19 | 
20 | # The `build` directory can be extracted to the host machine
21 | 


--------------------------------------------------------------------------------
/optimin/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Compiled Object files
 5 | *.slo
 6 | *.lo
 7 | *.o
 8 | *.obj
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 | 
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 | 
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 | 
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 | 


--------------------------------------------------------------------------------
/optimin/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.7)
 2 | 
 3 | project(optimin
 4 |   LANGUAGES CXX
 5 |   DESCRIPTION "SAT-based fuzzing corpus minimizer"
 6 | )
 7 | 
 8 | set(CMAKE_CXX_STANDARD 17)
 9 | set(CMAKE_CXX_STANDARD_REQUIRED ON)
10 | set(CMAKE_CXX_EXTENSIONS OFF)
11 | 
12 | if(USE_Z3)
13 |   find_package(Z3 REQUIRED config)
14 |   message(STATUS "Found Z3 ${Z3_VERSION_STRING}")
15 |   include_directories(${Z3_CXX_INCLUDE_DIRS})
16 |   link_libraries(${Z3_LIBRARIES})
17 | endif(USE_Z3)
18 | 
19 | find_package(Boost COMPONENTS container REQUIRED)
20 | message(STATUS "Found Boost ${Boost_VERSION_STRING}")
21 | 
22 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -O3")
23 | 
24 | include_directories(${Boost_INCLUDE_DIR})
25 | 
26 | add_subdirectory(src)
27 | 


--------------------------------------------------------------------------------
/optimin/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM ubuntu:latest
 2 | 
 3 | # Install prerequesites
 4 | RUN export DEBIAN_FRONTEND=noninteractive &&    \
 5 |     apt update &&                               \
 6 |     apt install -y git build-essential cmake    \
 7 |         libboost-container-dev libz-dev python3
 8 | 
 9 | # Add OptiMin source
10 | ADD CMakeLists.txt /optimin/
11 | ADD src /optimin/src
12 | 
13 | # Build OptiMin
14 | RUN mkdir -p /optimin/build
15 | RUN cd /optimin/build &&        \
16 |     cmake .. &&                 \
17 |     make -j &&                  \
18 |     make install
19 | 
20 | # Build EvalMaxSAT
21 | RUN git clone https://github.com/FlorentAvellaneda/EvalMaxSAT
22 | RUN mkdir -p EvalMaxSAT/build
23 | RUN cd EvalMaxSAT/build &&      \
24 |     cmake .. &&                 \
25 |     make -j &&                  \
26 |     make install
27 | 
28 | # Add OptiMin wrapper
29 | ADD optimin.py /optimin/
30 | ENTRYPOINT ["/optimin/optimin.py"]
31 | 


--------------------------------------------------------------------------------
/optimin/LICENSE.jsoncpp:
--------------------------------------------------------------------------------
 1 | The JsonCpp library's source code, including accompanying documentation, 
 2 | tests and demonstration applications, are licensed under the following
 3 | conditions...
 4 | 
 5 | Baptiste Lepilleur and The JsonCpp Authors explicitly disclaim copyright in all 
 6 | jurisdictions which recognize such a disclaimer. In such jurisdictions, 
 7 | this software is released into the Public Domain.
 8 | 
 9 | In jurisdictions which do not recognize Public Domain property (e.g. Germany as of
10 | 2010), this software is Copyright (c) 2007-2010 by Baptiste Lepilleur and
11 | The JsonCpp Authors, and is released under the terms of the MIT License (see below).
12 | 
13 | In jurisdictions which recognize Public Domain property, the user of this 
14 | software may choose to accept it either as 1) Public Domain, 2) under the 
15 | conditions of the MIT License (see below), or 3) under the terms of dual 
16 | Public Domain/MIT License conditions described here, as they choose.
17 | 
18 | The MIT License is about as close to Public Domain as a license can get, and is
19 | described in clear, concise terms at:
20 | 
21 |    http://en.wikipedia.org/wiki/MIT_License
22 |    
23 | The full text of the MIT License follows:
24 | 
25 | ========================================================================
26 | Copyright (c) 2007-2010 Baptiste Lepilleur and The JsonCpp Authors
27 | 
28 | Permission is hereby granted, free of charge, to any person
29 | obtaining a copy of this software and associated documentation
30 | files (the "Software"), to deal in the Software without
31 | restriction, including without limitation the rights to use, copy,
32 | modify, merge, publish, distribute, sublicense, and/or sell copies
33 | of the Software, and to permit persons to whom the Software is
34 | furnished to do so, subject to the following conditions:
35 | 
36 | The above copyright notice and this permission notice shall be
37 | included in all copies or substantial portions of the Software.
38 | 
39 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
40 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
41 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
42 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
43 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
44 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
45 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
46 | SOFTWARE.
47 | ========================================================================
48 | (END LICENSE TEXT)
49 | 
50 | The MIT license is compatible with both the GPL and commercial
51 | software, affording one all of the rights of Public Domain with the
52 | minor nuisance of being required to keep the above copyright notice
53 | and license text in the source code. Note also that by accepting the
54 | Public Domain "license" you can re-license your copy using whatever
55 | license you like.
56 | 


--------------------------------------------------------------------------------
/optimin/optimin.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """
  4 | Wrapper around OptiMin.
  5 | 
  6 | Author: Adrian Herrera
  7 | """
  8 | 
  9 | 
 10 | from argparse import ArgumentParser, Namespace
 11 | from pathlib import Path
 12 | from shutil import which
 13 | from tempfile import NamedTemporaryFile
 14 | from typing import Dict, List, Optional, TextIO, Tuple
 15 | import re
 16 | import subprocess
 17 | 
 18 | 
 19 | WCNF_SEED_MAP_RE = re.compile(r'^c (\d+) : (.+)$')
 20 | 
 21 | 
 22 | def parse_args() -> Namespace:
 23 |     """Parse command-line arguments."""
 24 |     parser = ArgumentParser(description='Run OptiMin to produce a minimized corpus')
 25 |     parser.add_argument('-j', '--jobs', type=int, default=0,
 26 |                         help='Number of minimization threads')
 27 |     parser.add_argument('-e', '--edge-only', action='store_true',
 28 |                         help='Use edge coverage only, ignore hit counts')
 29 |     parser.add_argument('-w', '--weights', metavar='CSV', type=Path,
 30 |                         help='Path to weights CSV')
 31 |     parser.add_argument('corpus', type=Path, help='Path to input corpus')
 32 |     return parser.parse_args()
 33 | 
 34 | 
 35 | def get_seed_mapping(inf: TextIO) -> Dict[int, str]:
 36 |     """
 37 |     Retrieve the mapping of literal identifiers (integers) to seed names
 38 |     (strings) from the WCNF file.
 39 |     """
 40 |     mapping = {}
 41 |     for line in inf:
 42 |         # This starts the constraint listing
 43 |         if line.startswith('p wcnf '):
 44 |             break
 45 | 
 46 |         match = WCNF_SEED_MAP_RE.match(line.strip())
 47 |         if not match:
 48 |             continue
 49 | 
 50 |         mapping[int(match.group(1))] = match.group(2)
 51 | 
 52 |     return mapping
 53 | 
 54 | 
 55 | def parse_maxsat_out(out: List[str], mapping: Dict[int, str]) -> Tuple[Optional[List[str]], Optional[float]]:
 56 |     """
 57 |     Parse the output from EvalMaxSat.
 58 | 
 59 |     Returns a tuple containing:
 60 | 
 61 |     1. The list of seeds that make up the solution, or `None` if a solution
 62 |     could not be found.
 63 |     2. The execution time.
 64 |     """
 65 |     solution = None
 66 |     exec_time = None
 67 | 
 68 |     for line in out:
 69 |         # Solution status
 70 |         if line.startswith('s ') and 'OPTIMUM FOUND' not in line:
 71 |             # No optimum solution found
 72 |             break
 73 | 
 74 |         # Solution values
 75 |         if line.startswith('v '):
 76 |             vals = [int(v) for v in line[2:].split(' ')]
 77 |             solution = [mapping[v] for v in vals if v > 0]
 78 | 
 79 |         # Execution time
 80 |         if line.startswith('c Total time: '):
 81 |             toks = line.split(' ')
 82 |             exec_time = float(toks[3])
 83 |             units = toks[4]
 84 | 
 85 |             # TODO other units to worry about?
 86 |             if units == 'ms':
 87 |                 exec_time = exec_time / 1000
 88 | 
 89 |     return solution, exec_time
 90 | 
 91 | 
 92 | def main():
 93 |     """The main function."""
 94 |     args = parse_args()
 95 | 
 96 |     # Check binaries are available
 97 |     optimin = which('afl-showmap-maxsat')
 98 |     if not optimin:
 99 |         raise Exception('Cannot find OptiMin. Check PATH')
100 |     eval_max_sat = which('EvalMaxSAT_bin')
101 |     if not eval_max_sat:
102 |         raise Exception('Cannot find EvalMaxSAT. Check PATH')
103 | 
104 |     # Configure optimin
105 |     optimin_args = [optimin, '-p']
106 |     if args.edge_only:
107 |         optimin_args.append('-e')
108 |     if args.weights:
109 |         optimin_args.extend(['-w', str(args.weights)])
110 | 
111 |     with NamedTemporaryFile() as wcnf:
112 |         print(f'[*] Running Optimin on {args.corpus}')
113 |         optimin_args.extend(['-o', wcnf.name, '--', str(args.corpus)])
114 |         subprocess.run(optimin_args, check=True)
115 | 
116 |         with open(wcnf.name, 'r') as inf:
117 |             seed_map = get_seed_mapping(inf)
118 | 
119 |         print('[*] Running EvalMaxSAT on WCNF')
120 |         proc = subprocess.run([eval_max_sat, wcnf.name, '-p', f'{args.jobs}'],
121 |                               check=True, stdout=subprocess.PIPE,
122 |                               encoding='utf-8')
123 |         print('[+] EvalMaxSAT completed')
124 |         maxsat_out = [line.strip() for line in proc.stdout.split('\n')]
125 | 
126 |         print('[*] Parsing EvalMaxSAT output')
127 |         solution, exec_time = parse_maxsat_out(maxsat_out, seed_map)
128 |         if not solution:
129 |             raise Exception(f'Unable to find optimum solution for {args.corpus}')
130 | 
131 |         print(f'[+] Solution found for {args.corpus}\n')
132 |         print('[+] Total time: %.02f sec' % exec_time)
133 |         print(f'[+] Num. seeds: {len(solution)}\n')
134 | 
135 |         print('\n'.join(solution))
136 | 
137 | 
138 | if __name__ == '__main__':
139 |     main()
140 | 


--------------------------------------------------------------------------------
/optimin/src/AFLShowmapZ3.cpp:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Perform an optimal (if possible) fuzzing corpus minimization based on
  3 |  * afl-showmap's edge coverage.
  4 |  *
  5 |  * Author: Adrian Herrera
  6 |  */
  7 | 
  8 | #include <chrono>
  9 | #include <cstdint>
 10 | #include <dirent.h>
 11 | #include <fstream>
 12 | #include <iostream>
 13 | #include <iterator>
 14 | #include <map>
 15 | #include <numeric>
 16 | #include <unistd.h>
 17 | #include <vector>
 18 | 
 19 | #include "Common.h"
 20 | #include "ProgressBar.h"
 21 | #include "Z3Common.h"
 22 | 
 23 | #include <z3++.h>
 24 | 
 25 | // This is based on the human class count in `count_class_human[256]` in
 26 | // `afl-showmap.c`
 27 | static constexpr uint32_t MAX_EDGE_FREQ = 8;
 28 | 
 29 | static void Usage(const char *Argv0) {
 30 |   std::cerr << '\n' << Argv0 << " [ options ] -- /path/to/corpus_dir\n\n";
 31 |   std::cerr << "Optional parameters:\n\n";
 32 |   std::cerr << "  -p         - Show progress bar\n";
 33 |   std::cerr << "  -e         - Use edge coverage only, ignore hit counts\n";
 34 |   std::cerr << "  -h         - Print this message\n";
 35 |   std::cerr << "  -s smt2    - Save SMT2\n";
 36 |   std::cerr << "  -w weights - CSV containing seed weights (see README)\n\n";
 37 |   std::cerr << std::endl;
 38 | 
 39 |   std::exit(1);
 40 | }
 41 | 
 42 | int main(int Argc, char *Argv[]) {
 43 |   bool ShowProg = false;
 44 |   bool EdgesOnly = false;
 45 |   std::string SMTOutFile = "";
 46 |   std::string WeightsFile;
 47 |   WeightsMap Weights;
 48 |   int Opt;
 49 |   ProgressBar Prog;
 50 | 
 51 |   std::chrono::time_point<std::chrono::steady_clock> StartTime, EndTime;
 52 |   std::chrono::seconds Duration;
 53 | 
 54 |   std::cout << "afl-showmap corpus minimization\n\n";
 55 | 
 56 |   // Parse command-line options
 57 |   while ((Opt = getopt(Argc, Argv, "+pehs:w:")) > 0) {
 58 |     switch (Opt) {
 59 |     case 'p':
 60 |       // Show progres bar
 61 |       ShowProg = true;
 62 |       break;
 63 |     case 'e':
 64 |       // Solve for edge coverage only (not frequency of edge coverage)
 65 |       EdgesOnly = true;
 66 |       break;
 67 |     case 'h':
 68 |       // Help
 69 |       Usage(Argv[0]);
 70 |       break;
 71 |     case 's':
 72 |       // SMT2 file
 73 |       SMTOutFile = optarg;
 74 |       break;
 75 |     case 'w':
 76 |       // Weights file
 77 |       WeightsFile = optarg;
 78 |       break;
 79 |     default:
 80 |       Usage(Argv[0]);
 81 |     }
 82 |   }
 83 | 
 84 |   if (optind >= Argc) {
 85 |     Usage(Argv[0]);
 86 |   }
 87 | 
 88 |   const char *CorpusDir = Argv[optind];
 89 | 
 90 |   // Parse weights
 91 |   //
 92 |   // Weights are stored in CSV file mapping a seed file name to an integer
 93 |   // greater than zero.
 94 |   if (!WeightsFile.empty()) {
 95 |     std::cout << "[*] Reading weights from `" << WeightsFile << "`... "
 96 |               << std::flush;
 97 | 
 98 |     StartTime = std::chrono::steady_clock::now();
 99 |     std::ifstream IFS(WeightsFile);
100 |     GetZ3Weights(IFS, Weights);
101 |     IFS.close();
102 |     EndTime = std::chrono::steady_clock::now();
103 |     Duration =
104 |         std::chrono::duration_cast<std::chrono::seconds>(EndTime - StartTime);
105 | 
106 |     std::cout << Duration.count() << 's' << std::endl;
107 |   }
108 | 
109 |   // Get seed coverage
110 |   //
111 |   // Iterate over the corpus directory, which should contain afl-showmap-style
112 |   // output files. Read each of these files and store them in the appropriate
113 |   // data structures.
114 |   struct dirent *DP;
115 |   DIR *DirFD;
116 |   AFLCoverageVector Cov;
117 | 
118 |   Z3ExprSet SeedExprs;
119 |   Z3CoverageMap SeedCoverage;
120 | 
121 |   z3::context Ctx;
122 |   z3::optimize Optimizer(Ctx);
123 | 
124 |   if (!ShowProg) {
125 |     std::cout << "[*] Reading coverage in `" << CorpusDir << "`... "
126 |               << std::flush;
127 |   }
128 |   StartTime = std::chrono::steady_clock::now();
129 | 
130 |   if ((DirFD = opendir(CorpusDir)) == nullptr) {
131 |     std::cerr << "[-] Unable to open corpus directory" << std::endl;
132 |     return 1;
133 |   }
134 | 
135 |   size_t SeedCount = 0;
136 |   const size_t NumSeeds = GetNumSeeds(DirFD);
137 | 
138 |   while ((DP = readdir(DirFD)) != nullptr) {
139 |     if (DP->d_type == DT_DIR) {
140 |       continue;
141 |     }
142 | 
143 |     // Get seed coverage
144 |     std::ifstream IFS(std::string(CorpusDir) + '/' + DP->d_name);
145 |     Cov.clear();
146 |     GetAFLCoverage(IFS, Cov);
147 |     IFS.close();
148 | 
149 |     // Create a variable (a boolean) to represent the seed
150 |     z3::expr SeedExpr = Ctx.bool_const(MakeZ3ExprName(DP->d_name).c_str());
151 |     SeedExprs.insert(SeedExpr);
152 | 
153 |     // Record the set of seeds that cover a particular edge
154 |     for (const auto &[Edge, Freq] : Cov) {
155 |       if (EdgesOnly) {
156 |         // Ignore edge frequency
157 |         SeedCoverage[Edge].insert(SeedExpr);
158 |       } else {
159 |         // Executing edge `E` `N` times means that it was executed `N - 1` times
160 |         for (unsigned I = 0; I < Freq; ++I)
161 |           SeedCoverage[MAX_EDGE_FREQ * Edge + I].insert(SeedExpr);
162 |       }
163 |     }
164 | 
165 |     if ((++SeedCount % 10 == 0) && ShowProg) {
166 |       Prog.Update(SeedCount * 100 / NumSeeds, "Reading seed coverage");
167 |     }
168 |   }
169 | 
170 |   closedir(DirFD);
171 | 
172 |   EndTime = std::chrono::steady_clock::now();
173 |   Duration =
174 |       std::chrono::duration_cast<std::chrono::seconds>(EndTime - StartTime);
175 |   if (ShowProg) {
176 |     std::cout << std::endl;
177 |   } else {
178 |     std::cout << Duration.count() << 's' << std::endl;
179 |   }
180 | 
181 |   // Ensure that at least one seed is selected that covers a particular edge
182 |   if (!ShowProg) {
183 |     std::cout << "[*] Generating constraints for " << SeedCoverage.size()
184 |               << " seeds... " << std::flush;
185 |   }
186 |   StartTime = std::chrono::steady_clock::now();
187 | 
188 |   SeedCount = 0;
189 | 
190 |   for (const auto &[_, Seeds] : SeedCoverage) {
191 |     if (Seeds.empty()) {
192 |       continue;
193 |     }
194 | 
195 |     z3::expr EdgeDisjunc = std::accumulate(
196 |         Seeds.begin(), Seeds.end(), Ctx.bool_val(false),
197 |         [](const z3::expr &E1, const z3::expr &E2) { return E1 || E2; });
198 |     Optimizer.add(EdgeDisjunc);
199 | 
200 |     if ((++SeedCount % 10 == 0) && ShowProg) {
201 |       Prog.Update(SeedCount * 100 / SeedCoverage.size(),
202 |                   "Generating seed constraints");
203 |     }
204 |   }
205 | 
206 |   // Select the minimum number of seeds that cover a particular set of edges
207 |   for (const auto &E : SeedExprs) {
208 |     Optimizer.add(!E, Weights[E.to_string()]);
209 |   }
210 | 
211 |   EndTime = std::chrono::steady_clock::now();
212 |   Duration =
213 |       std::chrono::duration_cast<std::chrono::seconds>(EndTime - StartTime);
214 |   if (ShowProg) {
215 |     std::cout << std::endl;
216 |   } else {
217 |     std::cout << Duration.count() << 's' << std::endl;
218 |   }
219 | 
220 |   // Dump constraints to SMT2
221 |   if (!SMTOutFile.empty()) {
222 |     std::cout << "[*] Writing SMT2 to `" << SMTOutFile << "`... " << std::flush;
223 |     StartTime = std::chrono::steady_clock::now();
224 | 
225 |     std::ofstream OFS(SMTOutFile);
226 |     OFS << Optimizer;
227 |     OFS.close();
228 | 
229 |     EndTime = std::chrono::steady_clock::now();
230 |     Duration =
231 |         std::chrono::duration_cast<std::chrono::seconds>(EndTime - StartTime);
232 |     std::cout << Duration.count() << 's' << std::endl;
233 |   }
234 | 
235 |   // Check if an optimal solution exists
236 |   std::cout << "[*] Solving constraints... " << std::flush;
237 |   StartTime = std::chrono::steady_clock::now();
238 | 
239 |   z3::check_result Result = Optimizer.check();
240 | 
241 |   EndTime = std::chrono::steady_clock::now();
242 |   Duration =
243 |       std::chrono::duration_cast<std::chrono::seconds>(EndTime - StartTime);
244 |   std::cout << Duration.count() << 's' << std::endl;
245 | 
246 |   // Get the resulting coverset
247 |   if (Result == z3::sat) {
248 |     std::cout << "[+] Optimal corpus found\n";
249 | 
250 |     z3::model Model = Optimizer.get_model();
251 |     std::vector<std::string> SelectedSeeds;
252 |     for (const auto &SeedExpr : SeedExprs) {
253 |       if (Model.eval(SeedExpr).is_true()) {
254 |         SelectedSeeds.push_back(GetSeed(SeedExpr));
255 |       }
256 |     }
257 | 
258 |     // Compute some interesting statistics
259 |     size_t NumSelectedSeeds = SelectedSeeds.size();
260 |     float PercentSelected = (float)NumSelectedSeeds / SeedExprs.size() * 100.0;
261 | 
262 |     std::cout << "\nNum. seeds: " << NumSelectedSeeds << " (" << PercentSelected
263 |               << "%)\n\n";
264 |     std::copy(SelectedSeeds.begin(), SelectedSeeds.end(),
265 |               std::ostream_iterator<std::string>(std::cout, "\n"));
266 |     std::cout << std::endl;
267 |   } else {
268 |     std::cerr << "[- ]Unable to find optimal minimized corpus" << std::endl;
269 |     return 1;
270 |   }
271 | 
272 |   return 0;
273 | }
274 | 


--------------------------------------------------------------------------------
/optimin/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_executable(afl-showmap-maxsat AFLShowmapMaxSat.cpp Common.cpp)
 2 | 
 3 | install(TARGETS afl-showmap-maxsat RUNTIME DESTINATION bin)
 4 | 
 5 | if(USE_Z3)
 6 |   add_executable(afl-showmap-z3 AFLShowmapZ3.cpp
 7 |                                 Common.cpp
 8 |                                 Z3Common.cpp)
 9 |   add_executable(llvm-cov-z3 LLVMCovZ3.cpp
10 |                              Common.cpp
11 |                              Z3Common.cpp
12 |                              jsoncpp/jsoncpp.cpp)
13 |   target_include_directories(llvm-cov-z3 PRIVATE jsoncpp)
14 | 
15 |   install(TARGETS afl-showmap-z3 RUNTIME DESTINATION bin)
16 |   install(TARGETS llvm-cov-z3 RUNTIME DESTINATION bin)
17 | endif(USE_Z3)
18 | 


--------------------------------------------------------------------------------
/optimin/src/Common.cpp:
--------------------------------------------------------------------------------
 1 | #include "Common.h"
 2 | 
 3 | void GetAFLCoverage(std::istream &IS, AFLCoverageVector &Cov) {
 4 |   std::string Line;
 5 | 
 6 |   while (std::getline(IS, Line, '\n')) {
 7 |     const size_t DelimPos = Line.find(':');
 8 |     const uint32_t E = std::stoul(Line.substr(0, DelimPos));
 9 |     const unsigned Freq = std::stoul(Line.substr(DelimPos + 1));
10 | 
11 |     Cov.push_back({E, Freq});
12 |   }
13 | }
14 | 
15 | void GetWeights(std::istream &IS, WeightsMap &Weights) {
16 |   std::string Line;
17 | 
18 |   while (std::getline(IS, Line, '\n')) {
19 |     const size_t DelimPos = Line.find(',');
20 |     const std::string Seed = Line.substr(0, DelimPos).c_str();
21 |     const unsigned Weight = std::stoul(Line.substr(DelimPos + 1));
22 | 
23 |     Weights.emplace(Seed, Weight);
24 |   }
25 | }
26 | 
27 | size_t GetNumSeeds(DIR *FD) {
28 |   struct dirent *DP;
29 |   size_t SeedCount = 0;
30 | 
31 |   while ((DP = readdir(FD)) != nullptr) {
32 |     if (DP->d_type == DT_REG) {
33 |       ++SeedCount;
34 |     }
35 |   }
36 | 
37 |   rewinddir(FD);
38 | 
39 |   return SeedCount;
40 | }
41 | 


--------------------------------------------------------------------------------
/optimin/src/Common.h:
--------------------------------------------------------------------------------
 1 | #ifndef COMMON_H
 2 | #define COMMON_H
 3 | 
 4 | #include <cstdint>
 5 | #include <dirent.h>
 6 | #include <istream>
 7 | #include <map>
 8 | #include <string>
 9 | #include <vector>
10 | 
11 | /// Seed weights default to 1
12 | class WeightT {
13 | public:
14 |   WeightT() : WeightT(1){};
15 |   WeightT(uint32_t V) : Value(V){};
16 | 
17 |   operator unsigned() const { return Value; }
18 | 
19 | private:
20 |   const unsigned Value;
21 | };
22 | 
23 | /// Pair of tuple (edge) ID and hit count
24 | using AFLTuple =
25 |     std::pair</* Tuple ID */ uint32_t, /* Execution count */ unsigned>;
26 | 
27 | /// Coverage for a given seed file
28 | using AFLCoverageVector = std::vector<AFLTuple>;
29 | 
30 | /// Maps seed file paths to a weight
31 | using WeightsMap =
32 |     std::map</* Seed file */ std::string, /* Seed weight */ WeightT>;
33 | 
34 | /// Read AFL coverage as produced by `afl-showmap`
35 | void GetAFLCoverage(std::istream &, AFLCoverageVector &);
36 | 
37 | /// Read a CSV file containing seed weights
38 | void GetWeights(std::istream &, WeightsMap &);
39 | 
40 | /// Get the number of seeds in a directory
41 | size_t GetNumSeeds(DIR *);
42 | 
43 | #endif // COMMON_H
44 | 


--------------------------------------------------------------------------------
/optimin/src/ProgressBar.h:
--------------------------------------------------------------------------------
 1 | /**
 2 |  *  Progress bar.
 3 |  *
 4 |  *  Adapted from https://www.bfilipek.com/2020/02/inidicators.html
 5 |  */
 6 | #ifndef PROGRESS_BAR_H
 7 | #define PROGRESS_BAR_H
 8 | 
 9 | #include <iostream>
10 | 
11 | /// Display a progress bar in the terminal
12 | class ProgressBar {
13 | private:
14 |   const size_t BarWidth;
15 |   const std::string Fill;
16 |   const std::string Remainder;
17 | 
18 | public:
19 |   ProgressBar() : ProgressBar(60, "#", " ") {}
20 | 
21 |   ProgressBar(size_t Width, const std::string &F, const std::string &R)
22 |       : BarWidth(Width), Fill(F), Remainder(R) {}
23 | 
24 |   void Update(float Progress, const std::string Status = "",
25 |               std::ostream &OS = std::cout) {
26 |     // No need to write once progress is 100%
27 |     if (Progress > 100.0f) {
28 |       return;
29 |     }
30 | 
31 |     // Move cursor to the first position on the same line and flush
32 |     OS << '\r' << std::flush;
33 | 
34 |     // Start bar
35 |     OS << '[';
36 | 
37 |     const auto Completed =
38 |         static_cast<size_t>(Progress * static_cast<float>(BarWidth) / 100.0);
39 |     for (size_t I = 0; I < BarWidth; ++I) {
40 |       if (I <= Completed) {
41 |         OS << Fill;
42 |       } else {
43 |         OS << Remainder;
44 |       }
45 |     }
46 | 
47 |     // End bar
48 |     OS << ']';
49 | 
50 |     // Write progress percentage
51 |     OS << ' ' << std::min(static_cast<size_t>(Progress), size_t(100)) << '%';
52 | 
53 |     // Write status text
54 |     OS << "  " << Status;
55 |   }
56 | };
57 | 
58 | #endif
59 | 


--------------------------------------------------------------------------------
/optimin/src/Z3Common.cpp:
--------------------------------------------------------------------------------
 1 | #include "Z3Common.h"
 2 | 
 3 | std::string GetSeed(const z3::expr &E) {
 4 |   std::string Name = E.to_string();
 5 |   Name.pop_back();
 6 | 
 7 |   return Name.erase(0, 1);
 8 | }
 9 | 
10 | std::string MakeZ3ExprName(const std::string &S) {
11 |   // Z3 "quotes" strings with a pipe if it begins with a numeric value. So we
12 |   // just quote everything
13 |   std::string Name("|");
14 |   Name.append(S);
15 |   Name.append("|");
16 | 
17 |   return Name;
18 | }
19 | 
20 | void GetZ3Weights(std::istream &IS, WeightsMap &Weights) {
21 |   std::string Line;
22 | 
23 |   while (std::getline(IS, Line, '\n')) {
24 |     const size_t DelimPos = Line.find(',');
25 |     const std::string Seed = Line.substr(0, DelimPos).c_str();
26 |     const unsigned Weight = std::stoul(Line.substr(DelimPos + 1));
27 | 
28 |     Weights.emplace(MakeZ3ExprName(Seed), Weight);
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/optimin/src/Z3Common.h:
--------------------------------------------------------------------------------
 1 | #ifndef Z3_COMMON_H
 2 | #define Z3_COMMON_H
 3 | 
 4 | #include <map>
 5 | #include <string>
 6 | #include <unordered_set>
 7 | 
 8 | #include <boost/container/flat_map.hpp>
 9 | #include <z3++.h>
10 | 
11 | #include "Common.h"
12 | 
13 | /// Hash structure so a `z3::expr` can be stored in an `std::unordered_set`
14 | struct Z3ExprHash {
15 |   size_t operator()(const z3::expr &E) const noexcept {
16 |     return std::hash<unsigned>()(E.hash());
17 |   }
18 | };
19 | 
20 | /// Set of Z3 expressions
21 | using Z3ExprSet = std::unordered_set<z3::expr, Z3ExprHash>;
22 | 
23 | /// Maps tuple IDs to Z3 expressions that "cover" that tuple
24 | using Z3CoverageMap =
25 |     boost::container::flat_map<AFLTuple::first_type, Z3ExprSet>;
26 | 
27 | /// Get the seed name from a Z3 expression
28 | std::string GetSeed(const z3::expr &);
29 | 
30 | /// Read a CSV file containing seed weights
31 | void GetZ3Weights(std::istream &, WeightsMap &);
32 | 
33 | /// Create a name for a Z3 expression
34 | std::string MakeZ3ExprName(const std::string &);
35 | 
36 | #endif // Z3_COMMON_H
37 | 


--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
 1 | # Seed Selection Tools
 2 | 
 3 | A collection of scripts to help with analyzing fuzzing seed selection practices.
 4 | 
 5 | ## afl_cmin.py
 6 | 
 7 | A wrapper around [`afl-cmin`](https://github.com/google/AFL/blob/master/afl-cmin)
 8 | that prints the seeds selected (but does not copy them).
 9 | 
10 | ## afl_coverage_merge.py
11 | 
12 | Merge the [final coverage bitmaps](https://github.com/google/AFL/blob/master/afl-fuzz.c#L863)
13 | from multiple AFL parallel nodes.
14 | 
15 | ## afl_coverage_pca.py
16 | 
17 | Generate a [PCA](https://en.wikipedia.org/wiki/Principal_component_analysis)
18 | plot for a given seed set (stored in an HDF5 file, such as those stored
19 | [here](https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/)).
20 | 
21 | ## coverage_auc.py
22 | 
23 | Compute the area under curve (AUC) of AFL coverage data (stored in `plot_data`
24 | files).
25 | 
26 | ## eval_maxsat.py
27 | 
28 | Run [EvalMaxSAT](https://github.com/FlorentAvellaneda/EvalMaxSAT) over a WCNF
29 | produced by `afl-showmap-maxsat` to compute an optimum corpus.
30 | 
31 | ## expand_hdf5_coverage.py
32 | 
33 | Extract [`afl-showmap`](https://github.com/google/AFL/blob/master/afl-showmap.c)
34 | style bitmaps from an HDF5 file containing AFL coverage (as stored
35 | [here](https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/)).
36 | 
37 | ## fuzz.py
38 | 
39 | Run multiple AFL campaigns in parallel. Ensures that CPU-usage is properly
40 | managed and optionally provides a watchdog that timestamps artifacts created by
41 | AFL (e.g., crashes, queue entries).
42 | 
43 | ## get_corpus.py
44 | 
45 | Download a corpus of seeds from our [datastore](https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/)
46 | based on a given minimization technique (e.g., optimal, afl-cmin).
47 | 
48 | ## get_libs.py
49 | 
50 | Extract all shared libraries that a given program depends on and copy these
51 | libraries to a particular directory.
52 | 
53 | ## llvm_cov_merge.py
54 | 
55 | Merge LLVM [SanitizerCoverage](https://clang.llvm.org/docs/SanitizerCoverage.html).
56 | 
57 | ## qminset.py
58 | 
59 | Wraps the MinSet tool as proposed in the [Optimizing Seed Selection for
60 | Fuzzing](https://www.usenix.org/conference/usenixsecurity14/technical-sessions/presentation/rebert)
61 | paper. Prints the selected seeds.
62 | 
63 | ## replay_seeds.py
64 | 
65 | Replay a directory of inputs seeds and generate coverage information. This
66 | coverage information is stored in an HDF5 (as stored
67 | [here](https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/)).
68 | 
69 | ## triage_crashes.py
70 | 
71 | Replay AFL's `crashes` directory and match crash outputs to a regex (e.g., such
72 | as those provided [here](../fuzzing/config/fts-bug-regexs.toml).
73 | 
74 | ## visualize_corpora.py
75 | 
76 | Plot a "Venn diagram" (it's not really a Venn diagram) of different minimized
77 | corpora.
78 | 


--------------------------------------------------------------------------------
/scripts/bin/afl_cmin.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Wrapper around `afl-cmin`. Only keeps the names of the files in the minimized
 5 | corpus.
 6 | 
 7 | Author: Adrian Herrera
 8 | """
 9 | 
10 | 
11 | from getopt import getopt
12 | from pathlib import Path
13 | from shutil import which
14 | from subprocess import run
15 | from tempfile import TemporaryDirectory
16 | import os
17 | import sys
18 | 
19 | 
20 | def main():
21 |     """The main function."""
22 |     opts, args = getopt(sys.argv[1:], '+i:f:m:t:eQC')
23 | 
24 |     cmin = which('afl-cmin')
25 |     if not cmin:
26 |         raise Exception('afl-cmin not found. Check PATH')
27 | 
28 |     env = os.environ.copy()
29 |     env['AFL_ALLOW_TMP'] = '1'
30 | 
31 |     ret = 1
32 | 
33 |     with TemporaryDirectory() as temp_dir:
34 |         cmin_args = [cmin, *[val for vals in opts for val in vals],
35 |                      '-o', temp_dir, '--', *args]
36 |         proc = run(cmin_args, check=False, env=env)
37 | 
38 |         seeds = list(Path(temp_dir).iterdir())
39 | 
40 |         print('\nSeeds (%d):' % len(seeds))
41 |         for seed in seeds:
42 |             print(seed.name)
43 | 
44 |         ret = proc.returncode
45 | 
46 |     sys.exit(ret)
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     main()
51 | 


--------------------------------------------------------------------------------
/scripts/bin/afl_coverage_merge.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Merge AFL coverage across multiple parallel nodes.
 5 | 
 6 | Author: Adrian Herrera
 7 | """
 8 | 
 9 | 
10 | from argparse import ArgumentParser, Namespace
11 | 
12 | from seed_selection.argparse import path_exists
13 | 
14 | 
15 | # Taken from AFL's config.h
16 | MAP_SIZE_POW2 = 16
17 | MAP_SIZE = 1 << MAP_SIZE_POW2
18 | 
19 | 
20 | def parse_args() -> Namespace:
21 |     """Parse command-line arguments."""
22 |     parser = ArgumentParser(description='Merge AFL coverage')
23 |     parser.add_argument('output', metavar='AFL_OUT', nargs='+', type=path_exists,
24 |                         help='AFL output directory')
25 |     return parser.parse_args()
26 | 
27 | 
28 | def main():
29 |     """The main function."""
30 |     args = parse_args()
31 | 
32 |     # Read and merge bitmaps. The merged bitmap only indicates (via a boolean)
33 |     # whether an edge tuple was hit or not (i.e., edge counts are discarded)
34 |     merged_bitmap = [False] * MAP_SIZE
35 |     for out in args.output:
36 |         with open(out / 'fuzz_bitmap', 'rb') as inf:
37 |             bitmap = inf.read()
38 |             for i, byte in enumerate(bitmap):
39 |                 if byte != 255:
40 |                     merged_bitmap[i] = True
41 | 
42 |     # Calculate merged coverage
43 |     bitmap_cvg = (sum(merged_bitmap) * 100.0) / MAP_SIZE
44 |     print('bitmap_cvg: %.02f%%' % bitmap_cvg)
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     main()
49 | 


--------------------------------------------------------------------------------
/scripts/bin/afl_coverage_pca.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Generate PCA plot for the given coverage HDF5 file.
 5 | 
 6 | Author: Adrian Herrera
 7 | """
 8 | 
 9 | from argparse import ArgumentParser, Namespace
10 | from pathlib import Path
11 | import sys
12 | 
13 | from h5py import File as H5File
14 | from matplotlib import rc
15 | from sklearn.decomposition import PCA
16 | from sklearn.preprocessing import StandardScaler
17 | import matplotlib.pyplot as plt
18 | import numpy as np
19 | import pandas as pd
20 | 
21 | from seed_selection.argparse import path_exists
22 | 
23 | 
24 | # From afl/config.h
25 | MAP_SIZE_POW2 = 16
26 | MAP_SIZE = 1 << MAP_SIZE_POW2
27 | 
28 | 
29 | def parse_args() -> Namespace:
30 |     """Parse command-line arguments."""
31 |     parser = ArgumentParser(description='PCA plots of coverage data')
32 |     parser.add_argument('-i', '--input', metavar='HDF5', type=path_exists,
33 |                         required=True, help='Input HDF5 file')
34 |     parser.add_argument('-o', '--output', metavar='PDF', type=Path,
35 |                         required=True, help='Path to output PDF')
36 |     return parser.parse_args()
37 | 
38 | 
39 | def main():
40 |     """The main function."""
41 |     args = parse_args()
42 | 
43 |     in_hdf5 = args.input
44 |     out_pdf = args.output
45 | 
46 |     print('Reading %s...' % in_hdf5)
47 |     cov_data = {}
48 |     with H5File(in_hdf5, 'r') as h5_file:
49 |         for cov_file, cov in h5_file.items():
50 |             df_cov = np.zeros(MAP_SIZE, dtype=np.uint8)
51 |             if len(cov.shape) == 0:
52 |                 edge, count = cov[()]
53 |                 df_cov[edge] = count
54 |             else:
55 |                 for edge, count in cov:
56 |                     df_cov[edge] = count
57 |             cov_data[cov_file] = list(df_cov)
58 | 
59 |     df = pd.DataFrame.from_dict(cov_data, orient='index')
60 |     x = StandardScaler().fit_transform(df)
61 |     if len(df) <= 1:
62 |         sys.stderr.write('Not enough seeds to perform PCA')
63 |         sys.exit(1)
64 | 
65 |     # Compute PCA
66 |     # TODO determine the number of components
67 |     print('Computing PCA...')
68 |     pca = PCA(n_components=2)
69 |     pca_scores = pd.DataFrame(pca.fit_transform(x),
70 |                               columns=['PCA 1', 'PCA 2']).set_index(df.index)
71 | 
72 |     # Configure plot
73 |     rc('pdf', fonttype=42)
74 |     rc('ps', fonttype=42)
75 |     plt.style.use('ggplot')
76 | 
77 |     # Plot PCA
78 |     print('Plotting...')
79 |     fig = plt.figure()
80 |     ax = fig.add_subplot(1, 1, 1)
81 |     ax.scatter(pca_scores['PCA 1'], pca_scores['PCA 2'], marker='x', alpha=0.5)
82 |     ax.set_xlabel('Component 1')
83 |     ax.set_ylabel('Component 2')
84 | 
85 |     fig.savefig(out_pdf, bbox_inches='tight')
86 |     print('%s coverage plotted at %s' % (in_hdf5, out_pdf))
87 | 
88 | 
89 | if __name__ == '__main__':
90 |     main()
91 | 


--------------------------------------------------------------------------------
/scripts/bin/coverage_auc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Compute AUC for AFL coverage files.
 5 | 
 6 | Author: Adrian Herrera
 7 | """
 8 | 
 9 | 
10 | from argparse import ArgumentParser, Namespace
11 | from pathlib import Path
12 | from typing import TextIO
13 | 
14 | from sklearn import metrics
15 | import bootstrapped.bootstrap as bs
16 | import bootstrapped.stats_functions as bs_stats
17 | import numpy as np
18 | import pandas as pd
19 | 
20 | 
21 | def parse_args() -> Namespace:
22 |     """Parse command-line arguments."""
23 |     parser = ArgumentParser(description='Calculate AUC of AFL coverage')
24 |     parser.add_argument('-p', '--percentile', type=float, default=1.0,
25 |                         help='Coverage percentile (as fraction 0 < p <= 1')
26 |     parser.add_argument('plot_data', nargs='+', type=Path,
27 |                         help='Path to AFL plot_data file(s)')
28 |     return parser.parse_args()
29 | 
30 | 
31 | def read_plot_data(in_file: TextIO) -> pd.DataFrame:
32 |     """Read an AFL `plot_data` file."""
33 |     def fix_map_size(x):
34 |         if isinstance(x, str):
35 |             return float(x.split('%')[0])
36 |         return x
37 | 
38 |     # Skip the opening '# ' (if it exists)
39 |     pos = in_file.tell()
40 |     first_chars = in_file.read(2)
41 |     if first_chars != '# ':
42 |         in_file.seek(pos)
43 | 
44 |     # Read the data
45 |     df = pd.read_csv(in_file, index_col=False, skipinitialspace=True)
46 |     df.map_size = df.map_size.apply(fix_map_size)
47 | 
48 |     return df
49 | 
50 | 
51 | def main():
52 |     """The main function."""
53 |     args = parse_args()
54 |     plot_data_paths = args.plot_data
55 |     num_plot_datas = len(plot_data_paths)
56 | 
57 |     aucs = []
58 | 
59 |     for plot_data_path in plot_data_paths:
60 |         if plot_data_path.stat().st_size == 0:
61 |             continue
62 | 
63 |         with plot_data_path.open() as inf:
64 |             df = read_plot_data(inf)
65 |             if df.empty:
66 |                 continue
67 | 
68 |         df['unix_time'] = df.unix_time - df.unix_time.iloc[0]
69 | 
70 |         total_cov = df.map_size.iloc[-1]
71 |         percentile_cov = total_cov * args.percentile
72 |         df_percentile = df[df.map_size <= percentile_cov]
73 |         if len(df_percentile) < 2:
74 |             df_percentile = df[0:2]
75 | 
76 |         auc = metrics.auc(df_percentile.unix_time, df_percentile.map_size)
77 |         aucs.append(auc)
78 | 
79 |     # Compute the mean AUC and confidence intervals
80 |     auc_ci = bs.bootstrap(np.array(aucs), stat_func=bs_stats.mean)
81 |     print(f'mean AUC ({num_plot_datas} plot_data files)')
82 |     print(f'  {auc_ci.value:.02f} +/- {auc_ci.error_width() / 2:.02f}')
83 | 
84 | 
85 | if __name__ == '__main__':
86 |     main()
87 | 


--------------------------------------------------------------------------------
/scripts/bin/eval_maxsat.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """
  4 | Wrapper around EvalMaxSAT.
  5 | 
  6 | Author: Adrian Herrera
  7 | """
  8 | 
  9 | 
 10 | from argparse import ArgumentParser, Namespace
 11 | from shutil import which
 12 | from typing import Dict, Optional, List, TextIO, Tuple
 13 | import logging
 14 | import re
 15 | import subprocess
 16 | import sys
 17 | 
 18 | from seed_selection.argparse import log_level, path_exists, positive_int
 19 | from seed_selection.log import get_logger
 20 | 
 21 | 
 22 | WCNF_SEED_MAP_RE = re.compile(r'^c (\d+) : (.+)$')
 23 | 
 24 | logger = get_logger('run_maxsat')
 25 | 
 26 | 
 27 | def parse_args() -> Namespace:
 28 |     """Parse command-line arguments."""
 29 |     parser = ArgumentParser(description='Run EvalMaxSAT on a corpus WCNF')
 30 |     parser.add_argument('-l', '--log', type=log_level, default=logging.WARN,
 31 |                         help='Logging level')
 32 |     parser.add_argument('-j', '--jobs', type=positive_int, default=0,
 33 |                         help='Number of minimization threads')
 34 |     parser.add_argument('input', metavar='WCNF', type=path_exists,
 35 |                         help='Path to input WCNF')
 36 |     return parser.parse_args()
 37 | 
 38 | 
 39 | def get_seed_mapping(inf: TextIO) -> Dict[int, str]:
 40 |     """
 41 |     Retrieve the mapping of literal identifiers (integers) to seed names
 42 |     (strings) from the WCNF file.
 43 |     """
 44 |     mapping = {}
 45 |     for line in inf:
 46 |         # This starts the constraint listing
 47 |         if line.startswith('p wcnf '):
 48 |             break
 49 | 
 50 |         match = WCNF_SEED_MAP_RE.match(line.strip())
 51 |         if not match:
 52 |             continue
 53 | 
 54 |         mapping[int(match.group(1))] = match.group(2)
 55 | 
 56 |     return mapping
 57 | 
 58 | 
 59 | def parse_maxsat_out(out: List[str], mapping: Dict[int, str]) -> Tuple[Optional[List[str]], Optional[float]]:
 60 |     """
 61 |     Parse the output from EvalMaxSat.
 62 | 
 63 |     Returns a tuple containing:
 64 | 
 65 |     1. The list of seeds that make up the solution, or `None` if a solution
 66 |     could not be found.
 67 |     2. The execution time.
 68 |     """
 69 |     solution = None
 70 |     exec_time = None
 71 | 
 72 |     for line in out:
 73 |         # Solution status
 74 |         if line.startswith('s ') and 'OPTIMUM FOUND' not in line:
 75 |             # No optimum solution found
 76 |             break
 77 | 
 78 |         # Solution values
 79 |         if line.startswith('v '):
 80 |             vals = [int(v) for v in line[2:].split(' ')]
 81 |             solution = [mapping[v] for v in vals if v > 0]
 82 | 
 83 |         # Execution time
 84 |         if line.startswith('c Total time: '):
 85 |             toks = line.split(' ')
 86 |             exec_time = float(toks[3])
 87 |             units = toks[4]
 88 | 
 89 |             # TODO other units to worry about?
 90 |             if units == 'ms':
 91 |                 exec_time = exec_time / 1000
 92 | 
 93 |     return solution, exec_time
 94 | 
 95 | 
 96 | def main():
 97 |     """The main function."""
 98 |     args = parse_args()
 99 |     in_file = args.input
100 | 
101 |     eval_max_sat = which('EvalMaxSAT_bin')
102 |     if not eval_max_sat:
103 |         raise Exception('Cannot find EvalMaxSAT_bin. Check PATH')
104 | 
105 |     # Intitialize logging
106 |     logger.setLevel(args.log)
107 | 
108 |     logger.debug('Retrieving literal/seed mapping from %s', in_file)
109 |     with open(in_file, 'r') as inf:
110 |         seed_map = get_seed_mapping(inf)
111 | 
112 |     logger.debug('Running EvalMaxSAT on %s', in_file)
113 |     proc = subprocess.run([eval_max_sat, in_file, '-p', '%d' % args.jobs],
114 |                           check=True, stdout=subprocess.PIPE, encoding='utf-8')
115 |     logger.debug('EvalMaxSAT completed')
116 |     maxsat_out = [line.strip() for line in proc.stdout.split('\n')]
117 | 
118 |     logger.debug('Parsing EvalMaxSAT output')
119 |     solution, exec_time = parse_maxsat_out(maxsat_out, seed_map)
120 |     if not solution:
121 |         raise Exception('Unable to find optimum solution for %s' % in_file)
122 | 
123 |     print('[+] Solution found for %s' % in_file, file=sys.stderr)
124 |     print('[+] Total time: %.02f sec' % exec_time, file=sys.stderr)
125 |     print('[+] Num. seeds: %d\n' % len(solution), file=sys.stderr)
126 | 
127 |     print('\n'.join(solution))
128 | 
129 | 
130 | if __name__ == '__main__':
131 |     main()
132 | 


--------------------------------------------------------------------------------
/scripts/bin/expand_hdf5_coverage.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Extract AFL coverage from an HDF5 file.
 5 | 
 6 | Author: Adrian Herrera
 7 | """
 8 | 
 9 | 
10 | from argparse import ArgumentParser, Namespace
11 | from pathlib import Path
12 | from typing import Set, TextIO
13 | import logging
14 | 
15 | from h5py import File
16 | 
17 | from seed_selection.argparse import log_level, path_exists, positive_int
18 | from seed_selection.coverage import expand_hdf5
19 | from seed_selection.log import get_logger
20 | 
21 | 
22 | logger = get_logger('extract_coverage')
23 | 
24 | 
25 | def parse_args() -> Namespace:
26 |     """Parse command-line arguments."""
27 |     parser = ArgumentParser(description='Create a directory of AFL coverage '
28 |                                         'data from an HDDF5 file')
29 |     parser.add_argument('-j', '--jobs', type=positive_int, default=1,
30 |                         help='Number of parallel jobs')
31 |     parser.add_argument('-i', '--input', metavar='HDF5', type=path_exists,
32 |                         required=True, help='Input HDF5 file')
33 |     parser.add_argument('-l', '--log', type=log_level, default=logging.WARN,
34 |                         help='Logging level')
35 |     parser.add_argument('-o', '--output', metavar='DIR', required=True,
36 |                         type=Path, help='Output directory')
37 |     parser.add_argument('-s', '--seeds', type=path_exists,
38 |                         help='Optional text file containing a list of seeds to '
39 |                              'extract')
40 |     return parser.parse_args()
41 | 
42 | 
43 | def get_seeds(inf: TextIO) -> Set[str]:
44 |     """Get a list of seeds (one per line)."""
45 |     return {line.strip() for line in inf}
46 | 
47 | 
48 | def main():
49 |     """The main function."""
50 |     args = parse_args()
51 |     in_file = args.input
52 |     out_dir = args.output
53 | 
54 |     # Initialize logging
55 |     logger.setLevel(args.log)
56 | 
57 |     # Determine the specific seeds to extract
58 |     seeds = None
59 |     if args.seeds:
60 |         with open(args.seeds, 'r') as inf:
61 |             seeds = get_seeds(inf)
62 | 
63 |     # Extract the seed coverage from the HDF5 file
64 |     out_dir.mkdir(exist_ok=True)
65 |     extracted_seeds = set()
66 | 
67 |     logger.info('Getting seed coverage (%d jobs)', args.jobs)
68 |     with File(in_file, 'r') as h5f:
69 |         for seed in expand_hdf5(h5f, out_dir, seeds, jobs=args.jobs,
70 |                                 progress=True):
71 |             extracted_seeds.add(seed)
72 |     logger.info('Extracted coverage for %d seeds', len(extracted_seeds))
73 | 
74 | 
75 | if __name__ == '__main__':
76 |     main()
77 | 


--------------------------------------------------------------------------------
/scripts/bin/fuzz.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """
  4 | Run multiple fuzzing campaigns in parallel.
  5 | 
  6 | Author: Adrian Herrera
  7 | """
  8 | 
  9 | 
 10 | from argparse import ArgumentParser, Namespace
 11 | from concurrent.futures import ProcessPoolExecutor as Executor
 12 | from csv import DictWriter
 13 | from datetime import datetime
 14 | from pathlib import Path
 15 | from shutil import which
 16 | from subprocess import PIPE, run
 17 | from time import sleep
 18 | from typing import TextIO
 19 | import gzip
 20 | import logging
 21 | import os
 22 | import re
 23 | 
 24 | from watchdog.observers import Observer
 25 | from watchdog.events import PatternMatchingEventHandler
 26 | 
 27 | from seed_selection.log import FORMATTER as LOG_FORMATTER
 28 | from seed_selection.argparse import mem_limit, path_exists, positive_int
 29 | 
 30 | 
 31 | AFL_SEED_RE = re.compile(r'''^id[:_]''')
 32 | TIMESTAMP_FIELDNAMES = ('seed', 'size', 'unix_time', 'time_offset')
 33 | 
 34 | 
 35 | class FuzzEventHandler(PatternMatchingEventHandler):
 36 |     """Only capture testcase creation events."""
 37 | 
 38 |     def __init__(self, logger: logging.Logger) -> None:
 39 |         patterns = [
 40 |             str(Path('*') / 'crashes' / 'id:*'),
 41 |             str(Path('*') / 'hangs' / 'id:*'),
 42 |             str(Path('*') / 'queue' / 'id:*'),
 43 |         ]
 44 |         super().__init__(patterns=patterns)
 45 |         self._logger = logger
 46 | 
 47 |     def on_created(self, event) -> None:
 48 |         super().on_created(event)
 49 |         self._logger.info(event.src_path)
 50 | 
 51 | 
 52 | def parse_args() -> Namespace:
 53 |     """Parse command-line arguments."""
 54 |     parser = ArgumentParser(description='Run a fuzzer experiment')
 55 |     parser.add_argument('-j', '--jobs', type=positive_int,
 56 |                         default=os.cpu_count() // 3,
 57 |                         help='Number of concurrent fuzz campaigns')
 58 |     parser.add_argument('-i', '--input', metavar='DIR', type=path_exists,
 59 |                         required=True,
 60 |                         help='Path to the input corpus directory')
 61 |     parser.add_argument('-o', '--output', metavar='DIR', type=Path,
 62 |                         required=True,
 63 |                         help='Path to the output results directory')
 64 |     parser.add_argument('-t', '--timeout', default=None, type=positive_int,
 65 |                         help='Timeout for each run')
 66 |     parser.add_argument('-m', '--memory', type=mem_limit, default=None,
 67 |                         help='Memory limit for child process')
 68 |     parser.add_argument('-n', '--nodes', type=positive_int, default=2,
 69 |                         help='Number of fuzzer nodes')
 70 |     parser.add_argument('-w', '--watch', action='store_true',
 71 |                         help='Watch the output directory and generate timestamps')
 72 |     parser.add_argument('--num-trials', type=positive_int, default=30,
 73 |                         help='The number of repeated trials to perform')
 74 |     parser.add_argument('--trial-len', type=positive_int, default=18 * 60 * 60,
 75 |                         help='The length of an individual trial (in seconds)')
 76 |     parser.add_argument('--cmp-log', metavar='BIN', type=Path,
 77 |                         help='Path to cmp-log instrumented binary (if fuzzing '
 78 |                              'with AFL++)')
 79 |     parser.add_argument('target', metavar='TARGET', type=path_exists,
 80 |                         help='Target program')
 81 |     parser.add_argument('target_args', metavar='ARG', nargs='*',
 82 |                         help='Target program arguments')
 83 |     return parser.parse_args()
 84 | 
 85 | 
 86 | def get_logger(log_file: TextIO) -> logging.Logger:
 87 |     """Create a logger for recording file creation events."""
 88 |     handler = logging.StreamHandler(log_file)
 89 |     handler.setFormatter(LOG_FORMATTER)
 90 | 
 91 |     name = '.'.join(log_file.name.split(os.sep)[-3:])
 92 |     logger = logging.getLogger(name)
 93 |     logger.setLevel(logging.INFO)
 94 |     logger.addHandler(handler)
 95 | 
 96 |     return logger
 97 | 
 98 | 
 99 | def create_watchdog(logger: logging.Logger, fuzz_dir: Path) -> Observer:
100 |     """Create a watchdog observer for recording file creation events."""
101 |     handler = FuzzEventHandler(logger)
102 |     observer = Observer()
103 |     observer.schedule(handler, fuzz_dir, recursive=True)
104 | 
105 |     return observer
106 | 
107 | 
108 | def timestamp_results(out_dir: Path, start_time: datetime) -> dict:
109 |     """Timestamp the results of AFL."""
110 |     stats = []
111 | 
112 |     for root, dirs, files in os.walk(out_dir):
113 |         # Ignore hidden directories
114 |         dirs[:] = [d for d in dirs if not d[0] == '.']
115 | 
116 |         for name in files:
117 |             if not AFL_SEED_RE.match(name):
118 |                 continue
119 | 
120 |             seed = Path(root) / name
121 |             ctime = seed.stat().st_ctime
122 | 
123 |             stat_dict = dict(seed=str(seed), unix_time=ctime,
124 |                              time_offset=ctime - start_time.timestamp(),
125 |                              size=seed.stat().st_size)
126 |             stats.append(stat_dict)
127 | 
128 |     return stats
129 | 
130 | 
131 | def run_fuzzer(afl: Path, out_dir: Path, node: int, **kwargs) -> int:
132 |     """Run a fuzzer, and log testcases as they are created."""
133 |     # Create AFL command-line. We use `timeout` because a timeout in
134 |     # subprocess.run causes us to lose our `CompletedProcess` object
135 |     args = ['timeout', '%ds' % kwargs['trial_len'],
136 |             afl, '-i', str(kwargs['input']), '-o', str(out_dir.parent)]
137 | 
138 |     if kwargs['timeout']:
139 |         args.extend(['-t', str(kwargs['timeout'])])
140 |     if kwargs['memory'] and not kwargs['cmp_log']:
141 |         args.extend(['-m', kwargs['memory']])
142 | 
143 |     if node == 1:
144 |         args.extend(['-M', out_dir.name])
145 |     else:
146 |         args.extend(['-S', out_dir.name])
147 | 
148 |     if kwargs['cmp_log']:
149 |         args.extend(['-m', 'none', '-c', str(kwargs['cmp_log'])])
150 |     if 'fuzzer_args' in kwargs:
151 |         args.extend(kwargs['fuzzer_args'])
152 | 
153 |     args.extend(['--', str(kwargs['target']), *kwargs['target_args']])
154 | 
155 |     # Create AFL environment
156 |     env = os.environ.copy()
157 |     env['AFL_NO_UI'] = '1'
158 | 
159 |     # Create watchdog. Testcase creation times are logged to a compressed file
160 |     if kwargs['watch']:
161 |         log_file = gzip.open(out_dir / 'watchdog.log.gz', 'wt')
162 |         logger = get_logger(log_file)
163 |         watchdog = create_watchdog(logger, out_dir)
164 |         watchdog.start()
165 | 
166 |     # Start the fuzzer
167 |     try:
168 |         start_time = datetime.now()
169 |         print('[%s] %s' % (start_time, ' '.join(args)))
170 |         proc = run(args, stdout=PIPE, stderr=PIPE, env=env, check=False)
171 | 
172 |         # Save fuzzer output
173 |         with open(out_dir / 'stdout.log', 'wb') as outf:
174 |             outf.write(proc.stdout)
175 |         with open(out_dir / 'stderr.log', 'wb') as outf:
176 |             outf.write(proc.stderr)
177 | 
178 |         # Timestamp everything produced by the fuzzer
179 |         stats = []
180 |         for name in ('queue', 'crashes', 'hangs'):
181 |             stats.extend(timestamp_results(out_dir / name, start_time))
182 |         stats.sort(key=lambda d: d['unix_time'])
183 |         with open(out_dir / 'timestamps.csv', 'w') as outf:
184 |             writer = DictWriter(outf, fieldnames=TIMESTAMP_FIELDNAMES)
185 |             writer.writeheader()
186 |             writer.writerows(stats)
187 | 
188 |         return proc.returncode
189 |     finally:
190 |         # Cleanup
191 |         watchdog.stop()
192 |         watchdog.join()
193 |         log_file.close()
194 | 
195 |     return None
196 | 
197 | 
198 | def main():
199 |     """The main function."""
200 |     args = parse_args()
201 | 
202 |     afl = which('afl-fuzz')
203 |     if not afl:
204 |         raise Exception('Cannot find `afl-fuzz`. Check PATH')
205 | 
206 |     num_jobs = args.jobs
207 |     num_nodes = args.nodes
208 | 
209 |     if num_jobs % num_nodes != 0:
210 |         raise Exception('The number of jobs (%d) must be divisible by the '
211 |                         'number of nodes (%d)' % (num_jobs, num_nodes))
212 | 
213 |     out_dir = args.output
214 |     out_dir.mkdir(exist_ok=True)
215 | 
216 |     with Executor(max_workers=num_jobs) as executor:
217 |         for trial in range(1, args.num_trials + 1):
218 |             # Create output directory
219 |             trial_dir = args.output / f'trial-{trial:02d}'
220 |             trial_dir.mkdir(exist_ok=True)
221 | 
222 |             # Run the fuzzer node
223 |             for node in range(1, 1 + args.nodes):
224 |                 node_dir = trial_dir / f'fuzzer-{node:02d}'
225 |                 node_dir.mkdir(exist_ok=True)
226 | 
227 |                 # Sleep to avoid races when AFL attempts to bind to a core
228 |                 executor.submit(run_fuzzer, afl, node_dir, node, **vars(args))
229 |                 sleep(1.5)
230 | 
231 | 
232 | if __name__ == '__main__':
233 |     main()
234 | 


--------------------------------------------------------------------------------
/scripts/bin/get_corpus.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """
  4 | Download a specific seed corpus.
  5 | 
  6 | Author: Adrian Herrera
  7 | """
  8 | 
  9 | 
 10 | from argparse import ArgumentParser, Namespace
 11 | from io import BytesIO, StringIO
 12 | from pathlib import Path
 13 | from tarfile import TarFile
 14 | from tempfile import TemporaryDirectory
 15 | from typing import Set
 16 | import logging
 17 | import shutil
 18 | 
 19 | from tqdm import tqdm
 20 | 
 21 | from seed_selection import BENCHMARKS, CORPORA, TARGET_FILE_TYPES
 22 | from seed_selection.argparse import log_level, path_exists
 23 | from seed_selection.log import get_logger
 24 | from seed_selection import datastore
 25 | 
 26 | 
 27 | logger = get_logger('get_corpus')
 28 | 
 29 | 
 30 | def parse_args() -> Namespace:
 31 |     """Parse command-line arguments."""
 32 |     parser = ArgumentParser(description='Create a corpus for a given '
 33 |                                         'benchmark\'s target')
 34 |     parser.add_argument('-b', '--benchmark', choices=BENCHMARKS, required=True,
 35 |                         help='The benchmark')
 36 |     parser.add_argument('-c', '--corpus', choices=CORPORA, default='full',
 37 |                         help='The corpus type to download')
 38 |     parser.add_argument('-l', '--log', type=log_level, default=logging.WARN,
 39 |                         help='Logging level')
 40 |     parser.add_argument('-t', '--target', type=str, required=True,
 41 |                         help='The benchmark target')
 42 |     parser.add_argument('output', metavar='DIR', type=path_exists,
 43 |                         help='Path to output directory')
 44 |     return parser.parse_args()
 45 | 
 46 | 
 47 | def get_seeds(benchmark: str, target: str, corpus: str) -> Set[Path]:
 48 |     """Get the list of seeds for the given corpus."""
 49 |     logger.info('Getting seed list from datastore')
 50 | 
 51 |     # Get the corpus file
 52 |     seed_path = Path('corpora') / benchmark / target / f'{corpus}.txt'
 53 |     seed_data = datastore.get_file(seed_path).decode('utf-8')
 54 |     logger.debug('Downloaded corpus seed file')
 55 | 
 56 |     # Now create the paths to the seeds
 57 |     filetype = TARGET_FILE_TYPES[benchmark][target]
 58 |     seeds = set()
 59 |     with StringIO(seed_data) as inf:
 60 |         for line in inf:
 61 |             seed = line.strip()
 62 |             seeds.add(Path(filetype) / seed)
 63 |     logger.info('Read %d seeds from seed list', len(seeds))
 64 | 
 65 |     return seeds
 66 | 
 67 | 
 68 | def main():
 69 |     """The main function."""
 70 |     args = parse_args()
 71 |     benchmark = args.benchmark
 72 |     corpus = args.corpus
 73 |     target = args.target
 74 |     out_dir = args.output
 75 | 
 76 |     # Validate that the target is in the benchmark
 77 |     if target not in TARGET_FILE_TYPES[benchmark]:
 78 |         raise Exception('Target `%s` is not valid for the `%s` benchmark' %
 79 |                         (target, benchmark))
 80 |     filetype = TARGET_FILE_TYPES[benchmark][target]
 81 | 
 82 |     # Initialize logging
 83 |     logger.setLevel(args.log)
 84 | 
 85 |     # Get the list of seeds for the given benchmark target. Need special
 86 |     # handling for the empty corpus :(
 87 |     if corpus == 'empty':
 88 |         seeds = [Path('empty') / f'empty.{filetype}']
 89 |         archive_name = 'empty.tar.xz'
 90 |     else:
 91 |         seeds = get_seeds(benchmark, target, corpus)
 92 |         archive_name = '%s.tar.xz' % filetype
 93 | 
 94 |     # Download seeds
 95 |     logger.info('Downloading %s', archive_name)
 96 |     data = datastore.get_file(Path('seeds') / archive_name, progbar=True)
 97 |     with BytesIO(data) as bio:
 98 |         logger.info('Extracting seeds from %s', archive_name)
 99 |         with TarFile.open(fileobj=bio, mode='r:xz') as tf, \
100 |                 TemporaryDirectory() as td:
101 |             logger.info('Extract all seeds to temp dir %s', td)
102 |             tf.extractall(td)
103 | 
104 |             for seed in tqdm(seeds, desc=f'Copying seeds to {out_dir}',
105 |                              unit='seeds'):
106 |                 shutil.copy(Path(td) / seed, out_dir)
107 | 
108 |     logger.info('Successfully created %s corpus for %s - %s at %s', corpus,
109 |                 benchmark, target, out_dir)
110 | 
111 | 
112 | if __name__ == '__main__':
113 |     main()
114 | 


--------------------------------------------------------------------------------
/scripts/bin/get_libs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Extract the shared library dependencies for a given binary.
 5 | 
 6 | Author: Adrian Herrera
 7 | """
 8 | 
 9 | 
10 | from argparse import ArgumentParser, Namespace
11 | from pathlib import Path
12 | from shutil import copyfile
13 | from typing import Set
14 | import re
15 | import subprocess
16 | 
17 | from elftools.elf.elffile import ELFFile
18 | from elftools.elf.segments import InterpSegment
19 | 
20 | LDD_RE = re.compile(r'.*.so.* => (/.*\.so[^ ]*)')
21 | LDD_NOT_FOUND_RE = re.compile(r'(.*.so.*) => not found')
22 | 
23 | 
24 | def parse_args() -> Namespace:
25 |     """Parse command-line arguments."""
26 |     parser = ArgumentParser(description='Extract a binary\'s library '
27 |                                         'dependencies')
28 |     parser.add_argument('-o', '--output', metavar='DIR', type=Path,
29 |                         required=True, help='Output directory')
30 |     parser.add_argument('binary', metavar='FILE', nargs='+', type=Path,
31 |                         help='Path to binary')
32 |     return parser.parse_args()
33 | 
34 | 
35 | def get_interpreter(prog: Path) -> Path:
36 |     """Extract the binary's interpreter."""
37 |     with open(prog, 'rb') as inf:
38 |         elf = ELFFile(inf)
39 |         for seg in elf.iter_segments():
40 |             if isinstance(seg, InterpSegment):
41 |                 return Path(seg.get_interp_name())
42 | 
43 |     raise Exception('Could not find binary interpreter in %s' % prog)
44 | 
45 | 
46 | def get_library_deps(prog: Path) -> Set[Path]:
47 |     """Use `ldd` to determine the given program's library dependencies."""
48 |     deps = set()
49 | 
50 |     ldd = subprocess.run(['ldd', str(prog)], check=True, encoding='utf-8',
51 |                          stdout=subprocess.PIPE, stderr=subprocess.PIPE)
52 |     for line in ldd.stdout.split('\n'):
53 |         match = LDD_NOT_FOUND_RE.search(line)
54 |         if match:
55 |             missing_lib = match.group(1).strip()
56 |             raise Exception('Could not find %s - check LD_LIBRARY_PATH' %
57 |                             missing_lib)
58 |         match = LDD_RE.search(line)
59 |         if not match:
60 |             continue
61 |         deps.add(Path(match.group(1)))
62 | 
63 |     return deps
64 | 
65 | 
66 | def main():
67 |     """The main function."""
68 |     args = parse_args()
69 | 
70 |     progs = args.binary
71 |     out_dir = args.output
72 | 
73 |     libs = set()
74 | 
75 |     for prog in progs:
76 |         if not prog.exists():
77 |             print('WARN: %s does not exist. Skipping...' % prog)
78 |             continue
79 | 
80 |         # Get loader
81 |         libs.add(get_interpreter(prog))
82 | 
83 |         # Determine all library dependencies
84 |         libs.update(get_library_deps(prog))
85 | 
86 |     if not out_dir.exists():
87 |         out_dir.mkdir(exist_ok=True)
88 | 
89 |     for lib in libs:
90 |         # Skip if they are the same file
91 |         if lib.parent.samefile(out_dir):
92 |             print('WARN: %s already exists in %s. Skipping...' % (lib.name, out_dir))
93 |             continue
94 |         copyfile(lib, out_dir / lib.name)
95 | 
96 | 
97 | if __name__ == '__main__':
98 |     main()
99 | 


--------------------------------------------------------------------------------
/scripts/bin/llvm_cov_merge.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """
  4 | Generate and merge llvm-cov coverage from an AFL trial.
  5 | 
  6 | Author: Adrian Herrera
  7 | """
  8 | 
  9 | 
 10 | from argparse import ArgumentParser, Namespace
 11 | from functools import partial
 12 | from pathlib import Path
 13 | from random import randint
 14 | from tempfile import TemporaryDirectory, gettempdir
 15 | from typing import List, Optional
 16 | import json
 17 | import logging
 18 | import multiprocessing.pool as mpp
 19 | import os
 20 | import subprocess
 21 | 
 22 | from seed_selection.afl import replace_atat
 23 | from seed_selection.argparse import log_level, path_exists, positive_int
 24 | from seed_selection.log import get_logger
 25 | 
 26 | 
 27 | logger = get_logger('llvm_cov_merge')
 28 | 
 29 | 
 30 | def parse_args() -> Namespace:
 31 |     """Parse command-line arguments."""
 32 |     parser = ArgumentParser(description='Generate and merge llvm-cov coverage')
 33 |     parser.add_argument('-j', '--jobs', type=positive_int, default=1,
 34 |                         help='Number of parallel jobs')
 35 |     parser.add_argument('-i', '--input', metavar='DIR', type=path_exists,
 36 |                         required=True, help='AFL output directory')
 37 |     parser.add_argument('-o', '--output', metavar='JSON', type=Path,
 38 |                         help='Output JSON')
 39 |     parser.add_argument('-l', '--log', type=log_level, default=logging.WARN,
 40 |                         help='Logging level')
 41 |     parser.add_argument('-t', '--timeout', type=positive_int, default=None,
 42 |                         help='Timeout (seconds)')
 43 |     parser.add_argument('--summary-only', action='store_true',
 44 |                         help='Export only summary information for each source file')
 45 |     parser.add_argument('target', metavar='TARGET', type=path_exists,
 46 |                         help='LLVM SanitizerCoverage-intrumented target program')
 47 |     parser.add_argument('target_args', metavar='ARG', nargs='+',
 48 |                         help='Target program arguments')
 49 |     return parser.parse_args()
 50 | 
 51 | 
 52 | def get_seed_profraw(seed: Path, outdir: Path, target: Path,
 53 |                      target_args: List[str], timeout:Optional[int] = None) -> Path:
 54 |     """
 55 |     Generate the raw coverage profile by replaying the seed through a
 56 |     SanitizerCoverage-instrumented target.
 57 |     """
 58 |     if seed.stat().st_size == 0:
 59 |         logger.warning('%s is empty', seed)
 60 | 
 61 |     rand_id = randint(0, 99999)
 62 |     profraw = outdir / f'{rand_id}-{seed.stem}.profraw'
 63 | 
 64 |     env = os.environ.copy()
 65 |     env['LLVM_PROFILE_FILE'] = profraw
 66 | 
 67 |     target_args_w_seed, found_atat = replace_atat(target_args, seed)
 68 |     if not found_atat:
 69 |         raise Exception('No seed placeholder `@@` found in target arguments')
 70 | 
 71 |     stderr = ''
 72 |     try:
 73 |         proc = subprocess.run([str(target), *target_args_w_seed], check=False,
 74 |                               env=env, timeout=timeout,
 75 |                               stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 76 |         stderr = proc.stderr
 77 |         if proc.returncode:
 78 |             logger.debug('%s error: %s', seed, stderr.strip())
 79 |     except subprocess.TimeoutExpired:
 80 |         logger.warning('%s timed out', seed)
 81 |     if not profraw.exists():
 82 |         raise Exception('Failed to create raw coverage profile for `%s`: %s' %
 83 |                         (seed, stderr.strip()))
 84 | 
 85 |     return profraw
 86 | 
 87 | 
 88 | def merge_profraw(seed_list: Path, profdata: Path, jobs : int = 1) -> None:
 89 |     """
 90 |     Run llvm-profdata to merge raw coverage profiles (listed in `seed_list`).
 91 |     """
 92 |     # Find appropriate llvm-profdata
 93 |     llvm_profdata = 'llvm-profdata'
 94 |     if 'LLVM_PROFDATA' in os.environ:
 95 |         llvm_profdata = os.environ['LLVM_PROFDATA']
 96 | 
 97 |     llvm_profdata_args = [llvm_profdata, 'merge', '-sparse',
 98 |                           '-num-threads', '%d' % jobs,
 99 |                           '-f', str(seed_list), '-o', str(profdata)]
100 |     proc = subprocess.run(llvm_profdata_args, check=False, encoding='utf-8',
101 |                           stdout=subprocess.PIPE, stderr=subprocess.PIPE)
102 |     if proc.returncode:
103 |         raise Exception('Failed to merge profile data: %s' %
104 |                         proc.stderr.strip())
105 | 
106 | 
107 | def export_json(target: Path, profdata: Path,
108 |                 summary_only: bool = False) -> dict:
109 |     """Run llvm-cov to export coverage as JSON."""
110 |     # Find appropriate llvm-cov
111 |     llvm_cov = 'llvm-cov'
112 |     if 'LLVM_COV' in os.environ:
113 |         llvm_cov = os.environ['LLVM_COV']
114 | 
115 |     llvm_cov_args = [llvm_cov, 'export']
116 |     if summary_only:
117 |         llvm_cov_args.append('-summary-only')
118 |     llvm_cov_args.extend([str(target), '-instr-profile', str(profdata),
119 |                           '-format', 'text'])
120 |     proc = subprocess.run(llvm_cov_args, check=True, stdout=subprocess.PIPE,
121 |                           stderr=subprocess.PIPE)
122 |     return json.loads(proc.stdout)
123 | 
124 | 
125 | def get_temp_dir() -> Path:
126 |     """Determine temporary directory location. Prefer tmpfs if available."""
127 |     root = Path('/')
128 |     preferred_dirs = (root / 'dev' / 'shm', root / 'run' / 'shm')
129 |     for dir_ in preferred_dirs:
130 |         if dir_.exists():
131 |             return dir_
132 | 
133 |     return Path(gettempdir())
134 | 
135 | 
136 | def main():
137 |     """The main function."""
138 |     args = parse_args()
139 |     in_dir = args.input
140 |     output = args.output
141 |     target = args.target
142 | 
143 |     # Initialize logging
144 |     logger.setLevel(args.log)
145 | 
146 |     seeds = (seed for queue in in_dir.glob('**/queue') \
147 |              for seed in queue.iterdir() if seed.is_file())
148 | 
149 |     with TemporaryDirectory(dir=get_temp_dir()) as temp_dir:
150 |         # Generate raw coverage files
151 |         with mpp.Pool(processes=args.jobs) as pool:
152 |             logger.info('Generating raw coverage profiles from %s...', in_dir)
153 |             get_profraw = partial(get_seed_profraw, outdir=Path(temp_dir),
154 |                                   target=target, target_args=args.target_args,
155 |                                   timeout=args.timeout)
156 |             profraws = pool.map(get_profraw, seeds)
157 |             logger.info('Generated %d coverage profiles', len(profraws))
158 | 
159 |         if not profraws:
160 |             logger.warning('No coverage profiles generated')
161 |             return
162 | 
163 |         # Create list of seeds for merging
164 |         logger.info('Generating seeds.txt...')
165 |         seed_list = Path(temp_dir) / 'seeds.txt'
166 |         with open(seed_list, 'w') as outf:
167 |             for profraw in profraws:
168 |                 outf.write('1,%s\n' % profraw)
169 | 
170 |         # Merge raw coverage
171 |         logger.info('Merging raw coverage profiles...')
172 |         profdata_file = Path(temp_dir) / 'merged.profdata'
173 |         merge_profraw(seed_list, profdata_file, jobs=args.jobs)
174 | 
175 |         # Generate JSON
176 |         logger.info('Generating JSON coverage report...')
177 |         summary_only = not(output is not None and not args.summary_only)
178 |         prof_data = export_json(target, profdata_file, summary_only)
179 | 
180 |     # Save/print JSON
181 |     if output:
182 |         logger.info('Saving JSON report to %s', output)
183 |         with open(output, 'w') as outf:
184 |             json.dump(prof_data, outf)
185 | 
186 |     region_data = prof_data['data'][0]['totals']['regions']
187 |     region_cvg = region_data['covered'] / region_data['count'] * 100.0
188 |     print('region coverage: %.02f%%' % region_cvg)
189 | 
190 | 
191 | if __name__ == '__main__':
192 |     main()
193 | 


--------------------------------------------------------------------------------
/scripts/bin/llvm_cov_stats.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Generate llvm-cov coverage statistics.
 5 | 
 6 | Author: Adrian Herrera
 7 | """
 8 | 
 9 | 
10 | from argparse import ArgumentParser, Namespace
11 | from pathlib import Path
12 | import json
13 | 
14 | from bootstrapped import bootstrap as bs
15 | import bootstrapped.stats_functions as bs_stats
16 | import numpy as np
17 | 
18 | from seed_selection.argparse import path_exists
19 | 
20 | 
21 | def parse_args() -> Namespace:
22 |     """Parse command-line arguments."""
23 |     parser = ArgumentParser(description='Generate llvm-cov statistics')
24 |     parser.add_argument('jsons', metavar='JSON', nargs='+', type=path_exists,
25 |                         help='llvm-cov-generated JSON coverage file(s)')
26 |     return parser.parse_args()
27 | 
28 | 
29 | def get_region_cov(llvm_cov_json: Path) -> float:
30 |     """Get region coverage from the llvm-cov-generated JSON file."""
31 |     with llvm_cov_json.open() as inf:
32 |         root = json.load(inf)
33 |         data = root['data'][0]['totals']['regions']
34 |         return data['covered'] / data['count'] * 100.0
35 | 
36 | 
37 | def main():
38 |     """The main function."""
39 |     args = parse_args()
40 | 
41 |     # Get region coverage
42 |     regions = np.array([get_region_cov(p) for p in args.jsons])
43 | 
44 |     # Calculate mean and confidence intervals
45 |     cov_ci = bs.bootstrap(regions, stat_func=bs_stats.mean)
46 | 
47 |     # Output
48 |     print(f'mean coverage ({len(regions)} trials)')
49 |     print(f'  {cov_ci.value:.02f} +/- {cov_ci.error_width() / 2:.02f}')
50 | 
51 | 
52 | if __name__ == '__main__':
53 |     main()
54 | 


--------------------------------------------------------------------------------
/scripts/bin/qminset.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """
  4 | Wrapper around `qminset`. Only keeps the named of the files in the minimized
  5 | corpus.
  6 | 
  7 | Author: Adrian Herrera
  8 | """
  9 | 
 10 | 
 11 | from argparse import ArgumentParser, Namespace
 12 | from io import BytesIO
 13 | from pathlib import Path
 14 | from shutil import copytree, which
 15 | from subprocess import PIPE, run
 16 | from tarfile import TarInfo
 17 | from tempfile import TemporaryDirectory
 18 | import re
 19 | import tarfile
 20 | 
 21 | from seed_selection.argparse import path_exists
 22 | 
 23 | 
 24 | SEED_RE = re.compile(r'Adding \d+ instructions \((?P<seed>.+?)\)')
 25 | 
 26 | 
 27 | def parse_args() -> Namespace:
 28 |     """Parse command-line arguments."""
 29 |     parser = ArgumentParser(description='Wrapper around minset')
 30 |     parser.add_argument('-i', '--input', metavar='DIR', type=path_exists,
 31 |                         required=True, help='Path to input directory of '
 32 |                                             '`afl-showmap` coverage files')
 33 |     parser.add_argument('-s', '--bitvectors', metavar='DIR', type=Path,
 34 |                         help='Save the `moonbeam-afl` bitvector traces in the '
 35 |                              'given directory')
 36 |     return parser.parse_args()
 37 | 
 38 | 
 39 | def main():
 40 |     """The main function."""
 41 |     args = parse_args()
 42 |     in_dir = args.input
 43 | 
 44 |     moonbeam = which('moonbeam-afl')
 45 |     if not moonbeam:
 46 |         raise Exception('`moonbeam-afl` not found. Check PATH')
 47 |     qminset = which('qminset')
 48 |     if not qminset:
 49 |         raise Exception('`qminset` not found. Check PATH')
 50 | 
 51 |     with TemporaryDirectory() as bv_dir, TemporaryDirectory() as mset_dir:
 52 |         # Generate bitvectors
 53 |         proc = run([moonbeam, '-i', in_dir, '-o', bv_dir], check=True)
 54 |         if proc.returncode != 0:
 55 |             raise Exception('moonbeam failed to generate bitvectors')
 56 |         print('')
 57 | 
 58 |         output_path = Path('output')
 59 |         bitvectors = list(Path(bv_dir).glob('*.bv'))
 60 |         num_bitvectors = len(bitvectors)
 61 | 
 62 |         # Save the bitvectors if requested
 63 |         if args.bitvectors:
 64 |             copytree(bv_dir, args.bitvectors)
 65 | 
 66 |         # Prepare the minset data
 67 |         print('Preparing minset data...')
 68 |         for bitvector in bitvectors:
 69 |             bitvector_size = bitvector.stat().st_size
 70 |             mset_bv_dir = Path(mset_dir) / bitvector.stem
 71 |             mset_bv_dir.mkdir()
 72 | 
 73 |             # Write size
 74 |             with open(mset_bv_dir / 'size', 'w') as outf:
 75 |                 outf.write('%d\n' % bitvector_size)
 76 | 
 77 |             # Write output.tgz
 78 |             with tarfile.open(mset_bv_dir / 'output.tgz', 'w:gz') as tar:
 79 |                 # Write imagefilemap.txt
 80 |                 imagefilemap = BytesIO(b'%s,%s\n' % (in_dir.name.encode(),
 81 |                                                      bitvector.name.encode()))
 82 |                 tarinfo = TarInfo(name=str(output_path / 'imagefilemap.txt'))
 83 |                 tarinfo.size = len(imagefilemap.getvalue())
 84 |                 tar.addfile(tarinfo=tarinfo, fileobj=imagefilemap)
 85 | 
 86 |                 # Write info.txt
 87 |                 info = BytesIO(b'0_0_0_0_0_0_0\n0_0_0_0_0_0_WEIGHT}_0\n')
 88 |                 tarinfo = TarInfo(name=str(output_path / 'info.txt'))
 89 |                 tarinfo.size = len(info.getvalue())
 90 |                 tar.addfile(tarinfo=tarinfo, fileobj=info)
 91 | 
 92 |                 # Write the bitvector
 93 |                 with open(bitvector, 'rb') as inf:
 94 |                     tarinfo = TarInfo(name=str(output_path / bitvector.name))
 95 |                     tarinfo.size = bitvector_size
 96 |                     tar.addfile(tarinfo=tarinfo, fileobj=inf)
 97 | 
 98 |         # Run qminset
 99 |         print('Running minset...')
100 |         proc = run([qminset, 'q', '%d' % num_bitvectors, str(mset_dir)],
101 |                    stdout=PIPE, stderr=PIPE, check=True, encoding='utf8')
102 | 
103 |         # Get the seeds
104 |         seeds = []
105 |         for line in proc.stdout.split('\n'):
106 |             line = line.strip()
107 |             if line == 'DONE':
108 |                 break
109 | 
110 |             match = SEED_RE.match(line)
111 |             if match:
112 |                 seeds.append(match.group('seed'))
113 | 
114 |         print('\nSeeds (%d):' % len(seeds))
115 |         for seed in seeds:
116 |             print(seed)
117 | 
118 | 
119 | if __name__ == '__main__':
120 |     main()
121 | 


--------------------------------------------------------------------------------
/scripts/bin/replay_seeds.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """
  4 | Replay a directory of fuzzer inputs and generate coverage information. Store
  5 | this coverage (along with size and execution time metadata) in an HDF5 file.
  6 | 
  7 | Author: Adrian Herrera
  8 | """
  9 | 
 10 | 
 11 | from argparse import ArgumentParser, Namespace
 12 | from pathlib import Path
 13 | from shutil import copy, which
 14 | from subprocess import run
 15 | from tempfile import NamedTemporaryFile
 16 | from time import time
 17 | import re
 18 | 
 19 | from h5py import File as H5File
 20 | from tqdm import tqdm
 21 | import numpy as np
 22 | 
 23 | from seed_selection.afl import replace_atat
 24 | from seed_selection.argparse import mem_limit, path_exists, positive_int
 25 | 
 26 | 
 27 | MEM_LIMIT_RE = re.compile(r'''(\d+)([TGkM]?)''')
 28 | COV_TYPE = np.dtype([('edge', np.uint32), ('count', np.uint8)])
 29 | 
 30 | 
 31 | def parse_args() -> Namespace:
 32 |     """Parse command-line arguments."""
 33 |     parser = ArgumentParser(description='Generate AFL coverage from a seed '
 34 |                                         'directory')
 35 |     parser.add_argument('-i', '--input', metavar='DIR', type=path_exists,
 36 |                         required=True, help='Path to input directory')
 37 |     parser.add_argument('-o', '--output', metavar='HDF5', type=Path,
 38 |                         required=True, help='Path to output HDF5')
 39 |     parser.add_argument('-s', '--traces', metavar='DIR', type=path_exists,
 40 |                         help='Save the traces to the given directory')
 41 |     parser.add_argument('-t', '--timeout', type=positive_int, default=None,
 42 |                         help='Timeout for each run')
 43 |     parser.add_argument('-m', '--memory', type=mem_limit, default=None,
 44 |                         help='Memory limit for child process')
 45 |     parser.add_argument('-q', '--quiet', action='store_true',
 46 |                         help='Sink program output')
 47 |     parser.add_argument('target', metavar='TARGET', type=path_exists,
 48 |                         help='Target program')
 49 |     parser.add_argument('target_args', metavar='ARG', nargs='+',
 50 |                         help='Target program arguments')
 51 |     return parser.parse_args()
 52 | 
 53 | 
 54 | def run_showmap(afl_showmap: Path, seed: Path, **kwargs: dict) -> (np.array, float):
 55 |     """Run afl-showmap on a given file."""
 56 |     cov = np.empty(0, dtype=COV_TYPE)
 57 |     args = [afl_showmap]
 58 | 
 59 |     timeout = kwargs.get('timeout')
 60 |     memory = kwargs.get('memory')
 61 | 
 62 |     if timeout:
 63 |         args.extend(['-t', str(timeout)])
 64 |     if memory:
 65 |         args.extend(['-m', str(memory)])
 66 |     if kwargs['quiet']:
 67 |         args.append('-q')
 68 | 
 69 |     target_args_w_seed, found_atat = replace_atat(kwargs['target_args'], seed)
 70 |     if not found_atat:
 71 |         raise Exception('No seed placeholder `@@` found in target arguments')
 72 | 
 73 |     with NamedTemporaryFile() as temp:
 74 |         args.extend(['-o', temp.name])
 75 |         args.extend(['--', kwargs['target'], *target_args_w_seed])
 76 | 
 77 |         start_time = time()
 78 |         run(args, check=False)
 79 |         end_time = time()
 80 | 
 81 |         exec_time_ms = (end_time - start_time) * 1000
 82 | 
 83 |         # Successfully generated coverage
 84 |         if Path(temp.name).stat().st_size != 0:
 85 |             with open(temp.name, 'r') as trace_data:
 86 |                 cov = np.genfromtxt(trace_data, delimiter=':', dtype=COV_TYPE)
 87 | 
 88 |             # Save the seed trace if requested
 89 |             trace_dir = kwargs['traces']
 90 |             if trace_dir:
 91 |                 copy(temp.name, trace_dir / seed.name)
 92 | 
 93 |     return cov, exec_time_ms
 94 | 
 95 | 
 96 | def main():
 97 |     """The main function."""
 98 |     args = parse_args()
 99 | 
100 |     in_dir = args.input
101 |     out_path = args.output
102 |     num_seeds = len(list(in_dir.glob('*')))
103 | 
104 |     afl_showmap = which('afl-showmap')
105 |     if not afl_showmap:
106 |         raise Exception('Cannot find `afl-showmap`. Check PATH')
107 | 
108 |     with H5File(out_path, 'w') as h5f:
109 |         for seed in tqdm(in_dir.iterdir(),
110 |                          desc='Generating `afl-showmap` coverage',
111 |                          total=num_seeds, unit='seeds'):
112 |             cov, exec_time = run_showmap(afl_showmap, seed, **vars(args))
113 |             if cov.size == 0:
114 |                 continue
115 | 
116 |             compression = 'gzip' if cov.size > 1 else None
117 |             dset = h5f.create_dataset(str(seed.relative_to(in_dir)),
118 |                                       data=cov, compression=compression)
119 |             dset.attrs['time'] = exec_time
120 |             dset.attrs['size'] = seed.stat().st_size
121 | 
122 | 
123 | if __name__ == '__main__':
124 |     main()
125 | 


--------------------------------------------------------------------------------
/scripts/bin/timestamp_afl.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Get the timestamps and sizes for all fuzzing testcases in the queue, crashes,
 5 | and hangs directories (works for both AFL and Angora).
 6 | 
 7 | Author: Adrian Herrera
 8 | """
 9 | 
10 | 
11 | from argparse import ArgumentParser, Namespace
12 | from csv import DictWriter
13 | from pathlib import Path
14 | import os
15 | import re
16 | from typing import Dict
17 | 
18 | 
19 | AFL_SEED_RE = re.compile(r'''^id[:_]''')
20 | FIELDNAMES = ('seed', 'size', 'unix_time')
21 | 
22 | 
23 | def parse_args() -> Namespace:
24 |     """Parse command-line arguments."""
25 |     parser = ArgumentParser(description='Get timestamps and file sizes for '
26 |                                         'fuzzer results')
27 |     parser.add_argument('-o', '--output', type=Path, required=True,
28 |                         help='Path to output CSV file')
29 |     parser.add_argument('out_dir', type=Path, metavar='OUT_DIR',
30 |                         help='AFL output directory')
31 |     return parser.parse_args()
32 | 
33 | 
34 | def timestamp_results(out_dir: Path) -> Dict[str, int]:
35 |     """Timestamp the results of AFL."""
36 |     stats = []
37 | 
38 |     for root, dirs, files in os.walk(out_dir):
39 |         # Ignore hidden directories
40 |         dirs[:] = [d for d in dirs if not d[0] == '.']
41 | 
42 |         for name in files:
43 |             if not AFL_SEED_RE.match(name):
44 |                 continue
45 | 
46 |             seed = Path(root) / name
47 |             ctime = seed.stat().st_ctime
48 | 
49 |             stat_dict = dict(seed=str(seed), unix_time=ctime,
50 |                              size=seed.stat().st_size)
51 |             stats.append(stat_dict)
52 | 
53 |     return stats
54 | 
55 | 
56 | def main():
57 |     """The main function."""
58 |     args = parse_args()
59 | 
60 |     stats = []
61 |     for name in ('queue', 'crashes', 'hangs'):
62 |         stats.extend(timestamp_results(args.out_dir / name))
63 |     stats.sort(key=lambda d: d['unix_time'])
64 | 
65 |     # Write the results to the output CSV file
66 |     with open(args.output, 'w') as outf:
67 |         writer = DictWriter(outf, fieldnames=FIELDNAMES)
68 |         writer.writeheader()
69 |         writer.writerows(stats)
70 | 
71 | 
72 | if __name__ == '__main__':
73 |     main()
74 | 


--------------------------------------------------------------------------------
/scripts/bin/timestamp_honggfuzz.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | """
 4 | Get the timestamps and sizes for all honggfuzz-generated files.
 5 | 
 6 | Author: Adrian Herrera
 7 | """
 8 | 
 9 | 
10 | from argparse import ArgumentParser, Namespace
11 | from csv import DictWriter
12 | from pathlib import Path
13 | import os
14 | from typing import Dict
15 | 
16 | 
17 | FIELDNAMES = ('seed', 'size', 'unix_time')
18 | 
19 | 
20 | def parse_args() -> Namespace:
21 |     """Parse command-line arguments."""
22 |     parser = ArgumentParser(description='Get timestamps and file sizes for '
23 |                                         'fuzzer results')
24 |     parser.add_argument('-o', '--output', type=Path, required=True,
25 |                         help='Path to output CSV file')
26 |     parser.add_argument('out_dir', type=Path, metavar='OUT_DIR',
27 |                         help='honggfuzz output directory')
28 |     return parser.parse_args()
29 | 
30 | def check_suffix(seed: Path) -> bool:
31 |     """Check if the file suffix is one that we are interested in."""
32 |     suffixes = seed.suffixes
33 | 
34 |     if len(suffixes) > 2 and ''.join(suffixes[-2:]) == '.honggfuzz.cov':
35 |         return True
36 |     if len(suffixes) > 1 and suffixes[-1] == '.fuzz':
37 |         return True
38 | 
39 |     return False
40 | 
41 | 
42 | def timestamp_results(out_dir: Path) -> Dict[str, int]:
43 |     """Timestamp the results of honggfuzz."""
44 |     stats = []
45 | 
46 |     for root, dirs, files in os.walk(out_dir):
47 |         # Ignore hidden directories
48 |         dirs[:] = [d for d in dirs if not d[0] == '.']
49 | 
50 |         for name in files:
51 |             seed = Path(root) / name
52 |             if not check_suffix(seed):
53 |                 continue
54 | 
55 |             ctime = seed.stat().st_ctime
56 | 
57 |             stat_dict = dict(seed=str(seed), unix_time=ctime,
58 |                              size=seed.stat().st_size)
59 |             stats.append(stat_dict)
60 | 
61 |     return stats
62 | 
63 | 
64 | def main():
65 |     """The main function."""
66 |     args = parse_args()
67 | 
68 |     stats = timestamp_results(args.out_dir)
69 |     stats.sort(key=lambda d: d['unix_time'])
70 | 
71 |     # Write the results to the output CSV file
72 |     with open(args.output, 'w') as outf:
73 |         writer = DictWriter(outf, fieldnames=FIELDNAMES)
74 |         writer.writeheader()
75 |         writer.writerows(stats)
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     main()
80 | 


--------------------------------------------------------------------------------
/scripts/bin/visualize_corpora.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | """
  4 | Visualize the overlap between corpora.
  5 | 
  6 | Author: Adrian Herrera
  7 | """
  8 | 
  9 | 
 10 | from argparse import ArgumentParser, Namespace
 11 | from collections import defaultdict, OrderedDict
 12 | from pathlib import Path
 13 | from typing import Set, TextIO
 14 | 
 15 | from matplotlib import rc, rcParams
 16 | from supervenn import supervenn
 17 | import matplotlib.pyplot as plt
 18 | 
 19 | from seed_selection.argparse import path_exists
 20 | 
 21 | 
 22 | def parse_args() -> Namespace:
 23 |     """Parse command-line arguments."""
 24 |     parser = ArgumentParser(description='Visualize corpora overlap')
 25 |     parser.add_argument('-o', '--output', metavar='PDF', type=Path,
 26 |                         required=True)
 27 |     parser.add_argument('--cmin', metavar='CORPUS', type=path_exists,
 28 |                         default=None)
 29 |     parser.add_argument('--full', metavar='CORPUS', type=path_exists,
 30 |                         default=None)
 31 |     parser.add_argument('--minset', metavar='CORPUS', type=path_exists,
 32 |                         default=None)
 33 |     parser.add_argument('--unweighted-optimal', metavar='CORPUS',
 34 |                         type=path_exists, default=None)
 35 |     parser.add_argument('--weighted-optimal', metavar='CORPUS',
 36 |                         type=path_exists, default=None)
 37 |     parser.add_argument('--weighted-max-freq-optimal', metavar='CORPUS',
 38 |                         type=path_exists, default=None)
 39 |     return parser.parse_args()
 40 | 
 41 | 
 42 | def read_corpus(inf: TextIO) -> Set[str]:
 43 |     """Read the given corpus listing as a set of seed names."""
 44 |     return {line.strip() for line in inf}
 45 | 
 46 | 
 47 | def main():
 48 |     """The main function."""
 49 |     args = parse_args()
 50 | 
 51 |     # Read corpora
 52 |     corpora = dict()
 53 |     if args.cmin:
 54 |         with open(args.cmin, 'r') as inf:
 55 |             corpora['CMIN'] = read_corpus(inf)
 56 |     if args.full:
 57 |         with open(args.full, 'r') as inf:
 58 |             corpora['FULL'] = read_corpus(inf)
 59 |     if args.minset:
 60 |         with open(args.minset, 'r') as inf:
 61 |             corpora['MSET'] = read_corpus(inf)
 62 |     if args.unweighted_optimal:
 63 |         with open(args.unweighted_optimal, 'r') as inf:
 64 |             corpora['UOPT'] = read_corpus(inf)
 65 |     if args.weighted_optimal:
 66 |         with open(args.weighted_optimal, 'r') as inf:
 67 |             corpora['WOPT'] = read_corpus(inf)
 68 |     if args.weighted_max_freq_optimal:
 69 |         with open(args.weighted_max_freq_optimal, 'r') as inf:
 70 |             corpora['WMOPT'] = read_corpus(inf)
 71 | 
 72 |     # Represent the corpora as a set of integers, where each integer uniquely
 73 |     # maps to a seed file
 74 |     seed_map = {seed: i for i, seed in enumerate(corpora['FULL'])}
 75 | 
 76 |     plot_data = defaultdict(set)
 77 |     plot_data['FULL'] = set(seed_map.values())
 78 | 
 79 |     for corpus in set(corpora.keys()) - set(['FULL']):
 80 |         for seed in corpora[corpus]:
 81 |             if seed not in seed_map:
 82 |                 print('WARN: seed `%s` is not in the FULL corpus' % seed)
 83 |                 continue
 84 |             plot_data[corpus].add(seed_map[seed])
 85 | 
 86 |     # Configure plot
 87 |     plt.style.use('seaborn-dark')
 88 | 
 89 |     x_size, y_size = rcParams['figure.figsize']
 90 | 
 91 |     rc('pdf', fonttype=42)
 92 |     rc('ps', fonttype=42)
 93 | 
 94 |     # Visualize the corpora
 95 |     fig = plt.figure(figsize=(x_size, y_size * 0.666))
 96 |     ax = fig.add_subplot(1, 1, 1)
 97 | 
 98 |     supervenn_data = OrderedDict()
 99 |     for corpus in ('FULL', 'MSET', 'CMIN', 'UOPT', 'WOPT', 'WMOPT'):
100 |         if corpus not in plot_data:
101 |             continue
102 |         supervenn_data[corpus] = plot_data[corpus]
103 | 
104 |     supervenn(list(supervenn_data.values()), list(supervenn_data.keys()),
105 |               ax=ax, side_plots=False, widths_minmax_ratio=0.5)
106 | 
107 |     ax.set_xlabel('Seeds (#)')
108 |     ax.set_ylabel('Corpora')
109 | 
110 |     fig.savefig(args.output, bbox_inches='tight')
111 | 
112 | 
113 | if __name__ == '__main__':
114 |     main()
115 | 


--------------------------------------------------------------------------------
/scripts/seed_selection/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Useful constants.
 3 | 
 4 | Author: Adrian Herrera
 5 | """
 6 | 
 7 | 
 8 | TARGET_FILE_TYPES = {
 9 |     'fts': {
10 |         'freetype2': 'ttf',
11 |         'guetzli': 'jpeg',
12 |         'json': 'json',
13 |         'libarchive': 'gzip',
14 |         'libjpeg-turbo': 'jpeg',
15 |         'libpng': 'png',
16 |         'libxml2': 'xml',
17 |         'pcre2': 'regex',
18 |         're2': 'regex',
19 |         'vorbis': 'ogg',
20 |     },
21 |     'magma': {
22 |         'libpng': 'png',
23 |         'libtiff': 'tiff',
24 |         'libxml2': 'xml',
25 |         'php-exif': 'jpeg',
26 |         'php-json': 'json',
27 |         'php-parser': 'php',
28 |         'poppler': 'pdf',
29 |     },
30 |     'real-world': {
31 |         'freetype2': 'ttf',
32 |         'librsvg': 'svg',
33 |         'libtiff': 'tiff',
34 |         'libxml2': 'xml',
35 |         'poppler': 'pdf',
36 |         'sox-mp3': 'mp3',
37 |         'sox-wav': 'wav',
38 |     },
39 | }
40 | 
41 | BENCHMARKS = list(TARGET_FILE_TYPES.keys())
42 | 
43 | MINIMIZE_TECHNIQUES = (
44 |     'cmin',
45 |     'minset',
46 |     'unweighted-optimal',
47 |     'weighted-optimal',
48 |     'weighted-max-freq-optimal')
49 | CORPORA = ('empty', 'full', *MINIMIZE_TECHNIQUES)
50 | 


--------------------------------------------------------------------------------
/scripts/seed_selection/afl.py:
--------------------------------------------------------------------------------
  1 | """
  2 | AFL helper functions.
  3 | 
  4 | Author: Adrian Herrera
  5 | """
  6 | 
  7 | 
  8 | from getopt import getopt, GetoptError
  9 | from pathlib import Path
 10 | from typing import List, TextIO, Tuple
 11 | import re
 12 | 
 13 | import pandas as pd
 14 | 
 15 | 
 16 | START_TIME_RE = re.compile(r'^start_time\s*: (?P<start_time>\d+)')
 17 | LAST_UPDATE_RE = re.compile(r'^last_update\s*: (?P<last_update>\d+)')
 18 | FUZZER_PID_RE = re.compile(r'^fuzzer_pid\s*: (?P<fuzzer_pid>\d+)')
 19 | CYCLES_DONE_RE = re.compile(r'^cycles_done\s*: (?P<cycles_done>\d+)')
 20 | EXECS_DONE_RE = re.compile(r'^execs_done\s*: (?P<execs_done>\d+)')
 21 | EXECS_PER_SEC_RE = re.compile(r'^execs_per_sec\s*: (?P<execs_per_sec>[\d.]+)')
 22 | PATHS_TOTAL_RE = re.compile(r'^paths_total\s*: (?P<paths_total>\d+)')
 23 | PATHS_FAVORED_RE = re.compile(r'^paths_favored\s*: (?P<paths_favored>\d+)')
 24 | PATHS_FOUND_RE = re.compile(r'^paths_found\s*: (?P<paths_found>\d+)')
 25 | PATHS_IMPORTED_RE = re.compile(r'^paths_imported\s*: (?P<paths_imported>\d+)')
 26 | MAX_DEPTH_RE = re.compile(r'^max_depth\s*: (?P<max_depth>\d+)')
 27 | CUR_PATH_RE = re.compile(r'^cur_path\s*: (?P<cur_path>\d+)')
 28 | PENDING_FAVS_RE = re.compile(r'^pending_favs\s*: (?P<pending_favs>\d+)')
 29 | PENDING_TOTAL_RE = re.compile(r'^pending_total\s*: (?P<pending_total>\d+)')
 30 | VARIABLE_PATHS_RE = re.compile(r'^variable_paths\s*: (?P<variable_paths>\d+)')
 31 | STABILITY_RE = re.compile(r'^stability\s*: (?P<stability>[\d.]+)%')
 32 | BITMAP_CVG_RE = re.compile(r'^bitmap_cvg\s*: (?P<bitmap_cvg>[\d.]+)%')
 33 | UNIQUE_CRASHES_RE = re.compile(r'^unique_crashes\s*: (?P<unique_crashes>\d+)')
 34 | UNIQUE_HANGS_RE = re.compile(r'^unique_hangs\s*: (?P<unique_hangs>\d+)')
 35 | LAST_PATH_RE = re.compile(r'^last_path\s*: (?P<last_path>\d+)')
 36 | LAST_CRASH_RE = re.compile(r'^last_crash\s*: (?P<last_crash>\d+)')
 37 | LAST_HANG_RE = re.compile(r'^last_hang\s*: (?P<last_hang>\d+)')
 38 | EXECS_SINCE_CRASH_RE = re.compile(r'^execs_since_crash\s*: (?P<execs_since_crash>\d+)')
 39 | EXECS_TIMEOUT_RE = re.compile(r'^execs_timeout\s*: (?P<execs_timeout>\d+)')
 40 | AFL_BANNER_RE = re.compile(r'^afl_banner\s*: (?P<afl_banner>,+)')
 41 | AFL_VERSION_RE = re.compile(r'^afl_version\s*: (?P<afl_version>.+)')
 42 | TARGET_MODE_RE = re.compile(r'^target_mode\s*: (?P<target_mode>.+)')
 43 | COMMAND_LINE_RE = re.compile(r'^command_line\s*: (?P<afl_fuzz>.*?afl-.+?)\s+(?P<command_line>.+)')
 44 | SLOWEST_EXEC_MS_RE = re.compile(r'^slowest_exec_ms\s*: (?P<slowest_exec_ms>\d+)')
 45 | PEAK_RSS_MB_RE = re.compile(r'^peak_rss_mb\s*: (?P<peak_rss_mb>\d+)')
 46 | 
 47 | FUZZER_STATS_RES = (
 48 |     START_TIME_RE,
 49 |     LAST_UPDATE_RE,
 50 |     FUZZER_PID_RE,
 51 |     CYCLES_DONE_RE,
 52 |     EXECS_DONE_RE,
 53 |     EXECS_PER_SEC_RE,
 54 |     PATHS_TOTAL_RE,
 55 |     PATHS_FAVORED_RE,
 56 |     PATHS_FOUND_RE,
 57 |     PATHS_IMPORTED_RE,
 58 |     MAX_DEPTH_RE,
 59 |     CUR_PATH_RE,
 60 |     PENDING_FAVS_RE,
 61 |     PENDING_TOTAL_RE,
 62 |     VARIABLE_PATHS_RE,
 63 |     STABILITY_RE,
 64 |     BITMAP_CVG_RE,
 65 |     UNIQUE_CRASHES_RE,
 66 |     UNIQUE_HANGS_RE,
 67 |     LAST_PATH_RE,
 68 |     LAST_CRASH_RE,
 69 |     LAST_HANG_RE,
 70 |     EXECS_SINCE_CRASH_RE,
 71 |     EXECS_TIMEOUT_RE,
 72 |     AFL_BANNER_RE,
 73 |     AFL_VERSION_RE,
 74 |     TARGET_MODE_RE,
 75 |     COMMAND_LINE_RE,
 76 |     SLOWEST_EXEC_MS_RE,
 77 |     PEAK_RSS_MB_RE,
 78 | )
 79 | 
 80 | AFL_GETOPT = '+i:o:f:m:t:T:dnCB:S:M:x:Q'
 81 | AFLPP_GETOPT = '+c:i:I:o:f:m:t:T:dnCB:S:M:x:QNUWe:p:s:V:E:L:hRP:'
 82 | 
 83 | AFL_GETOPTS = (AFL_GETOPT, AFLPP_GETOPT)
 84 | 
 85 | 
 86 | def replace_atat(args: List[str], seed: Path) -> Tuple[List[str], bool]:
 87 |     """Replace the seed placeholder `@@`."""
 88 |     new_args = []
 89 |     found_atat = False
 90 | 
 91 |     for arg in args:
 92 |         if arg == '@@':
 93 |             new_args.append(str(seed))
 94 |             found_atat = True
 95 |         else:
 96 |             new_args.append(arg)
 97 | 
 98 |     return new_args, found_atat
 99 | 
100 | 
101 | def read_plot_data(in_file: TextIO) -> pd.DataFrame:
102 |     """Read an AFL plot_data file."""
103 |     def fix_map_size(x):
104 |         if isinstance(x, str):
105 |             return float(x.split('%')[0])
106 |         return x
107 | 
108 |     # Skip the opening '# ' (if it exists)
109 |     pos = in_file.tell()
110 |     first_chars = in_file.read(2)
111 |     if first_chars != '# ':
112 |         in_file.seek(pos)
113 | 
114 |     # Decide on a delimiter and then read the first line of column headers
115 |     header = in_file.readline().strip()
116 |     names = []
117 |     for delim in (', ', ','):
118 |         if delim in header:
119 |             names = header.split(delim)
120 |             break
121 | 
122 |     if not names:
123 |         raise Exception('Invalid plot_data header')
124 | 
125 |     df = pd.read_csv(in_file, names=names, header=0, index_col=False)
126 |     df.map_size = df.map_size.apply(fix_map_size)
127 | 
128 |     return df
129 | 
130 | 
131 | class FuzzerStats:
132 |     """Container for AFL fuzzer_stats file."""
133 | 
134 |     def __init__(self, stats_file):
135 |         """
136 |         Create a fuzzer stats object from a file object (i.e., one created by
137 |         `open`ing a fuzzer_stats file).
138 |         """
139 |         stats = dict()
140 |         self._stats = dict()
141 | 
142 |         for line in stats_file:
143 |             stat = next((regex.match(line).groupdict()
144 |                          for regex in FUZZER_STATS_RES if regex.match(line)),
145 |                         dict())
146 |             stats.update(stat)
147 | 
148 |         if not stats:
149 |             raise Exception('Empty fuzzer_stats file `%s`' % stats_file.name)
150 | 
151 |         # Automatically create class attributes based on the fuzzer_stats fields
152 |         for k, v in stats.items():
153 |             if k == 'command_line':
154 |                 afl_opts = None
155 |                 target_args = None
156 |                 getopt_error = None
157 | 
158 |                 for afl_getopt in AFL_GETOPTS:
159 |                     try:
160 |                         afl_opts, target_args = getopt(v.split(), afl_getopt)
161 |                         break
162 |                     except GetoptError as e:
163 |                         getopt_error = e
164 | 
165 |                 if not afl_opts or not target_args:
166 |                     raise getopt_error
167 | 
168 |                 setattr(self, 'afl_cmdline', afl_opts)
169 |                 setattr(self, 'target_cmdline', target_args)
170 |             else:
171 |                 # If convertable to a number, treat as a number
172 |                 try:
173 |                     v = float(v)
174 |                 except ValueError:
175 |                     pass
176 | 
177 |                 setattr(self, k, v)
178 |                 self._stats[k] = v
179 | 
180 |     def gen_command_line(self, testcase: Path) -> Tuple[List[str], str]:
181 |         """
182 |         Generate the AFL target command-line for the given testcase.
183 | 
184 |         Replaces '@@' with the given testcase. This can be either a
185 |         command-line argument or stdin (depending on whether '@@' was found on
186 |         the AFL command-line). A tuple of both command-line and stdin input is
187 |         returned.
188 |         """
189 |         new_args, found_atat = replace_atat(self.target_cmdline, testcase)
190 | 
191 |         if found_atat:
192 |             stdin = None
193 |         else:
194 |             with open(testcase, 'rb') as inf:
195 |                 stdin = inf.read()
196 | 
197 |         return new_args, stdin
198 | 
199 |     def __iter__(self):
200 |         for k, v in self._stats.items():
201 |             yield k, v
202 | 
203 |     def __str__(self):
204 |         return '%s' % self._stats
205 | 


--------------------------------------------------------------------------------
/scripts/seed_selection/argparse.py:
--------------------------------------------------------------------------------
 1 | """
 2 | argparse type-checking functions.
 3 | 
 4 | Author: Adrian Herrera
 5 | """
 6 | 
 7 | 
 8 | from argparse import ArgumentTypeError
 9 | from pathlib import Path
10 | import logging
11 | import re
12 | 
13 | 
14 | MEM_LIMIT_RE = re.compile(r'''(\d+)([TGkM]?)''')
15 | 
16 | 
17 | def log_level(val: str) -> int:
18 |     """Ensure that an argument value is a valid log level."""
19 |     numeric_level = getattr(logging, val.upper(), None)
20 |     if not isinstance(numeric_level, int):
21 |         raise ArgumentTypeError('%r is not a valid log level' % val)
22 |     return numeric_level
23 | 
24 | 
25 | def mem_limit(val: str) -> int:
26 |     """Parse the memory limit (based on AFL's format)."""
27 |     mem_limit = 0
28 |     if val:
29 |         match = MEM_LIMIT_RE.match(val)
30 |         if not match:
31 |             raise ArgumentTypeError('%r is not a valid memory limit' % val)
32 |         mem_limit = match.group(1)
33 |         suffix = match.group(2)
34 |         if suffix == 'T':
35 |             mem_limit *= 1024 * 1024
36 |         elif suffix == 'G':
37 |             mem_limit *= 1024
38 |         elif suffix == 'k':
39 |             mem_limit /= 1024
40 |     return mem_limit
41 | 
42 | 
43 | def path_exists(val: str) -> Path:
44 |     """Ensure that the path argument exists."""
45 |     try:
46 |         p = Path(val)
47 |     except Exception as e:
48 |         raise ArgumentTypeError('%r is not a valid path' % val) from e
49 |     if not p.exists():
50 |         raise ArgumentTypeError('%s does not exist' % p)
51 |     return p.resolve()
52 | 
53 | 
54 | def positive_int(val: str) -> int:
55 |     """Ensure that an argument value is a positive integer."""
56 |     try:
57 |         ival = int(val)
58 |         if ival <= 0:
59 |             raise Exception
60 |     except Exception as e:
61 |         raise ArgumentTypeError('%r is not a positive integer' % val) from e
62 |     return ival
63 | 


--------------------------------------------------------------------------------
/scripts/seed_selection/coverage.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Extract coverage from HDF5 files.
 3 | 
 4 | Author: Adrian Herrera
 5 | """
 6 | 
 7 | 
 8 | from functools import partial
 9 | from itertools import repeat
10 | from pathlib import Path
11 | from typing import Optional, Set
12 | import multiprocessing.pool as mpp
13 | 
14 | from h5py import File
15 | from tqdm import tqdm
16 | 
17 | # pylint: disable=unused-import
18 | from . import istarmap
19 | 
20 | 
21 | def _get_seed_cov(h5_path: Path, seed: str, out_dir: Path,
22 |                   seeds: Optional[Set[str]] = None) -> str:
23 |     """Extract the given seed from the HDF5 file specified at `h5_path`."""
24 |     if seeds and seed not in seeds:
25 |         return None
26 | 
27 |     with File(h5_path, 'r') as h5f, open(out_dir / seed, 'w') as outf:
28 |         for edge, count in h5f[seed]:
29 |             outf.write('%d:%d\n' % (edge, count))
30 | 
31 |     return seed
32 | 
33 | 
34 | def expand_hdf5(h5f: File, out_dir: Path, seeds: Optional[Set[str]] = None,
35 |                 jobs: int = 1, progress: bool = False):
36 |     """
37 |     Expand an HDF5 containing code coverage.
38 | 
39 |     Args:
40 |         h5f: h5py file object.
41 |         out_dir: Directory to extract seed coverage to.
42 |         seeds: An optional seed set. If provided, only these seeds will be
43 |                extracted.
44 |         jobs: Number of parallel jobs to run.
45 |         progress: Set to `True` for progress bar.
46 | 
47 |     Returns:
48 |         Yields each extracted seed.
49 |     """
50 |     h5_filename = h5f.filename
51 | 
52 |     with mpp.Pool(processes=jobs) as pool:
53 |         get_cov = partial(_get_seed_cov, out_dir=out_dir, seeds=seeds)
54 |         h5_iter = zip(repeat(h5_filename), h5f.keys())
55 |         num_seeds = len(seeds) if seeds else len(list(h5f.keys()))
56 |         print('%d seeds to extract' % num_seeds)
57 |         iter_func = partial(tqdm, desc='Expanding %s' % h5_filename,
58 |                             total=num_seeds, unit='seeds') if progress else id
59 |         for seed in iter_func(pool.istarmap(get_cov, h5_iter)):
60 |             if seed:
61 |                 yield seed
62 | 


--------------------------------------------------------------------------------
/scripts/seed_selection/datastore.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Data store helper functions.
 3 | 
 4 | Author: Adrian Herrera
 5 | """
 6 | 
 7 | 
 8 | from pathlib import Path
 9 | 
10 | from tqdm import tqdm
11 | import requests
12 | 
13 | 
14 | CHUNK_SIZE = 1024
15 | _URL = 'https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/'
16 | 
17 | 
18 | def get_file(path: Path, progbar: bool = False) -> bytes:
19 |     """Download a file from the data store."""
20 |     content = bytearray()
21 | 
22 |     with requests.get(f'{_URL}/{path}', stream=True) as r:
23 |         if r.status_code != 200:
24 |             raise Exception('Failed to download %s from datastore' % path)
25 | 
26 |         total_kb = int(r.headers.get('content-length', 0)) // CHUNK_SIZE
27 |         for data in tqdm(r.iter_content(CHUNK_SIZE), total=total_kb, unit='kB',
28 |                          disable=not progbar):
29 |             content.extend(data)
30 | 
31 |     return bytes(content)
32 | 


--------------------------------------------------------------------------------
/scripts/seed_selection/istarmap.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Monkey-patch `multiprocessing.Pool` to support `istarmap`.
 3 | 
 4 | Adapted from https://stackoverflow.com/a/57364423
 5 | """
 6 | 
 7 | 
 8 | import multiprocessing.pool as mpp
 9 | import sys
10 | 
11 | 
12 | def istarmap(self, func, iterable, chunksize=1):
13 |     """starmap-version of imap."""
14 |     if self._state != mpp.RUN:
15 |         raise ValueError("Pool not running")
16 | 
17 |     if chunksize < 1:
18 |         raise ValueError("Chunksize must be 1+, not {0:n}".format(chunksize))
19 | 
20 |     task_batches = mpp.Pool._get_tasks(func, iterable, chunksize)
21 |     if sys.version_info < (3, 8):
22 |         result = mpp.IMapIterator(self._cache)
23 |     else:
24 |         result = mpp.IMapIterator(self)
25 |     self._taskqueue.put((self._guarded_task_generation(result._job,
26 |                                                       mpp.starmapstar,
27 |                                                       task_batches),
28 |                         result._set_length))
29 |     return (item for chunk in result for item in chunk)
30 | 
31 | 
32 | mpp.Pool.istarmap = istarmap
33 | 


--------------------------------------------------------------------------------
/scripts/seed_selection/log.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Logging utilities.
 3 | 
 4 | Author: Adrian Herrera
 5 | """
 6 | 
 7 | 
 8 | import logging
 9 | 
10 | 
11 | FORMATTER = logging.Formatter('[%(asctime)s] %(levelname)s: %(message)s')
12 | 
13 | 
14 | def get_logger(name: str):
15 |     """Get a formatted logger."""
16 |     handler = logging.StreamHandler()
17 |     handler.setFormatter(FORMATTER)
18 | 
19 |     logger = logging.getLogger(name)
20 |     logger.addHandler(handler)
21 | 
22 |     return logger
23 | 


--------------------------------------------------------------------------------
/scripts/seed_selection/seeds.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Seed size utilities.
 3 | 
 4 | Author: Adrian Herrera
 5 | """
 6 | 
 7 | 
 8 | from io import TextIOWrapper
 9 | from pathlib import Path
10 | from typing import Dict, Optional, Set, TextIO
11 | import csv
12 | 
13 | from . import datastore
14 | 
15 | 
16 | def _download_seed_size_csv():
17 |     content = datastore.get_file(Path('seeds') / 'filesizes.csv')
18 |     return TextIOWrapper(content, encoding='utf-8')
19 | 
20 | 
21 | def get_seed_sizes(seeds: Set[str],
22 |                    csv_file: Optional[TextIO] = None) -> Dict[str, int]:
23 |     """
24 |     Get the file sizes for the given seed set.
25 | 
26 |     If the seed size CSV is provided, use it. Otherwise, download it from the
27 |     datastore.
28 |     """
29 |     # Download the seed sizes CSV if it was not provided
30 |     if not csv_file:
31 |         csv_file = _download_seed_size_csv()
32 | 
33 |     num_seeds = len(seeds)
34 |     num_sizes = 0
35 |     sizes = dict()
36 | 
37 |     reader = csv.DictReader(csv_file, fieldnames=('filetype', 'file', 'size'))
38 |     for row in reader:
39 |         if num_sizes == num_seeds:
40 |             break
41 |         if row['file'] in seeds:
42 |             sizes[row['file']] = int(row['size'])
43 |             num_sizes += 1
44 |     return sizes
45 | 


--------------------------------------------------------------------------------
/scripts/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | 
 4 | setup(
 5 |     name='fuzzing-seed-selection',
 6 |     description='Scripts supporting "Seed Selection for Successful Fuzzing"',
 7 |     author='Adrian Herrera',
 8 |     url='https://github.com/HexHive/fuzzing-seed-selection',
 9 |     platforms=['linux'],
10 |     packages=find_packages(),
11 |     include_package_data=True,
12 |     scripts=[
13 |         'bin/afl_cmin.py',
14 |         'bin/afl_coverage_merge.py',
15 |         'bin/afl_coverage_pca.py',
16 |         'bin/coverage_auc.py',
17 |         'bin/expand_hdf5_coverage.py',
18 |         'bin/fuzz.py',
19 |         'bin/get_corpus.py',
20 |         'bin/get_libs.py',
21 |         'bin/llvm_cov_merge.py',
22 |         'bin/llvm_cov_stats.py',
23 |         'bin/qminset.py',
24 |         'bin/replay_seeds.py',
25 |         'bin/eval_maxsat.py',
26 |         'bin/timestamp_afl.py',
27 |         'bin/timestamp_honggfuzz.py',
28 |         'bin/triage_crashes.py',
29 |         'bin/visualize_corpora.py',
30 |     ],
31 |     python_requires='>3.0',
32 |     install_requires=[
33 |         'h5py',
34 |         'Jinja2',
35 |         'lifelines',
36 |         'matplotlib',
37 |         'numpy',
38 |         'pandas',
39 |         'pyelftools',
40 |         'bootstrapped',
41 |         'scikit-learn',
42 |         'supervenn',
43 |         'tabulate',
44 |         'toml',
45 |         'tqdm',
46 |         'watchdog',
47 |         'requests',
48 |         'moonbeam @ git+https://gitlab.anu.edu.au/lunar/moonbeam.git#egg=moonbeam',
49 |     ],
50 | )
51 | 


--------------------------------------------------------------------------------