├── .gitignore ├── README.md ├── fuzzing ├── README.md ├── config │ ├── fts-bug-regexs.toml │ └── targets.toml ├── extract-from-container.sh ├── fts │ ├── afl.Dockerfile │ ├── aflpp.Dockerfile │ ├── aflpp_driver_GNUmakefile │ ├── base.Dockerfile │ ├── build.sh │ ├── common.sh │ ├── coverage.Dockerfile │ ├── libarchive-2017-01-04-build.sh │ ├── libjpeg-turbo-07-2017-build.sh │ └── libxml2-v2.9.2-build.sh ├── magma │ ├── clean_corpora.sh │ ├── log-execs.patch │ ├── setup.sh │ ├── survival_analysis.py │ └── v1.1.patch ├── readelf │ ├── Dockerfile │ ├── afl_llvm_mode.Makefile │ ├── data │ │ ├── aflfast-ascii-cov.csv.gz │ │ ├── aflfast-cmin-cov.csv.gz │ │ ├── aflfast-singleton-cov.csv.gz │ │ ├── aflplusplus-ascii-cov.csv.gz │ │ ├── aflplusplus-cmin-cov.csv.gz │ │ ├── aflplusplus-singleton-cov.csv.gz │ │ ├── honggfuzz-ascii-cov.csv.gz │ │ ├── honggfuzz-cmin-cov.csv.gz │ │ ├── honggfuzz-singleton-cov.csv.gz │ │ └── readelf-experiment.csv.gz │ ├── scripts │ │ ├── fuzz.sh │ │ ├── get_afl_cov.sh │ │ ├── get_hfuzz_cov.sh │ │ ├── merge_cov.py │ │ ├── plot_cov.py │ │ └── requirements.txt │ └── seeds │ │ ├── cmin-seeds.tar.xz │ │ └── uninformed-seed └── real-world │ ├── freetype2 │ ├── afl.Dockerfile │ ├── base.Dockerfile │ └── coverage.Dockerfile │ ├── librsvg │ ├── afl.Dockerfile │ ├── base.Dockerfile │ └── coverage.Dockerfile │ ├── libtiff │ ├── afl.Dockerfile │ ├── base.Dockerfile │ └── coverage.Dockerfile │ ├── libxml2 │ ├── afl.Dockerfile │ ├── base.Dockerfile │ └── coverage.Dockerfile │ ├── poppler │ ├── afl-toolchain-llvm.cmake │ ├── afl.Dockerfile │ ├── base.Dockerfile │ ├── coverage.Dockerfile │ └── toolchain.cmake │ └── sox │ ├── afl.Dockerfile │ ├── base.Dockerfile │ └── coverage.Dockerfile ├── optimin ├── .gitignore ├── CMakeLists.txt ├── Dockerfile ├── LICENSE.jsoncpp ├── optimin.py └── src │ ├── AFLShowmapMaxSat.cpp │ ├── AFLShowmapZ3.cpp │ ├── CMakeLists.txt │ ├── Common.cpp │ ├── Common.h │ ├── LLVMCovZ3.cpp │ ├── ProgressBar.h │ ├── Z3Common.cpp │ ├── Z3Common.h │ └── jsoncpp │ ├── json │ ├── json-forwards.h │ ├── json.h │ └── jsoncpp.cpp │ └── jsoncpp.cpp └── scripts ├── README.md ├── bin ├── afl_cmin.py ├── afl_coverage_merge.py ├── afl_coverage_pca.py ├── coverage_auc.py ├── eval_maxsat.py ├── expand_hdf5_coverage.py ├── fuzz.py ├── get_corpus.py ├── get_libs.py ├── llvm_cov_merge.py ├── llvm_cov_stats.py ├── qminset.py ├── replay_seeds.py ├── timestamp_afl.py ├── timestamp_honggfuzz.py ├── triage_crashes.py └── visualize_corpora.py ├── seed_selection ├── __init__.py ├── afl.py ├── argparse.py ├── coverage.py ├── datastore.py ├── istarmap.py ├── log.py └── seeds.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # Cython debug symbols 138 | cython_debug/ 139 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Seed Selection for Successful Fuzzing 2 | 3 | The artifact associated with our ISSTA 2021 paper "[Seed Selection for 4 | Successful Fuzzing](https://hexhive.epfl.ch/publications/files/21ISSTA2.pdf)". 5 | While our primary artifact is the OptiMin corpus minimizer, we also provide the 6 | necessary infrastructure to reproduce our fuzzing experiments. 7 | 8 | ## Getting Started 9 | 10 | ### Setup your environment 11 | 12 | Set up your environment (assumes a modern Ubuntu OS, `>= 18.04 && <= 20.04`, 13 | and Python, `>= 3.6 && <= 3.8`): 14 | 15 | ```bash 16 | # Install prerequisites 17 | sudo apt update 18 | sudo apt install -y git docker.io python3-venv 19 | 20 | # Add yourself to the docker group (don't forget to log out and log back in so 21 | # that the group changes take effect) 22 | sudo usermod -aG docker $USER 23 | 24 | # Setup virtualenv 25 | python3 -m venv seed_selection 26 | source seed_selection/bin/activate 27 | pip3 install wheel 28 | 29 | # Get this repo 30 | git clone https://github.com/HexHive/fuzzing-seed-selection 31 | pip3 install fuzzing-seed-selection/scripts 32 | ``` 33 | 34 | ### Build OptiMin 35 | 36 | OptiMin is our SAT-based corpus minimization tool. It supports coverage 37 | generated by both [AFL](https://github.com/google/AFL) and 38 | [llvm-cov](https://llvm.org/docs/CommandGuide/llvm-cov.html) (only AFL is used 39 | in the paper). Similarly, OptiMin can back out to both 40 | [Z3](https://github.com/Z3Prover/z3) or 41 | [EvalMaxSAT](https://github.com/FlorentAvellaneda/EvalMaxSAT) (only EvalMaxSAT 42 | is used in the paper). To build: 43 | 44 | ```bash 45 | docker build -t seed-selection/optimin fuzzing-seed-selection/optimin 46 | ``` 47 | 48 | ### Run OptiMin 49 | 50 | OptiMin takes a large "collection corpus" and selects a subset of seeds that are 51 | used for fuzzing. This is based on the _code coverage_ for each seed in the 52 | collection corpus. 53 | 54 | While we provide tools to generate code coverage information for a given corpus 55 | (based on [`afl-showmap`](https://github.com/google/AFL/blob/master/afl-showmap.c)), 56 | this can be time consuming (depending on the size of the corpus). Thus, we 57 | provide seed traces in [HDF5](https://en.wikipedia.org/wiki/Hierarchical_Data_Format) 58 | archives. 59 | 60 | For example, to perform a corpus minimization base on Google FTS FreeType2 61 | coverage: 62 | 63 | 1. Download the coverage HDF5 from 64 | [here](https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/afl-showmap-coverage/fts/freetype2.hdf5). 65 | 66 | ```bash 67 | wget https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/afl-showmap-coverage/fts/freetype2.hdf5 68 | ``` 69 | 1. Expand the HDF5 using the 70 | [`expand_hdf5_coverage.py`](scripts/bin/expand_hdf5_coverage.py) script 71 | 72 | ```bash 73 | expand_hdf5_coverage.py -i freetype2.hdf5 -o /tmp/freetype2 74 | 75 | # Expected output: 76 | # 77 | # 466 seeds to extract 78 | # Expanding freetype2.hdf5: 100% 79 | ``` 80 | 1. Perform an unweighted minimization based on edges only (not hit counts) 81 | 82 | ```bash 83 | docker run -v /tmp/freetype2:/tmp/freetype2 \ 84 | seed-selection/optimin -e /tmp/freetype2 85 | 86 | # Expected output: 87 | # 88 | # afl-showmap corpus minimization 89 | # 90 | # [############################################################] 100% Reading seed coverage 91 | # [############################################################] 100% Generating clauses 92 | # [*] Running Optimin on /tmp/freetype2 93 | # [*] Running EvalMaxSAT on WCNF 94 | # [+] EvalMaxSAT completed 95 | # [*] Parsing EvalMaxSAT output 96 | # [+] Solution found for /tmp/freetype2 97 | # 98 | # [+] Total time: 0.01 sec 99 | # [+] Num. seeds: 37 100 | # 101 | # ... 102 | ``` 103 | 1. Perform an unweighted minimization including edge hit counts 104 | 105 | ```bash 106 | docker run -v /tmp/freetype2:/tmp/freetype2 \ 107 | seed-selection/optimin /tmp/freetype2 108 | 109 | # Expected output: 110 | # 111 | # afl-showmap corpus minimization 112 | # 113 | # [############################################################] 100% Reading seed coverage 114 | # [############################################################] 100% Generating clauses 115 | # [*] Running Optimin on /tmp/freetype2 116 | # [*] Running EvalMaxSAT on WCNF 117 | # [+] EvalMaxSAT completed 118 | # [*] Parsing EvalMaxSAT output 119 | # [+] Solution found for /tmp/freetype2 120 | # 121 | # [+] Total time: 0.01 sec 122 | # [+] Num. seeds: 53 123 | # 124 | # ... 125 | ``` 126 | 1. Download the file weights (i.e., sizes) from 127 | [here](https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/weights/ttf.csv). 128 | 129 | ```bash 130 | wget https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/weights/ttf.csv 131 | ``` 132 | 1. Perform a weighted minimization based on file size and edges only 133 | 134 | ```bash 135 | docker run -v /tmp/freetype2:/tmp/freetype2 -v $(pwd):/tmp \ 136 | seed-selection/optimin -e -w /tmp/ttf.csv /tmp/freetype2 137 | 138 | # Expected output: 139 | # 140 | # afl-showmap corpus minimization 141 | # 142 | # [*] Reading weights from `/tmp/ttf.csv`... 0s 143 | # [############################################################] 100% Calculating top 144 | # [############################################################] 100% Reading seed coverage 145 | # [############################################################] 100% Generating clauses 146 | # [*] Running Optimin on /tmp/freetype2 147 | # [*] Running EvalMaxSAT on WCNF 148 | # [+] EvalMaxSAT completed 149 | # [*] Parsing EvalMaxSAT output 150 | # [+] Solution found for /tmp/freetype2 151 | # 152 | # [+] Total time: 0.01 sec 153 | # [+] Num. seeds: 37 154 | # 155 | # ... 156 | ``` 157 | 158 | ## Detailed Description 159 | 160 | ### Additional Files 161 | 162 | The sizes of our collection corpora mean that we cannot store them in a Git 163 | repo. Instead, we store ancillary data at ANU's DataCommons repository, 164 | available [here](https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/). 165 | 166 | ### Tracing Code Coverage 167 | 168 | Corpus minimization is typically based on some notion of "code coverage". To 169 | ensure a fair and uniform comparison across the three corpus minimization tools 170 | (`afl-cmin`, MinSet, and OptiMin), we use AFL's notion of _edge coverage_. This 171 | coverage information can be generated as follows 172 | 173 | 1. Compile your target with AFL instrumentation. See the AFL 174 | [documentation](https://lcamtuf.coredump.cx/afl/) for instructions on how to 175 | do this. 176 | 1. Run [`replay_seeds.py`](scripts/bin/replay_seeds.py) with your target program 177 | and your collection corpus. This will generate an HDF5 archive containing 178 | coverage information that can then be minimized. 179 | 180 | ### Corpus Minimization 181 | 182 | Our paper surveys a number of corpus minimization tools: OptiMin, `afl-cmin`, 183 | and MinSet. A more detailed explanation on how to use these tools and reproduce 184 | our results is given below. 185 | 186 | #### OptiMin 187 | 188 | Instructions for running OptiMin are given above. As described previously, a 189 | weighted minimization can be performed by supplying a weights CSV file to 190 | OptiMin's `-w` option. This weights file has the following format: 191 | 192 | ``` 193 | FILE_1,WEIGHT 194 | FILE_2,WEIGHT 195 | FILE_3,WEIGHT 196 | FILE_4,WEIGHT 197 | FILE_5,WEIGHT 198 | ``` 199 | 200 | Where `FILE_1`, `FILE_2`, ... corresponds to the name of a file within the 201 | corpus directory (only the filename needs to be provided: the corpus directory 202 | path should **not** be provided), and `WEIGHT` is an unsigned integer >= 1. We 203 | provide weights for our collection corpora 204 | [here](https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/weights/). 205 | 206 | #### `afl-cmin` 207 | 208 | [`afl-cmin`](https://github.com/google/AFL/blob/master/afl-cmin) is AFL's 209 | inbuilt corpus minimization tool. [`afl_cmin.py`](scripts/bin/afl_cmin.py) wraps 210 | `afl-cmin` so that it outputs the names of the seeds in the minimized corpus 211 | (rather than copying the seeds and wasting storage). 212 | 213 | #### MinSet 214 | 215 | MinSet is the tool developed by Rebert et al. in their paper [Optimizing Seed 216 | Selection for Fuzzing](https://www.usenix.org/system/files/conference/usenixsecurity14/sec14-paper-rebert.pdf). 217 | While we were able to obtain the tool from the authors, it is not open source 218 | and thus we are unable to provide it here. Please contact the authors if you 219 | would like to obtain the source code. 220 | 221 | If you have access to the source code, you can perform a MinSet minimization by: 222 | 223 | 1. Generate code coverage as described [here](#tracing-code-coverage) 224 | 1. Expand the generated HDF5 archive using 225 | [`expand_hdf5_coverage.py`](scripts/bin/expand_hdf5_coverage.py) 226 | 1. Convert the expanded coverage to a set of bitvector traces using 227 | [MoonBeam](https://gitlab.anu.edu.au/lunar/moonbeam) 228 | 1. Run the [`qminset.py`](scripts/bin/qminset.py) wrapper on the bitvector 229 | traces 230 | 231 | ### Fuzzing Experiments 232 | 233 | In addition to the OptiMin tool, we also provide the necessary infrastructure 234 | to reproduce our fuzzing experiments. Detailed instructions are provided 235 | [here](fuzzing/README.md). 236 | -------------------------------------------------------------------------------- /fuzzing/README.md: -------------------------------------------------------------------------------- 1 | # Fuzzing Targets 2 | 3 | This directory contains build scripts for building the targets fuzzed in the 4 | paper. Like in the paper, we group the targets into three benchmarks: Magma, 5 | the Google Fuzzer Test Suite (FTS), and a set of real-world programs. 6 | 7 | Note: AFL typically requires that coredumps be disabled: 8 | 9 | ```bash 10 | sudo bash -c 'echo core >/proc/sys/kernel/core_pattern' 11 | sudo systemctl disable apport.service 12 | ``` 13 | 14 | ## Magma 15 | 16 | [Magma](https://hexhive.epfl.ch/magma) is a ground-truth fuzzing benchmark. To 17 | build: 18 | 19 | 1. Install dependencies, as described [here](https://hexhive.epfl.ch/magma/docs/getting-started.html) 20 | 1. Run `./magma/setup.sh /magma/benchmark/dir` 21 | 1. Clean out the default corpora `./magma/clean_corpora.sh /magma/benchmark/dir` 22 | 1. Copy the relevant `TARGET` corpus into 23 | `/magma/benchmark/dir/targets/TARGET/corpus/PROGRAM`. You can either distill 24 | your own corpus or use one that we have already prepared. For the former, 25 | see the [Run OptiMin](../README.md#run-optimin) instructions. For the latter, 26 | use the [`get_corpus.py`](../scripts/bin/get_corpus.py) script. E.g., to 27 | download the `afl-cmin`-minimized libpng corpus (this can take up to 15-20 28 | mins): 29 | 30 | ```bash 31 | get_corpus.py --benchmark magma --corpus cmin --log info --target libpng \ 32 | /magma/benchmark/dir/targets/libpng/corpus/libpng_read_fuzzer 33 | ``` 34 | 1. Set `WORKDIR` in `/magma/benchmark/dir/tools/captain/captainrc` to something 35 | appropriate. If you only want to fuzz a single target (e.g., libpng), edit 36 | the `afl_TARGETS`/`aflplusplus_TARGETS` entry in `captainrc` 37 | 1. Start fuzzing! 38 | 39 | ```bash 40 | cd /magma/benchmark/dir/tools/captain 41 | ./run.sh 42 | ``` 43 | 1. [This](https://github.com/HexHive/magma/blob/dev/tools/benchd/survival_analysis.py) 44 | Magma script can be used to perform the survival analysis on the results 45 | 46 | ## FTS 47 | 48 | The [Google Fuzzer Test Suite](https://github.com/google/fuzzer-test-suite) is 49 | a widely-used fuzzing benchmark. 50 | 51 | 1. Build the base image 52 | 53 | ```bash 54 | docker build -t seed-selection/fts/base -f fts/base.Dockerfile fts 55 | ``` 56 | 1. Build the FTS targets with the required `$INSTRUMENTATION` (one of `afl`, 57 | `aflpp`, or `coverage`) 58 | 59 | ```bash 60 | docker build -t seed-selection/fts/$INSTRUMENTATION \ 61 | -f fts/$INSTRUMENTATION.Dockerfile fts 62 | ``` 63 | 1. Extract the relevant files for fuzzing, as instructed at the end of the 64 | previous step. E.g., for AFL++ 65 | 66 | ```bash 67 | ./extract-from-container.sh seed-selection/fts/$INSTRUMENTATION /aflplusplus . 68 | ./extract-from-container.sh seed-selection/fts/$INSTRUMENTATION /build-aflpp . 69 | ./extract-from-container.sh seed-selection/fts/$INSTRUMENTATION /build-cmplog . 70 | ``` 71 | 1. Create a fuzzing corpus using the 72 | [`get_corpus.py`](../scripts/bin/get_corpus.py) script 73 | 1. Start fuzzing. The runtime fuzzer configurations (e.g., timeouts and memory 74 | limits) that we used are stored [here](config/targets.toml). The `fuzz.py` 75 | script (in `scripts/bin`) can be used to launch multiple campaigns in 76 | parallel. For example, to fuzz FreeType2 with AFL++ and the provided seeds: 77 | 78 | ```bash 79 | LD_LIBRARY_PATH=$(pwd)/build-aflpp/RUNDIR-aflpp-freetype2-2017/lib \ 80 | fuzz.py -i $(pwd)/build-aflpp/RUNDIR-aflpp-freetype2-2017/seeds \ 81 | -o fuzz-out -n2 --num-trials 30 --trial-len $((18*60*60)) \ 82 | --cmp-log $(pwd)/build-aflpp_cmplog/RUNDIR-aflpp_cmplog-freetype2-2017/freetype2-2017-aflpp_cmplog \ 83 | $(pwd)/build-aflpp/RUNDIR-aflpp-freetype2-2017/freetype2-2017-aflpp 84 | ``` 85 | 1. We use the regexs [here](config/fts-bug-regexs.toml) to determine each 86 | crash's root cause. 87 | 88 | ## Real-world Targets 89 | 90 | A set of real-world programs. 91 | 92 | 1. Build the base image for a given `$TARGET` (e.g., sox, freetype) 93 | 94 | ```bash 95 | docker build -t seed-selection/real-world/$TARGET/base \ 96 | -f real-world/$TARGET/base.Dockerfile real-world/$TARGET 97 | ``` 98 | 1. Build the target with the required `$INSTRUMENTATION` 99 | 100 | ```bash 101 | docker build -t seed-selection/real-world/$TARGET/$INSTRUMENTATION \ 102 | -f real-world/$TARGET/$INSTRUMENTATION.Dockerfile \ 103 | real-world/$TARGET 104 | ``` 105 | 1. Extract the relevant files for fuzzing, using the `extract-from-container.sh` 106 | script 107 | 1. Create a fuzzing corpus using the 108 | [`get_corpus.py`](../scripts/bin/get_corpus.py) script 109 | 1. Start fuzzing. Again, the `fuzz.py` script can be used. 110 | 111 | ## `readelf` 112 | 113 | To reproduce the `readelf` experiment (Section 3.1 of the paper): 114 | 115 | 1. Build the Docker image 116 | 117 | ```bash 118 | docker build -t seed-selection/readelf readelf 119 | ``` 120 | 1. Start the container, run the fuzzers, and process the results 121 | 122 | ```bash 123 | docker run -ti --rm seed-selection/readelf 124 | 125 | # Execute the following commands inside the Docker container 126 | 127 | ./fuzz.sh 128 | 129 | ./get_afl_cov.sh 130 | ./get_hfuzz_cov.sh 131 | 132 | ./merge_cov.py 133 | ./plot_cov.py 134 | ``` 135 | 136 | ## Generating LLVM Code Coverage 137 | 138 | We use LLVM's [source-code-level 139 | coverage](https://clang.llvm.org/docs/SourceBasedCodeCoverage.html) in our 140 | evaluation. To generate LLVM coverage after a fuzzing campaign: 141 | 142 | 1. Build the target with LLVM's coverage instrumentation. For Magma, this 143 | requires building with the `llvm_cov` fuzzer. For the FTS and real-world 144 | targets, build with the `coverage` Dockerfile. 145 | 1. Replay the final fuzzing queue (in AFL, this is the `queue` output directory) 146 | using the [`llvm_cov_merge`](../scripts/bin/llvm_cov_merge.py) script 147 | 1. Summarize the results using 148 | [`llvm_cov_stats`](../scripts/bin/llvm_cov_stats.py) 149 | -------------------------------------------------------------------------------- /fuzzing/config/fts-bug-regexs.toml: -------------------------------------------------------------------------------- 1 | # Regular expressions for triaging/deduplicating Google FTS bugs. The regexs 2 | # basically match on ASan reports. 3 | # 4 | # Author: Adrian Herrera 5 | 6 | [guetzli] 7 | a = "output_image\.cc:398.*?Assertion \`coeff % quant == 0\' failed\." 8 | 9 | [json] 10 | a = "fuzzer-parse_json\.cpp:50.*?Assertion \`s1 == s2\' failed\." 11 | 12 | [libarchive] 13 | a = "heap-buffer-overflow" 14 | 15 | [libxml2] 16 | a = "READ of size .+? xmlParseXMLDecl .+? xmlParseDocument .+? xmlDoRead" 17 | b = "READ of size .+? xmlDictComputeFastQKey .+? xmlDictQLookup .+? xmlSAX2StartElementNs .+? xmlParseStartTag2 .+? xmlParseElement" 18 | c = "READ of size .+? xmlDictComputeFastKey .+? xmlDictLookup .+? xmlParseNCNameComplex" 19 | 20 | [pcre2] 21 | a = "READ of size .+? match .+?pcre2_match\.c:5968:11.+? pcre2_match_8 .+? regexec" 22 | b = "READ of size .+? match .+?pcre2_match\.c:1426:16" 23 | 24 | [re2] 25 | b = "WRITE of size .+? re2::NFA::Search\(.+? re2::Prog::SearchNFA\(.+? re2::RE2::Match\(.+? re2::RE2::DoMatch\(" 26 | 27 | [vorbis] 28 | a = "READ of size .+? vorbis_book_decodevv_add .+? res2_inverse .+? mapping0_inverse" 29 | b = "READ of size .+? vorbis_book_decodev_add .+? _01inverse .+? res1_inverse .+? mapping0_inverse" 30 | c = "SEGV on unknown address .+? _01inverse .+? res1_inverse .+? mapping0_inverse" 31 | -------------------------------------------------------------------------------- /fuzzing/config/targets.toml: -------------------------------------------------------------------------------- 1 | # This file describes the AFL arguments for the targets in two of our three 2 | # benchmarks (Google FTS, and a set of real-world programs). Magma is not 3 | # included because we just use the default run configuration. 4 | # 5 | # Author: Adrian Herrera 6 | 7 | [fts] 8 | 9 | [fts.freetype2] 10 | memory = 600 11 | args = "-1" 12 | 13 | [fts.guetzli] 14 | memory = 1024 15 | timeout = 5000 16 | args = "-1" 17 | 18 | [fts.json] 19 | memory = 800 20 | args = "-1" 21 | 22 | [fts.libarchive] 23 | memory = 800 24 | args = "-1" 25 | 26 | [fts.libjpeg-turbo] 27 | memory = 800 28 | args = "-1" 29 | 30 | [fts.libpng] 31 | memory = 800 32 | args = "-1" 33 | 34 | [fts.libxml2] 35 | memory = 800 36 | args = "-1" 37 | 38 | [fts.pcre2] 39 | memory = 800 40 | args = "-1" 41 | 42 | [fts.re2] 43 | memory = 800 44 | args = "-1" 45 | 46 | [fts.vorbis] 47 | memory = 800 48 | args = "-1" 49 | 50 | [real-world] 51 | 52 | [real-world.freetype2] 53 | driver = "char2svg" 54 | memory = 600 55 | args = "@@ @" 56 | 57 | [real-world.librsvg] 58 | driver = "rsvg-convert" 59 | memory = 800 60 | timeout = 3500 61 | args = "-o /dev/null @@" 62 | 63 | [real-world.libtiff] 64 | driver = "tiff2pdf" 65 | memory = 800 66 | args = "-o /dev/null @@" 67 | 68 | [real-world.libxml2] 69 | driver = "xmllint" 70 | memory = 600 71 | args = "-o /dev/null @@" 72 | 73 | [real-world.poppler] 74 | driver = "pdftotext" 75 | memory = 850 76 | timeout = 3500 77 | args = "@@ /dev/null" 78 | 79 | [real-world.sox.mp3] 80 | driver = "sox" 81 | memory = 800 82 | args = "--single-threaded @@ -b 16 -t aiff /dev/null channels 1 rate 16k fade 3 norm" 83 | 84 | [real-world.sox.wav] 85 | driver = "sox" 86 | memory = 800 87 | args = "--single-threaded @@ -b 16 -t aiff /dev/null channels 1 rate 16k fade 3 norm" 88 | -------------------------------------------------------------------------------- /fuzzing/extract-from-container.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | if [ "$#" -ne 3 ]; then 6 | echo "Usage: $0 " 7 | echo "" 8 | echo "DOCKER_IMAGE: Name of the Docker image to extract a directory from" 9 | echo "CONTAINER_PATH: The directory in the Docker container to extract" 10 | echo "HOST_DIR: The location of the host directory to extract to" 11 | exit 1 12 | fi 13 | 14 | DOCKER_IMAGE=$1 15 | CONTAINER_PATH=$2 16 | HOST_DIR=$(mkdir -p $3 && cd $3 && pwd) 17 | 18 | docker_container=$(docker run -d ${DOCKER_IMAGE} sleep 1000) 19 | docker cp ${docker_container}:${CONTAINER_PATH} ${HOST_DIR} 20 | docker kill ${docker_container} > /dev/null 21 | docker rm ${docker_container} > /dev/null 22 | -------------------------------------------------------------------------------- /fuzzing/fts/afl.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM seed-selection/fts/base 2 | 3 | # Get and build AFL 4 | ENV AFL_CC=clang-8 5 | ENV AFL_CXX=clang++-8 6 | 7 | RUN git clone --no-checkout https://github.com/google/afl && \ 8 | git -C afl checkout v256b 9 | RUN cd afl && \ 10 | export LLVM_CONFIG=llvm-config-8 && \ 11 | export CC=$AFL_CC && \ 12 | export CXX=$AFL_CXX && \ 13 | make -j && \ 14 | make -j -C llvm_mode 15 | 16 | # Build AFL FTS 17 | ENV AFL_SRC="/afl" 18 | ENV FUZZING_ENGINE="afl" 19 | RUN mkdir /build-afl 20 | 21 | RUN cd /build-afl && /fuzzer-test-suite/build.sh freetype2-2017 22 | RUN get_libs.py -o /build-afl/RUNDIR-afl-freetype2-2017/lib \ 23 | /build-afl/RUNDIR-afl-freetype2-2017/freetype2-2017-afl 24 | 25 | RUN cd /build-afl && /fuzzer-test-suite/build.sh guetzli-2017-3-30 26 | RUN get_libs.py -o /build-afl/RUNDIR-afl-guetzli-2017-3-30/lib \ 27 | /build-afl/RUNDIR-afl-guetzli-2017-3-30/guetzli-2017-3-30-afl 28 | 29 | RUN cd /build-afl && /fuzzer-test-suite/build.sh json-2017-02-12 30 | RUN get_libs.py -o /build-afl/RUNDIR-afl-json-2017-02-12/lib \ 31 | /build-afl/RUNDIR-afl-json-2017-02-12/json-2017-02-12-afl 32 | 33 | RUN cd /build-afl && /fuzzer-test-suite/build.sh libarchive-2017-01-04 34 | RUN get_libs.py -o /build-afl/RUNDIR-afl-libarchive-2017-01-04/lib \ 35 | /build-afl/RUNDIR-afl-libarchive-2017-01-04/libarchive-2017-01-04-afl 36 | 37 | RUN cd /build-afl && /fuzzer-test-suite/build.sh libjpeg-turbo-07-2017 38 | RUN get_libs.py -o /build-afl/RUNDIR-afl-libjpeg-turbo-07-2017/lib \ 39 | /build-afl/RUNDIR-afl-libjpeg-turbo-07-2017/libjpeg-turbo-07-2017-afl 40 | 41 | RUN cd /build-afl && /fuzzer-test-suite/build.sh libpng-1.2.56 42 | RUN get_libs.py -o /build-afl/RUNDIR-afl-libpng-1.2.56/lib \ 43 | /build-afl/RUNDIR-afl-libpng-1.2.56/libpng-1.2.56-afl 44 | 45 | RUN cd /build-afl && /fuzzer-test-suite/build.sh libxml2-v2.9.2 46 | RUN get_libs.py -o /build-afl/RUNDIR-afl-libxml2-v2.9.2/lib \ 47 | /build-afl/RUNDIR-afl-libxml2-v2.9.2/libxml2-v2.9.2-afl 48 | 49 | RUN cd /build-afl && /fuzzer-test-suite/build.sh pcre2-10.00 50 | RUN get_libs.py -o /build-afl/RUNDIR-afl-pcre2-10.00/lib \ 51 | /build-afl/RUNDIR-afl-pcre2-10.00/pcre2-10.00-afl 52 | 53 | RUN cd /build-afl && /fuzzer-test-suite/build.sh re2-2014-12-09 54 | RUN get_libs.py -o /build-afl/RUNDIR-afl-re2-2014-12-09/lib \ 55 | /build-afl/RUNDIR-afl-re2-2014-12-09/re2-2014-12-09-afl 56 | 57 | RUN cd /build-afl && /fuzzer-test-suite/build.sh vorbis-2017-12-11 58 | RUN get_libs.py -o /build-afl/RUNDIR-afl-vorbis-2017-12-11/lib \ 59 | /build-afl/RUNDIR-afl-vorbis-2017-12-11/vorbis-2017-12-11-afl 60 | 61 | RUN echo "\033[0;33m * Extract the '/build-afl' and '/afl' directories\033[0m" 62 | -------------------------------------------------------------------------------- /fuzzing/fts/aflpp.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM seed-selection/fts/base 2 | 3 | # Get and build AFL++ 4 | ENV AFL_CC=clang-8 5 | ENV AFL_CXX=clang++-8 6 | 7 | RUN git clone --no-checkout https://github.com/aflplusplus/aflplusplus && \ 8 | git -C aflplusplus checkout 5ee63a6e6267e448342ccb28cc8d3c0d34ffc1cd 9 | ADD aflpp_driver_GNUmakefile /aflplusplus/examples/aflpp_driver/GNUmakefile 10 | RUN cd aflplusplus && \ 11 | export LLVM_CONFIG="llvm-config-8" && \ 12 | make -j && \ 13 | make -j -C llvm_mode 14 | RUN cd aflplusplus && \ 15 | export LLVM_CONFIG="llvm-config-8" && \ 16 | export CFLAGS="-m32" && \ 17 | make -j -C examples/aflpp_driver libAFLDriver.a 18 | 19 | # Build AFL++ FTS 20 | ENV AFL_SRC="/aflplusplus" 21 | ENV FUZZING_ENGINE="aflpp" 22 | RUN mkdir /build-aflpp 23 | 24 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh freetype2-2017 25 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-freetype2-2017/lib \ 26 | /build-aflpp/RUNDIR-aflpp-freetype2-2017/freetype2-2017-aflpp 27 | 28 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh guetzli-2017-3-30 29 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-guetzli-2017-3-30/lib \ 30 | /build-aflpp/RUNDIR-aflpp-guetzli-2017-3-30/guetzli-2017-3-30-aflpp 31 | 32 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh json-2017-02-12 33 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-json-2017-02-12/lib \ 34 | /build-aflpp/RUNDIR-aflpp-json-2017-02-12/json-2017-02-12-aflpp 35 | 36 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh libarchive-2017-01-04 37 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-libarchive-2017-01-04/lib \ 38 | /build-aflpp/RUNDIR-aflpp-libarchive-2017-01-04/libarchive-2017-01-04-aflpp 39 | 40 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh libjpeg-turbo-07-2017 41 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-libjpeg-turbo-07-2017/lib \ 42 | /build-aflpp/RUNDIR-aflpp-libjpeg-turbo-07-2017/libjpeg-turbo-07-2017-aflpp 43 | 44 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh libpng-1.2.56 45 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-libpng-1.2.56/lib \ 46 | /build-aflpp/RUNDIR-aflpp-libpng-1.2.56/libpng-1.2.56-aflpp 47 | 48 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh libxml2-v2.9.2 49 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-libxml2-v2.9.2/lib \ 50 | /build-aflpp/RUNDIR-aflpp-libxml2-v2.9.2/libxml2-v2.9.2-aflpp 51 | 52 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh pcre2-10.00 53 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-pcre2-10.00/lib \ 54 | /build-aflpp/RUNDIR-aflpp-pcre2-10.00/pcre2-10.00-aflpp 55 | 56 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh re2-2014-12-09 57 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-re2-2014-12-09/lib \ 58 | /build-aflpp/RUNDIR-aflpp-re2-2014-12-09/re2-2014-12-09-aflpp 59 | 60 | RUN cd /build-aflpp && /fuzzer-test-suite/build.sh vorbis-2017-12-11 61 | RUN get_libs.py -o /build-aflpp/RUNDIR-aflpp-vorbis-2017-12-11/lib \ 62 | /build-aflpp/RUNDIR-aflpp-vorbis-2017-12-11/vorbis-2017-12-11-aflpp 63 | 64 | # Build AFL++ FTS in CmpLog mode 65 | ENV FUZZING_ENGINE="aflpp_cmplog" 66 | RUN mkdir /build-cmplog 67 | 68 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh freetype2-2017 69 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-freetype2-2017/lib \ 70 | /build-cmplog/RUNDIR-aflpp_cmplog-freetype2-2017/freetype2-2017-aflpp_cmplog 71 | 72 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh guetzli-2017-3-30 73 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-guetzli-2017-3-30/lib \ 74 | /build-cmplog/RUNDIR-aflpp_cmplog-guetzli-2017-3-30/guetzli-2017-3-30-aflpp_cmplog 75 | 76 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh json-2017-02-12 77 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-json-2017-02-12/lib \ 78 | /build-cmplog/RUNDIR-aflpp_cmplog-json-2017-02-12/json-2017-02-12-aflpp_cmplog 79 | 80 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh libarchive-2017-01-04 81 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-libarchive-2017-01-04/lib \ 82 | /build-cmplog/RUNDIR-aflpp_cmplog-libarchive-2017-01-04/libarchive-2017-01-04-aflpp_cmplog 83 | 84 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh libjpeg-turbo-07-2017 85 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-libjpeg-turbo-07-2017/lib \ 86 | /build-cmplog/RUNDIR-aflpp_cmplog-libjpeg-turbo-07-2017/libjpeg-turbo-07-2017-aflpp_cmplog 87 | 88 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh libpng-1.2.56 89 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-libpng-1.2.56/lib \ 90 | /build-cmplog/RUNDIR-aflpp_cmplog-libpng-1.2.56/libpng-1.2.56-aflpp_cmplog 91 | 92 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh libxml2-v2.9.2 93 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-libxml2-v2.9.2/lib \ 94 | /build-cmplog/RUNDIR-aflpp_cmplog-libxml2-v2.9.2/libxml2-v2.9.2-aflpp_cmplog 95 | 96 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh pcre2-10.00 97 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-pcre2-10.00/lib \ 98 | /build-cmplog/RUNDIR-aflpp_cmplog-pcre2-10.00/pcre2-10.00-aflpp_cmplog 99 | 100 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh re2-2014-12-09 101 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-re2-2014-12-09/lib \ 102 | /build-cmplog/RUNDIR-aflpp_cmplog-re2-2014-12-09/re2-2014-12-09-aflpp_cmplog 103 | 104 | RUN cd /build-cmplog && /fuzzer-test-suite/build.sh vorbis-2017-12-11 105 | RUN get_libs.py -o /build-cmplog/RUNDIR-aflpp_cmplog-vorbis-2017-12-11/lib \ 106 | /build-cmplog/RUNDIR-aflpp_cmplog-vorbis-2017-12-11/vorbis-2017-12-11-aflpp_cmplog 107 | 108 | # The `build` directory can be extracted to the host machine 109 | RUN echo "\033[0;33m * Extract the '/build-aflpp', '/build-cmplog', and '/aflplusplus' directories\033[0m" 110 | -------------------------------------------------------------------------------- /fuzzing/fts/aflpp_driver_GNUmakefile: -------------------------------------------------------------------------------- 1 | ifeq "" "$(LLVM_CONFIG)" 2 | LLVM_CONFIG=llvm-config 3 | endif 4 | 5 | LLVM_BINDIR = $(shell $(LLVM_CONFIG) --bindir 2>/dev/null) 6 | ifneq "" "$(LLVM_BINDIR)" 7 | LLVM_BINDIR := $(LLVM_BINDIR)/ 8 | endif 9 | 10 | CFLAGS += -O3 -funroll-loops -g 11 | 12 | all: libAFLDriver.a libAFLQemuDriver.a aflpp_qemu_driver_hook.so 13 | 14 | aflpp_driver.o: aflpp_driver.c 15 | $(LLVM_BINDIR)clang -I. -I../../include $(CFLAGS) -c aflpp_driver.c 16 | 17 | libAFLDriver.a: aflpp_driver.o 18 | ar ru libAFLDriver.a aflpp_driver.o 19 | cp -vf libAFLDriver.a ../../ 20 | 21 | debug: 22 | $(LLVM_BINDIR)clang -Wno-deprecated -I../../include $(CFLAGS) -D_DEBUG=\"1\" -c -o afl-performance.o ../../src/afl-performance.c 23 | $(LLVM_BINDIR)clang -I../../include -D_DEBUG=\"1\" -g -funroll-loops -c aflpp_driver.c 24 | #$(LLVM_BINDIR)clang -S -emit-llvm -Wno-deprecated -I../../include $(CFLAGS) -D_DEBUG=\"1\" -c -o afl-performance.ll ../../src/afl-performance.c 25 | #$(LLVM_BINDIR)clang -S -emit-llvm -I../../include -D_DEBUG=\"1\" -g -funroll-loops -c aflpp_driver.c 26 | ar ru libAFLDriver.a afl-performance.o aflpp_driver.o 27 | 28 | aflpp_qemu_driver.o: aflpp_qemu_driver.c 29 | $(LLVM_BINDIR)clang $(CFLAGS) -O0 -funroll-loops -c aflpp_qemu_driver.c 30 | 31 | libAFLQemuDriver.a: aflpp_qemu_driver.o 32 | ar ru libAFLQemuDriver.a aflpp_qemu_driver.o 33 | cp -vf libAFLQemuDriver.a ../../ 34 | 35 | aflpp_qemu_driver_hook.so: aflpp_qemu_driver_hook.o 36 | $(LLVM_BINDIR)clang -shared aflpp_qemu_driver_hook.o -o aflpp_qemu_driver_hook.so 37 | 38 | aflpp_qemu_driver_hook.o: aflpp_qemu_driver_hook.c 39 | $(LLVM_BINDIR)clang -fPIC $(CFLAGS) -funroll-loops -c aflpp_qemu_driver_hook.c 40 | 41 | test: debug 42 | #clang -S -emit-llvm -D_DEBUG=\"1\" -I../../include -Wl,--allow-multiple-definition -funroll-loops -o aflpp_driver_test.ll aflpp_driver_test.c 43 | afl-clang-fast -D_DEBUG=\"1\" -I../../include -Wl,--allow-multiple-definition -funroll-loops -o aflpp_driver_test aflpp_driver_test.c libAFLDriver.a afl-performance.o 44 | 45 | clean: 46 | rm -f *.o libAFLDriver*.a libAFLQemuDriver.a aflpp_qemu_driver_hook.so *~ core aflpp_driver_test 47 | -------------------------------------------------------------------------------- /fuzzing/fts/base.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | # Install dependencies 4 | RUN dpkg --add-architecture i386 5 | RUN export DEBIAN_FRONTEND=noninteractive && \ 6 | apt-get update && \ 7 | apt-get -y install git subversion build-essential autoconf libtool \ 8 | cmake gcc-multilib g++-multilib pkg-config libarchive-dev:i386 \ 9 | zlib1g-dev:i386 libbz2-dev:i386 libxml2-dev:i386 libssl-dev:i386 \ 10 | liblzma-dev:i386 libexpat-dev:i386 nasm python3-pip wget 11 | 12 | # Install LLVM 8 13 | RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - 14 | RUN apt-get install -y llvm-8 clang-8 15 | 16 | # Get helper scripts 17 | RUN git clone https://github.com/HexHive/fuzzing-seed-selection && \ 18 | pip3 install fuzzing-seed-selection/scripts 19 | 20 | # Get LLVM compiler-rt 21 | RUN wget -O - http://releases.llvm.org/8.0.0/compiler-rt-8.0.0.src.tar.xz | tar xJ 22 | 23 | # Get Google FTS 24 | RUN git clone https://github.com/google/fuzzer-test-suite 25 | ADD build.sh /fuzzer-test-suite 26 | ADD common.sh /fuzzer-test-suite 27 | ADD libarchive-2017-01-04-build.sh /fuzzer-test-suite/libarchive-2017-01-04/build.sh 28 | ADD libjpeg-turbo-07-2017-build.sh /fuzzer-test-suite/libjpeg-turbo-07-2017/build.sh 29 | ADD libxml2-v2.9.2-build.sh /fuzzer-test-suite/libxml2-v2.9.2/build.sh 30 | 31 | ENV LIBFUZZER_SRC="/compiler-rt-8.0.0.src/lib/fuzzer" 32 | -------------------------------------------------------------------------------- /fuzzing/fts/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | # Copyright 2016 Google Inc. All Rights Reserved. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | . $(dirname $(realpath -s $0))/common.sh 5 | 6 | if [ $# -ne 1 ]; then 7 | echo "usage: $0 TARGET" 8 | exit 1 9 | fi 10 | 11 | BUILD=$SCRIPT_DIR/$1/build.sh 12 | 13 | [ ! -e $BUILD ] && echo "NO SUCH FILE: $BUILD" && exit 1 14 | 15 | RUNDIR="RUNDIR-${FUZZING_ENGINE}-$1" 16 | mkdir -p $RUNDIR 17 | cd $RUNDIR 18 | $BUILD 19 | 20 | -------------------------------------------------------------------------------- /fuzzing/fts/common.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017 Google Inc. All Rights Reserved. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | 5 | # Don't allow to call these scripts from their directories. 6 | [ -e $(basename $0) ] && echo "PLEASE USE THIS SCRIPT FROM ANOTHER DIR" && exit 1 7 | 8 | # Ensure that fuzzing engine, if defined, is valid 9 | FUZZING_ENGINE=${FUZZING_ENGINE:-"afl"} 10 | POSSIBLE_FUZZING_ENGINE="afl aflpp aflpp_cmplog coverage" 11 | !(echo "$POSSIBLE_FUZZING_ENGINE" | grep -w "$FUZZING_ENGINE" > /dev/null) && \ 12 | echo "USAGE: Error: If defined, FUZZING_ENGINE should be one of the following: 13 | $POSSIBLE_FUZZING_ENGINE. However, it was defined as $FUZZING_ENGINE" && exit 1 14 | 15 | SCRIPT_DIR=$(dirname $(realpath -s $0)) 16 | EXECUTABLE_NAME_BASE=$(basename $SCRIPT_DIR)-${FUZZING_ENGINE} 17 | LIBFUZZER_SRC=${LIBFUZZER_SRC:-$(dirname $(dirname $SCRIPT_DIR))/Fuzzer} 18 | STANDALONE_TARGET=0 19 | AFL_SRC=${AFL_SRC:-$(dirname $(dirname $SCRIPT_DIR))/AFL} 20 | CORPUS=CORPUS-$EXECUTABLE_NAME_BASE 21 | JOBS=${JOBS:-"8"} 22 | 23 | export LIB_FUZZING_ENGINE="libFuzzingEngine-${FUZZING_ENGINE}.a" 24 | 25 | if [[ $FUZZING_ENGINE == "afl" ]]; then 26 | export AFL_PATH=$(realpath -s ${AFL_SRC}) 27 | 28 | export CC="${AFL_PATH}/afl-clang-fast" 29 | export CXX="${AFL_PATH}/afl-clang-fast++" 30 | 31 | export AFL_CC="clang-8" 32 | export AFL_CXX="clang++-8" 33 | 34 | export AFL_USE_ASAN="1" 35 | export CFLAGS="-m32 -O2 -fno-omit-frame-pointer -gline-tables-only" 36 | export CXXFLAGS="${CFLAGS}" 37 | export LDFLAGS="-m32" 38 | elif [[ $FUZZING_ENGINE == "aflpp" ]]; then 39 | export AFL_PATH=$(realpath -s ${AFL_SRC}) 40 | 41 | export CC="${AFL_PATH}/afl-clang-fast" 42 | export CXX="${AFL_PATH}/afl-clang-fast++" 43 | export AS="llvm-as-8" 44 | 45 | export AFL_CC="clang-8" 46 | export AFL_CXX="clang++-8" 47 | 48 | export AFL_USE_ASAN="1" 49 | export CFLAGS="-m32 -O2 -fno-omit-frame-pointer -gline-tables-only" 50 | export CXXFLAGS="${CFLAGS}" 51 | export LDFLAGS="-m32" 52 | elif [[ $FUZZING_ENGINE == "aflpp_cmplog" ]]; then 53 | export AFL_PATH=$(realpath -s ${AFL_SRC}) 54 | 55 | export CC="${AFL_PATH}/afl-clang-fast" 56 | export CXX="${AFL_PATH}/afl-clang-fast++" 57 | export AS="llvm-as-8" 58 | 59 | export AFL_CC="clang-8" 60 | export AFL_CXX="clang++-8" 61 | 62 | export AFL_LLVM_CMPLOG=1 63 | export CFLAGS="-m32 -O2 -fno-omit-frame-pointer -gline-tables-only" 64 | export CXXFLAGS="${CFLAGS}" 65 | export LDFLAGS="-m32" 66 | elif [[ $FUZZING_ENGINE == "coverage" ]]; then 67 | export CC="clang-8" 68 | export CXX="clang++-8" 69 | 70 | export CFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping" 71 | export CXXFLAGS="${CFLAGS}" 72 | export LDFLAGS="-m32" 73 | fi 74 | 75 | export CPPFLAGS=${CPPFLAGS:-"-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION"} 76 | 77 | get_git_revision() { 78 | GIT_REPO="$1" 79 | GIT_REVISION="$2" 80 | TO_DIR="$3" 81 | [ ! -e $TO_DIR ] && git clone $GIT_REPO $TO_DIR && (cd $TO_DIR && git reset --hard $GIT_REVISION) 82 | } 83 | 84 | get_git_tag() { 85 | GIT_REPO="$1" 86 | GIT_TAG="$2" 87 | TO_DIR="$3" 88 | [ ! -e $TO_DIR ] && git clone $GIT_REPO $TO_DIR && (cd $TO_DIR && git checkout $GIT_TAG) 89 | } 90 | 91 | get_svn_revision() { 92 | SVN_REPO="$1" 93 | SVN_REVISION="$2" 94 | TO_DIR="$3" 95 | [ ! -e $TO_DIR ] && svn co -r$SVN_REVISION $SVN_REPO $TO_DIR 96 | } 97 | 98 | build_afl() { 99 | $CXX $CXXFLAGS -std=c++11 -m32 -c ${LIBFUZZER_SRC}/afl/afl_driver.cpp -I$LIBFUZZER_SRC 100 | ar r $LIB_FUZZING_ENGINE afl_driver.o 101 | rm *.o 102 | } 103 | 104 | build_aflpp() { 105 | cp ${AFL_SRC}/examples/aflpp_driver/libAFLDriver.a $LIB_FUZZING_ENGINE 106 | } 107 | 108 | build_aflpp_cmplog() { 109 | build_aflpp 110 | } 111 | 112 | # This provides a build with no fuzzing engine, just to measure coverage 113 | build_coverage () { 114 | STANDALONE_TARGET=1 115 | $CC -m32 -c $LIBFUZZER_SRC/standalone/StandaloneFuzzTargetMain.c 116 | ar rc $LIB_FUZZING_ENGINE StandaloneFuzzTargetMain.o 117 | rm *.o 118 | } 119 | 120 | build_fuzzer() { 121 | echo "Building with $FUZZING_ENGINE" 122 | build_${FUZZING_ENGINE} 123 | } 124 | 125 | -------------------------------------------------------------------------------- /fuzzing/fts/coverage.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM seed-selection/fts/base 2 | 3 | # Build coverage FTS 4 | ENV FUZZING_ENGINE="coverage" 5 | RUN mkdir /build-cov 6 | 7 | RUN cd /build-cov && /fuzzer-test-suite/build.sh freetype2-2017 8 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-freetype2-2017/lib \ 9 | /build-cov/RUNDIR-coverage-freetype2-2017/freetype2-2017-coverage 10 | 11 | RUN cd /build-cov && /fuzzer-test-suite/build.sh guetzli-2017-3-30 12 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-guetzli-2017-3-30/lib \ 13 | /build-cov/RUNDIR-coverage-guetzli-2017-3-30/guetzli-2017-3-30-coverage 14 | 15 | RUN cd /build-cov && /fuzzer-test-suite/build.sh json-2017-02-12 16 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-json-2017-02-12/lib \ 17 | /build-cov/RUNDIR-coverage-json-2017-02-12/json-2017-02-12-coverage 18 | 19 | RUN cd /build-cov && /fuzzer-test-suite/build.sh libarchive-2017-01-04 20 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-libarchive-2017-01-04/lib \ 21 | /build-cov/RUNDIR-coverage-libarchive-2017-01-04/libarchive-2017-01-04-coverage 22 | 23 | RUN cd /build-cov && /fuzzer-test-suite/build.sh libjpeg-turbo-07-2017 24 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-libjpeg-turbo-07-2017/lib \ 25 | /build-cov/RUNDIR-coverage-libjpeg-turbo-07-2017/libjpeg-turbo-07-2017-coverage 26 | 27 | RUN cd /build-cov && /fuzzer-test-suite/build.sh libpng-1.2.56 28 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-libpng-1.2.56/lib \ 29 | /build-cov/RUNDIR-coverage-libpng-1.2.56/libpng-1.2.56-coverage 30 | 31 | RUN cd /build-cov && /fuzzer-test-suite/build.sh libxml2-v2.9.2 32 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-libxml2-v2.9.2/lib \ 33 | /build-cov/RUNDIR-coverage-libxml2-v2.9.2/libxml2-v2.9.2-coverage 34 | 35 | RUN cd /build-cov && /fuzzer-test-suite/build.sh pcre2-10.00 36 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-pcre2-10.00/lib \ 37 | /build-cov/RUNDIR-coverage-pcre2-10.00/pcre2-10.00-coverage 38 | 39 | RUN cd /build-cov && /fuzzer-test-suite/build.sh re2-2014-12-09 40 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-re2-2014-12-09/lib \ 41 | /build-cov/RUNDIR-coverage-re2-2014-12-09/re2-2014-12-09-coverage 42 | 43 | RUN cd /build-cov && /fuzzer-test-suite/build.sh vorbis-2017-12-11 44 | RUN get_libs.py -o /build-cov/RUNDIR-coverage-vorbis-2017-12-11/lib \ 45 | /build-cov/RUNDIR-coverage-vorbis-2017-12-11/vorbis-2017-12-11-coverage 46 | 47 | RUN echo "\033[0;33m * Extract the 'build-cov' directory\033[0m" 48 | -------------------------------------------------------------------------------- /fuzzing/fts/libarchive-2017-01-04-build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 Google Inc. All Rights Reserved. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | . $(dirname $0)/../custom-build.sh $1 $2 5 | . $(dirname $0)/../common.sh 6 | 7 | build_lib() { 8 | rm -rf BUILD 9 | cp -rf SRC BUILD 10 | (cd BUILD/build && ./autogen.sh && cd .. && ./configure --disable-shared --without-nettle && make -j $JOBS) 11 | } 12 | 13 | get_git_revision https://github.com/libarchive/libarchive.git 51d7afd3644fdad725dd8faa7606b864fd125f88 SRC 14 | build_lib 15 | build_fuzzer 16 | 17 | if [[ $FUZZING_ENGINE == "hooks" ]]; then 18 | # Link ASan runtime so we can hook memcmp et al. 19 | LIB_FUZZING_ENGINE="$LIB_FUZZING_ENGINE -fsanitize=address" 20 | fi 21 | set -x 22 | $CXX $CXXFLAGS -std=c++11 $SCRIPT_DIR/libarchive_fuzzer.cc -I BUILD/libarchive BUILD/.libs/libarchive.a $LIB_FUZZING_ENGINE -lz -lbz2 -lxml2 -lcrypto -lssl -llzma -lexpat -o $EXECUTABLE_NAME_BASE 23 | -------------------------------------------------------------------------------- /fuzzing/fts/libjpeg-turbo-07-2017-build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2017 Google Inc. All Rights Reserved. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | . $(dirname $0)/../custom-build.sh $1 $2 5 | . $(dirname $0)/../common.sh 6 | 7 | build_lib() { 8 | rm -rf BUILD 9 | cp -rf SRC BUILD 10 | (cd BUILD && autoreconf -fiv && ./configure --disable-shared --host=i386-linux && make -j $JOBS) 11 | } 12 | 13 | get_git_revision https://github.com/libjpeg-turbo/libjpeg-turbo.git b0971e47d76fdb81270e93bbf11ff5558073350d SRC 14 | build_lib 15 | build_fuzzer 16 | 17 | if [[ $FUZZING_ENGINE == "hooks" ]]; then 18 | # Link ASan runtime so we can hook memcmp et al. 19 | LIB_FUZZING_ENGINE="$LIB_FUZZING_ENGINE -fsanitize=address" 20 | fi 21 | set -x 22 | $CXX $CXXFLAGS -std=c++11 $SCRIPT_DIR/libjpeg_turbo_fuzzer.cc -I BUILD BUILD/.libs/libturbojpeg.a $LIB_FUZZING_ENGINE -o $EXECUTABLE_NAME_BASE 23 | -------------------------------------------------------------------------------- /fuzzing/fts/libxml2-v2.9.2-build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 Google Inc. All Rights Reserved. 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | . $(dirname $0)/../custom-build.sh $1 $2 5 | . $(dirname $0)/../common.sh 6 | 7 | build_lib() { 8 | rm -rf BUILD 9 | cp -rf SRC BUILD 10 | (cd BUILD && ./autogen.sh && CCLD="$CXX $CXXFLAGS" ./configure --disable-shared --without-python && make -j $JOBS) 11 | } 12 | 13 | get_git_tag https://gitlab.gnome.org/GNOME/libxml2.git v2.9.2 SRC 14 | get_git_revision https://github.com/google/afl e9be6bce2282e8db95221c9a17fd10aba9e901bc afl 15 | build_lib 16 | build_fuzzer 17 | 18 | cp afl/dictionaries/xml.dict . 19 | 20 | if [[ $FUZZING_ENGINE == "hooks" ]]; then 21 | # Link ASan runtime so we can hook memcmp et al. 22 | LIB_FUZZING_ENGINE="$LIB_FUZZING_ENGINE -fsanitize=address" 23 | fi 24 | set -x 25 | $CXX $CXXFLAGS -std=c++11 $SCRIPT_DIR/target.cc -I BUILD/include BUILD/.libs/libxml2.a $LIB_FUZZING_ENGINE -lz -llzma -o $EXECUTABLE_NAME_BASE 26 | -------------------------------------------------------------------------------- /fuzzing/magma/clean_corpora.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | if [ "$#" -ne 1 ]; then 4 | echo "usage: $0 /path/to/magma" 5 | exit 1 6 | fi 7 | 8 | MAGMA_DIR=$1 9 | 10 | set -x 11 | rm -rf ${MAGMA_DIR}/targets/libpng/corpus/libpng_read_fuzzer/* 12 | rm -rf ${MAGMA_DIR}/targets/libtiff/corpus/tiff_read_rgba_fuzzer/* 13 | rm -rf ${MAGMA_DIR}/targets/libxml2/corpus/libxml2_xml_reader_for_file_fuzzer/* 14 | rm -rf ${MAGMA_DIR}/targets/php/corpus/{exif,json,parser}/* 15 | rm -rf ${MAGMA_DIR}/targets/poppler/corpus/pdf_fuzzer/* 16 | -------------------------------------------------------------------------------- /fuzzing/magma/log-execs.patch: -------------------------------------------------------------------------------- 1 | diff --git a/afl-fuzz.c b/afl-fuzz.c 2 | index 21918df..64e7b35 100644 3 | --- a/afl-fuzz.c 4 | +++ b/afl-fuzz.c 5 | @@ -3543,10 +3543,10 @@ static void maybe_update_plot_file(double bitmap_cvg, double eps) { 6 | execs_per_sec */ 7 | 8 | fprintf(plot_file, 9 | - "%llu, %llu, %u, %u, %u, %u, %0.02f%%, %llu, %llu, %u, %0.02f\n", 10 | + "%llu, %llu, %u, %u, %u, %u, %0.02f%%, %llu, %llu, %u, %llu, %0.02f\n", 11 | get_cur_time() / 1000, queue_cycle - 1, current_entry, queued_paths, 12 | pending_not_fuzzed, pending_favored, bitmap_cvg, unique_crashes, 13 | - unique_hangs, max_depth, eps); /* ignore errors */ 14 | + unique_hangs, max_depth, total_execs, eps); /* ignore errors */ 15 | 16 | fflush(plot_file); 17 | 18 | @@ -7266,7 +7266,7 @@ EXP_ST void setup_dirs_fds(void) { 19 | 20 | fprintf(plot_file, "# unix_time, cycles_done, cur_path, paths_total, " 21 | "pending_total, pending_favs, map_size, unique_crashes, " 22 | - "unique_hangs, max_depth, execs_per_sec\n"); 23 | + "unique_hangs, max_depth, execs, execs_per_sec\n"); 24 | /* ignore errors */ 25 | 26 | } 27 | -------------------------------------------------------------------------------- /fuzzing/magma/setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | if [ "$#" -ne 1 ]; then 4 | echo "usage: $0 /dest/path" 5 | exit 1 6 | fi 7 | 8 | THIS_DIR=$(dirname $(realpath -s $0)) 9 | DEST_DIR=$1 10 | 11 | rm -rf ${DEST_DIR} 12 | git clone --branch v1.1 --depth 1 https://github.com/HexHive/magma ${DEST_DIR} 13 | git -C ${DEST_DIR} apply "${THIS_DIR}/v1.1.patch" 14 | cp "${THIS_DIR}/log-execs.patch" ${DEST_DIR}/fuzzers/afl/src/ 15 | cp "${THIS_DIR}/../../scripts/bin/timestamp_afl.py" ${DEST_DIR}/fuzzers/afl/src/timestamp_findings.py 16 | 17 | # Create php corpus directories 18 | mkdir -p ${DEST_DIR}/targets/php/corpus/{exif,json,parser} 19 | -------------------------------------------------------------------------------- /fuzzing/magma/survival_analysis.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Compute a survival analysis for each bug reported in the given JSON file. This 5 | JSON file is generated by running `magma/tools/benchd/exp2json.py` on the Magma 6 | work directory. 7 | 8 | Author: Adrian Herrera 9 | """ 10 | 11 | 12 | from argparse import ArgumentParser, Namespace 13 | from collections import defaultdict 14 | from math import sqrt 15 | from pathlib import Path 16 | from typing import Tuple 17 | import json 18 | import warnings 19 | 20 | from lifelines import KaplanMeierFitter 21 | from lifelines.utils import restricted_mean_survival_time as rmst 22 | import pandas as pd 23 | 24 | 25 | NUM_TRIALS = 30 26 | TRIAL_LEN = 18 * 60 * 60 27 | 28 | ddr = lambda: defaultdict(ddr) 29 | 30 | 31 | def parse_args() -> Namespace: 32 | """Parse command-line arguments.""" 33 | parser = ArgumentParser(description='Magma survival analysis') 34 | parser.add_argument('json', type=Path, nargs='+', 35 | help='Magma-generated JSON file (containing bug data)') 36 | return parser.parse_args() 37 | 38 | 39 | def get_time_to_bug(data: dict) -> dict: 40 | """Get time-to-bug data from Magma JSON dictionary.""" 41 | for fuzzer, f_data in data.items(): 42 | for target, t_data in f_data.items(): 43 | for program, p_data in t_data.items(): 44 | bugs = ddr() 45 | for run, r_data in p_data.items(): 46 | for metric, m_data in r_data.items(): 47 | for bug, time in m_data.items(): 48 | if metric not in bugs[bug]: 49 | bugs[bug][metric] = [None] * NUM_TRIALS 50 | bugs[bug][metric][int(run)] = time 51 | for bug, b_data in bugs.items(): 52 | yield dict( 53 | target=target, 54 | program=program, 55 | fuzzer=fuzzer, 56 | bug=bug, 57 | **b_data, 58 | ) 59 | 60 | 61 | def calc_survival(data: dict) -> Tuple[float, float]: 62 | """Do the survival analysis.""" 63 | df = pd.DataFrame(data) 64 | T = df.fillna(TRIAL_LEN) / 60 / 60 65 | E = df.notnull() 66 | 67 | kmf = KaplanMeierFitter() 68 | kmf.fit(T, E) 69 | 70 | # Compute the restricted mean survival time and 95% confidence interval 71 | surv_time_mean, surv_time_var = rmst(kmf, t=TRIAL_LEN / 60 / 60, 72 | return_variance=True) 73 | surv_time_var = abs(surv_time_var) 74 | surv_time_ci = 1.96 * (sqrt(surv_time_var) / 75 | sqrt(len(kmf.survival_function_))) 76 | 77 | return surv_time_mean, surv_time_ci 78 | 79 | 80 | def main(): 81 | """The main function.""" 82 | args = parse_args() 83 | 84 | # Ignore warnings 85 | warnings.simplefilter("ignore") 86 | 87 | survival_times = dict(target=[], 88 | program=[], 89 | bug=[], 90 | src=[], 91 | fuzzer=[], 92 | survival_time=[], 93 | survival_ci=[]) 94 | 95 | for magma_json in args.json: 96 | # Read Magma JSON data 97 | with magma_json.open() as inf: 98 | json_data = json.load(inf).get('results', {}) 99 | 100 | # Do survival analysis on 'triggered' results 101 | for ttb in get_time_to_bug(json_data): 102 | if 'triggered' not in ttb: 103 | surv_time_mean = None 104 | surv_time_ci = None 105 | else: 106 | triggered_data = ttb['triggered'] 107 | surv_time_mean, surv_time_ci = calc_survival(triggered_data) 108 | 109 | # Save table data 110 | survival_times['target'].append(ttb['target']) 111 | survival_times['program'].append(ttb['program']) 112 | survival_times['bug'].append(ttb['bug']) 113 | survival_times['src'].append(magma_json.stem) 114 | survival_times['fuzzer'].append('afl') 115 | survival_times['survival_time'].append(surv_time_mean) 116 | survival_times['survival_ci'].append(surv_time_ci) 117 | 118 | # Write to CSV 119 | print(pd.DataFrame.from_dict(survival_times).sort_values(by='bug').to_csv(index=False)) 120 | 121 | 122 | if __name__ == '__main__': 123 | main() 124 | -------------------------------------------------------------------------------- /fuzzing/magma/v1.1.patch: -------------------------------------------------------------------------------- 1 | diff --git a/fuzzers/afl/fetch.sh b/fuzzers/afl/fetch.sh 2 | index c2ca3be3..ca1cb7a6 100755 3 | --- a/fuzzers/afl/fetch.sh 4 | +++ b/fuzzers/afl/fetch.sh 5 | @@ -11,3 +11,5 @@ git -C "$FUZZER/repo" checkout fab1ca5ed7e3552833a18fc2116d33a9241699bc 6 | #wget -O "$FUZZER/repo/afl_driver.cpp" \ 7 | # "https://cs.chromium.org/codesearch/f/chromium/src/third_party/libFuzzer/src/afl/afl_driver.cpp" 8 | cp "$FUZZER/src/afl_driver.cpp" "$FUZZER/repo/afl_driver.cpp" 9 | + 10 | +git -C "$FUZZER/repo" apply "$FUZZER/src/log-execs.patch" 11 | diff --git a/fuzzers/afl/run.sh b/fuzzers/afl/run.sh 12 | index 077ba44b..f20c37aa 100755 13 | --- a/fuzzers/afl/run.sh 14 | +++ b/fuzzers/afl/run.sh 15 | @@ -15,5 +15,7 @@ mkdir -p "$SHARED/findings" 16 | 17 | export AFL_SKIP_CPUFREQ=1 18 | export AFL_NO_AFFINITY=1 19 | -"$FUZZER/repo/afl-fuzz" -m 100M -i "$TARGET/corpus/$PROGRAM" -o "$SHARED/findings" \ 20 | - $FUZZARGS -- "$OUT/$PROGRAM" $ARGS 2>&1 21 | +"$FUZZER/repo/afl-fuzz" -t 1000+ -m 100M -i "$TARGET/corpus/$PROGRAM" -o "$SHARED/findings" \ 22 | + -M fuzzer-01 $FUZZARGS -- "$OUT/$PROGRAM" $ARGS 2>&1 & 23 | +"$FUZZER/repo/afl-fuzz" -t 1000+ -m 100M -i "$TARGET/corpus/$PROGRAM" -o "$SHARED/findings" \ 24 | + -S fuzzer-02 $FUZZARGS -- "$OUT/$PROGRAM" $ARGS 2>&1 & 25 | diff --git a/fuzzers/aflplusplus/run.sh b/fuzzers/aflplusplus/run.sh 26 | index c2f1c100..2e814eb2 100755 27 | --- a/fuzzers/aflplusplus/run.sh 28 | +++ b/fuzzers/aflplusplus/run.sh 29 | @@ -21,6 +21,9 @@ export AFL_NO_UI=1 30 | export AFL_MAP_SIZE=256000 31 | export AFL_DRIVER_DONT_DEFER=1 32 | 33 | -"$FUZZER/repo/afl-fuzz" -i "$TARGET/corpus/$PROGRAM" -o "$SHARED/findings" \ 34 | +"$FUZZER/repo/afl-fuzz" -t 1000+ -i "$TARGET/corpus/$PROGRAM" -o "$SHARED/findings" \ 35 | "${flag_cmplog[@]}" -d \ 36 | - $FUZZARGS -- "$OUT/afl/$PROGRAM" $ARGS 2>&1 37 | + -M fuzzer-01 $FUZZARGS -- "$OUT/afl/$PROGRAM" $ARGS 2>&1 & 38 | +"$FUZZER/repo/afl-fuzz" -t 1000+ -i "$TARGET/corpus/$PROGRAM" -o "$SHARED/findings" \ 39 | + "${flag_cmplog[@]}" -d \ 40 | + -S fuzzer-02 $FUZZARGS -- "$OUT/afl/$PROGRAM" $ARGS 2>&1 & 41 | diff --git a/magma/run.sh b/magma/run.sh 42 | index d6fcdd53..50f3cf52 100755 43 | --- a/magma/run.sh 44 | +++ b/magma/run.sh 45 | @@ -25,15 +25,17 @@ mkdir -p "$MONITOR" 46 | cd "$SHARED" 47 | 48 | # prune the seed corpus for any fault-triggering test-cases 49 | -for seed in "$TARGET/corpus/$PROGRAM"/*; do 50 | - out="$("$MAGMA"/runonce.sh "$seed")" 51 | - code=$? 52 | +if [ ! -z $NO_PRUNE ]; then 53 | + for seed in "$TARGET/corpus/$PROGRAM"/*; do 54 | + out="$("$MAGMA"/runonce.sh "$seed")" 55 | + code=$? 56 | 57 | - if [ $code -ne 0 ]; then 58 | - echo "$seed: $out" 59 | - rm "$seed" 60 | - fi 61 | -done 62 | + if [ $code -ne 0 ]; then 63 | + echo "$seed: $out" 64 | + rm "$seed" 65 | + fi 66 | + done 67 | +fi 68 | 69 | shopt -s nullglob 70 | seeds=("$1"/*) 71 | @@ -68,8 +70,10 @@ done & 72 | 73 | echo "Campaign launched at $(date '+%F %R')" 74 | 75 | -timeout $TIMEOUT "$FUZZER/run.sh" | \ 76 | - multilog n2 s$LOGSIZE "$SHARED/log" 77 | +timeout $TIMEOUT bash -c ' 78 | + "$FUZZER/run.sh" | \ 79 | + multilog n2 s$LOGSIZE "$SHARED/log" 80 | +' 81 | 82 | if [ -f "$SHARED/log/current" ]; then 83 | cat "$SHARED/log/current" 84 | @@ -77,4 +81,10 @@ fi 85 | 86 | echo "Campaign terminated at $(date '+%F %R')" 87 | 88 | +# XXX This should be in a fuzzer-specific post-processing script 89 | +python3 "$FUZZER/src/timestamp_findings.py" -o "$SHARED/fuzzer-01-timestamps.csv" -- \ 90 | + "$SHARED/findings/fuzzer-01" 91 | +python3 "$FUZZER/src/timestamp_findings.py" -o "$SHARED/fuzzer-02-timestamps.csv" -- \ 92 | + "$SHARED/findings/fuzzer-02" 93 | + 94 | kill $(jobs -p) 95 | diff --git a/targets/php/build.sh b/targets/php/build.sh 96 | index 26f2e2d6..131d83fd 100755 97 | --- a/targets/php/build.sh 98 | +++ b/targets/php/build.sh 99 | @@ -47,16 +47,7 @@ popd 100 | 101 | make -j$(nproc) 102 | 103 | -# Generate seed corpora 104 | -sapi/cli/php sapi/fuzzer/generate_unserialize_dict.php 105 | -sapi/cli/php sapi/fuzzer/generate_parser_corpus.php 106 | - 107 | FUZZERS="php-fuzz-json php-fuzz-exif php-fuzz-mbstring php-fuzz-unserialize php-fuzz-parser" 108 | for fuzzerName in $FUZZERS; do 109 | cp sapi/fuzzer/$fuzzerName "$OUT/${fuzzerName/php-fuzz-/}" 110 | done 111 | - 112 | -for fuzzerName in `ls sapi/fuzzer/corpus`; do 113 | - mkdir -p "$TARGET/corpus/${fuzzerName}" 114 | - cp sapi/fuzzer/corpus/${fuzzerName}/* "$TARGET/corpus/${fuzzerName}/" 115 | -done 116 | diff --git a/tools/captain/captainrc b/tools/captain/captainrc 117 | index 466d2164..098931fe 100644 118 | --- a/tools/captain/captainrc 119 | +++ b/tools/captain/captainrc 120 | @@ -10,7 +10,7 @@ 121 | WORKDIR=./workdir 122 | 123 | # REPEAT: number of campaigns to run per program (per fuzzer) 124 | -REPEAT=3 125 | +REPEAT=30 126 | 127 | # [WORKER_MODE]: defines the type of CPU resources to allocate (default: 1) 128 | # - 1: logical cores (possibly SMT-enabled) 129 | @@ -25,22 +25,22 @@ REPEAT=3 130 | # WORKER_POOL="1 3 5 7 9" 131 | 132 | # [CAMPAIGN_WORKERS]: number of workers to allocate for a campaign (default: 1) 133 | -# CAMPAIGN_WORKERS=1 134 | +CAMPAIGN_WORKERS=2 135 | 136 | # [TIMEOUT]: time to run each campaign. This variable supports one-letter 137 | # suffixes to indicate duration (s: seconds, m: minutes, h: hours, d: days) 138 | # (default: 1m) 139 | -TIMEOUT=24h 140 | +TIMEOUT=18h 141 | 142 | # [POLL]: time (in seconds) between polls (default: 5) 143 | POLL=5 144 | 145 | # [CACHE_ON_DISK]: if set, the cache workdir is mounted on disk instead of 146 | # in-memory (default: unset) 147 | -# CACHE_ON_DISK=1 148 | +CACHE_ON_DISK=1 149 | 150 | # [NO_ARCHIVE]: if set, campaign workdirs will not be tarballed (default: unset) 151 | -# NO_ARCHIVE=1 152 | +NO_ARCHIVE=1 153 | 154 | # [TMPFS_SIZE]: the size of the tmpfs mounted volume. This only applies when 155 | # CACHE_ON_DISK is not set (default: 50g) 156 | @@ -65,23 +65,40 @@ POLL=5 157 | # (default: unset) 158 | # POC_EXTRACT=1 159 | 160 | +# [NO_PRUNE]: if set, do not prune the seed corpus for any fault-triggering 161 | +# test-cases (default: unset) 162 | +NO_PRUNE=1 163 | + 164 | 165 | ### 166 | ## Campaigns to run 167 | ### 168 | 169 | # FUZZERS: an array of fuzzer names (from magma/fuzzers/*) to evaluate 170 | -FUZZERS=(afl aflfast moptafl aflplusplus fairfuzz honggfuzz) 171 | +FUZZERS=(afl aflplusplus ) 172 | 173 | # [fuzzer_TARGETS]: an array of target names (from magma/targets/*) to fuzz with 174 | # `fuzzer`. The `fuzzer` prefix is a fuzzer listed in the FUZZERS array 175 | # (default: all targets) 176 | # afl_TARGETS=(libpng libtiff libxml2) 177 | +afl_TARGETS=(libpng libtiff libxml2 php poppler) 178 | +aflplusplus_TARGETS=(libpng libtiff libxml2 php poppler) 179 | 180 | # [fuzzer_target_PROGRAMS]: an array of program names (from 181 | # magma/targets/target/configrc) to use as execution drivers when fuzzing the 182 | # `target` 183 | # afl_libtiff_PROGRAMS=(tiffcp) 184 | +afl_libpng_PROGRAMS=(libpng_read_fuzzer) 185 | +afl_libtiff_PROGRAMS=(tiff_read_rgba_fuzzer) 186 | +afl_libxml2_PROGRAMS=(libxml2_xml_reader_for_file_fuzzer) 187 | +afl_php_PROGRAMS=(exif json parser) 188 | +afl_poppler_PROGRAMS=(pdf_fuzzer) 189 | + 190 | +aflplusplus_libpng_PROGRAMS=(libpng_read_fuzzer) 191 | +aflplusplus_libtiff_PROGRAMS=(tiff_read_rgba_fuzzer) 192 | +aflplusplus_libxml2_PROGRAMS=(libxml2_xml_reader_for_file_fuzzer) 193 | +aflplusplus_php_PROGRAMS=(exif json parser) 194 | +aflplusplus_poppler_PROGRAMS=(pdf_fuzzer) 195 | 196 | # [fuzzer_target_FUZZARGS]: a string containing fuzzer/target-specific arguments 197 | # when fuzzing `target` with `fuzzer` 198 | -------------------------------------------------------------------------------- /fuzzing/readelf/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | # Install depndencies 4 | RUN export DEBIAN_FRONTEND=noninteractiv && \ 5 | apt-get update && \ 6 | apt-get -y install git build-essential wget binutils-dev libunwind-dev \ 7 | parallel python3 python3-pip 8 | 9 | # Install LLVM 8 10 | RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - 11 | RUN apt-get install -y llvm-8 clang-8 12 | 13 | # Get AFLFast 14 | RUN git clone --no-checkout https://github.com/mboehme/aflfast && \ 15 | git -C aflfast checkout 11ec1828448d27bdcc54fdeb91bf3215d4d8c583 16 | ADD afl_llvm_mode.Makefile /aflfast/llvm_mode/Makefile 17 | RUN make -C aflfast -j && \ 18 | CC=clang-8 CXX=clang++-8 LLVM_CONFIG=llvm-config-8 make -C aflfast/llvm_mode 19 | 20 | # Get AFL++ 21 | RUN git clone --no-checkout https://github.com/AFLplusplus/AFLplusplus aflplusplus && \ 22 | git -C aflplusplus checkout fa933036a7bdbf9a59a9b1b7669d6ec7db64a202 && \ 23 | CC=clang-8 CXX=clang++-8 LLVM_CONFIG=llvm-config-8 make -C aflplusplus 24 | 25 | # Get honggfuzz 26 | RUN git clone --no-checkout https://github.com/google/honggfuzz && \ 27 | git -C honggfuzz checkout 5810856f5381f93c461e3a7ba6452945c0725574 && \ 28 | make -C honggfuzz 29 | 30 | # Get binutils 31 | RUN wget ftp://sourceware.org/pub/binutils/snapshots/binutils-2.30.0.tar.xz 32 | 33 | # AFLFast build 34 | RUN rm -rf binutils-2.30.0 35 | RUN tar xJf /binutils-2.30.0.tar.xz 36 | RUN mkdir -p /binutils-afl 37 | RUN cd binutils-2.30.0 && \ 38 | export AFL_CC=clang-8 && \ 39 | export AFL_CXX=clang++-8 && \ 40 | export CC=/aflfast/afl-clang-fast && \ 41 | export CXX=/aflfast/afl-clang-fast++ && \ 42 | ./configure --prefix=/binutils-aflfast --disable-shared --disable-werror\ 43 | --disable-ld --disable-gdb && \ 44 | make -j && make install 45 | 46 | # AFL++ build 47 | RUN rm -rf binutils-2.30.0 48 | RUN tar xJf /binutils-2.30.0.tar.xz 49 | RUN mkdir -p /binutils-aflplusplus/afl 50 | RUN cd binutils-2.30.0 && \ 51 | export AFL_CC=clang-8 && \ 52 | export AFL_CXX=clang++-8 && \ 53 | export CC=/aflplusplus/afl-clang-fast && \ 54 | export CXX=/aflplusplus/afl-clang-fast++ && \ 55 | ./configure --prefix=/binutils-aflplusplus/afl --disable-shared \ 56 | --disable-werror --disable-ld --disable-gdb && \ 57 | make -j && make install 58 | 59 | RUN rm -rf binutils-2.30.0 60 | RUN tar xJf /binutils-2.30.0.tar.xz 61 | RUN mkdir -p /binutils-aflplusplus/cmplog 62 | RUN cd binutils-2.30.0 && \ 63 | export AFL_CC=clang-8 && \ 64 | export AFL_CXX=clang++-8 && \ 65 | export CC=/aflplusplus/afl-clang-fast && \ 66 | export CXX=/aflplusplus/afl-clang-fast++ && \ 67 | export AFL_LLVM_CMPLOG=1 && \ 68 | ./configure --prefix=/binutils-aflplusplus/cmplog --disable-shared \ 69 | --disable-werror --disable-ld --disable-gdb && \ 70 | make -j && make install 71 | 72 | # honggfuzz build 73 | RUN rm -rf binutils-2.30.0 74 | RUN tar xJf /binutils-2.30.0.tar.xz 75 | RUN mkdir -p /binutils-honggfuzz 76 | RUN cd binutils-2.30.0 && \ 77 | export CC=/honggfuzz/hfuzz_cc/hfuzz-clang && \ 78 | export CXX=/honggfuzz/hfuzz_cc/hfuzz-clang++ && \ 79 | ./configure --prefix=/binutils-honggfuzz --disable-shared \ 80 | --disable-werror --disable-ld --disable-gdb && \ 81 | make -j && make install 82 | 83 | # Coverage build 84 | RUN rm -rf binutils-2.30.0 85 | RUN tar xJf /binutils-2.30.0.tar.xz 86 | RUN mkdir -p /binutils-coverage 87 | RUN cd binutils-2.30.0 && \ 88 | export CC=clang-8 && \ 89 | export CXX=clang++-8 && \ 90 | export CFLAGS="-fprofile-instr-generate -fcoverage-mapping" && \ 91 | export CXXFLAGS="-fprofile-instr-generate -fcoverage-mapping" && \ 92 | ./configure --prefix=/binutils-coverage --disable-shared \ 93 | --disable-werror --disable-ld --disable-gdb && \ 94 | make -j && make install 95 | 96 | # Add fuzzer files 97 | RUN mkdir /uninformed-seed 98 | ADD seeds/uninformed-seed /uninformed-seed/seed 99 | ADD seeds/cmin-seeds.tar.xz / 100 | 101 | # Add scripts 102 | ADD scripts /scripts 103 | RUN git clone https://github.com/HexHive/fuzzing-seed-selection && \ 104 | pip3 install fuzzing-seed-selection/scripts 105 | RUN pip3 install -r /scripts/requirements.txt 106 | -------------------------------------------------------------------------------- /fuzzing/readelf/afl_llvm_mode.Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # american fuzzy lop - LLVM instrumentation 3 | # ----------------------------------------- 4 | # 5 | # Written by Laszlo Szekeres and 6 | # Michal Zalewski 7 | # 8 | # LLVM integration design comes from Laszlo Szekeres. 9 | # 10 | # Copyright 2015, 2016 Google Inc. All rights reserved. 11 | # 12 | # Licensed under the Apache License, Version 2.0 (the "License"); 13 | # you may not use this file except in compliance with the License. 14 | # You may obtain a copy of the License at: 15 | # 16 | # http://www.apache.org/licenses/LICENSE-2.0 17 | # 18 | 19 | PREFIX ?= /usr/local 20 | HELPER_PATH = $(PREFIX)/lib/afl 21 | BIN_PATH = $(PREFIX)/bin 22 | 23 | VERSION = $(shell grep '^\#define VERSION ' ../config.h | cut -d '"' -f2) 24 | 25 | LLVM_CONFIG ?= llvm-config 26 | 27 | CFLAGS ?= -O3 -funroll-loops 28 | CFLAGS += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \ 29 | -DAFL_PATH=\"$(HELPER_PATH)\" -DBIN_PATH=\"$(BIN_PATH)\" \ 30 | -DVERSION=\"$(VERSION)\" 31 | ifdef AFL_TRACE_PC 32 | CFLAGS += -DUSE_TRACE_PC=1 33 | endif 34 | 35 | CXXFLAGS ?= -O3 -funroll-loops 36 | CXXFLAGS += -Wall -D_FORTIFY_SOURCE=2 -g -Wno-pointer-sign \ 37 | -DVERSION=\"$(VERSION)\" -Wno-variadic-macros 38 | 39 | # Mark nodelete to work around unload bug in upstream LLVM 5.0+ 40 | CLANG_CFL = `$(LLVM_CONFIG) --cxxflags` -Wl,-znodelete -fno-rtti -fpic $(CXXFLAGS) 41 | CLANG_LFL = `$(LLVM_CONFIG) --ldflags` $(LDFLAGS) 42 | 43 | # User teor2345 reports that this is required to make things work on MacOS X. 44 | 45 | ifeq "$(shell uname)" "Darwin" 46 | CLANG_LFL += -Wl,-flat_namespace -Wl,-undefined,suppress 47 | endif 48 | 49 | # We were using llvm-config --bindir to get the location of clang, but 50 | # this seems to be busted on some distros, so using the one in $PATH is 51 | # probably better. 52 | 53 | ifeq "$(origin CC)" "default" 54 | CC = clang 55 | CXX = clang++ 56 | endif 57 | 58 | ifndef AFL_TRACE_PC 59 | PROGS = ../afl-clang-fast ../afl-llvm-pass.so ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o 60 | else 61 | PROGS = ../afl-clang-fast ../afl-llvm-rt.o ../afl-llvm-rt-32.o ../afl-llvm-rt-64.o 62 | endif 63 | 64 | all: test_deps $(PROGS) test_build all_done 65 | 66 | test_deps: 67 | ifndef AFL_TRACE_PC 68 | @echo "[*] Checking for working 'llvm-config'..." 69 | @which $(LLVM_CONFIG) >/dev/null 2>&1 || ( echo "[-] Oops, can't find 'llvm-config'. Install clang or set \$$LLVM_CONFIG or \$$PATH beforehand."; echo " (Sometimes, the binary will be named llvm-config-3.5 or something like that.)"; exit 1 ) 70 | else 71 | @echo "[!] Note: using -fsanitize=trace-pc mode (this will fail with older LLVM)." 72 | endif 73 | @echo "[*] Checking for working '$(CC)'..." 74 | @which $(CC) >/dev/null 2>&1 || ( echo "[-] Oops, can't find '$(CC)'. Make sure that it's in your \$$PATH (or set \$$CC and \$$CXX)."; exit 1 ) 75 | @echo "[*] Checking for '../afl-showmap'..." 76 | @test -f ../afl-showmap || ( echo "[-] Oops, can't find '../afl-showmap'. Be sure to compile AFL first."; exit 1 ) 77 | @echo "[+] All set and ready to build." 78 | 79 | ../afl-clang-fast: afl-clang-fast.c | test_deps 80 | $(CC) $(CFLAGS) $< -o $@ $(LDFLAGS) 81 | ln -sf afl-clang-fast ../afl-clang-fast++ 82 | 83 | ../afl-llvm-pass.so: afl-llvm-pass.so.cc | test_deps 84 | $(CXX) $(CLANG_CFL) -shared $< -o $@ $(CLANG_LFL) 85 | 86 | ../afl-llvm-rt.o: afl-llvm-rt.o.c | test_deps 87 | $(CC) $(CFLAGS) -fPIC -c $< -o $@ 88 | 89 | ../afl-llvm-rt-32.o: afl-llvm-rt.o.c | test_deps 90 | @printf "[*] Building 32-bit variant of the runtime (-m32)... " 91 | @$(CC) $(CFLAGS) -m32 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi 92 | 93 | ../afl-llvm-rt-64.o: afl-llvm-rt.o.c | test_deps 94 | @printf "[*] Building 64-bit variant of the runtime (-m64)... " 95 | @$(CC) $(CFLAGS) -m64 -fPIC -c $< -o $@ 2>/dev/null; if [ "$$?" = "0" ]; then echo "success!"; else echo "failed (that's fine)"; fi 96 | 97 | test_build: $(PROGS) 98 | @echo "[*] Testing the CC wrapper and instrumentation output..." 99 | unset AFL_USE_ASAN AFL_USE_MSAN AFL_INST_RATIO; AFL_QUIET=1 AFL_PATH=. AFL_CC=$(CC) ../afl-clang-fast $(CFLAGS) ../test-instr.c -o test-instr $(LDFLAGS) 100 | echo 0 | ../afl-showmap -m none -q -o .test-instr0 ./test-instr 101 | echo 1 | ../afl-showmap -m none -q -o .test-instr1 ./test-instr 102 | @rm -f test-instr 103 | @cmp -s .test-instr0 .test-instr1; DR="$$?"; rm -f .test-instr0 .test-instr1; if [ "$$DR" = "0" ]; then echo; echo "Oops, the instrumentation does not seem to be behaving correctly!"; echo; echo "Please ping to troubleshoot the issue."; echo; exit 1; fi 104 | @echo "[+] All right, the instrumentation seems to be working!" 105 | 106 | all_done: test_build 107 | @echo "[+] All done! You can now use '../afl-clang-fast' to compile programs." 108 | 109 | .NOTPARALLEL: clean 110 | 111 | clean: 112 | rm -f *.o *.so *~ a.out core core.[1-9][0-9]* test-instr .test-instr0 .test-instr1 113 | rm -f $(PROGS) ../afl-clang-fast++ 114 | -------------------------------------------------------------------------------- /fuzzing/readelf/data/aflfast-ascii-cov.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/aflfast-ascii-cov.csv.gz -------------------------------------------------------------------------------- /fuzzing/readelf/data/aflfast-cmin-cov.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/aflfast-cmin-cov.csv.gz -------------------------------------------------------------------------------- /fuzzing/readelf/data/aflfast-singleton-cov.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/aflfast-singleton-cov.csv.gz -------------------------------------------------------------------------------- /fuzzing/readelf/data/aflplusplus-ascii-cov.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/aflplusplus-ascii-cov.csv.gz -------------------------------------------------------------------------------- /fuzzing/readelf/data/aflplusplus-cmin-cov.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/aflplusplus-cmin-cov.csv.gz -------------------------------------------------------------------------------- /fuzzing/readelf/data/aflplusplus-singleton-cov.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/aflplusplus-singleton-cov.csv.gz -------------------------------------------------------------------------------- /fuzzing/readelf/data/honggfuzz-ascii-cov.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/honggfuzz-ascii-cov.csv.gz -------------------------------------------------------------------------------- /fuzzing/readelf/data/honggfuzz-cmin-cov.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/honggfuzz-cmin-cov.csv.gz -------------------------------------------------------------------------------- /fuzzing/readelf/data/honggfuzz-singleton-cov.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/honggfuzz-singleton-cov.csv.gz -------------------------------------------------------------------------------- /fuzzing/readelf/data/readelf-experiment.csv.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/data/readelf-experiment.csv.gz -------------------------------------------------------------------------------- /fuzzing/readelf/scripts/fuzz.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | NUM_TRIALS=5 4 | TRIAL_LEN=$((10*60*60)) 5 | NUM_CORES=$(grep -c ^processor /proc/cpuinfo) 6 | SEM_ID="readelf-fuzz" 7 | 8 | export AFL_NO_UI=1 9 | 10 | # AFLFast 11 | mkdir /readelf-aflfast 12 | for TRIAL in $(seq 1 ${NUM_TRIALS}); do 13 | sem --timeout ${TRIAL_LEN} --jobs ${NUM_CORES} --id ${SEM_ID} -u \ 14 | --halt now,fail=1 \ 15 | /aflfast/afl-fuzz -p fast -i /uninformed-seed \ 16 | -o /readelf-aflfast/uninformed-trial-${TRIAL} -- \ 17 | /binutils-aflfast/bin/readelf -a @@ > /dev/null 18 | sleep 2s 19 | 20 | sem --timeout ${TRIAL_LEN} --jobs ${NUM_CORES} --id ${SEM_ID} -u \ 21 | --halt now,fail=1 \ 22 | /aflfast/afl-fuzz -p fast -i /aflfast/testcases/others/elf \ 23 | -o /readelf-aflfast/singleton-trial-${TRIAL} -- \ 24 | /binutils-aflfast/bin/readelf -a @@ > /dev/null 25 | sleep 2s 26 | 27 | sem --timeout ${TRIAL_LEN} --jobs ${NUM_CORES} --id ${SEM_ID} -u \ 28 | --halt now,fail=1 \ 29 | /aflfast/afl-fuzz -p fast -i /cmin-seeds \ 30 | -o /readelf-aflfast/cmin-trial-${TRIAL} -- \ 31 | /binutils-aflfast/bin/readelf -a @@ > /dev/null 32 | sleep 2s 33 | done 34 | 35 | # AFL++ 36 | mkdir /readelf-aflplusplus 37 | for TRIAL in $(seq 1 ${NUM_TRIALS}); do 38 | sem --timeout ${TRIAL_LEN} --jobs ${NUM_CORES} --id ${SEM_ID} -u \ 39 | --halt now,fail=1 \ 40 | /aflplusplus/afl-fuzz -i /uninformed-seed \ 41 | -o /readelf-aflplusplus/uninformed-trial-${TRIAL} \ 42 | -m none -c /binutils-aflplusplus/cmplog/bin/readelf -- \ 43 | /binutils-aflplusplus/afl/bin/readelf -a @@ > /dev/null 44 | sleep 2s 45 | 46 | sem --timeout ${TRIAL_LEN} --jobs ${NUM_CORES} --id ${SEM_ID} -u \ 47 | --halt now,fail=1 \ 48 | /aflplusplus/afl-fuzz -i /aflfast/testcases/others/elf \ 49 | -o /readelf-aflplusplus/singleton-trial-${TRIAL} \ 50 | -m none -c /binutils-aflplusplus/cmplog/bin/readelf -- \ 51 | /binutils-aflplusplus/afl/bin/readelf -a @@ > /dev/null 52 | sleep 2s 53 | 54 | sem --timeout ${TRIAL_LEN} --jobs ${NUM_CORES} --id ${SEM_ID} -u \ 55 | --halt now,fail=1 \ 56 | /aflplusplus/afl-fuzz -i /cmin-seeds \ 57 | -o /readelf-aflplusplus/cmin-trial-${TRIAL} \ 58 | -m none -c /binutils-aflplusplus/cmplog/bin/readelf -- \ 59 | /binutils-aflplusplus/afl/bin/readelf -a @@ > /dev/null 60 | sleep 2s 61 | done 62 | 63 | # honggfuzz 64 | mkdir readelf-honggfuzz 65 | for TRIAL in $(seq 1 ${NUM_TRIALS}); do 66 | sem --timeout ${TRIAL_LEN} --jobs ${NUM_CORES} --id ${SEM_ID} -u \ 67 | --halt now,fail=1 \ 68 | /honggfuzz/honggfuzz --threads 1 --quiet -z -q -v \ 69 | -i /uninformed-seed -o /readelf-honggfuzz/uninformed-trial-${TRIAL} \ 70 | -- /binutils-honggfuzz/bin/readelf -a ___FILE___ > /dev/null 71 | sleep 2s 72 | 73 | sem --timeout ${TRIAL_LEN} --jobs ${NUM_CORES} --id ${SEM_ID} -u \ 74 | --halt now,fail=1 \ 75 | /honggfuzz/honggfuzz --threads 1 --quiet -z -q -v \ 76 | -i /aflfast/testcases/others/elf \ 77 | -o /readelf-honggfuzz/singleton-trial-${TRIAL} \ 78 | -- /binutils-honggfuzz/bin/readelf -a ___FILE___ > /dev/null 79 | sleep 2s 80 | 81 | sem --timeout ${TRIAL_LEN} --jobs ${NUM_CORES} --id ${SEM_ID} -u \ 82 | --halt now,fail=1 \ 83 | /honggfuzz/honggfuzz --threads 1 --quiet -z -q -v \ 84 | -i /cmin-seeds \ 85 | -o /readelf-honggfuzz/cmin-trial-${TRIAL} \ 86 | -- /binutils-honggfuzz/bin/readelf -a ___FILE___ > /dev/null 87 | sleep 2s 88 | done 89 | 90 | # Wait for fuzzers to finish 91 | sem --wait --id ${SEM_ID} 92 | -------------------------------------------------------------------------------- /fuzzing/readelf/scripts/get_afl_cov.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -u 2 | 3 | export THIS_DIR=$(dirname $(readlink -f $0)) 4 | export TARGET="/binutils-coverage/bin/readelf" 5 | export TIMEOUT="1m" 6 | 7 | function get_cov() { 8 | local QUEUE=$(realpath $1) 9 | local OUT_DIR=$(dirname ${QUEUE}) 10 | local LLVM_COV_DIR=$(realpath "${QUEUE}/../llvm_cov") 11 | local SEEDS_LIST="${LLVM_COV_DIR}/seeds.txt" 12 | 13 | timestamp_afl.py -o ${OUT_DIR}/timestamps.csv ${OUT_DIR} 14 | 15 | rm -f ${SEEDS_LIST} 16 | for SEED in $(ls -rt ${QUEUE}); do 17 | if [[ $SEED != id:* ]]; then 18 | continue 19 | fi 20 | 21 | echo "[*] processing ${SEED}" 22 | 23 | local SEED_PATH="${QUEUE}/${SEED}" 24 | export LLVM_PROFILE_FILE="${LLVM_COV_DIR}/${SEED}.profraw" 25 | 26 | timeout --preserve-status ${TIMEOUT} ${TARGET} -a ${SEED_PATH} > /dev/null 2>&1 27 | 28 | echo "1,${LLVM_PROFILE_FILE}" >> ${SEEDS_LIST} 29 | llvm-profdata-8 merge --sparse \ 30 | --input-files "${LLVM_COV_DIR}/seeds.txt" \ 31 | --output "${LLVM_PROFILE_FILE%.profraw}.profdata" \ 32 | --num-threads=5 33 | 34 | llvm-cov-8 export --summary-only ${TARGET} \ 35 | --instr-profile "${LLVM_PROFILE_FILE%.profraw}.profdata"\ 36 | --format text --num-threads=5 > "${LLVM_PROFILE_FILE%.profraw}.json" 37 | done 38 | } 39 | 40 | export -f get_cov 41 | 42 | find ${THIS_DIR} -maxdepth 4 -name 'queue' -type d -print0 | parallel -0 -u get_cov {} 43 | -------------------------------------------------------------------------------- /fuzzing/readelf/scripts/get_hfuzz_cov.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -u 2 | 3 | export THIS_DIR=$(dirname $(readlink -f $0)) 4 | export TARGET="/binutils-coverage/bin/readelf" 5 | export TIMEOUT="1m" 6 | 7 | function get_cov() { 8 | local QUEUE=$(realpath $1) 9 | local OUT_DIR=${QUEUE} 10 | local LLVM_COV_DIR=$(realpath "${QUEUE}/llvm_cov") 11 | local SEEDS_LIST="${LLVM_COV_DIR}/seeds.txt" 12 | 13 | timestamps_honggfuzz.py -o ${OUT_DIR}/timestamps.csv ${OUT_DIR} 14 | 15 | rm -f ${SEEDS_LIST} 16 | for SEED in $(ls -rt ${QUEUE}); do 17 | if [[ ${SEED} != *.honggfuzz.cov ]]; then 18 | continue 19 | fi 20 | 21 | echo "[*] processing ${SEED}" 22 | 23 | local SEED_PATH="${QUEUE}/${SEED}" 24 | export LLVM_PROFILE_FILE="${LLVM_COV_DIR}/${SEED}.profraw" 25 | 26 | timeout --preserve-status ${TIMEOUT} ${TARGET} -a ${SEED_PATH} > /dev/null 2>&1 27 | 28 | echo "1,${LLVM_PROFILE_FILE}" >> ${SEEDS_LIST} 29 | llvm-profdata-8 merge --sparse \ 30 | --input-files "${LLVM_COV_DIR}/seeds.txt" \ 31 | --output "${LLVM_PROFILE_FILE%.profraw}.profdata" \ 32 | --num-threads=5 33 | 34 | llvm-cov-8 export --summary-only ${TARGET} \ 35 | --instr-profile "${LLVM_PROFILE_FILE%.profraw}.profdata"\ 36 | --format text --num-threads=5 > "${LLVM_PROFILE_FILE%.profraw}.json" 37 | done 38 | } 39 | 40 | export -f get_cov 41 | 42 | find . -maxdepth 2 -path '*-honggfuzz/*-trial-*' -type d -print0 | parallel -0 -u get_cov {} 43 | -------------------------------------------------------------------------------- /fuzzing/readelf/scripts/merge_cov.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | 4 | from functools import partial, reduce 5 | from itertools import product 6 | from pathlib import Path 7 | import json 8 | import logging 9 | import multiprocessing.pool as mpp 10 | 11 | import numpy as np 12 | import pandas as pd 13 | 14 | 15 | THIS_DIR = Path(__file__).parent 16 | 17 | FUZZERS = ('aflfast', 'aflplusplus', 'honggfuzz') 18 | TRIAL_LEN = 10 # Hours 19 | NUM_TRIALS = 5 20 | SEEDS = ('ascii', 'singleton', 'cmin') 21 | FORMATTER = logging.Formatter('[%(asctime)s] %(levelname)s: %(message)s') 22 | NUM_JOBS = NUM_TRIALS 23 | 24 | logger = logging.getLogger() 25 | 26 | 27 | def get_cov(fuzzer: str, seed: str, trial: int) -> pd.DataFrame: 28 | cov_dir = THIS_DIR / ('readelf-%s' % fuzzer) / ('%s-trial-%d' % (seed, trial)) / 'llvm_cov' 29 | assert cov_dir.exists() 30 | 31 | count_col = 'region_count_%d' % trial 32 | percent_col = 'region_percent_%d' % trial 33 | 34 | df = pd.read_csv(cov_dir.parent / 'timestamps.csv') 35 | df['time'] = df.unix_time - df.unix_time.iloc[0] 36 | df['dir'] = df.seed.apply(lambda x: Path(x).parent.name) 37 | df['seed'] = df.seed.apply(lambda x: Path(x).name) 38 | df[count_col] = np.nan 39 | df[percent_col] = np.nan 40 | 41 | # Drop crashes 42 | df = df.drop(df[df.dir == 'crashes'].index) 43 | 44 | for cov_file in sorted(list(cov_dir.glob('*.json'))): 45 | with cov_file.open() as inf: 46 | try: 47 | region_data = json.load(inf)['data'][0]['totals']['regions'] 48 | except json.JSONDecodeError: 49 | print('unable to read %s. Skipping' % cov_file) 50 | continue 51 | reg_covered = region_data['covered'] 52 | reg_count = region_data['count'] 53 | df.loc[df.seed == cov_file.stem, count_col] = reg_covered 54 | df.loc[df.seed == cov_file.stem, percent_col] = reg_covered * 100.0 / reg_count 55 | 56 | return df.set_index('time')[[count_col, percent_col]] 57 | 58 | 59 | def main(): 60 | """The main function.""" 61 | # Configure logger 62 | handler = logging.StreamHandler() 63 | handler.setFormatter(FORMATTER) 64 | logger.addHandler(handler) 65 | 66 | logger.setLevel(logging.INFO) 67 | 68 | with mpp.Pool(processes=NUM_JOBS) as pool: 69 | for fuzzer, seed in product(FUZZERS, SEEDS): 70 | # Get raw trial data 71 | logger.info('Getting %s-%s coverage', fuzzer, seed) 72 | cov_func = partial(get_cov, fuzzer, seed) 73 | trials = range(1, NUM_TRIALS + 1) 74 | cov_data = pool.map(cov_func, trials) 75 | 76 | # Merge trial data and extend to trial length 77 | logger.info('Merging coverage') 78 | df = reduce(lambda x,y: x.join(y, how='outer'), cov_data) 79 | df.loc[TRIAL_LEN * 60 * 60] = np.nan 80 | df = df.ffill().cummax() 81 | 82 | # Save merged data 83 | out_path = Path('%s-%s-cov.csv' % (fuzzer, seed)) 84 | logger.info('Saving coverage data to %s', out_path) 85 | df.to_csv(out_path) 86 | 87 | 88 | if __name__ == '__main__': 89 | main() 90 | -------------------------------------------------------------------------------- /fuzzing/readelf/scripts/plot_cov.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | 4 | from itertools import product 5 | from pathlib import Path 6 | import gzip 7 | 8 | from matplotlib import rc, rcParams 9 | import matplotlib.pyplot as plt 10 | import pandas as pd 11 | import seaborn as sns 12 | 13 | 14 | DATA_DIR = Path(__file__).parent.parent / 'data' 15 | 16 | 17 | TRIAL_LEN = 10 # Hours 18 | PLOT_STEP = 10 # Seconds 19 | NUM_TRIALS = 5 20 | NUM_BOOTS = 2000 21 | FUZZERS = ('aflfast', 'aflplusplus', 'honggfuzz') 22 | SEEDS = ('ascii', 'singleton', 'cmin') 23 | 24 | FUZZER_LABELS = dict(aflfast='AFLFast', 25 | aflplusplus='AFL++', 26 | honggfuzz='honggfuzz') 27 | SEED_LABELS = dict(ascii='Uninformed', 28 | singleton='Valid', 29 | cmin='Corpus') 30 | 31 | rc('pdf', fonttype=42) 32 | rc('ps', fonttype=42) 33 | 34 | rc_fonts = { 35 | 'font.family': 'serif', 36 | 'text.usetex': True, 37 | 'text.latex.preamble': 38 | r""" 39 | \RequirePackage[T1]{fontenc} 40 | \RequirePackage[tt=false, type1=true]{libertine} 41 | \RequirePackage[varqu]{zi4} 42 | \RequirePackage[libertine]{newtxmath} 43 | """, 44 | } 45 | rcParams.update(rc_fonts) 46 | 47 | 48 | def gen_plot_data() -> pd.DataFrame: 49 | "Generate the data to plot.""" 50 | dfs = [] 51 | trials = range(1, NUM_TRIALS + 1) 52 | cols = ['region_percent_%d' % trial for trial in trials] 53 | 54 | for fuzzer, seed in product(FUZZERS, SEEDS): 55 | csv_path = DATA_DIR / f'{fuzzer}-{seed}-cov.csv.gz' 56 | 57 | print(f'Parsing {csv_path}...') 58 | with gzip.open(csv_path, 'rb') as inf: 59 | df = pd.read_csv(inf).set_index('time') 60 | 61 | df = df.loc[~df.index.duplicated(keep='first')] 62 | new_idx = pd.RangeIndex(start=0, stop=TRIAL_LEN * 60 * 60, 63 | step=PLOT_STEP) 64 | df = df.reindex(new_idx, method='ffill') 65 | df.index = df.index / 60 / 60 66 | df['time'] = df.index 67 | df = df.melt(id_vars='time', 68 | value_name='region_percent', 69 | value_vars=cols) 70 | df['Fuzzer'] = FUZZER_LABELS[fuzzer] 71 | df['Seed'] = SEED_LABELS[seed] 72 | 73 | dfs.append(df) 74 | 75 | return pd.concat(dfs) 76 | 77 | 78 | def main(): 79 | """The main function.""" 80 | 81 | print('Generating plot data...') 82 | plot_data = gen_plot_data() 83 | 84 | # Do the actual plotting 85 | print('plotting results...') 86 | sns.set_theme(style='ticks') 87 | fig = plt.figure() 88 | ax = fig.add_subplot(111) 89 | 90 | ax = sns.lineplot(ax=ax, data=plot_data, x='time', y='region_percent', 91 | hue='Seed', style='Fuzzer', ci=95, n_boot=NUM_BOOTS) 92 | 93 | # Tidy up plot 94 | xticks = [0, 1, 2, 5, 10] # Hours 95 | ax.set(xlabel='Time (h)', 96 | ylabel='Regions (\%)', 97 | xscale='symlog', 98 | xticks=xticks, 99 | xticklabels=[f'{x}' for x in xticks]) 100 | ax.set_ylim(bottom=0) 101 | ax.set_xlim(left=0, right=TRIAL_LEN) 102 | ax.legend(ncol=2, loc='upper center', bbox_to_anchor=(0.5, 1.3)) 103 | sns.despine() 104 | 105 | # Save plot 106 | fig.savefig('readelf-experiment.pdf', bbox_inches='tight') 107 | 108 | 109 | if __name__ == '__main__': 110 | main() 111 | -------------------------------------------------------------------------------- /fuzzing/readelf/scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | seaborn 4 | -------------------------------------------------------------------------------- /fuzzing/readelf/seeds/cmin-seeds.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/HexHive/fuzzing-seed-selection/f72d2bb5325c1d727ca982eb1d44ac8a65e179e8/fuzzing/readelf/seeds/cmin-seeds.tar.xz -------------------------------------------------------------------------------- /fuzzing/readelf/seeds/uninformed-seed: -------------------------------------------------------------------------------- 1 | ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ -------------------------------------------------------------------------------- /fuzzing/real-world/freetype2/afl.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM seed-selection/real-world/freetype2/base 2 | 3 | MAINTAINER Adrian Herrera 4 | 5 | # Get and build AFL 6 | ENV AFL_CC=clang-8 7 | ENV AFL_CXX=clang++-8 8 | 9 | RUN git clone --no-checkout https://github.com/google/afl && \ 10 | git -C afl checkout v256b 11 | RUN cd afl && \ 12 | export LLVM_CONFIG=llvm-config-8 && \ 13 | export CC=$AFL_CC && \ 14 | export CXX=$AFL_CXX && \ 15 | make -j && \ 16 | make -j -C llvm_mode 17 | 18 | # Build instrumented freetype for AFL 19 | RUN mkdir /build 20 | RUN cd freetype-2.5.3 && \ 21 | CC=/afl/afl-clang-fast CXX=/afl/afl-clang-fast++ \ 22 | CFLAGS="-m32 -fsanitize=address" \ 23 | CXXFLAGS="-m32 -fsanitize=address" \ 24 | LDFLAGS="-m32 -fsanitize=address" \ 25 | ./configure --prefix=/build --host=i386-linux --without-png 26 | RUN cd freetype-2.5.3 && \ 27 | make clean && \ 28 | make -j && \ 29 | make install 30 | RUN cd ttf_bin && rm -f char2svg && \ 31 | /afl/afl-clang-fast++ -m32 -fsanitize=address \ 32 | -I/build/include/freetype2 -o char2svg char2svg.cpp \ 33 | -L/build/lib/ -lfreetype 34 | 35 | RUN cp /ttf_bin/char2svg /build/bin/ 36 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py -o /build/lib \ 37 | /build/bin/char2svg 38 | 39 | # The `build` directory can be extracted to the host machine 40 | -------------------------------------------------------------------------------- /fuzzing/real-world/freetype2/base.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | # Install dependencies 4 | RUN dpkg --add-architecture i386 5 | RUN export DEBIAN_FRONTEND=noninteractive && \ 6 | apt-get update && \ 7 | apt-get -y install git build-essential gcc-multilib g++-multilib wget \ 8 | python3-pip 9 | 10 | # Install LLVM 8 11 | RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - 12 | RUN apt-get install -y llvm-8 clang-8 13 | 14 | # Get helper scripts 15 | RUN git clone https://github.com/HexHive/fuzzing-seed-selection && \ 16 | pip3 install fuzzing-seed-selection/scripts 17 | 18 | # Get the freetype source 19 | RUN wget -O - https://download.savannah.gnu.org/releases/freetype/freetype-2.5.3.tar.gz | tar xz 20 | 21 | # Create custom driver for freetype, taken from 22 | # https://www.freetype.org/freetype2/docs/tutorial/example5.cpp 23 | RUN mkdir ttf_bin 24 | ADD https://www.freetype.org/freetype2/docs/tutorial/example5.cpp /ttf_bin/char2svg.cpp 25 | -------------------------------------------------------------------------------- /fuzzing/real-world/freetype2/coverage.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM seed-selection/real-world/freetype2/base 2 | 3 | MAINTAINER Adrian Herrera 4 | 5 | # Build coverage freetype 6 | RUN mkdir /build 7 | RUN cd freetype-2.5.3 && \ 8 | CC=clang-8 CXX=clang++-8 \ 9 | CFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping" \ 10 | CXXFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping" \ 11 | LDFLAGS="-m32" \ 12 | ./configure --prefix=/build --host=i386-linux --without-png 13 | RUN cd freetype-2.5.3 && \ 14 | make clean && \ 15 | make -j && \ 16 | make install 17 | RUN cd ttf_bin && rm -f char2svg && \ 18 | clang++-8 -m32 -fprofile-instr-generate -fcoverage-mapping \ 19 | -I/build/include/freetype2 -o char2svg \ 20 | char2svg.cpp -L/build/lib/ -lfreetype 21 | 22 | RUN cp /ttf_bin/char2svg /build/bin/ 23 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py -o /build/lib \ 24 | /build/bin/char2svg 25 | 26 | # The `build` directory can be extracted to the host machine 27 | -------------------------------------------------------------------------------- /fuzzing/real-world/librsvg/afl.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM seed-selection/real-world/librsvg/base 2 | 3 | MAINTAINER Adrian Herrera 4 | 5 | # Get and build AFL 6 | ENV AFL_CC=clang-8 7 | ENV AFL_CXX=clang++-8 8 | 9 | RUN git clone --no-checkout https://github.com/google/afl && \ 10 | git -C afl checkout v256b 11 | RUN cd afl && \ 12 | export LLVM_CONFIG=llvm-config-8 && \ 13 | export CC=$AFL_CC && \ 14 | export CXX=$AFL_CXX && \ 15 | make -j && \ 16 | make -j -C llvm_mode 17 | 18 | # Build AFL librsvg 19 | RUN mkdir /build 20 | RUN cd librsvg-2.40.20 && \ 21 | PKG_CONFIG_PATH=/usr/lib/i386-linux-gnu/pkgconfig \ 22 | CC=/afl/afl-clang-fast \ 23 | CXX=/afl/afl-clang-fast++ \ 24 | CFLAGS="-m32 -fsanitize=address" CXXFLAGS="-m32 -fsanitize=address" \ 25 | LDFLAGS="-L/usr/lib/i386-linux-gnu -m32 -fsanitize=address" \ 26 | ./autogen.sh --prefix=/build \ 27 | --host=i386-linux-gnu --enable-introspection=no 28 | RUN cd librsvg-2.40.20 && \ 29 | make clean && \ 30 | make -j && \ 31 | make install 32 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py -o /build/lib \ 33 | /build/bin/rsvg-convert 34 | 35 | # The `build` directory can be extracted to the host machine 36 | -------------------------------------------------------------------------------- /fuzzing/real-world/librsvg/base.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | MAINTAINER Adrian Herrera 4 | 5 | # Install dependencies 6 | RUN dpkg --add-architecture i386 7 | RUN export DEBIAN_FRONTEND=noninteractive && \ 8 | apt-get update && \ 9 | apt-get -y install git build-essential gcc-multilib g++-multilib \ 10 | libc6-dev:i386 autoconf pkg-config libtool libgirepository1.0-dev \ 11 | gtk-doc-tools libgdk-pixbuf2.0-dev:i386 libglib2.0-dev:i386 \ 12 | libgio2.0-cil-dev libxml2-dev:i386 libpango1.0-dev:i386 \ 13 | libpangocairo-1.0.0:i386 libpangoft2-1.0.0:i386 libcairo2-dev:i386 \ 14 | libcroco3-dev:i386 wget python3-pip 15 | 16 | # Install LLVM 8 17 | RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - 18 | RUN apt-get install -y llvm-8 clang-8 19 | 20 | # Get helper scripts 21 | RUN git clone https://github.com/HexHive/fuzzing-seed-selection && \ 22 | pip3 install fuzzing-seed-selection/scripts 23 | 24 | # Get the librsvg source 25 | RUN wget -O - https://gitlab.gnome.org/GNOME/librsvg/-/archive/2.40.20/librsvg-2.40.20.tar.gz | tar xz 26 | 27 | # we have to use 'autoreconf -i' or 'autogen.sh' because we don't have 28 | # the configure script. 29 | 30 | # for whatever reason, the autoreconf (autogen) couldn't find gtk-doc.make 31 | RUN ln -s /usr/share/gtk-doc/data/gtk-doc.make librsvg-2.40.20 32 | 33 | # gobject-introspection ultimately has to be disabled because we can't install 34 | # the 32bit version of it without breaking dependencies. There is also a weird 35 | # case where it is trying to link against 64bit version of libfreetype.so. 36 | # Hence, the extra -L flag in the LDFLAGS 37 | -------------------------------------------------------------------------------- /fuzzing/real-world/librsvg/coverage.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM seed-selection/real-world/librsvg/base 2 | 3 | MAINTAINER Adrian Herrera 4 | 5 | # Build coverage librsvg 6 | RUN mkdir /build 7 | RUN cd librsvg-2.40.20 && \ 8 | PKG_CONFIG_PATH=/usr/lib/i386-linux-gnu/pkgconfig \ 9 | CC=clang-8 CXX=clang++-8 \ 10 | CFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping" \ 11 | CXXFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping" \ 12 | LDFLAGS="-L/usr/lib/i386-linux-gnu -m32" \ 13 | ./autogen.sh --prefix=/build --enable-introspection=no \ 14 | --host=i386-linux 15 | RUN cd librsvg-2.40.20 && \ 16 | make clean && \ 17 | make -j && \ 18 | make install 19 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py \ 20 | -o /build/lib /build/bin/rsvg-convert 21 | 22 | # The `build` and directory can be extracted to the host machine 23 | -------------------------------------------------------------------------------- /fuzzing/real-world/libtiff/afl.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM seed-selection/real-world/libtiff/base 2 | 3 | MAINTAINER Adrian Herrera 4 | 5 | # Get and build AFL 6 | ENV AFL_CC=clang-8 7 | ENV AFL_CXX=clang++-8 8 | 9 | RUN git clone --no-checkout https://github.com/google/afl && \ 10 | git -C afl checkout v256b 11 | RUN cd afl && \ 12 | export LLVM_CONFIG=llvm-config-8 && \ 13 | export CC=$AFL_CC && \ 14 | export CXX=$AFL_CXX && \ 15 | make -j && \ 16 | make -j -C llvm_mode 17 | 18 | # Build AFL libtiff 19 | RUN mkdir /build 20 | RUN cd tiff-4.0.9 && \ 21 | CC=/afl/afl-clang-fast CXX=/afl/afl-clang-fast++ \ 22 | CFLAGS="-m32 -fsanitize=address" CXXFLAGS="-m32 -fsanitize=address" \ 23 | LDFLAGS="-m32 -fsanitize=address" \ 24 | ./configure --prefix=/build --host=i386-linux-gnu 25 | RUN cd tiff-4.0.9 && \ 26 | make clean && \ 27 | make -j && \ 28 | make install 29 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py -o /build/lib \ 30 | /build/bin/tiff2pdf 31 | 32 | # The `build` directory can be extracted to the host machine 33 | -------------------------------------------------------------------------------- /fuzzing/real-world/libtiff/base.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | # Install dependencies 4 | RUN dpkg --add-architecture i386 5 | RUN export DEBIAN_FRONTEND=noninteractive && \ 6 | apt-get update && \ 7 | apt-get -y install git wget build-essential gcc-multilib g++-multilib \ 8 | libc6-dev:i386 python3-pip 9 | 10 | # Install LLVM 8 11 | RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - 12 | RUN apt-get install -y llvm-8 clang-8 13 | 14 | # Get helper scripts 15 | RUN git clone https://github.com/HexHive/fuzzing-seed-selection && \ 16 | pip3 install fuzzing-seed-selection/scripts 17 | 18 | # Get the libtiff source 19 | RUN wget -O - http://download.osgeo.org/libtiff/tiff-4.0.9.tar.gz | tar xz 20 | -------------------------------------------------------------------------------- /fuzzing/real-world/libtiff/coverage.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM seed-selection/real-world/libtiff/base 2 | 3 | MAINTAINER Adrian Herrera 4 | 5 | # Build coverage libtiff 6 | RUN mkdir /build 7 | RUN cd tiff-4.0.9 && \ 8 | CC=clang-8 CXX=clang++-8 \ 9 | CFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping" \ 10 | CXXFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping" \ 11 | LDFLAGS="-m32 -fno-stack-protector" \ 12 | ./configure --prefix=/build --host=i386-linux 13 | RUN cd tiff-4.0.9 && \ 14 | make clean && \ 15 | make -j && \ 16 | make install 17 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py \ 18 | -o /build/lib /build/bin/tiff2pdf 19 | 20 | # The `build` directory can be extracted to the host machine 21 | -------------------------------------------------------------------------------- /fuzzing/real-world/libxml2/afl.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM seed-selection/real-world/libxml2/base 2 | 3 | MAINTAINER Adrian Herrera 4 | 5 | # Get and build AFL 6 | ENV AFL_CC=clang-8 7 | ENV AFL_CXX=clang++-8 8 | 9 | RUN git clone --no-checkout https://github.com/google/afl && \ 10 | git -C afl checkout v256b 11 | RUN cd afl && \ 12 | export LLVM_CONFIG=llvm-config-8 && \ 13 | export CC=$AFL_CC && \ 14 | export CXX=$AFL_CXX && \ 15 | make -j && \ 16 | make -j -C llvm_mode 17 | 18 | # Build AFL libxml2 19 | RUN mkdir /build 20 | RUN cd libxml2-2.9.0 && \ 21 | CC=/afl/afl-clang-fast CXX=/afl/afl-clang-fast++ \ 22 | CFLAGS="-m32 -fsanitize=address" \ 23 | CXXFLAGS="-m32 -fsanitize=address" \ 24 | LDFLAGS="-m32 -fsanitize=address" \ 25 | ./configure --prefix=/build --host=i386-linux 26 | RUN cd libxml2-2.9.0 && \ 27 | make clean && \ 28 | make -j && \ 29 | make install 30 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py -o /build/lib \ 31 | /build/bin/xmllint 32 | 33 | # The `build` directory can be extracted to the host machine 34 | -------------------------------------------------------------------------------- /fuzzing/real-world/libxml2/base.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | MAINTAINER Adrian Herrera 4 | 5 | # Install dependencies 6 | RUN dpkg --add-architecture i386 7 | RUN export DEBIAN_FRONTEND=noninteractive && \ 8 | apt-get update && \ 9 | apt-get -y install git wget build-essential gcc-multilib g++-multilib \ 10 | libc6-dev:i386 python3-pip 11 | 12 | # Install LLVM 8 13 | RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - 14 | RUN apt-get install -y llvm-8 clang-8 15 | 16 | # Get helper scripts 17 | RUN git clone https://github.com/HexHive/fuzzing-seed-selection && \ 18 | pip3 install fuzzing-seed-selection/scripts 19 | 20 | # Get the libxml2 source 21 | RUN wget -O - ftp://xmlsoft.org/libxml2/libxml2-2.9.0.tar.gz | tar xz 22 | -------------------------------------------------------------------------------- /fuzzing/real-world/libxml2/coverage.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM seed-selection/real-world/libxml2/base 2 | 3 | MAINTAINER Adrian Herrera 4 | 5 | # Build coverage libxml2 6 | RUN mkdir /build 7 | RUN cd libxml2-2.9.0 && \ 8 | CC=clang-8 CXX=clang++-8 \ 9 | CFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping" \ 10 | CXXFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping" \ 11 | LDFLAGS="-m32" \ 12 | ./configure --prefix=/build --host=i386-linux 13 | RUN cd libxml2-2.9.0 && \ 14 | make clean && \ 15 | make -j && \ 16 | make install 17 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py \ 18 | -o /build/lib /build/bin/xmllint 19 | 20 | # The `build` directory can be extracted to the host machine 21 | -------------------------------------------------------------------------------- /fuzzing/real-world/poppler/afl-toolchain-llvm.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_C_COMPILER /afl/afl-clang-fast) 2 | set(CMAKE_CXX_COMPILER /afl/afl-clang-fast++) 3 | set(CMAKE_BUILD_TYPE debug) 4 | set(BUILD_GTK_TESTS off) 5 | set(BUILD_QT5_TESTS off) 6 | set(BUILD_CPP_TESTS off) 7 | set(ENABLE_GTK_DOC off) 8 | set(ENABLE_QT5 off) 9 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -m32 -fsanitize=address") 10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32 -fsanitize=address") 11 | set(CMAKE_SYSTEM_PROCESSOR "i386") 12 | -------------------------------------------------------------------------------- /fuzzing/real-world/poppler/afl.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM seed-selection/real-world/poppler/base 2 | 3 | MAINTAINER Adrian Herrera 4 | 5 | # Get and build AFL 6 | ENV AFL_CC=clang-8 7 | ENV AFL_CXX=clang++-8 8 | 9 | RUN git clone --no-checkout https://github.com/google/afl && \ 10 | git -C afl checkout v256b 11 | RUN cd afl && \ 12 | export LLVM_CONFIG=llvm-config-8 && \ 13 | export CC=$AFL_CC && \ 14 | export CXX=$AFL_CXX && \ 15 | make -j && \ 16 | make -j -C llvm_mode 17 | 18 | # Configure AFL poppler 19 | RUN mkdir /build 20 | RUN mkdir poppler-0.64.0-afl/build && \ 21 | cd poppler-0.64.0-afl/build && \ 22 | cmake .. \ 23 | -DCMAKE_TOOLCHAIN_FILE=/afl-toolchain-llvm.cmake \ 24 | -DCMAKE_INSTALL_PREFIX=/build \ 25 | -DCMAKE_C_FLAGS="-m32 -fsanitize=address" \ 26 | -DCMAKE_CXX_FLAGS="-m32 -fsanitize=address" \ 27 | -DCMAKE_SYSTEM_PROCESSOR="i386" 28 | 29 | # It is necessary for the 32-bit to reinstall the libopenjp2 as the 32-bit and 30 | # 64-bit versions are mutually exclusive 31 | RUN apt-get -y install libopenjp2-7-dev:i386 32 | 33 | # Actually make the AFL poppler 34 | RUN cd poppler-0.64.0-afl/build && \ 35 | make clean && make -j && make install 36 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py -o /build/lib \ 37 | /build/bin/pdftotext 38 | 39 | # The `build` directory can be extracted to the host machine 40 | -------------------------------------------------------------------------------- /fuzzing/real-world/poppler/base.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | # Install dependencies 4 | RUN dpkg --add-architecture i386 5 | RUN export DEBIAN_FRONTEND=noninteractive && \ 6 | apt-get update && \ 7 | apt-get -y install git build-essential wget gcc-multilib g++-multilib \ 8 | libc6-dev:i386 xz-utils pkg-config libfreetype6-dev libfontconfig-dev \ 9 | libjpeg-dev libopenjp2-7-dev cmake libfreetype6-dev:i386 \ 10 | libfontconfig-dev:i386 libjpeg-dev:i386 python3-pip 11 | 12 | # Install LLVM 8 13 | RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - 14 | RUN apt-get install -y llvm-8 clang-8 15 | 16 | # Get helper scripts 17 | RUN git clone https://github.com/HexHive/fuzzing-seed-selection && \ 18 | pip3 install fuzzing-seed-selection/scripts 19 | 20 | # Get the poppler source 21 | RUN wget -O - https://poppler.freedesktop.org/poppler-0.64.0.tar.xz | tar xJ 22 | 23 | # Make a copy for the source so that we don't have to reinstall libopenjp2-7 24 | # again 25 | RUN cp -r poppler-0.64.0 poppler-0.64.0-afl 26 | 27 | # Add the poppler build toolchain files 28 | ADD toolchain.cmake / 29 | ADD afl-toolchain-llvm.cmake / 30 | 31 | # It seems that the compiler flags passed in toolchain.cmake is not used by cmake 32 | -------------------------------------------------------------------------------- /fuzzing/real-world/poppler/coverage.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM seed-selection/real-world/poppler/base 2 | 3 | MAINTAINER Adrian Herrera 4 | 5 | # Configure coverage poppler 6 | RUN mkdir /build 7 | RUN mkdir poppler-0.64.0/build && \ 8 | cd poppler-0.64.0/build && \ 9 | cmake .. -DCMAKE_TOOLCHAIN_FILE=/toolchain.cmake \ 10 | -DCMAKE_INSTALL_PREFIX=/build \ 11 | -DCMAKE_C_FLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping" \ 12 | -DCMAKE_CXX_FLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping"\ 13 | -DCMAKE_SYSTEM_PROCESSOR="i386" 14 | 15 | # It is necessary for the 32-bit to reinstall the libopenjp2 as the 32-bit and 16 | # 64-bit versions are mutually exclusive 17 | RUN apt-get -y install libopenjp2-7-dev:i386 18 | 19 | # Actually make the coverage poppler 20 | RUN cd poppler-0.64.0/build && \ 21 | make clean && make -j && make install 22 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py -o /build/lib \ 23 | /build/bin/pdftotext 24 | 25 | # The `build` directory can be extracted to the host machine 26 | -------------------------------------------------------------------------------- /fuzzing/real-world/poppler/toolchain.cmake: -------------------------------------------------------------------------------- 1 | set(CMAKE_C_COMPILER clang-8) 2 | set(CMAKE_CXX_COMPILER clang++-8) 3 | set(CMAKE_BUILD_TYPE debug) 4 | set(BUILD_GTK_TESTS off) 5 | set(BUILD_QT5_TESTS off) 6 | set(BUILD_CPP_TESTS off) 7 | set(ENABLE_GTK_DOC off) 8 | set(ENABLE_QT5 off) 9 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -m32 -fprofile-instr-generate -fcoverage-mapping") 10 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32 -fprofile-instr-generate -fcoverage-mapping") 11 | set(CMAKE_SYSTEM_PROCESSOR "i386") 12 | -------------------------------------------------------------------------------- /fuzzing/real-world/sox/afl.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM seed-selection/real-world/sox/base 2 | 3 | MAINTAINER Adrian Herrera 4 | 5 | # Get and build AFL 6 | ENV AFL_CC=clang-8 7 | ENV AFL_CXX=clang++-8 8 | 9 | RUN git clone --no-checkout https://github.com/google/afl && \ 10 | git -C afl checkout v256b 11 | RUN cd afl && \ 12 | export LLVM_CONFIG=llvm-config-8 && \ 13 | export CC=$AFL_CC && \ 14 | export CXX=$AFL_CXX && \ 15 | make -j && \ 16 | make -j -C llvm_mode 17 | 18 | # Build AFL SoX 19 | RUN mkdir /build 20 | RUN cd sox-14.4.2 && \ 21 | CC=/afl/afl-clang-fast CXX=/afl/afl-clang-fast++ LD=ldd \ 22 | CFLAGS="-m32 -fsanitize=address" CXXFLAGS="-m32 -fsanitize=address" \ 23 | LDFLAGS="-m32 -fsanitize=address" \ 24 | ./configure --prefix=/build --host=i386-linux 25 | RUN cd sox-14.4.2 && \ 26 | make clean && \ 27 | make -s && \ 28 | make install 29 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py -o /build/lib \ 30 | /build/bin/sox 31 | 32 | # The `build` directory can be extracted to the host machine 33 | -------------------------------------------------------------------------------- /fuzzing/real-world/sox/base.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | # Install dependencies 4 | RUN dpkg --add-architecture i386 5 | RUN export DEBIAN_FRONTEND=noninteractive && \ 6 | apt-get update && \ 7 | apt-get -y install git build-essential gcc-multilib g++-multilib \ 8 | libmad0-dev:i386 libc6-dev:i386 wget python3-pip 9 | 10 | # Install LLVM 8 11 | RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add - 12 | RUN apt-get install -y llvm-8 clang-8 13 | 14 | # Get helper scripts 15 | RUN git clone https://github.com/HexHive/fuzzing-seed-selection && \ 16 | pip3 install fuzzing-seed-selection/scripts 17 | 18 | # Get the SoX source 19 | RUN wget -O - https://sourceforge.net/projects/sox/files/sox/14.4.2/sox-14.4.2.tar.gz | tar xz 20 | -------------------------------------------------------------------------------- /fuzzing/real-world/sox/coverage.Dockerfile: -------------------------------------------------------------------------------- 1 | FROM seed-selection/real-world/sox/base 2 | 3 | MAINTAINER Adrian Herrera 4 | 5 | # Build coverage SoX 6 | RUN mkdir /build 7 | RUN cd sox-14.4.2 && \ 8 | CC=clang-8 CXX=clang++-8 \ 9 | CFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping" \ 10 | CXXFLAGS="-m32 -fprofile-instr-generate -fcoverage-mapping" \ 11 | LDFLAGS="-m32" \ 12 | ./configure --prefix=/build --host=i386-linux 13 | RUN cd sox-14.4.2 && \ 14 | make clean && \ 15 | make -s && \ 16 | make install 17 | RUN LD_LIBRARY_PATH=/build/lib get_libs.py \ 18 | -o /build/lib /build/bin/sox 19 | 20 | # The `build` directory can be extracted to the host machine 21 | -------------------------------------------------------------------------------- /optimin/.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | -------------------------------------------------------------------------------- /optimin/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.7) 2 | 3 | project(optimin 4 | LANGUAGES CXX 5 | DESCRIPTION "SAT-based fuzzing corpus minimizer" 6 | ) 7 | 8 | set(CMAKE_CXX_STANDARD 17) 9 | set(CMAKE_CXX_STANDARD_REQUIRED ON) 10 | set(CMAKE_CXX_EXTENSIONS OFF) 11 | 12 | if(USE_Z3) 13 | find_package(Z3 REQUIRED config) 14 | message(STATUS "Found Z3 ${Z3_VERSION_STRING}") 15 | include_directories(${Z3_CXX_INCLUDE_DIRS}) 16 | link_libraries(${Z3_LIBRARIES}) 17 | endif(USE_Z3) 18 | 19 | find_package(Boost COMPONENTS container REQUIRED) 20 | message(STATUS "Found Boost ${Boost_VERSION_STRING}") 21 | 22 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -O3") 23 | 24 | include_directories(${Boost_INCLUDE_DIR}) 25 | 26 | add_subdirectory(src) 27 | -------------------------------------------------------------------------------- /optimin/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:latest 2 | 3 | # Install prerequesites 4 | RUN export DEBIAN_FRONTEND=noninteractive && \ 5 | apt update && \ 6 | apt install -y git build-essential cmake \ 7 | libboost-container-dev libz-dev python3 8 | 9 | # Add OptiMin source 10 | ADD CMakeLists.txt /optimin/ 11 | ADD src /optimin/src 12 | 13 | # Build OptiMin 14 | RUN mkdir -p /optimin/build 15 | RUN cd /optimin/build && \ 16 | cmake .. && \ 17 | make -j && \ 18 | make install 19 | 20 | # Build EvalMaxSAT 21 | RUN git clone https://github.com/FlorentAvellaneda/EvalMaxSAT 22 | RUN mkdir -p EvalMaxSAT/build 23 | RUN cd EvalMaxSAT/build && \ 24 | cmake .. && \ 25 | make -j && \ 26 | make install 27 | 28 | # Add OptiMin wrapper 29 | ADD optimin.py /optimin/ 30 | ENTRYPOINT ["/optimin/optimin.py"] 31 | -------------------------------------------------------------------------------- /optimin/LICENSE.jsoncpp: -------------------------------------------------------------------------------- 1 | The JsonCpp library's source code, including accompanying documentation, 2 | tests and demonstration applications, are licensed under the following 3 | conditions... 4 | 5 | Baptiste Lepilleur and The JsonCpp Authors explicitly disclaim copyright in all 6 | jurisdictions which recognize such a disclaimer. In such jurisdictions, 7 | this software is released into the Public Domain. 8 | 9 | In jurisdictions which do not recognize Public Domain property (e.g. Germany as of 10 | 2010), this software is Copyright (c) 2007-2010 by Baptiste Lepilleur and 11 | The JsonCpp Authors, and is released under the terms of the MIT License (see below). 12 | 13 | In jurisdictions which recognize Public Domain property, the user of this 14 | software may choose to accept it either as 1) Public Domain, 2) under the 15 | conditions of the MIT License (see below), or 3) under the terms of dual 16 | Public Domain/MIT License conditions described here, as they choose. 17 | 18 | The MIT License is about as close to Public Domain as a license can get, and is 19 | described in clear, concise terms at: 20 | 21 | http://en.wikipedia.org/wiki/MIT_License 22 | 23 | The full text of the MIT License follows: 24 | 25 | ======================================================================== 26 | Copyright (c) 2007-2010 Baptiste Lepilleur and The JsonCpp Authors 27 | 28 | Permission is hereby granted, free of charge, to any person 29 | obtaining a copy of this software and associated documentation 30 | files (the "Software"), to deal in the Software without 31 | restriction, including without limitation the rights to use, copy, 32 | modify, merge, publish, distribute, sublicense, and/or sell copies 33 | of the Software, and to permit persons to whom the Software is 34 | furnished to do so, subject to the following conditions: 35 | 36 | The above copyright notice and this permission notice shall be 37 | included in all copies or substantial portions of the Software. 38 | 39 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 40 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 41 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 42 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 43 | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 44 | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 45 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 46 | SOFTWARE. 47 | ======================================================================== 48 | (END LICENSE TEXT) 49 | 50 | The MIT license is compatible with both the GPL and commercial 51 | software, affording one all of the rights of Public Domain with the 52 | minor nuisance of being required to keep the above copyright notice 53 | and license text in the source code. Note also that by accepting the 54 | Public Domain "license" you can re-license your copy using whatever 55 | license you like. 56 | -------------------------------------------------------------------------------- /optimin/optimin.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Wrapper around OptiMin. 5 | 6 | Author: Adrian Herrera 7 | """ 8 | 9 | 10 | from argparse import ArgumentParser, Namespace 11 | from pathlib import Path 12 | from shutil import which 13 | from tempfile import NamedTemporaryFile 14 | from typing import Dict, List, Optional, TextIO, Tuple 15 | import re 16 | import subprocess 17 | 18 | 19 | WCNF_SEED_MAP_RE = re.compile(r'^c (\d+) : (.+)$') 20 | 21 | 22 | def parse_args() -> Namespace: 23 | """Parse command-line arguments.""" 24 | parser = ArgumentParser(description='Run OptiMin to produce a minimized corpus') 25 | parser.add_argument('-j', '--jobs', type=int, default=0, 26 | help='Number of minimization threads') 27 | parser.add_argument('-e', '--edge-only', action='store_true', 28 | help='Use edge coverage only, ignore hit counts') 29 | parser.add_argument('-w', '--weights', metavar='CSV', type=Path, 30 | help='Path to weights CSV') 31 | parser.add_argument('corpus', type=Path, help='Path to input corpus') 32 | return parser.parse_args() 33 | 34 | 35 | def get_seed_mapping(inf: TextIO) -> Dict[int, str]: 36 | """ 37 | Retrieve the mapping of literal identifiers (integers) to seed names 38 | (strings) from the WCNF file. 39 | """ 40 | mapping = {} 41 | for line in inf: 42 | # This starts the constraint listing 43 | if line.startswith('p wcnf '): 44 | break 45 | 46 | match = WCNF_SEED_MAP_RE.match(line.strip()) 47 | if not match: 48 | continue 49 | 50 | mapping[int(match.group(1))] = match.group(2) 51 | 52 | return mapping 53 | 54 | 55 | def parse_maxsat_out(out: List[str], mapping: Dict[int, str]) -> Tuple[Optional[List[str]], Optional[float]]: 56 | """ 57 | Parse the output from EvalMaxSat. 58 | 59 | Returns a tuple containing: 60 | 61 | 1. The list of seeds that make up the solution, or `None` if a solution 62 | could not be found. 63 | 2. The execution time. 64 | """ 65 | solution = None 66 | exec_time = None 67 | 68 | for line in out: 69 | # Solution status 70 | if line.startswith('s ') and 'OPTIMUM FOUND' not in line: 71 | # No optimum solution found 72 | break 73 | 74 | # Solution values 75 | if line.startswith('v '): 76 | vals = [int(v) for v in line[2:].split(' ')] 77 | solution = [mapping[v] for v in vals if v > 0] 78 | 79 | # Execution time 80 | if line.startswith('c Total time: '): 81 | toks = line.split(' ') 82 | exec_time = float(toks[3]) 83 | units = toks[4] 84 | 85 | # TODO other units to worry about? 86 | if units == 'ms': 87 | exec_time = exec_time / 1000 88 | 89 | return solution, exec_time 90 | 91 | 92 | def main(): 93 | """The main function.""" 94 | args = parse_args() 95 | 96 | # Check binaries are available 97 | optimin = which('afl-showmap-maxsat') 98 | if not optimin: 99 | raise Exception('Cannot find OptiMin. Check PATH') 100 | eval_max_sat = which('EvalMaxSAT_bin') 101 | if not eval_max_sat: 102 | raise Exception('Cannot find EvalMaxSAT. Check PATH') 103 | 104 | # Configure optimin 105 | optimin_args = [optimin, '-p'] 106 | if args.edge_only: 107 | optimin_args.append('-e') 108 | if args.weights: 109 | optimin_args.extend(['-w', str(args.weights)]) 110 | 111 | with NamedTemporaryFile() as wcnf: 112 | print(f'[*] Running Optimin on {args.corpus}') 113 | optimin_args.extend(['-o', wcnf.name, '--', str(args.corpus)]) 114 | subprocess.run(optimin_args, check=True) 115 | 116 | with open(wcnf.name, 'r') as inf: 117 | seed_map = get_seed_mapping(inf) 118 | 119 | print('[*] Running EvalMaxSAT on WCNF') 120 | proc = subprocess.run([eval_max_sat, wcnf.name, '-p', f'{args.jobs}'], 121 | check=True, stdout=subprocess.PIPE, 122 | encoding='utf-8') 123 | print('[+] EvalMaxSAT completed') 124 | maxsat_out = [line.strip() for line in proc.stdout.split('\n')] 125 | 126 | print('[*] Parsing EvalMaxSAT output') 127 | solution, exec_time = parse_maxsat_out(maxsat_out, seed_map) 128 | if not solution: 129 | raise Exception(f'Unable to find optimum solution for {args.corpus}') 130 | 131 | print(f'[+] Solution found for {args.corpus}\n') 132 | print('[+] Total time: %.02f sec' % exec_time) 133 | print(f'[+] Num. seeds: {len(solution)}\n') 134 | 135 | print('\n'.join(solution)) 136 | 137 | 138 | if __name__ == '__main__': 139 | main() 140 | -------------------------------------------------------------------------------- /optimin/src/AFLShowmapZ3.cpp: -------------------------------------------------------------------------------- 1 | /** 2 | * Perform an optimal (if possible) fuzzing corpus minimization based on 3 | * afl-showmap's edge coverage. 4 | * 5 | * Author: Adrian Herrera 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "Common.h" 20 | #include "ProgressBar.h" 21 | #include "Z3Common.h" 22 | 23 | #include 24 | 25 | // This is based on the human class count in `count_class_human[256]` in 26 | // `afl-showmap.c` 27 | static constexpr uint32_t MAX_EDGE_FREQ = 8; 28 | 29 | static void Usage(const char *Argv0) { 30 | std::cerr << '\n' << Argv0 << " [ options ] -- /path/to/corpus_dir\n\n"; 31 | std::cerr << "Optional parameters:\n\n"; 32 | std::cerr << " -p - Show progress bar\n"; 33 | std::cerr << " -e - Use edge coverage only, ignore hit counts\n"; 34 | std::cerr << " -h - Print this message\n"; 35 | std::cerr << " -s smt2 - Save SMT2\n"; 36 | std::cerr << " -w weights - CSV containing seed weights (see README)\n\n"; 37 | std::cerr << std::endl; 38 | 39 | std::exit(1); 40 | } 41 | 42 | int main(int Argc, char *Argv[]) { 43 | bool ShowProg = false; 44 | bool EdgesOnly = false; 45 | std::string SMTOutFile = ""; 46 | std::string WeightsFile; 47 | WeightsMap Weights; 48 | int Opt; 49 | ProgressBar Prog; 50 | 51 | std::chrono::time_point StartTime, EndTime; 52 | std::chrono::seconds Duration; 53 | 54 | std::cout << "afl-showmap corpus minimization\n\n"; 55 | 56 | // Parse command-line options 57 | while ((Opt = getopt(Argc, Argv, "+pehs:w:")) > 0) { 58 | switch (Opt) { 59 | case 'p': 60 | // Show progres bar 61 | ShowProg = true; 62 | break; 63 | case 'e': 64 | // Solve for edge coverage only (not frequency of edge coverage) 65 | EdgesOnly = true; 66 | break; 67 | case 'h': 68 | // Help 69 | Usage(Argv[0]); 70 | break; 71 | case 's': 72 | // SMT2 file 73 | SMTOutFile = optarg; 74 | break; 75 | case 'w': 76 | // Weights file 77 | WeightsFile = optarg; 78 | break; 79 | default: 80 | Usage(Argv[0]); 81 | } 82 | } 83 | 84 | if (optind >= Argc) { 85 | Usage(Argv[0]); 86 | } 87 | 88 | const char *CorpusDir = Argv[optind]; 89 | 90 | // Parse weights 91 | // 92 | // Weights are stored in CSV file mapping a seed file name to an integer 93 | // greater than zero. 94 | if (!WeightsFile.empty()) { 95 | std::cout << "[*] Reading weights from `" << WeightsFile << "`... " 96 | << std::flush; 97 | 98 | StartTime = std::chrono::steady_clock::now(); 99 | std::ifstream IFS(WeightsFile); 100 | GetZ3Weights(IFS, Weights); 101 | IFS.close(); 102 | EndTime = std::chrono::steady_clock::now(); 103 | Duration = 104 | std::chrono::duration_cast(EndTime - StartTime); 105 | 106 | std::cout << Duration.count() << 's' << std::endl; 107 | } 108 | 109 | // Get seed coverage 110 | // 111 | // Iterate over the corpus directory, which should contain afl-showmap-style 112 | // output files. Read each of these files and store them in the appropriate 113 | // data structures. 114 | struct dirent *DP; 115 | DIR *DirFD; 116 | AFLCoverageVector Cov; 117 | 118 | Z3ExprSet SeedExprs; 119 | Z3CoverageMap SeedCoverage; 120 | 121 | z3::context Ctx; 122 | z3::optimize Optimizer(Ctx); 123 | 124 | if (!ShowProg) { 125 | std::cout << "[*] Reading coverage in `" << CorpusDir << "`... " 126 | << std::flush; 127 | } 128 | StartTime = std::chrono::steady_clock::now(); 129 | 130 | if ((DirFD = opendir(CorpusDir)) == nullptr) { 131 | std::cerr << "[-] Unable to open corpus directory" << std::endl; 132 | return 1; 133 | } 134 | 135 | size_t SeedCount = 0; 136 | const size_t NumSeeds = GetNumSeeds(DirFD); 137 | 138 | while ((DP = readdir(DirFD)) != nullptr) { 139 | if (DP->d_type == DT_DIR) { 140 | continue; 141 | } 142 | 143 | // Get seed coverage 144 | std::ifstream IFS(std::string(CorpusDir) + '/' + DP->d_name); 145 | Cov.clear(); 146 | GetAFLCoverage(IFS, Cov); 147 | IFS.close(); 148 | 149 | // Create a variable (a boolean) to represent the seed 150 | z3::expr SeedExpr = Ctx.bool_const(MakeZ3ExprName(DP->d_name).c_str()); 151 | SeedExprs.insert(SeedExpr); 152 | 153 | // Record the set of seeds that cover a particular edge 154 | for (const auto &[Edge, Freq] : Cov) { 155 | if (EdgesOnly) { 156 | // Ignore edge frequency 157 | SeedCoverage[Edge].insert(SeedExpr); 158 | } else { 159 | // Executing edge `E` `N` times means that it was executed `N - 1` times 160 | for (unsigned I = 0; I < Freq; ++I) 161 | SeedCoverage[MAX_EDGE_FREQ * Edge + I].insert(SeedExpr); 162 | } 163 | } 164 | 165 | if ((++SeedCount % 10 == 0) && ShowProg) { 166 | Prog.Update(SeedCount * 100 / NumSeeds, "Reading seed coverage"); 167 | } 168 | } 169 | 170 | closedir(DirFD); 171 | 172 | EndTime = std::chrono::steady_clock::now(); 173 | Duration = 174 | std::chrono::duration_cast(EndTime - StartTime); 175 | if (ShowProg) { 176 | std::cout << std::endl; 177 | } else { 178 | std::cout << Duration.count() << 's' << std::endl; 179 | } 180 | 181 | // Ensure that at least one seed is selected that covers a particular edge 182 | if (!ShowProg) { 183 | std::cout << "[*] Generating constraints for " << SeedCoverage.size() 184 | << " seeds... " << std::flush; 185 | } 186 | StartTime = std::chrono::steady_clock::now(); 187 | 188 | SeedCount = 0; 189 | 190 | for (const auto &[_, Seeds] : SeedCoverage) { 191 | if (Seeds.empty()) { 192 | continue; 193 | } 194 | 195 | z3::expr EdgeDisjunc = std::accumulate( 196 | Seeds.begin(), Seeds.end(), Ctx.bool_val(false), 197 | [](const z3::expr &E1, const z3::expr &E2) { return E1 || E2; }); 198 | Optimizer.add(EdgeDisjunc); 199 | 200 | if ((++SeedCount % 10 == 0) && ShowProg) { 201 | Prog.Update(SeedCount * 100 / SeedCoverage.size(), 202 | "Generating seed constraints"); 203 | } 204 | } 205 | 206 | // Select the minimum number of seeds that cover a particular set of edges 207 | for (const auto &E : SeedExprs) { 208 | Optimizer.add(!E, Weights[E.to_string()]); 209 | } 210 | 211 | EndTime = std::chrono::steady_clock::now(); 212 | Duration = 213 | std::chrono::duration_cast(EndTime - StartTime); 214 | if (ShowProg) { 215 | std::cout << std::endl; 216 | } else { 217 | std::cout << Duration.count() << 's' << std::endl; 218 | } 219 | 220 | // Dump constraints to SMT2 221 | if (!SMTOutFile.empty()) { 222 | std::cout << "[*] Writing SMT2 to `" << SMTOutFile << "`... " << std::flush; 223 | StartTime = std::chrono::steady_clock::now(); 224 | 225 | std::ofstream OFS(SMTOutFile); 226 | OFS << Optimizer; 227 | OFS.close(); 228 | 229 | EndTime = std::chrono::steady_clock::now(); 230 | Duration = 231 | std::chrono::duration_cast(EndTime - StartTime); 232 | std::cout << Duration.count() << 's' << std::endl; 233 | } 234 | 235 | // Check if an optimal solution exists 236 | std::cout << "[*] Solving constraints... " << std::flush; 237 | StartTime = std::chrono::steady_clock::now(); 238 | 239 | z3::check_result Result = Optimizer.check(); 240 | 241 | EndTime = std::chrono::steady_clock::now(); 242 | Duration = 243 | std::chrono::duration_cast(EndTime - StartTime); 244 | std::cout << Duration.count() << 's' << std::endl; 245 | 246 | // Get the resulting coverset 247 | if (Result == z3::sat) { 248 | std::cout << "[+] Optimal corpus found\n"; 249 | 250 | z3::model Model = Optimizer.get_model(); 251 | std::vector SelectedSeeds; 252 | for (const auto &SeedExpr : SeedExprs) { 253 | if (Model.eval(SeedExpr).is_true()) { 254 | SelectedSeeds.push_back(GetSeed(SeedExpr)); 255 | } 256 | } 257 | 258 | // Compute some interesting statistics 259 | size_t NumSelectedSeeds = SelectedSeeds.size(); 260 | float PercentSelected = (float)NumSelectedSeeds / SeedExprs.size() * 100.0; 261 | 262 | std::cout << "\nNum. seeds: " << NumSelectedSeeds << " (" << PercentSelected 263 | << "%)\n\n"; 264 | std::copy(SelectedSeeds.begin(), SelectedSeeds.end(), 265 | std::ostream_iterator(std::cout, "\n")); 266 | std::cout << std::endl; 267 | } else { 268 | std::cerr << "[- ]Unable to find optimal minimized corpus" << std::endl; 269 | return 1; 270 | } 271 | 272 | return 0; 273 | } 274 | -------------------------------------------------------------------------------- /optimin/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(afl-showmap-maxsat AFLShowmapMaxSat.cpp Common.cpp) 2 | 3 | install(TARGETS afl-showmap-maxsat RUNTIME DESTINATION bin) 4 | 5 | if(USE_Z3) 6 | add_executable(afl-showmap-z3 AFLShowmapZ3.cpp 7 | Common.cpp 8 | Z3Common.cpp) 9 | add_executable(llvm-cov-z3 LLVMCovZ3.cpp 10 | Common.cpp 11 | Z3Common.cpp 12 | jsoncpp/jsoncpp.cpp) 13 | target_include_directories(llvm-cov-z3 PRIVATE jsoncpp) 14 | 15 | install(TARGETS afl-showmap-z3 RUNTIME DESTINATION bin) 16 | install(TARGETS llvm-cov-z3 RUNTIME DESTINATION bin) 17 | endif(USE_Z3) 18 | -------------------------------------------------------------------------------- /optimin/src/Common.cpp: -------------------------------------------------------------------------------- 1 | #include "Common.h" 2 | 3 | void GetAFLCoverage(std::istream &IS, AFLCoverageVector &Cov) { 4 | std::string Line; 5 | 6 | while (std::getline(IS, Line, '\n')) { 7 | const size_t DelimPos = Line.find(':'); 8 | const uint32_t E = std::stoul(Line.substr(0, DelimPos)); 9 | const unsigned Freq = std::stoul(Line.substr(DelimPos + 1)); 10 | 11 | Cov.push_back({E, Freq}); 12 | } 13 | } 14 | 15 | void GetWeights(std::istream &IS, WeightsMap &Weights) { 16 | std::string Line; 17 | 18 | while (std::getline(IS, Line, '\n')) { 19 | const size_t DelimPos = Line.find(','); 20 | const std::string Seed = Line.substr(0, DelimPos).c_str(); 21 | const unsigned Weight = std::stoul(Line.substr(DelimPos + 1)); 22 | 23 | Weights.emplace(Seed, Weight); 24 | } 25 | } 26 | 27 | size_t GetNumSeeds(DIR *FD) { 28 | struct dirent *DP; 29 | size_t SeedCount = 0; 30 | 31 | while ((DP = readdir(FD)) != nullptr) { 32 | if (DP->d_type == DT_REG) { 33 | ++SeedCount; 34 | } 35 | } 36 | 37 | rewinddir(FD); 38 | 39 | return SeedCount; 40 | } 41 | -------------------------------------------------------------------------------- /optimin/src/Common.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_H 2 | #define COMMON_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | /// Seed weights default to 1 12 | class WeightT { 13 | public: 14 | WeightT() : WeightT(1){}; 15 | WeightT(uint32_t V) : Value(V){}; 16 | 17 | operator unsigned() const { return Value; } 18 | 19 | private: 20 | const unsigned Value; 21 | }; 22 | 23 | /// Pair of tuple (edge) ID and hit count 24 | using AFLTuple = 25 | std::pair; 26 | 27 | /// Coverage for a given seed file 28 | using AFLCoverageVector = std::vector; 29 | 30 | /// Maps seed file paths to a weight 31 | using WeightsMap = 32 | std::map; 33 | 34 | /// Read AFL coverage as produced by `afl-showmap` 35 | void GetAFLCoverage(std::istream &, AFLCoverageVector &); 36 | 37 | /// Read a CSV file containing seed weights 38 | void GetWeights(std::istream &, WeightsMap &); 39 | 40 | /// Get the number of seeds in a directory 41 | size_t GetNumSeeds(DIR *); 42 | 43 | #endif // COMMON_H 44 | -------------------------------------------------------------------------------- /optimin/src/ProgressBar.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Progress bar. 3 | * 4 | * Adapted from https://www.bfilipek.com/2020/02/inidicators.html 5 | */ 6 | #ifndef PROGRESS_BAR_H 7 | #define PROGRESS_BAR_H 8 | 9 | #include 10 | 11 | /// Display a progress bar in the terminal 12 | class ProgressBar { 13 | private: 14 | const size_t BarWidth; 15 | const std::string Fill; 16 | const std::string Remainder; 17 | 18 | public: 19 | ProgressBar() : ProgressBar(60, "#", " ") {} 20 | 21 | ProgressBar(size_t Width, const std::string &F, const std::string &R) 22 | : BarWidth(Width), Fill(F), Remainder(R) {} 23 | 24 | void Update(float Progress, const std::string Status = "", 25 | std::ostream &OS = std::cout) { 26 | // No need to write once progress is 100% 27 | if (Progress > 100.0f) { 28 | return; 29 | } 30 | 31 | // Move cursor to the first position on the same line and flush 32 | OS << '\r' << std::flush; 33 | 34 | // Start bar 35 | OS << '['; 36 | 37 | const auto Completed = 38 | static_cast(Progress * static_cast(BarWidth) / 100.0); 39 | for (size_t I = 0; I < BarWidth; ++I) { 40 | if (I <= Completed) { 41 | OS << Fill; 42 | } else { 43 | OS << Remainder; 44 | } 45 | } 46 | 47 | // End bar 48 | OS << ']'; 49 | 50 | // Write progress percentage 51 | OS << ' ' << std::min(static_cast(Progress), size_t(100)) << '%'; 52 | 53 | // Write status text 54 | OS << " " << Status; 55 | } 56 | }; 57 | 58 | #endif 59 | -------------------------------------------------------------------------------- /optimin/src/Z3Common.cpp: -------------------------------------------------------------------------------- 1 | #include "Z3Common.h" 2 | 3 | std::string GetSeed(const z3::expr &E) { 4 | std::string Name = E.to_string(); 5 | Name.pop_back(); 6 | 7 | return Name.erase(0, 1); 8 | } 9 | 10 | std::string MakeZ3ExprName(const std::string &S) { 11 | // Z3 "quotes" strings with a pipe if it begins with a numeric value. So we 12 | // just quote everything 13 | std::string Name("|"); 14 | Name.append(S); 15 | Name.append("|"); 16 | 17 | return Name; 18 | } 19 | 20 | void GetZ3Weights(std::istream &IS, WeightsMap &Weights) { 21 | std::string Line; 22 | 23 | while (std::getline(IS, Line, '\n')) { 24 | const size_t DelimPos = Line.find(','); 25 | const std::string Seed = Line.substr(0, DelimPos).c_str(); 26 | const unsigned Weight = std::stoul(Line.substr(DelimPos + 1)); 27 | 28 | Weights.emplace(MakeZ3ExprName(Seed), Weight); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /optimin/src/Z3Common.h: -------------------------------------------------------------------------------- 1 | #ifndef Z3_COMMON_H 2 | #define Z3_COMMON_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | #include "Common.h" 12 | 13 | /// Hash structure so a `z3::expr` can be stored in an `std::unordered_set` 14 | struct Z3ExprHash { 15 | size_t operator()(const z3::expr &E) const noexcept { 16 | return std::hash()(E.hash()); 17 | } 18 | }; 19 | 20 | /// Set of Z3 expressions 21 | using Z3ExprSet = std::unordered_set; 22 | 23 | /// Maps tuple IDs to Z3 expressions that "cover" that tuple 24 | using Z3CoverageMap = 25 | boost::container::flat_map; 26 | 27 | /// Get the seed name from a Z3 expression 28 | std::string GetSeed(const z3::expr &); 29 | 30 | /// Read a CSV file containing seed weights 31 | void GetZ3Weights(std::istream &, WeightsMap &); 32 | 33 | /// Create a name for a Z3 expression 34 | std::string MakeZ3ExprName(const std::string &); 35 | 36 | #endif // Z3_COMMON_H 37 | -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- 1 | # Seed Selection Tools 2 | 3 | A collection of scripts to help with analyzing fuzzing seed selection practices. 4 | 5 | ## afl_cmin.py 6 | 7 | A wrapper around [`afl-cmin`](https://github.com/google/AFL/blob/master/afl-cmin) 8 | that prints the seeds selected (but does not copy them). 9 | 10 | ## afl_coverage_merge.py 11 | 12 | Merge the [final coverage bitmaps](https://github.com/google/AFL/blob/master/afl-fuzz.c#L863) 13 | from multiple AFL parallel nodes. 14 | 15 | ## afl_coverage_pca.py 16 | 17 | Generate a [PCA](https://en.wikipedia.org/wiki/Principal_component_analysis) 18 | plot for a given seed set (stored in an HDF5 file, such as those stored 19 | [here](https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/)). 20 | 21 | ## coverage_auc.py 22 | 23 | Compute the area under curve (AUC) of AFL coverage data (stored in `plot_data` 24 | files). 25 | 26 | ## eval_maxsat.py 27 | 28 | Run [EvalMaxSAT](https://github.com/FlorentAvellaneda/EvalMaxSAT) over a WCNF 29 | produced by `afl-showmap-maxsat` to compute an optimum corpus. 30 | 31 | ## expand_hdf5_coverage.py 32 | 33 | Extract [`afl-showmap`](https://github.com/google/AFL/blob/master/afl-showmap.c) 34 | style bitmaps from an HDF5 file containing AFL coverage (as stored 35 | [here](https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/)). 36 | 37 | ## fuzz.py 38 | 39 | Run multiple AFL campaigns in parallel. Ensures that CPU-usage is properly 40 | managed and optionally provides a watchdog that timestamps artifacts created by 41 | AFL (e.g., crashes, queue entries). 42 | 43 | ## get_corpus.py 44 | 45 | Download a corpus of seeds from our [datastore](https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/) 46 | based on a given minimization technique (e.g., optimal, afl-cmin). 47 | 48 | ## get_libs.py 49 | 50 | Extract all shared libraries that a given program depends on and copy these 51 | libraries to a particular directory. 52 | 53 | ## llvm_cov_merge.py 54 | 55 | Merge LLVM [SanitizerCoverage](https://clang.llvm.org/docs/SanitizerCoverage.html). 56 | 57 | ## qminset.py 58 | 59 | Wraps the MinSet tool as proposed in the [Optimizing Seed Selection for 60 | Fuzzing](https://www.usenix.org/conference/usenixsecurity14/technical-sessions/presentation/rebert) 61 | paper. Prints the selected seeds. 62 | 63 | ## replay_seeds.py 64 | 65 | Replay a directory of inputs seeds and generate coverage information. This 66 | coverage information is stored in an HDF5 (as stored 67 | [here](https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/)). 68 | 69 | ## triage_crashes.py 70 | 71 | Replay AFL's `crashes` directory and match crash outputs to a regex (e.g., such 72 | as those provided [here](../fuzzing/config/fts-bug-regexs.toml). 73 | 74 | ## visualize_corpora.py 75 | 76 | Plot a "Venn diagram" (it's not really a Venn diagram) of different minimized 77 | corpora. 78 | -------------------------------------------------------------------------------- /scripts/bin/afl_cmin.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Wrapper around `afl-cmin`. Only keeps the names of the files in the minimized 5 | corpus. 6 | 7 | Author: Adrian Herrera 8 | """ 9 | 10 | 11 | from getopt import getopt 12 | from pathlib import Path 13 | from shutil import which 14 | from subprocess import run 15 | from tempfile import TemporaryDirectory 16 | import os 17 | import sys 18 | 19 | 20 | def main(): 21 | """The main function.""" 22 | opts, args = getopt(sys.argv[1:], '+i:f:m:t:eQC') 23 | 24 | cmin = which('afl-cmin') 25 | if not cmin: 26 | raise Exception('afl-cmin not found. Check PATH') 27 | 28 | env = os.environ.copy() 29 | env['AFL_ALLOW_TMP'] = '1' 30 | 31 | ret = 1 32 | 33 | with TemporaryDirectory() as temp_dir: 34 | cmin_args = [cmin, *[val for vals in opts for val in vals], 35 | '-o', temp_dir, '--', *args] 36 | proc = run(cmin_args, check=False, env=env) 37 | 38 | seeds = list(Path(temp_dir).iterdir()) 39 | 40 | print('\nSeeds (%d):' % len(seeds)) 41 | for seed in seeds: 42 | print(seed.name) 43 | 44 | ret = proc.returncode 45 | 46 | sys.exit(ret) 47 | 48 | 49 | if __name__ == '__main__': 50 | main() 51 | -------------------------------------------------------------------------------- /scripts/bin/afl_coverage_merge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Merge AFL coverage across multiple parallel nodes. 5 | 6 | Author: Adrian Herrera 7 | """ 8 | 9 | 10 | from argparse import ArgumentParser, Namespace 11 | 12 | from seed_selection.argparse import path_exists 13 | 14 | 15 | # Taken from AFL's config.h 16 | MAP_SIZE_POW2 = 16 17 | MAP_SIZE = 1 << MAP_SIZE_POW2 18 | 19 | 20 | def parse_args() -> Namespace: 21 | """Parse command-line arguments.""" 22 | parser = ArgumentParser(description='Merge AFL coverage') 23 | parser.add_argument('output', metavar='AFL_OUT', nargs='+', type=path_exists, 24 | help='AFL output directory') 25 | return parser.parse_args() 26 | 27 | 28 | def main(): 29 | """The main function.""" 30 | args = parse_args() 31 | 32 | # Read and merge bitmaps. The merged bitmap only indicates (via a boolean) 33 | # whether an edge tuple was hit or not (i.e., edge counts are discarded) 34 | merged_bitmap = [False] * MAP_SIZE 35 | for out in args.output: 36 | with open(out / 'fuzz_bitmap', 'rb') as inf: 37 | bitmap = inf.read() 38 | for i, byte in enumerate(bitmap): 39 | if byte != 255: 40 | merged_bitmap[i] = True 41 | 42 | # Calculate merged coverage 43 | bitmap_cvg = (sum(merged_bitmap) * 100.0) / MAP_SIZE 44 | print('bitmap_cvg: %.02f%%' % bitmap_cvg) 45 | 46 | 47 | if __name__ == '__main__': 48 | main() 49 | -------------------------------------------------------------------------------- /scripts/bin/afl_coverage_pca.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Generate PCA plot for the given coverage HDF5 file. 5 | 6 | Author: Adrian Herrera 7 | """ 8 | 9 | from argparse import ArgumentParser, Namespace 10 | from pathlib import Path 11 | import sys 12 | 13 | from h5py import File as H5File 14 | from matplotlib import rc 15 | from sklearn.decomposition import PCA 16 | from sklearn.preprocessing import StandardScaler 17 | import matplotlib.pyplot as plt 18 | import numpy as np 19 | import pandas as pd 20 | 21 | from seed_selection.argparse import path_exists 22 | 23 | 24 | # From afl/config.h 25 | MAP_SIZE_POW2 = 16 26 | MAP_SIZE = 1 << MAP_SIZE_POW2 27 | 28 | 29 | def parse_args() -> Namespace: 30 | """Parse command-line arguments.""" 31 | parser = ArgumentParser(description='PCA plots of coverage data') 32 | parser.add_argument('-i', '--input', metavar='HDF5', type=path_exists, 33 | required=True, help='Input HDF5 file') 34 | parser.add_argument('-o', '--output', metavar='PDF', type=Path, 35 | required=True, help='Path to output PDF') 36 | return parser.parse_args() 37 | 38 | 39 | def main(): 40 | """The main function.""" 41 | args = parse_args() 42 | 43 | in_hdf5 = args.input 44 | out_pdf = args.output 45 | 46 | print('Reading %s...' % in_hdf5) 47 | cov_data = {} 48 | with H5File(in_hdf5, 'r') as h5_file: 49 | for cov_file, cov in h5_file.items(): 50 | df_cov = np.zeros(MAP_SIZE, dtype=np.uint8) 51 | if len(cov.shape) == 0: 52 | edge, count = cov[()] 53 | df_cov[edge] = count 54 | else: 55 | for edge, count in cov: 56 | df_cov[edge] = count 57 | cov_data[cov_file] = list(df_cov) 58 | 59 | df = pd.DataFrame.from_dict(cov_data, orient='index') 60 | x = StandardScaler().fit_transform(df) 61 | if len(df) <= 1: 62 | sys.stderr.write('Not enough seeds to perform PCA') 63 | sys.exit(1) 64 | 65 | # Compute PCA 66 | # TODO determine the number of components 67 | print('Computing PCA...') 68 | pca = PCA(n_components=2) 69 | pca_scores = pd.DataFrame(pca.fit_transform(x), 70 | columns=['PCA 1', 'PCA 2']).set_index(df.index) 71 | 72 | # Configure plot 73 | rc('pdf', fonttype=42) 74 | rc('ps', fonttype=42) 75 | plt.style.use('ggplot') 76 | 77 | # Plot PCA 78 | print('Plotting...') 79 | fig = plt.figure() 80 | ax = fig.add_subplot(1, 1, 1) 81 | ax.scatter(pca_scores['PCA 1'], pca_scores['PCA 2'], marker='x', alpha=0.5) 82 | ax.set_xlabel('Component 1') 83 | ax.set_ylabel('Component 2') 84 | 85 | fig.savefig(out_pdf, bbox_inches='tight') 86 | print('%s coverage plotted at %s' % (in_hdf5, out_pdf)) 87 | 88 | 89 | if __name__ == '__main__': 90 | main() 91 | -------------------------------------------------------------------------------- /scripts/bin/coverage_auc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Compute AUC for AFL coverage files. 5 | 6 | Author: Adrian Herrera 7 | """ 8 | 9 | 10 | from argparse import ArgumentParser, Namespace 11 | from pathlib import Path 12 | from typing import TextIO 13 | 14 | from sklearn import metrics 15 | import bootstrapped.bootstrap as bs 16 | import bootstrapped.stats_functions as bs_stats 17 | import numpy as np 18 | import pandas as pd 19 | 20 | 21 | def parse_args() -> Namespace: 22 | """Parse command-line arguments.""" 23 | parser = ArgumentParser(description='Calculate AUC of AFL coverage') 24 | parser.add_argument('-p', '--percentile', type=float, default=1.0, 25 | help='Coverage percentile (as fraction 0 < p <= 1') 26 | parser.add_argument('plot_data', nargs='+', type=Path, 27 | help='Path to AFL plot_data file(s)') 28 | return parser.parse_args() 29 | 30 | 31 | def read_plot_data(in_file: TextIO) -> pd.DataFrame: 32 | """Read an AFL `plot_data` file.""" 33 | def fix_map_size(x): 34 | if isinstance(x, str): 35 | return float(x.split('%')[0]) 36 | return x 37 | 38 | # Skip the opening '# ' (if it exists) 39 | pos = in_file.tell() 40 | first_chars = in_file.read(2) 41 | if first_chars != '# ': 42 | in_file.seek(pos) 43 | 44 | # Read the data 45 | df = pd.read_csv(in_file, index_col=False, skipinitialspace=True) 46 | df.map_size = df.map_size.apply(fix_map_size) 47 | 48 | return df 49 | 50 | 51 | def main(): 52 | """The main function.""" 53 | args = parse_args() 54 | plot_data_paths = args.plot_data 55 | num_plot_datas = len(plot_data_paths) 56 | 57 | aucs = [] 58 | 59 | for plot_data_path in plot_data_paths: 60 | if plot_data_path.stat().st_size == 0: 61 | continue 62 | 63 | with plot_data_path.open() as inf: 64 | df = read_plot_data(inf) 65 | if df.empty: 66 | continue 67 | 68 | df['unix_time'] = df.unix_time - df.unix_time.iloc[0] 69 | 70 | total_cov = df.map_size.iloc[-1] 71 | percentile_cov = total_cov * args.percentile 72 | df_percentile = df[df.map_size <= percentile_cov] 73 | if len(df_percentile) < 2: 74 | df_percentile = df[0:2] 75 | 76 | auc = metrics.auc(df_percentile.unix_time, df_percentile.map_size) 77 | aucs.append(auc) 78 | 79 | # Compute the mean AUC and confidence intervals 80 | auc_ci = bs.bootstrap(np.array(aucs), stat_func=bs_stats.mean) 81 | print(f'mean AUC ({num_plot_datas} plot_data files)') 82 | print(f' {auc_ci.value:.02f} +/- {auc_ci.error_width() / 2:.02f}') 83 | 84 | 85 | if __name__ == '__main__': 86 | main() 87 | -------------------------------------------------------------------------------- /scripts/bin/eval_maxsat.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Wrapper around EvalMaxSAT. 5 | 6 | Author: Adrian Herrera 7 | """ 8 | 9 | 10 | from argparse import ArgumentParser, Namespace 11 | from shutil import which 12 | from typing import Dict, Optional, List, TextIO, Tuple 13 | import logging 14 | import re 15 | import subprocess 16 | import sys 17 | 18 | from seed_selection.argparse import log_level, path_exists, positive_int 19 | from seed_selection.log import get_logger 20 | 21 | 22 | WCNF_SEED_MAP_RE = re.compile(r'^c (\d+) : (.+)$') 23 | 24 | logger = get_logger('run_maxsat') 25 | 26 | 27 | def parse_args() -> Namespace: 28 | """Parse command-line arguments.""" 29 | parser = ArgumentParser(description='Run EvalMaxSAT on a corpus WCNF') 30 | parser.add_argument('-l', '--log', type=log_level, default=logging.WARN, 31 | help='Logging level') 32 | parser.add_argument('-j', '--jobs', type=positive_int, default=0, 33 | help='Number of minimization threads') 34 | parser.add_argument('input', metavar='WCNF', type=path_exists, 35 | help='Path to input WCNF') 36 | return parser.parse_args() 37 | 38 | 39 | def get_seed_mapping(inf: TextIO) -> Dict[int, str]: 40 | """ 41 | Retrieve the mapping of literal identifiers (integers) to seed names 42 | (strings) from the WCNF file. 43 | """ 44 | mapping = {} 45 | for line in inf: 46 | # This starts the constraint listing 47 | if line.startswith('p wcnf '): 48 | break 49 | 50 | match = WCNF_SEED_MAP_RE.match(line.strip()) 51 | if not match: 52 | continue 53 | 54 | mapping[int(match.group(1))] = match.group(2) 55 | 56 | return mapping 57 | 58 | 59 | def parse_maxsat_out(out: List[str], mapping: Dict[int, str]) -> Tuple[Optional[List[str]], Optional[float]]: 60 | """ 61 | Parse the output from EvalMaxSat. 62 | 63 | Returns a tuple containing: 64 | 65 | 1. The list of seeds that make up the solution, or `None` if a solution 66 | could not be found. 67 | 2. The execution time. 68 | """ 69 | solution = None 70 | exec_time = None 71 | 72 | for line in out: 73 | # Solution status 74 | if line.startswith('s ') and 'OPTIMUM FOUND' not in line: 75 | # No optimum solution found 76 | break 77 | 78 | # Solution values 79 | if line.startswith('v '): 80 | vals = [int(v) for v in line[2:].split(' ')] 81 | solution = [mapping[v] for v in vals if v > 0] 82 | 83 | # Execution time 84 | if line.startswith('c Total time: '): 85 | toks = line.split(' ') 86 | exec_time = float(toks[3]) 87 | units = toks[4] 88 | 89 | # TODO other units to worry about? 90 | if units == 'ms': 91 | exec_time = exec_time / 1000 92 | 93 | return solution, exec_time 94 | 95 | 96 | def main(): 97 | """The main function.""" 98 | args = parse_args() 99 | in_file = args.input 100 | 101 | eval_max_sat = which('EvalMaxSAT_bin') 102 | if not eval_max_sat: 103 | raise Exception('Cannot find EvalMaxSAT_bin. Check PATH') 104 | 105 | # Intitialize logging 106 | logger.setLevel(args.log) 107 | 108 | logger.debug('Retrieving literal/seed mapping from %s', in_file) 109 | with open(in_file, 'r') as inf: 110 | seed_map = get_seed_mapping(inf) 111 | 112 | logger.debug('Running EvalMaxSAT on %s', in_file) 113 | proc = subprocess.run([eval_max_sat, in_file, '-p', '%d' % args.jobs], 114 | check=True, stdout=subprocess.PIPE, encoding='utf-8') 115 | logger.debug('EvalMaxSAT completed') 116 | maxsat_out = [line.strip() for line in proc.stdout.split('\n')] 117 | 118 | logger.debug('Parsing EvalMaxSAT output') 119 | solution, exec_time = parse_maxsat_out(maxsat_out, seed_map) 120 | if not solution: 121 | raise Exception('Unable to find optimum solution for %s' % in_file) 122 | 123 | print('[+] Solution found for %s' % in_file, file=sys.stderr) 124 | print('[+] Total time: %.02f sec' % exec_time, file=sys.stderr) 125 | print('[+] Num. seeds: %d\n' % len(solution), file=sys.stderr) 126 | 127 | print('\n'.join(solution)) 128 | 129 | 130 | if __name__ == '__main__': 131 | main() 132 | -------------------------------------------------------------------------------- /scripts/bin/expand_hdf5_coverage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Extract AFL coverage from an HDF5 file. 5 | 6 | Author: Adrian Herrera 7 | """ 8 | 9 | 10 | from argparse import ArgumentParser, Namespace 11 | from pathlib import Path 12 | from typing import Set, TextIO 13 | import logging 14 | 15 | from h5py import File 16 | 17 | from seed_selection.argparse import log_level, path_exists, positive_int 18 | from seed_selection.coverage import expand_hdf5 19 | from seed_selection.log import get_logger 20 | 21 | 22 | logger = get_logger('extract_coverage') 23 | 24 | 25 | def parse_args() -> Namespace: 26 | """Parse command-line arguments.""" 27 | parser = ArgumentParser(description='Create a directory of AFL coverage ' 28 | 'data from an HDDF5 file') 29 | parser.add_argument('-j', '--jobs', type=positive_int, default=1, 30 | help='Number of parallel jobs') 31 | parser.add_argument('-i', '--input', metavar='HDF5', type=path_exists, 32 | required=True, help='Input HDF5 file') 33 | parser.add_argument('-l', '--log', type=log_level, default=logging.WARN, 34 | help='Logging level') 35 | parser.add_argument('-o', '--output', metavar='DIR', required=True, 36 | type=Path, help='Output directory') 37 | parser.add_argument('-s', '--seeds', type=path_exists, 38 | help='Optional text file containing a list of seeds to ' 39 | 'extract') 40 | return parser.parse_args() 41 | 42 | 43 | def get_seeds(inf: TextIO) -> Set[str]: 44 | """Get a list of seeds (one per line).""" 45 | return {line.strip() for line in inf} 46 | 47 | 48 | def main(): 49 | """The main function.""" 50 | args = parse_args() 51 | in_file = args.input 52 | out_dir = args.output 53 | 54 | # Initialize logging 55 | logger.setLevel(args.log) 56 | 57 | # Determine the specific seeds to extract 58 | seeds = None 59 | if args.seeds: 60 | with open(args.seeds, 'r') as inf: 61 | seeds = get_seeds(inf) 62 | 63 | # Extract the seed coverage from the HDF5 file 64 | out_dir.mkdir(exist_ok=True) 65 | extracted_seeds = set() 66 | 67 | logger.info('Getting seed coverage (%d jobs)', args.jobs) 68 | with File(in_file, 'r') as h5f: 69 | for seed in expand_hdf5(h5f, out_dir, seeds, jobs=args.jobs, 70 | progress=True): 71 | extracted_seeds.add(seed) 72 | logger.info('Extracted coverage for %d seeds', len(extracted_seeds)) 73 | 74 | 75 | if __name__ == '__main__': 76 | main() 77 | -------------------------------------------------------------------------------- /scripts/bin/fuzz.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Run multiple fuzzing campaigns in parallel. 5 | 6 | Author: Adrian Herrera 7 | """ 8 | 9 | 10 | from argparse import ArgumentParser, Namespace 11 | from concurrent.futures import ProcessPoolExecutor as Executor 12 | from csv import DictWriter 13 | from datetime import datetime 14 | from pathlib import Path 15 | from shutil import which 16 | from subprocess import PIPE, run 17 | from time import sleep 18 | from typing import TextIO 19 | import gzip 20 | import logging 21 | import os 22 | import re 23 | 24 | from watchdog.observers import Observer 25 | from watchdog.events import PatternMatchingEventHandler 26 | 27 | from seed_selection.log import FORMATTER as LOG_FORMATTER 28 | from seed_selection.argparse import mem_limit, path_exists, positive_int 29 | 30 | 31 | AFL_SEED_RE = re.compile(r'''^id[:_]''') 32 | TIMESTAMP_FIELDNAMES = ('seed', 'size', 'unix_time', 'time_offset') 33 | 34 | 35 | class FuzzEventHandler(PatternMatchingEventHandler): 36 | """Only capture testcase creation events.""" 37 | 38 | def __init__(self, logger: logging.Logger) -> None: 39 | patterns = [ 40 | str(Path('*') / 'crashes' / 'id:*'), 41 | str(Path('*') / 'hangs' / 'id:*'), 42 | str(Path('*') / 'queue' / 'id:*'), 43 | ] 44 | super().__init__(patterns=patterns) 45 | self._logger = logger 46 | 47 | def on_created(self, event) -> None: 48 | super().on_created(event) 49 | self._logger.info(event.src_path) 50 | 51 | 52 | def parse_args() -> Namespace: 53 | """Parse command-line arguments.""" 54 | parser = ArgumentParser(description='Run a fuzzer experiment') 55 | parser.add_argument('-j', '--jobs', type=positive_int, 56 | default=os.cpu_count() // 3, 57 | help='Number of concurrent fuzz campaigns') 58 | parser.add_argument('-i', '--input', metavar='DIR', type=path_exists, 59 | required=True, 60 | help='Path to the input corpus directory') 61 | parser.add_argument('-o', '--output', metavar='DIR', type=Path, 62 | required=True, 63 | help='Path to the output results directory') 64 | parser.add_argument('-t', '--timeout', default=None, type=positive_int, 65 | help='Timeout for each run') 66 | parser.add_argument('-m', '--memory', type=mem_limit, default=None, 67 | help='Memory limit for child process') 68 | parser.add_argument('-n', '--nodes', type=positive_int, default=2, 69 | help='Number of fuzzer nodes') 70 | parser.add_argument('-w', '--watch', action='store_true', 71 | help='Watch the output directory and generate timestamps') 72 | parser.add_argument('--num-trials', type=positive_int, default=30, 73 | help='The number of repeated trials to perform') 74 | parser.add_argument('--trial-len', type=positive_int, default=18 * 60 * 60, 75 | help='The length of an individual trial (in seconds)') 76 | parser.add_argument('--cmp-log', metavar='BIN', type=Path, 77 | help='Path to cmp-log instrumented binary (if fuzzing ' 78 | 'with AFL++)') 79 | parser.add_argument('target', metavar='TARGET', type=path_exists, 80 | help='Target program') 81 | parser.add_argument('target_args', metavar='ARG', nargs='*', 82 | help='Target program arguments') 83 | return parser.parse_args() 84 | 85 | 86 | def get_logger(log_file: TextIO) -> logging.Logger: 87 | """Create a logger for recording file creation events.""" 88 | handler = logging.StreamHandler(log_file) 89 | handler.setFormatter(LOG_FORMATTER) 90 | 91 | name = '.'.join(log_file.name.split(os.sep)[-3:]) 92 | logger = logging.getLogger(name) 93 | logger.setLevel(logging.INFO) 94 | logger.addHandler(handler) 95 | 96 | return logger 97 | 98 | 99 | def create_watchdog(logger: logging.Logger, fuzz_dir: Path) -> Observer: 100 | """Create a watchdog observer for recording file creation events.""" 101 | handler = FuzzEventHandler(logger) 102 | observer = Observer() 103 | observer.schedule(handler, fuzz_dir, recursive=True) 104 | 105 | return observer 106 | 107 | 108 | def timestamp_results(out_dir: Path, start_time: datetime) -> dict: 109 | """Timestamp the results of AFL.""" 110 | stats = [] 111 | 112 | for root, dirs, files in os.walk(out_dir): 113 | # Ignore hidden directories 114 | dirs[:] = [d for d in dirs if not d[0] == '.'] 115 | 116 | for name in files: 117 | if not AFL_SEED_RE.match(name): 118 | continue 119 | 120 | seed = Path(root) / name 121 | ctime = seed.stat().st_ctime 122 | 123 | stat_dict = dict(seed=str(seed), unix_time=ctime, 124 | time_offset=ctime - start_time.timestamp(), 125 | size=seed.stat().st_size) 126 | stats.append(stat_dict) 127 | 128 | return stats 129 | 130 | 131 | def run_fuzzer(afl: Path, out_dir: Path, node: int, **kwargs) -> int: 132 | """Run a fuzzer, and log testcases as they are created.""" 133 | # Create AFL command-line. We use `timeout` because a timeout in 134 | # subprocess.run causes us to lose our `CompletedProcess` object 135 | args = ['timeout', '%ds' % kwargs['trial_len'], 136 | afl, '-i', str(kwargs['input']), '-o', str(out_dir.parent)] 137 | 138 | if kwargs['timeout']: 139 | args.extend(['-t', str(kwargs['timeout'])]) 140 | if kwargs['memory'] and not kwargs['cmp_log']: 141 | args.extend(['-m', kwargs['memory']]) 142 | 143 | if node == 1: 144 | args.extend(['-M', out_dir.name]) 145 | else: 146 | args.extend(['-S', out_dir.name]) 147 | 148 | if kwargs['cmp_log']: 149 | args.extend(['-m', 'none', '-c', str(kwargs['cmp_log'])]) 150 | if 'fuzzer_args' in kwargs: 151 | args.extend(kwargs['fuzzer_args']) 152 | 153 | args.extend(['--', str(kwargs['target']), *kwargs['target_args']]) 154 | 155 | # Create AFL environment 156 | env = os.environ.copy() 157 | env['AFL_NO_UI'] = '1' 158 | 159 | # Create watchdog. Testcase creation times are logged to a compressed file 160 | if kwargs['watch']: 161 | log_file = gzip.open(out_dir / 'watchdog.log.gz', 'wt') 162 | logger = get_logger(log_file) 163 | watchdog = create_watchdog(logger, out_dir) 164 | watchdog.start() 165 | 166 | # Start the fuzzer 167 | try: 168 | start_time = datetime.now() 169 | print('[%s] %s' % (start_time, ' '.join(args))) 170 | proc = run(args, stdout=PIPE, stderr=PIPE, env=env, check=False) 171 | 172 | # Save fuzzer output 173 | with open(out_dir / 'stdout.log', 'wb') as outf: 174 | outf.write(proc.stdout) 175 | with open(out_dir / 'stderr.log', 'wb') as outf: 176 | outf.write(proc.stderr) 177 | 178 | # Timestamp everything produced by the fuzzer 179 | stats = [] 180 | for name in ('queue', 'crashes', 'hangs'): 181 | stats.extend(timestamp_results(out_dir / name, start_time)) 182 | stats.sort(key=lambda d: d['unix_time']) 183 | with open(out_dir / 'timestamps.csv', 'w') as outf: 184 | writer = DictWriter(outf, fieldnames=TIMESTAMP_FIELDNAMES) 185 | writer.writeheader() 186 | writer.writerows(stats) 187 | 188 | return proc.returncode 189 | finally: 190 | # Cleanup 191 | watchdog.stop() 192 | watchdog.join() 193 | log_file.close() 194 | 195 | return None 196 | 197 | 198 | def main(): 199 | """The main function.""" 200 | args = parse_args() 201 | 202 | afl = which('afl-fuzz') 203 | if not afl: 204 | raise Exception('Cannot find `afl-fuzz`. Check PATH') 205 | 206 | num_jobs = args.jobs 207 | num_nodes = args.nodes 208 | 209 | if num_jobs % num_nodes != 0: 210 | raise Exception('The number of jobs (%d) must be divisible by the ' 211 | 'number of nodes (%d)' % (num_jobs, num_nodes)) 212 | 213 | out_dir = args.output 214 | out_dir.mkdir(exist_ok=True) 215 | 216 | with Executor(max_workers=num_jobs) as executor: 217 | for trial in range(1, args.num_trials + 1): 218 | # Create output directory 219 | trial_dir = args.output / f'trial-{trial:02d}' 220 | trial_dir.mkdir(exist_ok=True) 221 | 222 | # Run the fuzzer node 223 | for node in range(1, 1 + args.nodes): 224 | node_dir = trial_dir / f'fuzzer-{node:02d}' 225 | node_dir.mkdir(exist_ok=True) 226 | 227 | # Sleep to avoid races when AFL attempts to bind to a core 228 | executor.submit(run_fuzzer, afl, node_dir, node, **vars(args)) 229 | sleep(1.5) 230 | 231 | 232 | if __name__ == '__main__': 233 | main() 234 | -------------------------------------------------------------------------------- /scripts/bin/get_corpus.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Download a specific seed corpus. 5 | 6 | Author: Adrian Herrera 7 | """ 8 | 9 | 10 | from argparse import ArgumentParser, Namespace 11 | from io import BytesIO, StringIO 12 | from pathlib import Path 13 | from tarfile import TarFile 14 | from tempfile import TemporaryDirectory 15 | from typing import Set 16 | import logging 17 | import shutil 18 | 19 | from tqdm import tqdm 20 | 21 | from seed_selection import BENCHMARKS, CORPORA, TARGET_FILE_TYPES 22 | from seed_selection.argparse import log_level, path_exists 23 | from seed_selection.log import get_logger 24 | from seed_selection import datastore 25 | 26 | 27 | logger = get_logger('get_corpus') 28 | 29 | 30 | def parse_args() -> Namespace: 31 | """Parse command-line arguments.""" 32 | parser = ArgumentParser(description='Create a corpus for a given ' 33 | 'benchmark\'s target') 34 | parser.add_argument('-b', '--benchmark', choices=BENCHMARKS, required=True, 35 | help='The benchmark') 36 | parser.add_argument('-c', '--corpus', choices=CORPORA, default='full', 37 | help='The corpus type to download') 38 | parser.add_argument('-l', '--log', type=log_level, default=logging.WARN, 39 | help='Logging level') 40 | parser.add_argument('-t', '--target', type=str, required=True, 41 | help='The benchmark target') 42 | parser.add_argument('output', metavar='DIR', type=path_exists, 43 | help='Path to output directory') 44 | return parser.parse_args() 45 | 46 | 47 | def get_seeds(benchmark: str, target: str, corpus: str) -> Set[Path]: 48 | """Get the list of seeds for the given corpus.""" 49 | logger.info('Getting seed list from datastore') 50 | 51 | # Get the corpus file 52 | seed_path = Path('corpora') / benchmark / target / f'{corpus}.txt' 53 | seed_data = datastore.get_file(seed_path).decode('utf-8') 54 | logger.debug('Downloaded corpus seed file') 55 | 56 | # Now create the paths to the seeds 57 | filetype = TARGET_FILE_TYPES[benchmark][target] 58 | seeds = set() 59 | with StringIO(seed_data) as inf: 60 | for line in inf: 61 | seed = line.strip() 62 | seeds.add(Path(filetype) / seed) 63 | logger.info('Read %d seeds from seed list', len(seeds)) 64 | 65 | return seeds 66 | 67 | 68 | def main(): 69 | """The main function.""" 70 | args = parse_args() 71 | benchmark = args.benchmark 72 | corpus = args.corpus 73 | target = args.target 74 | out_dir = args.output 75 | 76 | # Validate that the target is in the benchmark 77 | if target not in TARGET_FILE_TYPES[benchmark]: 78 | raise Exception('Target `%s` is not valid for the `%s` benchmark' % 79 | (target, benchmark)) 80 | filetype = TARGET_FILE_TYPES[benchmark][target] 81 | 82 | # Initialize logging 83 | logger.setLevel(args.log) 84 | 85 | # Get the list of seeds for the given benchmark target. Need special 86 | # handling for the empty corpus :( 87 | if corpus == 'empty': 88 | seeds = [Path('empty') / f'empty.{filetype}'] 89 | archive_name = 'empty.tar.xz' 90 | else: 91 | seeds = get_seeds(benchmark, target, corpus) 92 | archive_name = '%s.tar.xz' % filetype 93 | 94 | # Download seeds 95 | logger.info('Downloading %s', archive_name) 96 | data = datastore.get_file(Path('seeds') / archive_name, progbar=True) 97 | with BytesIO(data) as bio: 98 | logger.info('Extracting seeds from %s', archive_name) 99 | with TarFile.open(fileobj=bio, mode='r:xz') as tf, \ 100 | TemporaryDirectory() as td: 101 | logger.info('Extract all seeds to temp dir %s', td) 102 | tf.extractall(td) 103 | 104 | for seed in tqdm(seeds, desc=f'Copying seeds to {out_dir}', 105 | unit='seeds'): 106 | shutil.copy(Path(td) / seed, out_dir) 107 | 108 | logger.info('Successfully created %s corpus for %s - %s at %s', corpus, 109 | benchmark, target, out_dir) 110 | 111 | 112 | if __name__ == '__main__': 113 | main() 114 | -------------------------------------------------------------------------------- /scripts/bin/get_libs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Extract the shared library dependencies for a given binary. 5 | 6 | Author: Adrian Herrera 7 | """ 8 | 9 | 10 | from argparse import ArgumentParser, Namespace 11 | from pathlib import Path 12 | from shutil import copyfile 13 | from typing import Set 14 | import re 15 | import subprocess 16 | 17 | from elftools.elf.elffile import ELFFile 18 | from elftools.elf.segments import InterpSegment 19 | 20 | LDD_RE = re.compile(r'.*.so.* => (/.*\.so[^ ]*)') 21 | LDD_NOT_FOUND_RE = re.compile(r'(.*.so.*) => not found') 22 | 23 | 24 | def parse_args() -> Namespace: 25 | """Parse command-line arguments.""" 26 | parser = ArgumentParser(description='Extract a binary\'s library ' 27 | 'dependencies') 28 | parser.add_argument('-o', '--output', metavar='DIR', type=Path, 29 | required=True, help='Output directory') 30 | parser.add_argument('binary', metavar='FILE', nargs='+', type=Path, 31 | help='Path to binary') 32 | return parser.parse_args() 33 | 34 | 35 | def get_interpreter(prog: Path) -> Path: 36 | """Extract the binary's interpreter.""" 37 | with open(prog, 'rb') as inf: 38 | elf = ELFFile(inf) 39 | for seg in elf.iter_segments(): 40 | if isinstance(seg, InterpSegment): 41 | return Path(seg.get_interp_name()) 42 | 43 | raise Exception('Could not find binary interpreter in %s' % prog) 44 | 45 | 46 | def get_library_deps(prog: Path) -> Set[Path]: 47 | """Use `ldd` to determine the given program's library dependencies.""" 48 | deps = set() 49 | 50 | ldd = subprocess.run(['ldd', str(prog)], check=True, encoding='utf-8', 51 | stdout=subprocess.PIPE, stderr=subprocess.PIPE) 52 | for line in ldd.stdout.split('\n'): 53 | match = LDD_NOT_FOUND_RE.search(line) 54 | if match: 55 | missing_lib = match.group(1).strip() 56 | raise Exception('Could not find %s - check LD_LIBRARY_PATH' % 57 | missing_lib) 58 | match = LDD_RE.search(line) 59 | if not match: 60 | continue 61 | deps.add(Path(match.group(1))) 62 | 63 | return deps 64 | 65 | 66 | def main(): 67 | """The main function.""" 68 | args = parse_args() 69 | 70 | progs = args.binary 71 | out_dir = args.output 72 | 73 | libs = set() 74 | 75 | for prog in progs: 76 | if not prog.exists(): 77 | print('WARN: %s does not exist. Skipping...' % prog) 78 | continue 79 | 80 | # Get loader 81 | libs.add(get_interpreter(prog)) 82 | 83 | # Determine all library dependencies 84 | libs.update(get_library_deps(prog)) 85 | 86 | if not out_dir.exists(): 87 | out_dir.mkdir(exist_ok=True) 88 | 89 | for lib in libs: 90 | # Skip if they are the same file 91 | if lib.parent.samefile(out_dir): 92 | print('WARN: %s already exists in %s. Skipping...' % (lib.name, out_dir)) 93 | continue 94 | copyfile(lib, out_dir / lib.name) 95 | 96 | 97 | if __name__ == '__main__': 98 | main() 99 | -------------------------------------------------------------------------------- /scripts/bin/llvm_cov_merge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Generate and merge llvm-cov coverage from an AFL trial. 5 | 6 | Author: Adrian Herrera 7 | """ 8 | 9 | 10 | from argparse import ArgumentParser, Namespace 11 | from functools import partial 12 | from pathlib import Path 13 | from random import randint 14 | from tempfile import TemporaryDirectory, gettempdir 15 | from typing import List, Optional 16 | import json 17 | import logging 18 | import multiprocessing.pool as mpp 19 | import os 20 | import subprocess 21 | 22 | from seed_selection.afl import replace_atat 23 | from seed_selection.argparse import log_level, path_exists, positive_int 24 | from seed_selection.log import get_logger 25 | 26 | 27 | logger = get_logger('llvm_cov_merge') 28 | 29 | 30 | def parse_args() -> Namespace: 31 | """Parse command-line arguments.""" 32 | parser = ArgumentParser(description='Generate and merge llvm-cov coverage') 33 | parser.add_argument('-j', '--jobs', type=positive_int, default=1, 34 | help='Number of parallel jobs') 35 | parser.add_argument('-i', '--input', metavar='DIR', type=path_exists, 36 | required=True, help='AFL output directory') 37 | parser.add_argument('-o', '--output', metavar='JSON', type=Path, 38 | help='Output JSON') 39 | parser.add_argument('-l', '--log', type=log_level, default=logging.WARN, 40 | help='Logging level') 41 | parser.add_argument('-t', '--timeout', type=positive_int, default=None, 42 | help='Timeout (seconds)') 43 | parser.add_argument('--summary-only', action='store_true', 44 | help='Export only summary information for each source file') 45 | parser.add_argument('target', metavar='TARGET', type=path_exists, 46 | help='LLVM SanitizerCoverage-intrumented target program') 47 | parser.add_argument('target_args', metavar='ARG', nargs='+', 48 | help='Target program arguments') 49 | return parser.parse_args() 50 | 51 | 52 | def get_seed_profraw(seed: Path, outdir: Path, target: Path, 53 | target_args: List[str], timeout:Optional[int] = None) -> Path: 54 | """ 55 | Generate the raw coverage profile by replaying the seed through a 56 | SanitizerCoverage-instrumented target. 57 | """ 58 | if seed.stat().st_size == 0: 59 | logger.warning('%s is empty', seed) 60 | 61 | rand_id = randint(0, 99999) 62 | profraw = outdir / f'{rand_id}-{seed.stem}.profraw' 63 | 64 | env = os.environ.copy() 65 | env['LLVM_PROFILE_FILE'] = profraw 66 | 67 | target_args_w_seed, found_atat = replace_atat(target_args, seed) 68 | if not found_atat: 69 | raise Exception('No seed placeholder `@@` found in target arguments') 70 | 71 | stderr = '' 72 | try: 73 | proc = subprocess.run([str(target), *target_args_w_seed], check=False, 74 | env=env, timeout=timeout, 75 | stdout=subprocess.PIPE, stderr=subprocess.PIPE) 76 | stderr = proc.stderr 77 | if proc.returncode: 78 | logger.debug('%s error: %s', seed, stderr.strip()) 79 | except subprocess.TimeoutExpired: 80 | logger.warning('%s timed out', seed) 81 | if not profraw.exists(): 82 | raise Exception('Failed to create raw coverage profile for `%s`: %s' % 83 | (seed, stderr.strip())) 84 | 85 | return profraw 86 | 87 | 88 | def merge_profraw(seed_list: Path, profdata: Path, jobs : int = 1) -> None: 89 | """ 90 | Run llvm-profdata to merge raw coverage profiles (listed in `seed_list`). 91 | """ 92 | # Find appropriate llvm-profdata 93 | llvm_profdata = 'llvm-profdata' 94 | if 'LLVM_PROFDATA' in os.environ: 95 | llvm_profdata = os.environ['LLVM_PROFDATA'] 96 | 97 | llvm_profdata_args = [llvm_profdata, 'merge', '-sparse', 98 | '-num-threads', '%d' % jobs, 99 | '-f', str(seed_list), '-o', str(profdata)] 100 | proc = subprocess.run(llvm_profdata_args, check=False, encoding='utf-8', 101 | stdout=subprocess.PIPE, stderr=subprocess.PIPE) 102 | if proc.returncode: 103 | raise Exception('Failed to merge profile data: %s' % 104 | proc.stderr.strip()) 105 | 106 | 107 | def export_json(target: Path, profdata: Path, 108 | summary_only: bool = False) -> dict: 109 | """Run llvm-cov to export coverage as JSON.""" 110 | # Find appropriate llvm-cov 111 | llvm_cov = 'llvm-cov' 112 | if 'LLVM_COV' in os.environ: 113 | llvm_cov = os.environ['LLVM_COV'] 114 | 115 | llvm_cov_args = [llvm_cov, 'export'] 116 | if summary_only: 117 | llvm_cov_args.append('-summary-only') 118 | llvm_cov_args.extend([str(target), '-instr-profile', str(profdata), 119 | '-format', 'text']) 120 | proc = subprocess.run(llvm_cov_args, check=True, stdout=subprocess.PIPE, 121 | stderr=subprocess.PIPE) 122 | return json.loads(proc.stdout) 123 | 124 | 125 | def get_temp_dir() -> Path: 126 | """Determine temporary directory location. Prefer tmpfs if available.""" 127 | root = Path('/') 128 | preferred_dirs = (root / 'dev' / 'shm', root / 'run' / 'shm') 129 | for dir_ in preferred_dirs: 130 | if dir_.exists(): 131 | return dir_ 132 | 133 | return Path(gettempdir()) 134 | 135 | 136 | def main(): 137 | """The main function.""" 138 | args = parse_args() 139 | in_dir = args.input 140 | output = args.output 141 | target = args.target 142 | 143 | # Initialize logging 144 | logger.setLevel(args.log) 145 | 146 | seeds = (seed for queue in in_dir.glob('**/queue') \ 147 | for seed in queue.iterdir() if seed.is_file()) 148 | 149 | with TemporaryDirectory(dir=get_temp_dir()) as temp_dir: 150 | # Generate raw coverage files 151 | with mpp.Pool(processes=args.jobs) as pool: 152 | logger.info('Generating raw coverage profiles from %s...', in_dir) 153 | get_profraw = partial(get_seed_profraw, outdir=Path(temp_dir), 154 | target=target, target_args=args.target_args, 155 | timeout=args.timeout) 156 | profraws = pool.map(get_profraw, seeds) 157 | logger.info('Generated %d coverage profiles', len(profraws)) 158 | 159 | if not profraws: 160 | logger.warning('No coverage profiles generated') 161 | return 162 | 163 | # Create list of seeds for merging 164 | logger.info('Generating seeds.txt...') 165 | seed_list = Path(temp_dir) / 'seeds.txt' 166 | with open(seed_list, 'w') as outf: 167 | for profraw in profraws: 168 | outf.write('1,%s\n' % profraw) 169 | 170 | # Merge raw coverage 171 | logger.info('Merging raw coverage profiles...') 172 | profdata_file = Path(temp_dir) / 'merged.profdata' 173 | merge_profraw(seed_list, profdata_file, jobs=args.jobs) 174 | 175 | # Generate JSON 176 | logger.info('Generating JSON coverage report...') 177 | summary_only = not(output is not None and not args.summary_only) 178 | prof_data = export_json(target, profdata_file, summary_only) 179 | 180 | # Save/print JSON 181 | if output: 182 | logger.info('Saving JSON report to %s', output) 183 | with open(output, 'w') as outf: 184 | json.dump(prof_data, outf) 185 | 186 | region_data = prof_data['data'][0]['totals']['regions'] 187 | region_cvg = region_data['covered'] / region_data['count'] * 100.0 188 | print('region coverage: %.02f%%' % region_cvg) 189 | 190 | 191 | if __name__ == '__main__': 192 | main() 193 | -------------------------------------------------------------------------------- /scripts/bin/llvm_cov_stats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Generate llvm-cov coverage statistics. 5 | 6 | Author: Adrian Herrera 7 | """ 8 | 9 | 10 | from argparse import ArgumentParser, Namespace 11 | from pathlib import Path 12 | import json 13 | 14 | from bootstrapped import bootstrap as bs 15 | import bootstrapped.stats_functions as bs_stats 16 | import numpy as np 17 | 18 | from seed_selection.argparse import path_exists 19 | 20 | 21 | def parse_args() -> Namespace: 22 | """Parse command-line arguments.""" 23 | parser = ArgumentParser(description='Generate llvm-cov statistics') 24 | parser.add_argument('jsons', metavar='JSON', nargs='+', type=path_exists, 25 | help='llvm-cov-generated JSON coverage file(s)') 26 | return parser.parse_args() 27 | 28 | 29 | def get_region_cov(llvm_cov_json: Path) -> float: 30 | """Get region coverage from the llvm-cov-generated JSON file.""" 31 | with llvm_cov_json.open() as inf: 32 | root = json.load(inf) 33 | data = root['data'][0]['totals']['regions'] 34 | return data['covered'] / data['count'] * 100.0 35 | 36 | 37 | def main(): 38 | """The main function.""" 39 | args = parse_args() 40 | 41 | # Get region coverage 42 | regions = np.array([get_region_cov(p) for p in args.jsons]) 43 | 44 | # Calculate mean and confidence intervals 45 | cov_ci = bs.bootstrap(regions, stat_func=bs_stats.mean) 46 | 47 | # Output 48 | print(f'mean coverage ({len(regions)} trials)') 49 | print(f' {cov_ci.value:.02f} +/- {cov_ci.error_width() / 2:.02f}') 50 | 51 | 52 | if __name__ == '__main__': 53 | main() 54 | -------------------------------------------------------------------------------- /scripts/bin/qminset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Wrapper around `qminset`. Only keeps the named of the files in the minimized 5 | corpus. 6 | 7 | Author: Adrian Herrera 8 | """ 9 | 10 | 11 | from argparse import ArgumentParser, Namespace 12 | from io import BytesIO 13 | from pathlib import Path 14 | from shutil import copytree, which 15 | from subprocess import PIPE, run 16 | from tarfile import TarInfo 17 | from tempfile import TemporaryDirectory 18 | import re 19 | import tarfile 20 | 21 | from seed_selection.argparse import path_exists 22 | 23 | 24 | SEED_RE = re.compile(r'Adding \d+ instructions \((?P.+?)\)') 25 | 26 | 27 | def parse_args() -> Namespace: 28 | """Parse command-line arguments.""" 29 | parser = ArgumentParser(description='Wrapper around minset') 30 | parser.add_argument('-i', '--input', metavar='DIR', type=path_exists, 31 | required=True, help='Path to input directory of ' 32 | '`afl-showmap` coverage files') 33 | parser.add_argument('-s', '--bitvectors', metavar='DIR', type=Path, 34 | help='Save the `moonbeam-afl` bitvector traces in the ' 35 | 'given directory') 36 | return parser.parse_args() 37 | 38 | 39 | def main(): 40 | """The main function.""" 41 | args = parse_args() 42 | in_dir = args.input 43 | 44 | moonbeam = which('moonbeam-afl') 45 | if not moonbeam: 46 | raise Exception('`moonbeam-afl` not found. Check PATH') 47 | qminset = which('qminset') 48 | if not qminset: 49 | raise Exception('`qminset` not found. Check PATH') 50 | 51 | with TemporaryDirectory() as bv_dir, TemporaryDirectory() as mset_dir: 52 | # Generate bitvectors 53 | proc = run([moonbeam, '-i', in_dir, '-o', bv_dir], check=True) 54 | if proc.returncode != 0: 55 | raise Exception('moonbeam failed to generate bitvectors') 56 | print('') 57 | 58 | output_path = Path('output') 59 | bitvectors = list(Path(bv_dir).glob('*.bv')) 60 | num_bitvectors = len(bitvectors) 61 | 62 | # Save the bitvectors if requested 63 | if args.bitvectors: 64 | copytree(bv_dir, args.bitvectors) 65 | 66 | # Prepare the minset data 67 | print('Preparing minset data...') 68 | for bitvector in bitvectors: 69 | bitvector_size = bitvector.stat().st_size 70 | mset_bv_dir = Path(mset_dir) / bitvector.stem 71 | mset_bv_dir.mkdir() 72 | 73 | # Write size 74 | with open(mset_bv_dir / 'size', 'w') as outf: 75 | outf.write('%d\n' % bitvector_size) 76 | 77 | # Write output.tgz 78 | with tarfile.open(mset_bv_dir / 'output.tgz', 'w:gz') as tar: 79 | # Write imagefilemap.txt 80 | imagefilemap = BytesIO(b'%s,%s\n' % (in_dir.name.encode(), 81 | bitvector.name.encode())) 82 | tarinfo = TarInfo(name=str(output_path / 'imagefilemap.txt')) 83 | tarinfo.size = len(imagefilemap.getvalue()) 84 | tar.addfile(tarinfo=tarinfo, fileobj=imagefilemap) 85 | 86 | # Write info.txt 87 | info = BytesIO(b'0_0_0_0_0_0_0\n0_0_0_0_0_0_WEIGHT}_0\n') 88 | tarinfo = TarInfo(name=str(output_path / 'info.txt')) 89 | tarinfo.size = len(info.getvalue()) 90 | tar.addfile(tarinfo=tarinfo, fileobj=info) 91 | 92 | # Write the bitvector 93 | with open(bitvector, 'rb') as inf: 94 | tarinfo = TarInfo(name=str(output_path / bitvector.name)) 95 | tarinfo.size = bitvector_size 96 | tar.addfile(tarinfo=tarinfo, fileobj=inf) 97 | 98 | # Run qminset 99 | print('Running minset...') 100 | proc = run([qminset, 'q', '%d' % num_bitvectors, str(mset_dir)], 101 | stdout=PIPE, stderr=PIPE, check=True, encoding='utf8') 102 | 103 | # Get the seeds 104 | seeds = [] 105 | for line in proc.stdout.split('\n'): 106 | line = line.strip() 107 | if line == 'DONE': 108 | break 109 | 110 | match = SEED_RE.match(line) 111 | if match: 112 | seeds.append(match.group('seed')) 113 | 114 | print('\nSeeds (%d):' % len(seeds)) 115 | for seed in seeds: 116 | print(seed) 117 | 118 | 119 | if __name__ == '__main__': 120 | main() 121 | -------------------------------------------------------------------------------- /scripts/bin/replay_seeds.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Replay a directory of fuzzer inputs and generate coverage information. Store 5 | this coverage (along with size and execution time metadata) in an HDF5 file. 6 | 7 | Author: Adrian Herrera 8 | """ 9 | 10 | 11 | from argparse import ArgumentParser, Namespace 12 | from pathlib import Path 13 | from shutil import copy, which 14 | from subprocess import run 15 | from tempfile import NamedTemporaryFile 16 | from time import time 17 | import re 18 | 19 | from h5py import File as H5File 20 | from tqdm import tqdm 21 | import numpy as np 22 | 23 | from seed_selection.afl import replace_atat 24 | from seed_selection.argparse import mem_limit, path_exists, positive_int 25 | 26 | 27 | MEM_LIMIT_RE = re.compile(r'''(\d+)([TGkM]?)''') 28 | COV_TYPE = np.dtype([('edge', np.uint32), ('count', np.uint8)]) 29 | 30 | 31 | def parse_args() -> Namespace: 32 | """Parse command-line arguments.""" 33 | parser = ArgumentParser(description='Generate AFL coverage from a seed ' 34 | 'directory') 35 | parser.add_argument('-i', '--input', metavar='DIR', type=path_exists, 36 | required=True, help='Path to input directory') 37 | parser.add_argument('-o', '--output', metavar='HDF5', type=Path, 38 | required=True, help='Path to output HDF5') 39 | parser.add_argument('-s', '--traces', metavar='DIR', type=path_exists, 40 | help='Save the traces to the given directory') 41 | parser.add_argument('-t', '--timeout', type=positive_int, default=None, 42 | help='Timeout for each run') 43 | parser.add_argument('-m', '--memory', type=mem_limit, default=None, 44 | help='Memory limit for child process') 45 | parser.add_argument('-q', '--quiet', action='store_true', 46 | help='Sink program output') 47 | parser.add_argument('target', metavar='TARGET', type=path_exists, 48 | help='Target program') 49 | parser.add_argument('target_args', metavar='ARG', nargs='+', 50 | help='Target program arguments') 51 | return parser.parse_args() 52 | 53 | 54 | def run_showmap(afl_showmap: Path, seed: Path, **kwargs: dict) -> (np.array, float): 55 | """Run afl-showmap on a given file.""" 56 | cov = np.empty(0, dtype=COV_TYPE) 57 | args = [afl_showmap] 58 | 59 | timeout = kwargs.get('timeout') 60 | memory = kwargs.get('memory') 61 | 62 | if timeout: 63 | args.extend(['-t', str(timeout)]) 64 | if memory: 65 | args.extend(['-m', str(memory)]) 66 | if kwargs['quiet']: 67 | args.append('-q') 68 | 69 | target_args_w_seed, found_atat = replace_atat(kwargs['target_args'], seed) 70 | if not found_atat: 71 | raise Exception('No seed placeholder `@@` found in target arguments') 72 | 73 | with NamedTemporaryFile() as temp: 74 | args.extend(['-o', temp.name]) 75 | args.extend(['--', kwargs['target'], *target_args_w_seed]) 76 | 77 | start_time = time() 78 | run(args, check=False) 79 | end_time = time() 80 | 81 | exec_time_ms = (end_time - start_time) * 1000 82 | 83 | # Successfully generated coverage 84 | if Path(temp.name).stat().st_size != 0: 85 | with open(temp.name, 'r') as trace_data: 86 | cov = np.genfromtxt(trace_data, delimiter=':', dtype=COV_TYPE) 87 | 88 | # Save the seed trace if requested 89 | trace_dir = kwargs['traces'] 90 | if trace_dir: 91 | copy(temp.name, trace_dir / seed.name) 92 | 93 | return cov, exec_time_ms 94 | 95 | 96 | def main(): 97 | """The main function.""" 98 | args = parse_args() 99 | 100 | in_dir = args.input 101 | out_path = args.output 102 | num_seeds = len(list(in_dir.glob('*'))) 103 | 104 | afl_showmap = which('afl-showmap') 105 | if not afl_showmap: 106 | raise Exception('Cannot find `afl-showmap`. Check PATH') 107 | 108 | with H5File(out_path, 'w') as h5f: 109 | for seed in tqdm(in_dir.iterdir(), 110 | desc='Generating `afl-showmap` coverage', 111 | total=num_seeds, unit='seeds'): 112 | cov, exec_time = run_showmap(afl_showmap, seed, **vars(args)) 113 | if cov.size == 0: 114 | continue 115 | 116 | compression = 'gzip' if cov.size > 1 else None 117 | dset = h5f.create_dataset(str(seed.relative_to(in_dir)), 118 | data=cov, compression=compression) 119 | dset.attrs['time'] = exec_time 120 | dset.attrs['size'] = seed.stat().st_size 121 | 122 | 123 | if __name__ == '__main__': 124 | main() 125 | -------------------------------------------------------------------------------- /scripts/bin/timestamp_afl.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Get the timestamps and sizes for all fuzzing testcases in the queue, crashes, 5 | and hangs directories (works for both AFL and Angora). 6 | 7 | Author: Adrian Herrera 8 | """ 9 | 10 | 11 | from argparse import ArgumentParser, Namespace 12 | from csv import DictWriter 13 | from pathlib import Path 14 | import os 15 | import re 16 | from typing import Dict 17 | 18 | 19 | AFL_SEED_RE = re.compile(r'''^id[:_]''') 20 | FIELDNAMES = ('seed', 'size', 'unix_time') 21 | 22 | 23 | def parse_args() -> Namespace: 24 | """Parse command-line arguments.""" 25 | parser = ArgumentParser(description='Get timestamps and file sizes for ' 26 | 'fuzzer results') 27 | parser.add_argument('-o', '--output', type=Path, required=True, 28 | help='Path to output CSV file') 29 | parser.add_argument('out_dir', type=Path, metavar='OUT_DIR', 30 | help='AFL output directory') 31 | return parser.parse_args() 32 | 33 | 34 | def timestamp_results(out_dir: Path) -> Dict[str, int]: 35 | """Timestamp the results of AFL.""" 36 | stats = [] 37 | 38 | for root, dirs, files in os.walk(out_dir): 39 | # Ignore hidden directories 40 | dirs[:] = [d for d in dirs if not d[0] == '.'] 41 | 42 | for name in files: 43 | if not AFL_SEED_RE.match(name): 44 | continue 45 | 46 | seed = Path(root) / name 47 | ctime = seed.stat().st_ctime 48 | 49 | stat_dict = dict(seed=str(seed), unix_time=ctime, 50 | size=seed.stat().st_size) 51 | stats.append(stat_dict) 52 | 53 | return stats 54 | 55 | 56 | def main(): 57 | """The main function.""" 58 | args = parse_args() 59 | 60 | stats = [] 61 | for name in ('queue', 'crashes', 'hangs'): 62 | stats.extend(timestamp_results(args.out_dir / name)) 63 | stats.sort(key=lambda d: d['unix_time']) 64 | 65 | # Write the results to the output CSV file 66 | with open(args.output, 'w') as outf: 67 | writer = DictWriter(outf, fieldnames=FIELDNAMES) 68 | writer.writeheader() 69 | writer.writerows(stats) 70 | 71 | 72 | if __name__ == '__main__': 73 | main() 74 | -------------------------------------------------------------------------------- /scripts/bin/timestamp_honggfuzz.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Get the timestamps and sizes for all honggfuzz-generated files. 5 | 6 | Author: Adrian Herrera 7 | """ 8 | 9 | 10 | from argparse import ArgumentParser, Namespace 11 | from csv import DictWriter 12 | from pathlib import Path 13 | import os 14 | from typing import Dict 15 | 16 | 17 | FIELDNAMES = ('seed', 'size', 'unix_time') 18 | 19 | 20 | def parse_args() -> Namespace: 21 | """Parse command-line arguments.""" 22 | parser = ArgumentParser(description='Get timestamps and file sizes for ' 23 | 'fuzzer results') 24 | parser.add_argument('-o', '--output', type=Path, required=True, 25 | help='Path to output CSV file') 26 | parser.add_argument('out_dir', type=Path, metavar='OUT_DIR', 27 | help='honggfuzz output directory') 28 | return parser.parse_args() 29 | 30 | def check_suffix(seed: Path) -> bool: 31 | """Check if the file suffix is one that we are interested in.""" 32 | suffixes = seed.suffixes 33 | 34 | if len(suffixes) > 2 and ''.join(suffixes[-2:]) == '.honggfuzz.cov': 35 | return True 36 | if len(suffixes) > 1 and suffixes[-1] == '.fuzz': 37 | return True 38 | 39 | return False 40 | 41 | 42 | def timestamp_results(out_dir: Path) -> Dict[str, int]: 43 | """Timestamp the results of honggfuzz.""" 44 | stats = [] 45 | 46 | for root, dirs, files in os.walk(out_dir): 47 | # Ignore hidden directories 48 | dirs[:] = [d for d in dirs if not d[0] == '.'] 49 | 50 | for name in files: 51 | seed = Path(root) / name 52 | if not check_suffix(seed): 53 | continue 54 | 55 | ctime = seed.stat().st_ctime 56 | 57 | stat_dict = dict(seed=str(seed), unix_time=ctime, 58 | size=seed.stat().st_size) 59 | stats.append(stat_dict) 60 | 61 | return stats 62 | 63 | 64 | def main(): 65 | """The main function.""" 66 | args = parse_args() 67 | 68 | stats = timestamp_results(args.out_dir) 69 | stats.sort(key=lambda d: d['unix_time']) 70 | 71 | # Write the results to the output CSV file 72 | with open(args.output, 'w') as outf: 73 | writer = DictWriter(outf, fieldnames=FIELDNAMES) 74 | writer.writeheader() 75 | writer.writerows(stats) 76 | 77 | 78 | if __name__ == '__main__': 79 | main() 80 | -------------------------------------------------------------------------------- /scripts/bin/visualize_corpora.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | Visualize the overlap between corpora. 5 | 6 | Author: Adrian Herrera 7 | """ 8 | 9 | 10 | from argparse import ArgumentParser, Namespace 11 | from collections import defaultdict, OrderedDict 12 | from pathlib import Path 13 | from typing import Set, TextIO 14 | 15 | from matplotlib import rc, rcParams 16 | from supervenn import supervenn 17 | import matplotlib.pyplot as plt 18 | 19 | from seed_selection.argparse import path_exists 20 | 21 | 22 | def parse_args() -> Namespace: 23 | """Parse command-line arguments.""" 24 | parser = ArgumentParser(description='Visualize corpora overlap') 25 | parser.add_argument('-o', '--output', metavar='PDF', type=Path, 26 | required=True) 27 | parser.add_argument('--cmin', metavar='CORPUS', type=path_exists, 28 | default=None) 29 | parser.add_argument('--full', metavar='CORPUS', type=path_exists, 30 | default=None) 31 | parser.add_argument('--minset', metavar='CORPUS', type=path_exists, 32 | default=None) 33 | parser.add_argument('--unweighted-optimal', metavar='CORPUS', 34 | type=path_exists, default=None) 35 | parser.add_argument('--weighted-optimal', metavar='CORPUS', 36 | type=path_exists, default=None) 37 | parser.add_argument('--weighted-max-freq-optimal', metavar='CORPUS', 38 | type=path_exists, default=None) 39 | return parser.parse_args() 40 | 41 | 42 | def read_corpus(inf: TextIO) -> Set[str]: 43 | """Read the given corpus listing as a set of seed names.""" 44 | return {line.strip() for line in inf} 45 | 46 | 47 | def main(): 48 | """The main function.""" 49 | args = parse_args() 50 | 51 | # Read corpora 52 | corpora = dict() 53 | if args.cmin: 54 | with open(args.cmin, 'r') as inf: 55 | corpora['CMIN'] = read_corpus(inf) 56 | if args.full: 57 | with open(args.full, 'r') as inf: 58 | corpora['FULL'] = read_corpus(inf) 59 | if args.minset: 60 | with open(args.minset, 'r') as inf: 61 | corpora['MSET'] = read_corpus(inf) 62 | if args.unweighted_optimal: 63 | with open(args.unweighted_optimal, 'r') as inf: 64 | corpora['UOPT'] = read_corpus(inf) 65 | if args.weighted_optimal: 66 | with open(args.weighted_optimal, 'r') as inf: 67 | corpora['WOPT'] = read_corpus(inf) 68 | if args.weighted_max_freq_optimal: 69 | with open(args.weighted_max_freq_optimal, 'r') as inf: 70 | corpora['WMOPT'] = read_corpus(inf) 71 | 72 | # Represent the corpora as a set of integers, where each integer uniquely 73 | # maps to a seed file 74 | seed_map = {seed: i for i, seed in enumerate(corpora['FULL'])} 75 | 76 | plot_data = defaultdict(set) 77 | plot_data['FULL'] = set(seed_map.values()) 78 | 79 | for corpus in set(corpora.keys()) - set(['FULL']): 80 | for seed in corpora[corpus]: 81 | if seed not in seed_map: 82 | print('WARN: seed `%s` is not in the FULL corpus' % seed) 83 | continue 84 | plot_data[corpus].add(seed_map[seed]) 85 | 86 | # Configure plot 87 | plt.style.use('seaborn-dark') 88 | 89 | x_size, y_size = rcParams['figure.figsize'] 90 | 91 | rc('pdf', fonttype=42) 92 | rc('ps', fonttype=42) 93 | 94 | # Visualize the corpora 95 | fig = plt.figure(figsize=(x_size, y_size * 0.666)) 96 | ax = fig.add_subplot(1, 1, 1) 97 | 98 | supervenn_data = OrderedDict() 99 | for corpus in ('FULL', 'MSET', 'CMIN', 'UOPT', 'WOPT', 'WMOPT'): 100 | if corpus not in plot_data: 101 | continue 102 | supervenn_data[corpus] = plot_data[corpus] 103 | 104 | supervenn(list(supervenn_data.values()), list(supervenn_data.keys()), 105 | ax=ax, side_plots=False, widths_minmax_ratio=0.5) 106 | 107 | ax.set_xlabel('Seeds (#)') 108 | ax.set_ylabel('Corpora') 109 | 110 | fig.savefig(args.output, bbox_inches='tight') 111 | 112 | 113 | if __name__ == '__main__': 114 | main() 115 | -------------------------------------------------------------------------------- /scripts/seed_selection/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Useful constants. 3 | 4 | Author: Adrian Herrera 5 | """ 6 | 7 | 8 | TARGET_FILE_TYPES = { 9 | 'fts': { 10 | 'freetype2': 'ttf', 11 | 'guetzli': 'jpeg', 12 | 'json': 'json', 13 | 'libarchive': 'gzip', 14 | 'libjpeg-turbo': 'jpeg', 15 | 'libpng': 'png', 16 | 'libxml2': 'xml', 17 | 'pcre2': 'regex', 18 | 're2': 'regex', 19 | 'vorbis': 'ogg', 20 | }, 21 | 'magma': { 22 | 'libpng': 'png', 23 | 'libtiff': 'tiff', 24 | 'libxml2': 'xml', 25 | 'php-exif': 'jpeg', 26 | 'php-json': 'json', 27 | 'php-parser': 'php', 28 | 'poppler': 'pdf', 29 | }, 30 | 'real-world': { 31 | 'freetype2': 'ttf', 32 | 'librsvg': 'svg', 33 | 'libtiff': 'tiff', 34 | 'libxml2': 'xml', 35 | 'poppler': 'pdf', 36 | 'sox-mp3': 'mp3', 37 | 'sox-wav': 'wav', 38 | }, 39 | } 40 | 41 | BENCHMARKS = list(TARGET_FILE_TYPES.keys()) 42 | 43 | MINIMIZE_TECHNIQUES = ( 44 | 'cmin', 45 | 'minset', 46 | 'unweighted-optimal', 47 | 'weighted-optimal', 48 | 'weighted-max-freq-optimal') 49 | CORPORA = ('empty', 'full', *MINIMIZE_TECHNIQUES) 50 | -------------------------------------------------------------------------------- /scripts/seed_selection/afl.py: -------------------------------------------------------------------------------- 1 | """ 2 | AFL helper functions. 3 | 4 | Author: Adrian Herrera 5 | """ 6 | 7 | 8 | from getopt import getopt, GetoptError 9 | from pathlib import Path 10 | from typing import List, TextIO, Tuple 11 | import re 12 | 13 | import pandas as pd 14 | 15 | 16 | START_TIME_RE = re.compile(r'^start_time\s*: (?P\d+)') 17 | LAST_UPDATE_RE = re.compile(r'^last_update\s*: (?P\d+)') 18 | FUZZER_PID_RE = re.compile(r'^fuzzer_pid\s*: (?P\d+)') 19 | CYCLES_DONE_RE = re.compile(r'^cycles_done\s*: (?P\d+)') 20 | EXECS_DONE_RE = re.compile(r'^execs_done\s*: (?P\d+)') 21 | EXECS_PER_SEC_RE = re.compile(r'^execs_per_sec\s*: (?P[\d.]+)') 22 | PATHS_TOTAL_RE = re.compile(r'^paths_total\s*: (?P\d+)') 23 | PATHS_FAVORED_RE = re.compile(r'^paths_favored\s*: (?P\d+)') 24 | PATHS_FOUND_RE = re.compile(r'^paths_found\s*: (?P\d+)') 25 | PATHS_IMPORTED_RE = re.compile(r'^paths_imported\s*: (?P\d+)') 26 | MAX_DEPTH_RE = re.compile(r'^max_depth\s*: (?P\d+)') 27 | CUR_PATH_RE = re.compile(r'^cur_path\s*: (?P\d+)') 28 | PENDING_FAVS_RE = re.compile(r'^pending_favs\s*: (?P\d+)') 29 | PENDING_TOTAL_RE = re.compile(r'^pending_total\s*: (?P\d+)') 30 | VARIABLE_PATHS_RE = re.compile(r'^variable_paths\s*: (?P\d+)') 31 | STABILITY_RE = re.compile(r'^stability\s*: (?P[\d.]+)%') 32 | BITMAP_CVG_RE = re.compile(r'^bitmap_cvg\s*: (?P[\d.]+)%') 33 | UNIQUE_CRASHES_RE = re.compile(r'^unique_crashes\s*: (?P\d+)') 34 | UNIQUE_HANGS_RE = re.compile(r'^unique_hangs\s*: (?P\d+)') 35 | LAST_PATH_RE = re.compile(r'^last_path\s*: (?P\d+)') 36 | LAST_CRASH_RE = re.compile(r'^last_crash\s*: (?P\d+)') 37 | LAST_HANG_RE = re.compile(r'^last_hang\s*: (?P\d+)') 38 | EXECS_SINCE_CRASH_RE = re.compile(r'^execs_since_crash\s*: (?P\d+)') 39 | EXECS_TIMEOUT_RE = re.compile(r'^execs_timeout\s*: (?P\d+)') 40 | AFL_BANNER_RE = re.compile(r'^afl_banner\s*: (?P,+)') 41 | AFL_VERSION_RE = re.compile(r'^afl_version\s*: (?P.+)') 42 | TARGET_MODE_RE = re.compile(r'^target_mode\s*: (?P.+)') 43 | COMMAND_LINE_RE = re.compile(r'^command_line\s*: (?P.*?afl-.+?)\s+(?P.+)') 44 | SLOWEST_EXEC_MS_RE = re.compile(r'^slowest_exec_ms\s*: (?P\d+)') 45 | PEAK_RSS_MB_RE = re.compile(r'^peak_rss_mb\s*: (?P\d+)') 46 | 47 | FUZZER_STATS_RES = ( 48 | START_TIME_RE, 49 | LAST_UPDATE_RE, 50 | FUZZER_PID_RE, 51 | CYCLES_DONE_RE, 52 | EXECS_DONE_RE, 53 | EXECS_PER_SEC_RE, 54 | PATHS_TOTAL_RE, 55 | PATHS_FAVORED_RE, 56 | PATHS_FOUND_RE, 57 | PATHS_IMPORTED_RE, 58 | MAX_DEPTH_RE, 59 | CUR_PATH_RE, 60 | PENDING_FAVS_RE, 61 | PENDING_TOTAL_RE, 62 | VARIABLE_PATHS_RE, 63 | STABILITY_RE, 64 | BITMAP_CVG_RE, 65 | UNIQUE_CRASHES_RE, 66 | UNIQUE_HANGS_RE, 67 | LAST_PATH_RE, 68 | LAST_CRASH_RE, 69 | LAST_HANG_RE, 70 | EXECS_SINCE_CRASH_RE, 71 | EXECS_TIMEOUT_RE, 72 | AFL_BANNER_RE, 73 | AFL_VERSION_RE, 74 | TARGET_MODE_RE, 75 | COMMAND_LINE_RE, 76 | SLOWEST_EXEC_MS_RE, 77 | PEAK_RSS_MB_RE, 78 | ) 79 | 80 | AFL_GETOPT = '+i:o:f:m:t:T:dnCB:S:M:x:Q' 81 | AFLPP_GETOPT = '+c:i:I:o:f:m:t:T:dnCB:S:M:x:QNUWe:p:s:V:E:L:hRP:' 82 | 83 | AFL_GETOPTS = (AFL_GETOPT, AFLPP_GETOPT) 84 | 85 | 86 | def replace_atat(args: List[str], seed: Path) -> Tuple[List[str], bool]: 87 | """Replace the seed placeholder `@@`.""" 88 | new_args = [] 89 | found_atat = False 90 | 91 | for arg in args: 92 | if arg == '@@': 93 | new_args.append(str(seed)) 94 | found_atat = True 95 | else: 96 | new_args.append(arg) 97 | 98 | return new_args, found_atat 99 | 100 | 101 | def read_plot_data(in_file: TextIO) -> pd.DataFrame: 102 | """Read an AFL plot_data file.""" 103 | def fix_map_size(x): 104 | if isinstance(x, str): 105 | return float(x.split('%')[0]) 106 | return x 107 | 108 | # Skip the opening '# ' (if it exists) 109 | pos = in_file.tell() 110 | first_chars = in_file.read(2) 111 | if first_chars != '# ': 112 | in_file.seek(pos) 113 | 114 | # Decide on a delimiter and then read the first line of column headers 115 | header = in_file.readline().strip() 116 | names = [] 117 | for delim in (', ', ','): 118 | if delim in header: 119 | names = header.split(delim) 120 | break 121 | 122 | if not names: 123 | raise Exception('Invalid plot_data header') 124 | 125 | df = pd.read_csv(in_file, names=names, header=0, index_col=False) 126 | df.map_size = df.map_size.apply(fix_map_size) 127 | 128 | return df 129 | 130 | 131 | class FuzzerStats: 132 | """Container for AFL fuzzer_stats file.""" 133 | 134 | def __init__(self, stats_file): 135 | """ 136 | Create a fuzzer stats object from a file object (i.e., one created by 137 | `open`ing a fuzzer_stats file). 138 | """ 139 | stats = dict() 140 | self._stats = dict() 141 | 142 | for line in stats_file: 143 | stat = next((regex.match(line).groupdict() 144 | for regex in FUZZER_STATS_RES if regex.match(line)), 145 | dict()) 146 | stats.update(stat) 147 | 148 | if not stats: 149 | raise Exception('Empty fuzzer_stats file `%s`' % stats_file.name) 150 | 151 | # Automatically create class attributes based on the fuzzer_stats fields 152 | for k, v in stats.items(): 153 | if k == 'command_line': 154 | afl_opts = None 155 | target_args = None 156 | getopt_error = None 157 | 158 | for afl_getopt in AFL_GETOPTS: 159 | try: 160 | afl_opts, target_args = getopt(v.split(), afl_getopt) 161 | break 162 | except GetoptError as e: 163 | getopt_error = e 164 | 165 | if not afl_opts or not target_args: 166 | raise getopt_error 167 | 168 | setattr(self, 'afl_cmdline', afl_opts) 169 | setattr(self, 'target_cmdline', target_args) 170 | else: 171 | # If convertable to a number, treat as a number 172 | try: 173 | v = float(v) 174 | except ValueError: 175 | pass 176 | 177 | setattr(self, k, v) 178 | self._stats[k] = v 179 | 180 | def gen_command_line(self, testcase: Path) -> Tuple[List[str], str]: 181 | """ 182 | Generate the AFL target command-line for the given testcase. 183 | 184 | Replaces '@@' with the given testcase. This can be either a 185 | command-line argument or stdin (depending on whether '@@' was found on 186 | the AFL command-line). A tuple of both command-line and stdin input is 187 | returned. 188 | """ 189 | new_args, found_atat = replace_atat(self.target_cmdline, testcase) 190 | 191 | if found_atat: 192 | stdin = None 193 | else: 194 | with open(testcase, 'rb') as inf: 195 | stdin = inf.read() 196 | 197 | return new_args, stdin 198 | 199 | def __iter__(self): 200 | for k, v in self._stats.items(): 201 | yield k, v 202 | 203 | def __str__(self): 204 | return '%s' % self._stats 205 | -------------------------------------------------------------------------------- /scripts/seed_selection/argparse.py: -------------------------------------------------------------------------------- 1 | """ 2 | argparse type-checking functions. 3 | 4 | Author: Adrian Herrera 5 | """ 6 | 7 | 8 | from argparse import ArgumentTypeError 9 | from pathlib import Path 10 | import logging 11 | import re 12 | 13 | 14 | MEM_LIMIT_RE = re.compile(r'''(\d+)([TGkM]?)''') 15 | 16 | 17 | def log_level(val: str) -> int: 18 | """Ensure that an argument value is a valid log level.""" 19 | numeric_level = getattr(logging, val.upper(), None) 20 | if not isinstance(numeric_level, int): 21 | raise ArgumentTypeError('%r is not a valid log level' % val) 22 | return numeric_level 23 | 24 | 25 | def mem_limit(val: str) -> int: 26 | """Parse the memory limit (based on AFL's format).""" 27 | mem_limit = 0 28 | if val: 29 | match = MEM_LIMIT_RE.match(val) 30 | if not match: 31 | raise ArgumentTypeError('%r is not a valid memory limit' % val) 32 | mem_limit = match.group(1) 33 | suffix = match.group(2) 34 | if suffix == 'T': 35 | mem_limit *= 1024 * 1024 36 | elif suffix == 'G': 37 | mem_limit *= 1024 38 | elif suffix == 'k': 39 | mem_limit /= 1024 40 | return mem_limit 41 | 42 | 43 | def path_exists(val: str) -> Path: 44 | """Ensure that the path argument exists.""" 45 | try: 46 | p = Path(val) 47 | except Exception as e: 48 | raise ArgumentTypeError('%r is not a valid path' % val) from e 49 | if not p.exists(): 50 | raise ArgumentTypeError('%s does not exist' % p) 51 | return p.resolve() 52 | 53 | 54 | def positive_int(val: str) -> int: 55 | """Ensure that an argument value is a positive integer.""" 56 | try: 57 | ival = int(val) 58 | if ival <= 0: 59 | raise Exception 60 | except Exception as e: 61 | raise ArgumentTypeError('%r is not a positive integer' % val) from e 62 | return ival 63 | -------------------------------------------------------------------------------- /scripts/seed_selection/coverage.py: -------------------------------------------------------------------------------- 1 | """ 2 | Extract coverage from HDF5 files. 3 | 4 | Author: Adrian Herrera 5 | """ 6 | 7 | 8 | from functools import partial 9 | from itertools import repeat 10 | from pathlib import Path 11 | from typing import Optional, Set 12 | import multiprocessing.pool as mpp 13 | 14 | from h5py import File 15 | from tqdm import tqdm 16 | 17 | # pylint: disable=unused-import 18 | from . import istarmap 19 | 20 | 21 | def _get_seed_cov(h5_path: Path, seed: str, out_dir: Path, 22 | seeds: Optional[Set[str]] = None) -> str: 23 | """Extract the given seed from the HDF5 file specified at `h5_path`.""" 24 | if seeds and seed not in seeds: 25 | return None 26 | 27 | with File(h5_path, 'r') as h5f, open(out_dir / seed, 'w') as outf: 28 | for edge, count in h5f[seed]: 29 | outf.write('%d:%d\n' % (edge, count)) 30 | 31 | return seed 32 | 33 | 34 | def expand_hdf5(h5f: File, out_dir: Path, seeds: Optional[Set[str]] = None, 35 | jobs: int = 1, progress: bool = False): 36 | """ 37 | Expand an HDF5 containing code coverage. 38 | 39 | Args: 40 | h5f: h5py file object. 41 | out_dir: Directory to extract seed coverage to. 42 | seeds: An optional seed set. If provided, only these seeds will be 43 | extracted. 44 | jobs: Number of parallel jobs to run. 45 | progress: Set to `True` for progress bar. 46 | 47 | Returns: 48 | Yields each extracted seed. 49 | """ 50 | h5_filename = h5f.filename 51 | 52 | with mpp.Pool(processes=jobs) as pool: 53 | get_cov = partial(_get_seed_cov, out_dir=out_dir, seeds=seeds) 54 | h5_iter = zip(repeat(h5_filename), h5f.keys()) 55 | num_seeds = len(seeds) if seeds else len(list(h5f.keys())) 56 | print('%d seeds to extract' % num_seeds) 57 | iter_func = partial(tqdm, desc='Expanding %s' % h5_filename, 58 | total=num_seeds, unit='seeds') if progress else id 59 | for seed in iter_func(pool.istarmap(get_cov, h5_iter)): 60 | if seed: 61 | yield seed 62 | -------------------------------------------------------------------------------- /scripts/seed_selection/datastore.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data store helper functions. 3 | 4 | Author: Adrian Herrera 5 | """ 6 | 7 | 8 | from pathlib import Path 9 | 10 | from tqdm import tqdm 11 | import requests 12 | 13 | 14 | CHUNK_SIZE = 1024 15 | _URL = 'https://datacommons.anu.edu.au/DataCommons/rest/records/anudc:6106/data/' 16 | 17 | 18 | def get_file(path: Path, progbar: bool = False) -> bytes: 19 | """Download a file from the data store.""" 20 | content = bytearray() 21 | 22 | with requests.get(f'{_URL}/{path}', stream=True) as r: 23 | if r.status_code != 200: 24 | raise Exception('Failed to download %s from datastore' % path) 25 | 26 | total_kb = int(r.headers.get('content-length', 0)) // CHUNK_SIZE 27 | for data in tqdm(r.iter_content(CHUNK_SIZE), total=total_kb, unit='kB', 28 | disable=not progbar): 29 | content.extend(data) 30 | 31 | return bytes(content) 32 | -------------------------------------------------------------------------------- /scripts/seed_selection/istarmap.py: -------------------------------------------------------------------------------- 1 | """ 2 | Monkey-patch `multiprocessing.Pool` to support `istarmap`. 3 | 4 | Adapted from https://stackoverflow.com/a/57364423 5 | """ 6 | 7 | 8 | import multiprocessing.pool as mpp 9 | import sys 10 | 11 | 12 | def istarmap(self, func, iterable, chunksize=1): 13 | """starmap-version of imap.""" 14 | if self._state != mpp.RUN: 15 | raise ValueError("Pool not running") 16 | 17 | if chunksize < 1: 18 | raise ValueError("Chunksize must be 1+, not {0:n}".format(chunksize)) 19 | 20 | task_batches = mpp.Pool._get_tasks(func, iterable, chunksize) 21 | if sys.version_info < (3, 8): 22 | result = mpp.IMapIterator(self._cache) 23 | else: 24 | result = mpp.IMapIterator(self) 25 | self._taskqueue.put((self._guarded_task_generation(result._job, 26 | mpp.starmapstar, 27 | task_batches), 28 | result._set_length)) 29 | return (item for chunk in result for item in chunk) 30 | 31 | 32 | mpp.Pool.istarmap = istarmap 33 | -------------------------------------------------------------------------------- /scripts/seed_selection/log.py: -------------------------------------------------------------------------------- 1 | """ 2 | Logging utilities. 3 | 4 | Author: Adrian Herrera 5 | """ 6 | 7 | 8 | import logging 9 | 10 | 11 | FORMATTER = logging.Formatter('[%(asctime)s] %(levelname)s: %(message)s') 12 | 13 | 14 | def get_logger(name: str): 15 | """Get a formatted logger.""" 16 | handler = logging.StreamHandler() 17 | handler.setFormatter(FORMATTER) 18 | 19 | logger = logging.getLogger(name) 20 | logger.addHandler(handler) 21 | 22 | return logger 23 | -------------------------------------------------------------------------------- /scripts/seed_selection/seeds.py: -------------------------------------------------------------------------------- 1 | """ 2 | Seed size utilities. 3 | 4 | Author: Adrian Herrera 5 | """ 6 | 7 | 8 | from io import TextIOWrapper 9 | from pathlib import Path 10 | from typing import Dict, Optional, Set, TextIO 11 | import csv 12 | 13 | from . import datastore 14 | 15 | 16 | def _download_seed_size_csv(): 17 | content = datastore.get_file(Path('seeds') / 'filesizes.csv') 18 | return TextIOWrapper(content, encoding='utf-8') 19 | 20 | 21 | def get_seed_sizes(seeds: Set[str], 22 | csv_file: Optional[TextIO] = None) -> Dict[str, int]: 23 | """ 24 | Get the file sizes for the given seed set. 25 | 26 | If the seed size CSV is provided, use it. Otherwise, download it from the 27 | datastore. 28 | """ 29 | # Download the seed sizes CSV if it was not provided 30 | if not csv_file: 31 | csv_file = _download_seed_size_csv() 32 | 33 | num_seeds = len(seeds) 34 | num_sizes = 0 35 | sizes = dict() 36 | 37 | reader = csv.DictReader(csv_file, fieldnames=('filetype', 'file', 'size')) 38 | for row in reader: 39 | if num_sizes == num_seeds: 40 | break 41 | if row['file'] in seeds: 42 | sizes[row['file']] = int(row['size']) 43 | num_sizes += 1 44 | return sizes 45 | -------------------------------------------------------------------------------- /scripts/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | 4 | setup( 5 | name='fuzzing-seed-selection', 6 | description='Scripts supporting "Seed Selection for Successful Fuzzing"', 7 | author='Adrian Herrera', 8 | url='https://github.com/HexHive/fuzzing-seed-selection', 9 | platforms=['linux'], 10 | packages=find_packages(), 11 | include_package_data=True, 12 | scripts=[ 13 | 'bin/afl_cmin.py', 14 | 'bin/afl_coverage_merge.py', 15 | 'bin/afl_coverage_pca.py', 16 | 'bin/coverage_auc.py', 17 | 'bin/expand_hdf5_coverage.py', 18 | 'bin/fuzz.py', 19 | 'bin/get_corpus.py', 20 | 'bin/get_libs.py', 21 | 'bin/llvm_cov_merge.py', 22 | 'bin/llvm_cov_stats.py', 23 | 'bin/qminset.py', 24 | 'bin/replay_seeds.py', 25 | 'bin/eval_maxsat.py', 26 | 'bin/timestamp_afl.py', 27 | 'bin/timestamp_honggfuzz.py', 28 | 'bin/triage_crashes.py', 29 | 'bin/visualize_corpora.py', 30 | ], 31 | python_requires='>3.0', 32 | install_requires=[ 33 | 'h5py', 34 | 'Jinja2', 35 | 'lifelines', 36 | 'matplotlib', 37 | 'numpy', 38 | 'pandas', 39 | 'pyelftools', 40 | 'bootstrapped', 41 | 'scikit-learn', 42 | 'supervenn', 43 | 'tabulate', 44 | 'toml', 45 | 'tqdm', 46 | 'watchdog', 47 | 'requests', 48 | 'moonbeam @ git+https://gitlab.anu.edu.au/lunar/moonbeam.git#egg=moonbeam', 49 | ], 50 | ) 51 | --------------------------------------------------------------------------------