├── .dockerignore
├── .gitignore
├── .logo.png
├── .ogharn.png
├── Dockerfile
├── LICENSE
├── README.md
├── demos
├── README.md
├── c-ares
│ ├── Makefile
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ ├── char
│ │ ├── is1
│ │ ├── is2
│ │ └── malformed.raw
│ └── seeds_valid
│ │ ├── s1
│ │ ├── s2
│ │ ├── s3
│ │ ├── s4
│ │ └── s5
├── cgltf
│ ├── Makefile
│ ├── config.yaml
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ ├── BadBasisU.gltf
│ │ ├── ToyCar.gltf
│ │ └── small.gltf
│ └── seeds_valid
│ │ ├── s1.gltf
│ │ ├── s2.gltf
│ │ ├── s3.gltf
│ │ └── s4.gltf
├── cjson
│ ├── Makefile
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ ├── invalid.json
│ │ ├── is2
│ │ ├── is3
│ │ ├── is4
│ │ ├── malformed.json
│ │ └── single
│ └── seeds_valid
│ │ ├── s1.json
│ │ ├── s2.json
│ │ ├── s3.json
│ │ ├── s4.json
│ │ └── s5.json
├── faup
│ ├── Makefile
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ ├── s3
│ │ ├── s4
│ │ └── small
│ └── seeds_valid
│ │ ├── s1
│ │ ├── s2
│ │ ├── s3
│ │ └── s4
├── fyaml
│ ├── Makefile
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ ├── is3.yaml
│ │ ├── is4.yaml
│ │ └── small.yaml
│ └── seeds_valid
│ │ ├── s1.yaml
│ │ ├── s2.yaml
│ │ ├── s3.yaml
│ │ └── s4.yaml
├── geos
│ ├── Makefile
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ └── seed.txt
│ └── seeds_valid
│ │ ├── seed1.txt
│ │ └── seed5.txt
├── gpac
│ ├── Makefile
│ ├── patch.diff
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ ├── s5.mp4
│ │ ├── s6
│ │ └── s7
│ └── seeds_valid
│ │ ├── s1.mp4
│ │ ├── s2.mp4
│ │ ├── s3.mp4
│ │ └── s4.mp4
├── hdf5
│ ├── Makefile
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ ├── is1
│ │ ├── is2
│ │ ├── is3
│ │ └── is4
│ └── seeds_valid
│ │ ├── s1
│ │ ├── s2
│ │ └── s3
├── ical
│ ├── Makefile
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ ├── char
│ │ ├── empty
│ │ └── s4.ics
│ └── seeds_valid
│ │ ├── s1.ics
│ │ ├── s2.ics
│ │ ├── s3.ics
│ │ └── s4.ics
├── lcms
│ ├── Makefile
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ ├── is1
│ │ ├── is2
│ │ └── is3
│ └── seeds_valid
│ │ ├── s1
│ │ ├── s2
│ │ ├── s3
│ │ ├── s4
│ │ ├── s5
│ │ └── s7
├── lexbor
│ ├── Makefile
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ ├── empty
│ │ ├── s6.html
│ │ ├── s7.html
│ │ └── single
│ └── seeds_valid
│ │ ├── s1.html
│ │ ├── s2.html
│ │ ├── s3.html
│ │ └── s4.html
├── magic
│ ├── Makefile
│ ├── config.yaml
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ ├── is1
│ │ ├── is2
│ │ └── is3
│ └── seeds_valid
│ │ ├── s1
│ │ ├── s2
│ │ ├── s3
│ │ ├── s4
│ │ └── s5
├── openexr
│ ├── Makefile
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ ├── empty
│ │ ├── s1.exr
│ │ ├── s4.exr
│ │ └── single
│ └── seeds_valid
│ │ ├── s1.exr
│ │ ├── s2.exr
│ │ ├── s3.exr
│ │ └── s4.exr
├── pcap
│ ├── Makefile
│ ├── config.yaml
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ ├── is1
│ │ ├── is2
│ │ ├── is3
│ │ └── is4
│ └── seeds_valid
│ │ ├── s1
│ │ ├── s2
│ │ ├── s4
│ │ ├── s5
│ │ └── s6
├── pcre2
│ ├── Makefile
│ ├── config.yaml
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ ├── is1
│ │ ├── is2
│ │ ├── is3
│ │ └── is4
│ └── seeds_valid
│ │ ├── s1
│ │ ├── s2
│ │ ├── s3
│ │ └── s4
├── raylib
│ ├── Makefile
│ ├── README.md
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ ├── empty
│ │ └── s5.png
│ └── seeds_valid
│ │ ├── s1.png
│ │ ├── s2.png
│ │ ├── s3.png
│ │ ├── s4.png
│ │ └── s5.png
├── sqlite
│ ├── Makefile
│ ├── config.yaml
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ ├── s1
│ │ ├── s2
│ │ ├── s3
│ │ └── s4
│ └── seeds_valid
│ │ ├── s1
│ │ ├── s2
│ │ ├── s3
│ │ ├── s4
│ │ ├── s5
│ │ ├── s6
│ │ └── s7
├── stormlib
│ ├── Makefile
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ ├── invalid.SC2Replay_
│ │ ├── s3.SC2Replay
│ │ └── small
│ └── seeds_valid
│ │ ├── s1.SC2Replay
│ │ ├── s2.SC2Replay
│ │ ├── s3.SC2Replay
│ │ └── s4.SC2Replay
├── ucl
│ ├── Makefile
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ │ ├── s5
│ │ ├── s6.in
│ │ └── s7.in
│ └── seeds_valid
│ │ ├── s1.in
│ │ ├── s2.res
│ │ ├── s3.in
│ │ └── s4.in
└── zlib
│ ├── Makefile
│ ├── config.yaml
│ ├── run_ogharn.sh
│ ├── seeds_invalid
│ ├── is1
│ ├── is2
│ └── is3
│ └── seeds_valid
│ ├── s1
│ ├── s2
│ ├── s3
│ └── s4
├── extras
├── install_dependencies.sh
├── mult-to-c-types.txt
├── set_env.sh
└── type-to-val.txt
└── src
├── engine.py
├── harness_builder.py
├── ogharn.py
└── process_mx.py
/.dockerignore:
--------------------------------------------------------------------------------
1 | /output
2 | /demos/*/lib*
3 | extras/AFLplusplus
4 | extras/Multiplier
5 | extras/llvm.sh
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /output
2 | /demos/*/lib*
3 | **/.DS_Store
4 | /demos/*/bin
5 | /demos/*/mx*
6 | /demos/*/out
7 | /extras/AFLplusplus
8 | /extras/llvm.sh
9 | /extras/multiplier
10 | src/__pycache__
11 |
--------------------------------------------------------------------------------
/.logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FuturesLab/OGHarn/ed713caab5edfef123eab53bfd2c948e515cae1f/.logo.png
--------------------------------------------------------------------------------
/.ogharn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FuturesLab/OGHarn/ed713caab5edfef123eab53bfd2c948e515cae1f/.ogharn.png
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:22.04
2 | FROM aflplusplus/aflplusplus:latest
3 | WORKDIR /
4 | ENV DEBIAN_FRONTEND=noninteractive
5 |
6 | # Install dependencies for multiplier
7 | RUN apt-get update \
8 | && apt-get install -y sudo \
9 | && apt-get install -y git \
10 | && apt-get install --no-install-recommends -y curl gnupg software-properties-common lsb-release build-essential libgoogle-glog-dev \
11 | && apt-get install -y tzdata \
12 | && ln -fs /usr/share/zoneinfo/Etc/UTC /etc/localtime \
13 | && echo "Etc/UTC" > /etc/timezone \
14 | && dpkg-reconfigure -f noninteractive tzdata \
15 | && sudo add-apt-repository ppa:deadsnakes/ppa \
16 | && sudo apt install python3.12-dev python3.12-venv -y \
17 | && sudo apt update \
18 | && sudo apt clean all \
19 | && wget https://apt.llvm.org/llvm.sh \
20 | && chmod u+x llvm.sh \
21 | && sudo ./llvm.sh 18 \
22 | && sudo apt install lld-18 lld -y \
23 | && wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null \
24 | && sudo apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" \
25 | && sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 6AF7F09730B3F0A4 \
26 | && sudo apt update \
27 | && apt-get install --no-install-recommends -y \
28 | gpg zip unzip tar git \
29 | pkg-config ninja-build ccache build-essential \
30 | doctest-dev \
31 | clang-18 lld-18 \
32 | python3.11 python3.11-dev \
33 | && curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11 \
34 | && apt-get clean \
35 | && rm -rf /var/lib/apt/lists/*
36 |
37 | RUN wget https://github.com/Kitware/CMake/releases/download/v3.31.6/cmake-3.31.6-linux-x86_64.tar.gz \
38 | && apt remove --purge --auto-remove cmake -y \
39 | && tar xz -f cmake-3.31.6-linux-x86_64.tar.gz -C /opt \
40 | && ln -s /opt/cmake-3.31.6-linux-x86_64/bin/cmake /usr/local/bin/cmake \
41 | && ln -s /opt/cmake-3.31.6-linux-x86_64/bin/cmake /usr/bin/cmake
42 |
43 | # download and set up multiplier
44 | RUN mkdir -p /OGHarn
45 | COPY . OGHarn
46 | RUN mkdir OGHarn/extras/multiplier
47 | WORKDIR OGHarn/extras/multiplier
48 | RUN mkdir src build install
49 |
50 | RUN bash -c 'if [[ ! -f "/OGHarn/extras/multiplier/install/bin/activate" ]]; then \
51 | python3.12 -m venv "/OGHarn/extras/multiplier/install"; \
52 | fi && \
53 | . "/OGHarn/extras/multiplier/install/bin/activate"'
54 |
55 |
56 | RUN git clone https://github.com/trailofbits/multiplier.git src/multiplier
57 |
58 | RUN cmake \
59 | -DCMAKE_BUILD_TYPE=Release \
60 | -DCMAKE_INSTALL_PREFIX="./install" \
61 | -DCMAKE_LINKER_TYPE=LLD \
62 | -DCMAKE_C_COMPILER="$(which clang-18)" \
63 | -DCMAKE_CXX_COMPILER="$(which clang++-18)" \
64 | -DMX_ENABLE_INSTALL=ON \
65 | -DMX_ENABLE_PYTHON_BINDINGS=ON \
66 | -DLLVM_CONFIG=/usr/bin/llvm-config-18 \
67 | -DLLVM_DIR=/usr/lib/llvm-18/lib/cmake/llvm/ \
68 | -DCMAKE_LINKER=$(which lld-18) \
69 | -GNinja \
70 | "./src/multiplier"
71 |
72 | RUN ninja install
73 |
74 | # install bear for indexing
75 | RUN sudo apt-key adv --fetch-keys https://apt.kitware.com/keys/kitware-archive-latest.asc \
76 | && sudo apt update \
77 | && sudo apt install -y bear
78 |
79 |
80 | WORKDIR /OGHarn/extras
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2025 Gabriel Sherman and Stefan Nagy
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 |
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 |
7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # OGHarn: Oracle-guided Fuzzing Harness Generation
3 |
4 |
5 |
6 | This repository provides the source code for **OGHarn**: a prototype framework for automated generation of fuzzing harnesses for C library APIs.
7 |
8 | This work is presented in our paper **[No Harness, No Problem: Oracle-guided Harnessing for Auto-generating C API Fuzzing Harnesses](https://futures.cs.utah.edu/papers/25ICSE-b.pdf)**, appearing in the 2025 International Conference on Software Engineering (ICSE'25).
9 |
10 | * [Installing OGHarn](#installation)
11 | * [Target Library Setup](#target-library-setup)
12 | * [Generating Harnesses](#generating-harnesses)
13 | * [Additional Notes](#additional-notes)
14 | * [Bug Trophy Case](#bug-trophy-case)
15 |
16 |
17 |
18 |
19 |
20 | Citing this repository: |
21 |
22 | @inproceedings{sherman:ogharn, title = {No Harness, No Problem: Oracle-guided Harnessing for Auto-generating C API Fuzzing Harnesses}, author = {Gabriel Sherman and Stefan Nagy}, booktitle = {{IEEE/ACM} {International} {Conference} on {Software Engineering} ({ICSE})}, year = {2025},}
23 | |
24 |
25 |
26 | Developers: |
27 | Gabriel Sherman (gabe.sherman@utah.edu) and Stefan Nagy (snagy@cs.utah.edu) |
28 |
29 |
30 | License: |
31 | MIT License |
32 |
33 |
34 | Disclaimer: |
35 | This software is provided as-is with no warranty. |
36 |
37 |
38 |
39 | Our original paper-version artifact is located at: https://zenodo.org/records/14727592.
40 |
41 |
42 |
43 | # Installation
44 | OGHarn relies on the following tools, which must both be installed to the `/extras` directory:
45 | * [Multiplier](https://github.com/trailofbits/multiplier): a code indexer used in OGHarn's static analysis of the library under test.
46 | * [AFL++](https://github.com/AFLplusplus/AFLplusplus): a grey-box fuzzer used in OGHarn's harness testing and code coverage analysis.
47 |
48 | To install them, run either `/extras/install_dependencies.sh`, or use our available [Dockerfile](Dockerfile).
49 |
50 | Once Multiplier and AFL++ are installed to `/extras`, run `/extras/set_env.sh` to:
51 | - Activate Multiplier's Python virtual environment and allows access to its API.
52 | - Add AFL++, Multiplier, and OGHarn to your system's `PATH`.
53 | - Install OGHarn's other Python dependencies (`PyYAML`, `cfile v0.2.0`).
54 |
55 |
56 |
57 | # Target Library Setup
58 | The following details the necessary steps for configuring harness generation for new libraries. We recommend reviewing the [demos](demos) folder for more detailed examples.
59 | ### Step 1: Initial Setup
60 | - `Library`: Harnesses generated by OGHarn are expected to be compiled with a dynamically-linked library instrumented with AFL++ and off-the-shelf sanitizers. Any of the Makefile targets titled `lib` in the demos folder provide examples for building a library for use by OGHarn.
61 | - `Index`: OGHarn requires a `.db` file generated by Multiplier containing the index of the library. Any of the Makefile targets titled `run_mx` provide examples for indexing a library using Multiplier. Refer to the [Multiplier Docs](https://github.com/trailofbits/multiplier/blob/main/docs/INDEXING.md) for additional guidance.
62 |
63 | ### Step 2: Create Makefile and Define Key Flags
64 | OGHarn relies on a user-supplied `Makefile` to define various flags and commands for compiling the harnesses targeting the library under test.
65 | It must begin with the following:
66 | - `CC_FUZZ` and `CXX_FUZZ`: AFL's compilers (`afl-clang-fast` and `afl-clang-fast++`, respectively).
67 | - `CFLAGS_ASAN` and `CXXFLAGS_ASAN`: Compiler flags for ASAN and UBSan instrumentation.
68 | - `DEPS`: Compile-time inclusion and linking commands for the eventual harnesses (e.g., `-I ... -L ...`).
69 | - `DEPS_STC`: Compile-time library flags for _statically_-linked dependencies.
70 | - `DEPS_DYN`: Compile-time library flags for _dynamically_-linked dependencies.
71 | - `DEPS_LDD`: Path to the directory containing the target's compiled shared library.
72 |
73 | As an example, below are associated `Makefile` definitions for [libMagic](https://github.com/file/file.git):
74 | ```
75 | # Compilers and instrumentation (leave as-is).
76 | CC_FUZZ = afl-clang-fast
77 | CXX_FUZZ = afl-clang-fast++
78 | CFLAGS_ASAN = -fsanitize=address,undefined
79 | CXXFLAGS_ASAN = -fsanitize=address,undefined
80 |
81 | # Library-specific settings (update accordingly).
82 | DEPS = -I library/src/ \
83 | -L library/src/.libs
84 | DEPS_STC = -l:libmagic.a -llzma -lbz2 -lz -lzstd
85 | DEPS_DYN = -l magic
86 | DEPS_LDD = library/src/.libs/
87 | ```
88 |
89 | ### Step 3: Define Makefile Commands for Harness Analysis
90 | - `harness`: Command to compile the harness using `CC_FUZZ` and `DEPS_DYN` (dynamic linking).
91 | - `showmap`: Command to execute the harness using `afl-showmap`. Set `DEPS_LDD` accordingly.
92 |
93 | If harnesses must be linked statically, the following commands also need to be supplied. Must be used in conjunction with OGHarn argument `--execute_both` (see [Optional Arguments and Experimental Modes](#optional-arguments-and-experimental-modes)).
94 | - `harness_stc`: Same as above, but with `DEPS_STC` (static linking).
95 | - `showmap_stc`: Same as above, but without setting `LD_LIBRARY_PATH`.
96 |
97 | See the following example. Make sure that `$(OUT)` and `$(SEED)` are present as shown below, as these will be populated and used during OGHarn's harness generation.
98 | ```
99 | harness: # Command for compiling individual harnesses.
100 | $(CC_FUZZ) -o $(OUT)/harness.out $(OUT)/harness.c $(DEPS) $(DEPS_DYN) $(CFLAGS_ASAN)
101 |
102 | showmap: # Command for collecting harness code coverage.
103 | LD_LIBRARY_PATH=$(DEPS_LDD) afl-showmap -o $(OUT)/tempfile -- $(OUT)/harness.out $(SEED)
104 | ```
105 | These targets will generally be the same across libraries unless the library under test requires extra environment configuration (e.g., setting the `MAGIC` environment variable during execution of harnesses for [magic](demos/magic/Makefile)).
106 | ### Step 4: Select Library Seed Inputs
107 | OGHarn relies on two user-supplied seed file directories:
108 | - `seeds_valid`: Files that will be accepted by the target library (e.g., valid JSONs for [cJSON](https://github.com/FuturesLab/OGharn-Artifact-Final/tree/master/demos/cjson/seeds_valid)).
109 | - `seeds_invalid`: Random, perturbed seeds that will be _rejected_ by the library.
110 |
111 | OGHarn's seeds are no different than those used by fuzzers. For best results, we recommend using a varied corpus of valid and invalid seeds spanning a wide range of file sizes. Example seeds for various formats are available here: https://github.com/FuturesLab/fuzzing-seeds.
112 |
113 | ### Step 5: Optional Configuration
114 | OGHarn allows for extra control over harnessing via a user-supplied `config.yaml`. Available settings include:
115 | - `blacklist`: List of functions to _avoid_ during harnessing.
116 | - `add_preamble`: Library function to be called _before fuzz data injection_ in any harness.
117 | - `add_argument`: Hardcoded argument value for a particular function.
118 | - `add_defines`: Any `#define` statements to include in each harness.
119 |
120 | See [demos/sqlite](demos/sqlite/config.yaml), [demos/magic](demos/magic/config.yaml), and [demos/pcre2](demos/pcre2/config.yaml) for relevant examples.
121 |
122 |
123 | ### Step 6: Final Preparation
124 | Before continuing, ensure that your `Makefile` as well as the `seeds_valid` and `seeds_invalid` directories are contained in the same folder, e.g.:
125 | ```
126 | in_dir/
127 | /seeds_valid/
128 | /seeds_invalid/
129 | /Makefile
130 | /config.yaml (optional)
131 | ```
132 | This directory (e.g., `in_dir` above) will serve as your input directory to OGHarn (passed via `--input`).
133 |
134 |
135 |
136 | # Generating Harnesses
137 | To generate harnesses, run `ogharn.py` and update the following arguments accordingly. As an example, OGHarn's command for harnessing [cJSON](https://github.com/DaveGamble/cJSON.git) is:
138 | ```
139 | ogharn.py -i in -o out -n 3 --m in/lib.db -h cJSON.h -r b
140 | ```
141 | See the `run_ogharn.sh` scripts in each [demos](demos) sub-directory for more examples.
142 |
143 | ### Required Arguments
144 | - `--input` (`-i`): Path to directory housing the user-provided `Makefile` and both `seeds_` dirs.
145 | - `--output` (`-o`): Path to the output directory where OGHarn's artifacts will be stored.
146 | - `--numfuncs` (`-n`): Maximum functions to call per harness following "data entrypoint" routines.
147 | - `--mxdb` (`-m`): Path to Multiplier's generated `.db` database file.
148 | - `--headers` (`-h`): Library headers to target, to be injected via `#include` in each harness.
149 | - `--readhow` (`-r`): Controls how the harnesses will read fuzzer-generated data:
150 | - `buf` (`b`): Via buffer (e.g., `foo(char* buffer)`).
151 | - `file` (`p`): Via file name/path (e.g., `bar(char* filename)`).
152 |
153 | ### Optional Arguments and Experimental Modes
154 | - `--config` (`-c`): Path to optional `config.yaml` (see [Optional Configuration](#step-5-optional-configuration)).
155 | - `--debug` (`-d`): Report the following information from the harnessing campaign:
156 | - Failed harnesses and why they failed.
157 | - Successfully-generated harnesses.
158 | - Inferred function-to-function dependencies.
159 | - Multiplier-found declarations, typedef aliases, function pointers, enums, and macros.
160 | - Functions that were successfully harnessed.
161 | - Statistics about the harness generation campaign.
162 | - If applicable, values extracted from function call site parameter tracking.
163 | - `--execute_both` (`-e`): Run harnesses both dynamically/statically linked. Useful for linker-related crashes.
164 | - `--recurse_headers` (`-x`): Recursively parse all headers. Useful if definitions are spread across multiple files.
165 | - `--fast_mode` (`-f`): Work faster by disabling exhaustive arg search, keeping only the first-successful one.
166 | - `--target_func` (`-t`): Attempt harnessing to reach only the specified function. Useful for targeted fuzzing.
167 | - `--allow_stderr` (`-as`): Keeps harnesses where `stderr` output seen. Useful if `stderr` is valid API behavior.
168 | - `--allow_lincov` (`-al`): Keeps harnesses with linear codecov deltas. Useful for low input-dependent logic.
169 | - `--allow_consts` (`-ac`): Considers `const` args from one function as potential non-`const` args for others.
170 | - `--allow_deepaux` (`-ad`): Arg resolution via deeper auxiliary sequences. Adds significant cost to harnessing.
171 | - `--allow_pvalret` (`-ap`): Try to retrieve _concrete_ parameter values via Multiplier's callsite analysis.
172 |
173 | # Additional Notes
174 | Below details several enhancements and limitations to OGHarn. We refer readers to [our paper](https://futures.cs.utah.edu/papers/25ICSE-b.pdf) for full details.
175 |
176 | ### Newer Enhancements
177 | The following enhancements were not present in our paper's version of OGHarn.
178 | - **Targeted Harnessing**: OGHarn previously only supported library-wide harnessing, but now supports function-specific harnessing (via `--target_func`). This helped us find bugs like https://github.com/pganalyze/libpg_query/issues/254, https://github.com/htacg/tidy-html5/issues/1120, and https://github.com/OpenPrinting/cups/issues/1026. This is still considered an experimental feature.
179 | - **Struct Member Population**: OGHarn previously avoided structs entirely, but now attempts limited population of their members. This is still considered an experimental feature. We anticipate that OGHarn's current implementation is not well-suited to many struct-based libraries (e.g., libPNG, libVPX, etc.).
180 | - **Deeper Argument Resolution Sequences**: OGHarn previously only resolved API-specific types (e.g., `cJSON *`) by injecting calls to single "auxiliary functions". By toggling-on `--allow_deepaux`, OGHarn will attempt multi-function _sequences_ for resolving such arguments. This is still considered an experimental feature.
181 | - **Concrete Parameter Value Retrieval**: OGHarn previously operated only on targeted header files, but now optionally invokes Multiplier's aggressive _library-wide_ callsite analysis to learn and consider possible concrete values in its mutation of function parameters. This helped us find bugs some bugs like https://github.com/pantoniou/libfyaml/issues/121 and https://github.com/pantoniou/libfyaml/issues/122. Enable this mode via `--allow_pvalret`.
182 |
183 | ### Debugging
184 | If harnessing appears to be failing, we recommend using the `--debug` flag and inspecting OGHarn's reported information in the `output/debug` directory. Two important debugging artifacts are:
185 | - `log_failed.txt`: Each failing harness and why it was discarded by OGHarn.
186 | - `log_multiplier.txt`: Any available information about the library from Multiplier.
187 |
188 | ### Limitations
189 | - **C Libraries**: OGHarn currently supports only C-based libraries.
190 | - **Multiplier**: In cases where Multiplier fails, OGHarn will not work. We aren't sure of the extent of this.
191 | - **Struct Population**: As described above, we are only at the beginning of testing out struct population. We leave refining this to future work.
192 | - **Server-Client APIs**: OGHarn does not support functions requiring complex _server-client_-style setup.
193 |
194 |
195 | # Bug Trophy Case
196 | We are pleased that OGHarn helped uncover the following software bugs and security vulnerabilities:
197 |
198 | | API | Reported Bugs |
199 | | ---- | ---- |
200 | | HDF5 | https://github.com/HDFGroup/hdf5/issues/3790, https://github.com/HDFGroup/hdf5/issues/4431, https://github.com/HDFGroup/hdf5/issues/4432, https://github.com/HDFGroup/hdf5/issues/4433, https://github.com/HDFGroup/hdf5/issues/4434, https://github.com/HDFGroup/hdf5/issues/4435 |
201 | | Lexbor | https://github.com/lexbor/lexbor/issues/220, https://github.com/lexbor/lexbor/issues/221, https://github.com/lexbor/lexbor/issues/222 |
202 | | libFYAML | https://github.com/pantoniou/libfyaml/issues/107, https://github.com/pantoniou/libfyaml/issues/108, https://github.com/pantoniou/libfyaml/issues/118, https://github.com/pantoniou/libfyaml/issues/119, https://github.com/pantoniou/libfyaml/issues/120, https://github.com/pantoniou/libfyaml/issues/121, https://github.com/pantoniou/libfyaml/issues/122, https://github.com/pantoniou/libfyaml/issues/123 |
203 | | libGEOS | https://github.com/libgeos/geos/issues/1021, https://github.com/libgeos/geos/issues/1070, https://github.com/libgeos/geos/issues/1071, https://github.com/libgeos/geos/issues/1072, https://github.com/libgeos/geos/issues/1073, https://github.com/libgeos/geos/issues/1074, https://github.com/libgeos/geos/issues/1084 |
204 | | libICAL | https://github.com/libical/libical/issues/677, https://github.com/libical/libical/issues/678 |
205 | | libUCL | https://github.com/vstakhov/libucl/issues/288, https://github.com/vstakhov/libucl/issues/289, https://github.com/vstakhov/libucl/issues/290, https://github.com/vstakhov/libucl/issues/291, https://github.com/vstakhov/libucl/issues/292, https://github.com/vstakhov/libucl/issues/293 |
206 | | StormLib | https://github.com/ladislav-zezula/StormLib/issues/327, https://github.com/ladislav-zezula/StormLib/issues/328, https://github.com/ladislav-zezula/StormLib/issues/329, https://github.com/ladislav-zezula/StormLib/issues/330, https://github.com/ladislav-zezula/StormLib/issues/331, https://github.com/ladislav-zezula/StormLib/issues/332, https://github.com/ladislav-zezula/StormLib/issues/333, https://github.com/ladislav-zezula/StormLib/issues/334, https://github.com/ladislav-zezula/StormLib/issues/335, https://github.com/ladislav-zezula/StormLib/issues/336, https://github.com/ladislav-zezula/StormLib/issues/337, https://github.com/ladislav-zezula/StormLib/issues/338 |
207 | | RayLib | https://github.com/raysan5/raylib/issues/3924 |
208 | | libPG_Query | https://github.com/pganalyze/libpg_query/issues/254 |
209 | | Tidy-HTML5 | https://github.com/htacg/tidy-html5/issues/1120 |
210 | | PCRE2 | https://github.com/PCRE2Project/pcre2/issues/561 |
211 | | CUPS | https://github.com/OpenPrinting/cups/issues/1026 |
212 |
213 | If you find any other bugs using OGHarn, please let us know!
214 |
215 |
216 | # Acknowledgement
217 |
218 | This material is based upon work supported by the National Science Foundation under Grant No. 2419798: [CICI: TCR: Practical, Systematic Fuzz Testing for Securing Scientific Software](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2419798).
219 |
--------------------------------------------------------------------------------
/demos/README.md:
--------------------------------------------------------------------------------
1 | # Running OGHarn Demos
2 | ### Step 1: Installation
3 | - Ensure all dependencies listed in the root [README](../README.md) are installed.
4 | - Set the environment using `/extras/set_env.sh`. The binaries for Multiplier and AFL++ should be in the system path.
5 |
6 | ### Step 2: Build the necessary resources
7 | Navigate to any of the libraries listed in the `/demos` directory and run `make all`. This builds:
8 |
9 | - Dynamically linked library instrumented with AFL++ and ASAN/UBSAN for harness generation.
10 | - Statically linked library instrumented with AFL++ for fuzzing.
11 | - If applicable, a build of the library to be used for indexing with Multiplier.
12 | - Multiplier-produced index of the library for static analysis during harness generation.
13 |
14 |
15 | ### Step 3: Begin harness generation
16 | Run `run_ogharn.sh`. This will begin harness generation for the corresponding demo. In some cases OGHarn will quickly discover valid harnesses for libraries, while other libraries will take more time. This is dependent on the size and complexity of the library. Some libraries that demonstrate OGHarn's ability to quickly discover interesting fuzzing harnesses are: [cjson](./cjson), [faup](./faup), [lexbor](./lexbor), [cgltf](./cgltf), [pcre2](./pcre2/), and [ucl](./ucl).
17 |
18 | ### Step 4: Post processing
19 | In order to get a set of harnesses that exercise deep, unique coverage, allowing OGHarn to run until it has exhausted all potential harnessing routines is recommended. This typically takes less than 24 hours. For the purpose of testing, terminating harness generation after OGHarn begins to report successful harnesses is also possible.
20 |
21 | Debugging information and final harnesses will be stored in the output directory provided to OGHarn with the `-o` argument. Final harnesses will be ranked according to the number of unique edges they reach compared to all other harnesses in the corpus.
22 |
23 | Run `make harness_fuzz HARNESS_NUMBER= OUT=