├── .gitignore ├── assets ├── sherpa.jpg ├── logo_black.png ├── logo_white.png └── validated_bugs.png ├── harness_generator ├── yamls │ ├── leveldb.yaml │ └── c-projects.yaml ├── requirements.txt ├── src │ ├── __init__.py │ ├── codex_helper.py │ └── harness_generator.py ├── setup-env.sh ├── scripts │ ├── sort_jobs.py │ ├── summarize.py │ ├── gather_reports.py │ └── generate_reports.py ├── README.md └── batch_generate.py ├── LICENSE ├── Makefile ├── leveldb_writeup ├── artifacts │ ├── crash_analysis.md │ └── crash_info.md └── workflow.md ├── setup-env.sh └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .venv 3 | 4 | jobs/ -------------------------------------------------------------------------------- /assets/sherpa.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIxCyberChallenge/sherpa/HEAD/assets/sherpa.jpg -------------------------------------------------------------------------------- /assets/logo_black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIxCyberChallenge/sherpa/HEAD/assets/logo_black.png -------------------------------------------------------------------------------- /assets/logo_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIxCyberChallenge/sherpa/HEAD/assets/logo_white.png -------------------------------------------------------------------------------- /assets/validated_bugs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AIxCyberChallenge/sherpa/HEAD/assets/validated_bugs.png -------------------------------------------------------------------------------- /harness_generator/yamls/leveldb.yaml: -------------------------------------------------------------------------------- 1 | projects: 2 | - project_name: leveldb 3 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 4 | fuzz_tooling_ref: master -------------------------------------------------------------------------------- /harness_generator/requirements.txt: -------------------------------------------------------------------------------- 1 | # Python dependencies for the harness-generation toolkit. 2 | # ----------------------------------------------- 3 | # Versions are intentionally left open but pegged to reasonably 4 | # recent releases that are available on PyPI. 5 | # 6 | # • GitPython – git wrapper used by CodexHelper & batch scripts 7 | # • PyYAML – parsing of .yaml files 8 | # • python-dotenv – loading OPENAI_API_KEY from .env files 9 | 10 | PyYAML>=5.4 11 | GitPython>=3.1 12 | python-dotenv>=1.0 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Artificial Intelligence Cyber Challenge 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for sherpa project 2 | # Usage: 3 | # make setup # Set up Python venv and install dependencies 4 | # make clean # Remove venv and __pycache__ 5 | # make run-script SCRIPT=script_name.py # Run a script from harness_generator/scripts 6 | 7 | VENV_DIR := .venv 8 | JOBS_DIR := ./jobs 9 | PYTHON := python3 10 | REQ_FILE := harness_generator/requirements.txt 11 | 12 | setup: 13 | $(PYTHON) -m venv $(VENV_DIR) 14 | . $(VENV_DIR)/bin/activate && pip install --upgrade pip && pip install -r $(REQ_FILE) 15 | 16 | clean: 17 | rm -rf $(VENV_DIR) 18 | rm -rf $(JOBS_DIR) 19 | find . -type d -name "__pycache__" -exec rm -rf {} + 20 | 21 | run-script: 22 | . $(VENV_DIR)/bin/activate && python harness_generator/scripts/$(SCRIPT) 23 | 24 | 25 | leveldb: 26 | @if [ -z "$$OPENAI_API_KEY" ]; then \ 27 | echo "Error: OPENAI_API_KEY is not set. Please export your OpenAI API key before running make leveldb."; \ 28 | exit 1; \ 29 | fi 30 | @docker info > /dev/null 2>&1 || (echo "Error: Docker is not running or not accessible. Please start Docker and try again." && exit 1) 31 | . $(VENV_DIR)/bin/activate && python harness_generator/batch_generate.py --targets harness_generator/yamls/leveldb.yaml 32 | 33 | .PHONY: setup clean run-script leveldb 34 | -------------------------------------------------------------------------------- /leveldb_writeup/artifacts/crash_analysis.md: -------------------------------------------------------------------------------- 1 | # Crash Analysis for fuzz_table_open Crash 2 | 3 | ## 1. Bug Type 4 | - Denial-of-Service (DoS) via unbounded memory allocation 5 | 6 | ## 2. Bug Summary 7 | Feeding arbitrary data as an SSTable file to `leveldb::Table::Open` can trigger an out-of-memory crash. The fuzzer input coincidentally contains the valid LevelDB table magic value, causing the parser to proceed. A malformed block handle is decoded with an extremely large `size` field, leading to a huge allocation request in `ReadBlock` and an AddressSanitizer OOM abort. 8 | 9 | ## 3. Bug Impact (real world reachability/exploitability/constraints) 10 | - An attacker controlling SSTable input can cause the library to abort or consume excessive memory (denial-of-service). 11 | - Requires supplying a crafted `.sst` file; not exploitable via normal database operations unless untrusted SST files are loaded. 12 | - **severity:** Medium 13 | 14 | ## 4. How to Patch 15 | - Validate decoded block handle fields before allocating memory: 16 | - Ensure `offset + size` does not overflow and stays within the file bounds (`file_size`). 17 | - Impose a reasonable maximum block size threshold or fail gracefully on suspicious values. 18 | - Return an error status from `Table::Open`/`ReadBlock` instead of proceeding to allocate if validation fails. -------------------------------------------------------------------------------- /harness_generator/src/__init__.py: -------------------------------------------------------------------------------- 1 | #──────────── 2 | # 3 | # Copyright 2025 Artificial Intelligence Cyber Challenge 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | # this software and associated documentation files (the “Software”), to deal in the 7 | # Software without restriction, including without limitation the rights to use, 8 | # copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 9 | # Software, and to permit persons to whom the Software is furnished to do so, 10 | # subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 16 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 17 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 18 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 20 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | # 22 | # ──────────── 23 | 24 | """Harness generation toolkit (stand-alone release).""" 25 | 26 | from .codex_helper import CodexHelper # re-export for convenience 27 | 28 | __all__ = [ 29 | "CodexHelper", 30 | ] 31 | -------------------------------------------------------------------------------- /setup-env.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # setup-env.sh for sherpa project 3 | # Usage: source ./setup-env.sh 4 | 5 | VENV_DIR=".venv" 6 | REQ_FILE="harness_generator/requirements.txt" 7 | PYTHON_BIN="python3" 8 | 9 | # Detect Apple Silicon and recommend Homebrew Python if needed 10 | if [[ $(uname -m) == "arm64" ]]; then 11 | echo "Detected Apple Silicon (arm64)." 12 | if ! command -v $PYTHON_BIN &> /dev/null; then 13 | echo "$PYTHON_BIN not found. Please install Python 3 via Homebrew: brew install python3" 14 | exit 1 15 | fi 16 | fi 17 | 18 | # Install codex binary if missing 19 | if ! command -v codex &> /dev/null; then 20 | echo "codex not found. Installing..." 21 | if [[ "$(uname)" == "Darwin" ]]; then 22 | # macOS 23 | if command -v brew &> /dev/null; then 24 | brew install codex 25 | else 26 | echo "Homebrew not found. Please install Homebrew first: https://brew.sh" 27 | exit 1 28 | fi 29 | elif [[ "$(uname)" == "Linux" ]]; then 30 | # Linux 31 | if command -v apt &> /dev/null; then 32 | sudo apt update && sudo apt install -y codex 33 | else 34 | echo "apt not found. Please install codex manually." 35 | exit 1 36 | fi 37 | else 38 | echo "Unsupported OS. Please install codex manually." 39 | exit 1 40 | fi 41 | fi 42 | 43 | # Create virtual environment if it doesn't exist 44 | if [ ! -d "$VENV_DIR" ]; then 45 | echo "Creating virtual environment in $VENV_DIR..." 46 | $PYTHON_BIN -m venv $VENV_DIR 47 | fi 48 | 49 | # Activate virtual environment 50 | source $VENV_DIR/bin/activate 51 | 52 | # Upgrade pip and install dependencies 53 | pip install --upgrade pip 54 | pip install -r $REQ_FILE 55 | 56 | echo "Environment setup complete." 57 | -------------------------------------------------------------------------------- /leveldb_writeup/artifacts/crash_info.md: -------------------------------------------------------------------------------- 1 | # Crash Info 2 | 3 | ## Reproducer command 4 | ```bash 5 | python infra/helper.py reproduce leveldb fuzz_table_open build/out/leveldb/crash-eb318a4efc67ba9452a00fc1e8bec0fd4bc8ecd3 6 | ``` 7 | 8 | ## Reproducer log 9 | ```text 10 | # python infra/helper.py reproduce leveldb fuzz_table_open build/out/leveldb/crash-eb318a4efc67ba9452a00fc1e8bec0fd4bc8ecd3 11 | + FUZZER=fuzz_table_open 12 | + shift 13 | + '[' '!' -v TESTCASE ']' 14 | + TESTCASE=/testcase 15 | + '[' '!' -f /testcase ']' 16 | + export RUN_FUZZER_MODE=interactive 17 | + RUN_FUZZER_MODE=interactive 18 | + export FUZZING_ENGINE=libfuzzer 19 | + FUZZING_ENGINE=libfuzzer 20 | + export SKIP_SEED_CORPUS=1 21 | + SKIP_SEED_CORPUS=1 22 | + run_fuzzer fuzz_table_open -runs=100 /testcase 23 | vm.mmap_rnd_bits = 28 24 | /out/fuzz_table_open -rss_limit_mb=2560 -timeout=25 -runs=100 /testcase < /dev/null 25 | INFO: Running with entropic power schedule (0xFF, 100). 26 | INFO: Seed: 1983861041 27 | INFO: Loaded 1 modules (1554 inline 8-bit counters): 1554 [0x5591fc773288, 0x5591fc77389a), 28 | INFO: Loaded 1 PC tables (1554 PCs): 1554 [0x5591fc7738a0,0x5591fc7799c0), 29 | /out/fuzz_table_open: Running 1 inputs 100 time(s) each. 30 | Running: /testcase 31 | ==14==WARNING: AddressSanitizer failed to allocate 0xffffffffffe0 bytes 32 | ================================================================= 33 | ==14==ERROR: AddressSanitizer: out of memory: allocator is trying to allocate 0xffffffffffe0 bytes 34 | #0 0x5591fc64a44d in operator new[](unsigned long) /src/llvm-project/compiler-rt/lib/asan/asan_new_delete.cpp:89:3 35 | #1 0x5591fc66bac0 in leveldb::ReadBlock(leveldb::RandomAccessFile*, leveldb::ReadOptions const&, leveldb::BlockHandle const&, leveldb::BlockContents*) /src/leveldb/table/format.cc:78:15 36 | #2 0x5591fc64d960 in leveldb::Table::Open(leveldb::Options const&, leveldb::RandomAccessFile*, unsigned long, leveldb::Table**) /src/leveldb/table/table.cc:61:7 37 | #3 0x5591fc64ca56 in LLVMFuzzerTestOneInput /src/leveldb/build/../fuzz_table_open.cc:29:7 38 | #4 0x5591fc5011a0 in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:614:13 39 | #5 0x5591fc4ec415 in fuzzer::RunOneTest(fuzzer::Fuzzer*, char const*, unsigned long) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:327:6 40 | #6 0x5591fc4f1eaf in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:862:9 41 | #7 0x5591fc51d152 in main /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerMain.cpp:20:10 42 | #8 0x7eff7acd7082 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x24082) (BuildId: 5792732f783158c66fb4f3756458ca24e46e827d) 43 | 44 | DEDUP_TOKEN: operator new[](unsigned long)--leveldb::ReadBlock(leveldb::RandomAccessFile*, leveldb::ReadOptions const&, leveldb::BlockHandle const&, leveldb::BlockContents*)--leveldb::Table::Open(leveldb::Options const&, leveldb::RandomAccessFile*, unsigned long, leveldb::Table**) 45 | ==14==HINT: if you don't care about these errors you may set allocator_may_return_null=1 46 | SUMMARY: AddressSanitizer: out-of-memory /src/leveldb/table/format.cc:78:15 in leveldb::ReadBlock(leveldb::RandomAccessFile*, leveldb::ReadOptions const&, leveldb::BlockHandle const&, leveldb::BlockContents*) 47 | ==14==ABORTING 48 | 49 | === STDERR === 50 | INFO:__main__:Running: docker run --privileged --shm-size=2g --platform linux/amd64 --rm -i -e HELPER=True -e ARCHITECTURE=x86_64 -v /home/ubuntu/workspace/friday/tools/generate-harnesses/output/leveldb_a9fcfd3fbc7d492282c714b6e0b46723/build/out/leveldb:/out -v /home/ubuntu/workspace/friday/tools/generate-harnesses/output/leveldb_a9fcfd3fbc7d492282c714b6e0b46723/build/out/leveldb/crash-eb318a4efc67ba9452a00fc1e8bec0fd4bc8ecd3:/testcase -t gcr.io/oss-fuzz-base/base-runner reproduce fuzz_table_open -runs=100. 51 | 52 | ``` 53 | 54 | ## Harness source 55 | ```c 56 | #include 57 | #include 58 | #include 59 | #include 60 | #include 61 | #include "leveldb/env.h" 62 | #include "leveldb/table.h" 63 | #include "leveldb/options.h" 64 | #include "leveldb/status.h" 65 | #include "leveldb/iterator.h" 66 | 67 | // Table::Open reads SST files from disk (e.g. user-supplied .sst files). 68 | extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { 69 | const char* fname = "/tmp/fuzz_table_open.sst"; 70 | std::ofstream out(fname, std::ios::binary); 71 | if (!out) 72 | return 0; 73 | out.write(reinterpret_cast(data), size); 74 | out.close(); 75 | 76 | leveldb::Options options; 77 | options.env = leveldb::Env::Default(); 78 | leveldb::RandomAccessFile* file = nullptr; 79 | leveldb::Status s = options.env->NewRandomAccessFile(fname, &file); 80 | if (!s.ok()) 81 | return 0; 82 | 83 | leveldb::Table* table = nullptr; 84 | s = leveldb::Table::Open(options, file, size, &table); 85 | if (!s.ok()) { 86 | delete file; 87 | return 0; 88 | } 89 | 90 | leveldb::Iterator* it = table->NewIterator(leveldb::ReadOptions()); 91 | for (it->SeekToFirst(); it->Valid(); it->Next()) {} 92 | delete it; 93 | delete table; 94 | delete file; 95 | std::remove(fname); 96 | return 0; 97 | } 98 | ``` 99 | 100 | ## Crashing input (hexdump) 101 | ```text 102 | 00000000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ................ 103 | 00000010: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ................ 104 | 00000020: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff fe ................ 105 | 00000030: ff 57 fb 80 8b 24 75 47 db ff ff ff ff ff 3f 3d .W...$uG......?= 106 | 00000040: 00 00 ff ff ff ff ff ff ff ff f7 ff ff ff ff ff ................ 107 | 00000050: 02 80 a8 0e 80 8b ff ff ff ff ff 57 fb 80 8b 24 ...........W...$ 108 | 00000060: 75 47 db uG. 109 | 110 | ``` 111 | -------------------------------------------------------------------------------- /harness_generator/setup-env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Simple environment bootstrapper for this repository. 4 | # 5 | # Features 6 | # • Confirms that basic system tools (git, docker) are available – offering to 7 | # install them via apt when missing. 8 | # • Optionally installs the libxapian-dev development headers. 9 | # • Optionally creates (or re-uses) a virtual-environment in ./.sherpa-venv. 10 | # • Installs Python dependencies from requirements.txt. 11 | # 12 | # The script is intentionally interactive so it can be re-run safely. 13 | 14 | #──────────── 15 | # 16 | # Copyright 2025 Artificial Intelligence Cyber Challenge 17 | # 18 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 19 | # this software and associated documentation files (the “Software”), to deal in the 20 | # Software without restriction, including without limitation the rights to use, 21 | # copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 22 | # Software, and to permit persons to whom the Software is furnished to do so, 23 | # subject to the following conditions: 24 | # 25 | # The above copyright notice and this permission notice shall be included in all 26 | # copies or substantial portions of the Software. 27 | # 28 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 29 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 30 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 31 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 32 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 33 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 34 | # 35 | # ──────────── 36 | 37 | set -euo pipefail 38 | 39 | #------------------------------------------------------------------------------ 40 | # Ensure required command-line tools are present 41 | #------------------------------------------------------------------------------ 42 | 43 | # ensure_tool 44 | # If is non-empty the function checks for its presence in PATH. If 45 | # the command is absent (or is empty) it falls back to verifying 46 | # that the corresponding apt package is installed via dpkg. When missing it 47 | # offers an interactive prompt to install it. 48 | 49 | ensure_tool() { 50 | local cmd_name="$1" # may be empty string for header-only libs like libxapian-dev 51 | local pkg_name="$2" 52 | 53 | local cmd_missing=false 54 | if [[ -n "$cmd_name" ]]; then 55 | if ! command -v "$cmd_name" >/dev/null 2>&1; then 56 | cmd_missing=true 57 | fi 58 | fi 59 | 60 | # If we didn't check a command or the command is present, still ensure the 61 | # package is installed (covers header-only deps). 62 | if dpkg -s "$pkg_name" >/dev/null 2>&1; then 63 | # Package present, and command (if any) is present—nothing to do. 64 | $cmd_missing && echo "'$cmd_name' will become available after reopening the shell." >&2 65 | return 0 66 | fi 67 | 68 | echo "The package '$pkg_name' is required${cmd_name:+ (provides '$cmd_name')}." >&2 69 | read -rp "Install '$pkg_name' now? [y/N]: " _install_pkg 70 | case "${_install_pkg:-N}" in 71 | [yY]|[yY][eE][sS]) 72 | echo "Installing $pkg_name (requires sudo)..." 73 | sudo apt update && sudo apt install -y "$pkg_name" 74 | ;; 75 | *) 76 | echo "Cannot continue without '$pkg_name'. Please install it and re-run the script." >&2 77 | exit 1 78 | ;; 79 | esac 80 | } 81 | 82 | # Verify core dependencies 83 | ensure_tool git git 84 | ensure_tool docker docker.io 85 | ensure_tool "" libxapian-dev 86 | 87 | # The repository relies on the "codex" command-line tool. 88 | # Detect Codex – offer instructions for installing when missing. 89 | if ! command -v codex >/dev/null 2>&1; then 90 | echo "Codex CLI not detected in PATH. It is required for harness generation." 91 | echo "Follow the instructions in the Codex CLI repository for installation: https://github.com/openai/codex" 92 | fi 93 | 94 | # libxapian-dev is handled by ensure_tool above. 95 | 96 | PROJECT_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 97 | cd "$PROJECT_ROOT" 98 | 99 | # Detect python executable 100 | detect_python() { 101 | if command -v python3 > /dev/null 2>&1; then 102 | echo python3 103 | elif command -v python > /dev/null 2>&1; then 104 | echo python 105 | else 106 | echo "Error: Python interpreter not found in PATH." >&2 107 | exit 1 108 | fi 109 | } 110 | 111 | PYTHON_BIN="$(detect_python)" 112 | 113 | VENV_DIR="${PROJECT_ROOT}/.sherpa-venv" 114 | 115 | #------------------------------------------------------------------------------ 116 | # Virtual-environment handling 117 | #------------------------------------------------------------------------------ 118 | 119 | activate_venv=false 120 | 121 | if [[ -d "$VENV_DIR" ]]; then 122 | echo "Found existing virtual environment at $VENV_DIR" 123 | activate_venv=true 124 | else 125 | read -rp "No virtual environment found. Create one at ./.sherpa-venv? [y/N]: " _create 126 | case "${_create:-N}" in 127 | [yY]|[yY][eE][sS]) 128 | echo "Creating virtual environment..." 129 | "$PYTHON_BIN" -m venv "$VENV_DIR" 130 | activate_venv=true 131 | ;; 132 | *) 133 | echo "Proceeding without a dedicated virtual environment. Ensure you have the right permissions." 134 | ;; 135 | esac 136 | fi 137 | 138 | # Determine pip invocation (always via python -m pip to avoid PATH issues) 139 | 140 | if $activate_venv; then 141 | source "$VENV_DIR/bin/activate" 142 | fi 143 | 144 | # After potential activation re-detect python so it points to venv interpreter 145 | PYTHON_BIN="$(detect_python)" 146 | 147 | PIP_CMD=("$PYTHON_BIN" -m pip) 148 | 149 | #------------------------------------------------------------------------------ 150 | # Requirements installation 151 | #------------------------------------------------------------------------------ 152 | 153 | if [[ -f "$PROJECT_ROOT/requirements.txt" ]]; then 154 | echo "Installing dependencies from requirements.txt..." 155 | "${PIP_CMD[@]}" install --upgrade -r "$PROJECT_ROOT/requirements.txt" 156 | else 157 | echo "requirements.txt not found – skipping dependency installation." >&2 158 | fi 159 | 160 | echo && echo "Environment setup complete. Ready for harness generation! 🚀" 161 | 162 | if $activate_venv && [[ "${BASH_SOURCE[0]}" == "$0" ]]; then 163 | echo -e "\nExecute \`source ./.sherpa-venv/bin/activate\` to enter the virtual environment." 164 | fi 165 | -------------------------------------------------------------------------------- /harness_generator/scripts/sort_jobs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | #──────────── 3 | # 4 | # Copyright 2025 Artificial Intelligence Cyber Challenge 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | # this software and associated documentation files (the “Software”), to deal in the 8 | # Software without restriction, including without limitation the rights to use, 9 | # copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 10 | # Software, and to permit persons to whom the Software is furnished to do so, 11 | # subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in all 14 | # copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 17 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 18 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 20 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 21 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | # 23 | # ──────────── 24 | """ 25 | sort_jobs.py 26 | ──────────── 27 | Classify and move harness-run job directories into three buckets: 28 | 29 | • crashes - at least one crash/OOM/timeout file produced **and** 30 | the crash *does not* appear to be harness-induced. 31 | • false_positives - crash_analysis.md contains the marker *"HARNESS ERROR"*. 32 | • no_crashes - build/out/** contains no crash, oom or timeout files. 33 | 34 | The script replaces the previous trio of helper utilities 35 | (*sort_crashes.py*, *sort_false_positives.py*, *sort_non_crashing.py*) with a 36 | single, more ergonomic command. 37 | 38 | Usage examples 39 | ────────────── 40 | # Use defaults (./jobs → ./sorted) 41 | ./sort_jobs.py 42 | 43 | # Custom locations 44 | ./sort_jobs.py --input batch_runs --output triaged 45 | 46 | Directory layout 47 | ──────────────── 48 | All job directories directly under *input* are inspected. They are **moved** 49 | to one of the following sub-directories inside *output* (created if absent): 50 | 51 | sorted/ 52 | crashes/ 53 | false_positives/ 54 | no_crashes/ 55 | 56 | If a target directory already exists a numeric suffix ("_1", "_2", …) is 57 | automatically appended to avoid overwriting previous runs. 58 | """ 59 | 60 | from __future__ import annotations 61 | 62 | import argparse 63 | import shutil 64 | import sys 65 | from pathlib import Path 66 | from typing import Iterable, List, Tuple 67 | 68 | 69 | # --------------------------------------------------------------------------- 70 | # Helper functions 71 | # --------------------------------------------------------------------------- 72 | 73 | 74 | def _unique_dest(dest_root: Path, name: str) -> Path: 75 | """Return a unique destination path inside *dest_root* (adds _N if needed).""" 76 | 77 | candidate = dest_root / name 78 | idx = 1 79 | while candidate.exists(): 80 | candidate = dest_root / f"{name}_{idx}" 81 | idx += 1 82 | return candidate 83 | 84 | 85 | def _list_matching(root: Path, prefixes: Iterable[str]) -> List[Path]: 86 | """Return immediate children of *root* whose names start with any prefix.""" 87 | 88 | return [ 89 | p 90 | for p in root.glob("*") 91 | if p.is_file() and any(p.name.startswith(pre) for pre in prefixes) 92 | ] 93 | 94 | 95 | def _detect_bug_files(run_dir: Path) -> bool: 96 | """Return *True* if the run directory contains any crash/oom/timeout files.""" 97 | 98 | build_out_root = run_dir / "build" / "out" 99 | 100 | # There should be exactly one project sub-directory under build/out/ 101 | subdirs = ( 102 | [d for d in build_out_root.iterdir() if d.is_dir()] 103 | if build_out_root.is_dir() 104 | else [] 105 | ) 106 | 107 | if len(subdirs) != 1: 108 | return False 109 | 110 | project_out = subdirs[0] 111 | bug_files = _list_matching(project_out, ("crash", "oom", "timeout")) 112 | return bool(bug_files) 113 | 114 | 115 | def _has_harness_error(run_dir: Path) -> bool: 116 | """Return *True* if crash_analysis.md mentions a harness error marker.""" 117 | 118 | analysis = run_dir / "crash_analysis.md" 119 | if not analysis.is_file(): 120 | return False 121 | 122 | try: 123 | content = analysis.read_text(encoding="utf-8", errors="replace") 124 | except Exception: 125 | return False 126 | 127 | return "harness error" in content.lower() 128 | 129 | 130 | def classify(run_dir: Path) -> str: 131 | """Return the classification label for *run_dir* (crashes/false_positives/no_crashes).""" 132 | 133 | # False positives have crash docs *and* the harness error marker. 134 | if _has_harness_error(run_dir): 135 | return "false_positives" 136 | 137 | if _detect_bug_files(run_dir): 138 | return "crashes" 139 | 140 | return "no_crashes" 141 | 142 | 143 | def sort_jobs(src_root: Path, dst_root: Path) -> Tuple[int, int, int]: 144 | """Move job directories from *src_root* into bucketed sub-directories under *dst_root*. 145 | 146 | Returns a tuple (crashes, false_positives, no_crashes) with the number of 147 | directories moved into each bucket. 148 | """ 149 | 150 | if not src_root.is_dir(): 151 | sys.exit(f"Input directory not found: {src_root}") 152 | 153 | # Ensure bucket directories exist. 154 | crashes_dir = dst_root / "crashes" 155 | fp_dir = dst_root / "false_positives" 156 | nc_dir = dst_root / "no_crashes" 157 | 158 | for d in (crashes_dir, fp_dir, nc_dir): 159 | d.mkdir(parents=True, exist_ok=True) 160 | 161 | counts = {"crashes": 0, "false_positives": 0, "no_crashes": 0} 162 | 163 | for run_dir in sorted(src_root.iterdir()): 164 | if not run_dir.is_dir(): 165 | continue 166 | 167 | label = classify(run_dir) 168 | 169 | dest_root = { 170 | "crashes": crashes_dir, 171 | "false_positives": fp_dir, 172 | "no_crashes": nc_dir, 173 | }[label] 174 | 175 | dest = _unique_dest(dest_root, run_dir.name) 176 | print(f"[+] {run_dir.name} → {label}/{dest.name}") 177 | shutil.move(str(run_dir), dest) 178 | counts[label] += 1 179 | 180 | return counts["crashes"], counts["false_positives"], counts["no_crashes"] 181 | 182 | 183 | # --------------------------------------------------------------------------- 184 | # CLI 185 | # --------------------------------------------------------------------------- 186 | 187 | 188 | def main() -> None: 189 | ap = argparse.ArgumentParser( 190 | description="Sort job run directories into crashes/false_positives/no_crashes.", 191 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 192 | ) 193 | 194 | ap.add_argument( 195 | "--input", 196 | type=Path, 197 | default=Path("./jobs"), 198 | help="Directory produced by batch_generate.py (default: ./jobs)", 199 | ) 200 | ap.add_argument( 201 | "--output", 202 | type=Path, 203 | default=Path("./sorted"), 204 | help="Destination root (buckets will be created here).", 205 | ) 206 | 207 | args = ap.parse_args() 208 | 209 | src = args.input.resolve() 210 | dst = args.output.resolve() 211 | 212 | crashes, fps, ncs = sort_jobs(src, dst) 213 | 214 | print( 215 | f"\nFinished. Crashes: {crashes}, False-positives: {fps}, No-crash: {ncs}." 216 | ) 217 | 218 | 219 | if __name__ == "__main__": 220 | main() 221 | -------------------------------------------------------------------------------- /harness_generator/README.md: -------------------------------------------------------------------------------- 1 | # OSS-Fuzz Harness Generation Toolkit 2 | 3 | The **Harness Generation Toolkit** automates the entire workflow of adding 4 | new *libFuzzer* harnesses to existing [OSS-Fuzz] projects, executing the 5 | resulting fuzzers and producing polished vulnerability reports when crashes 6 | are identified. 7 | 8 | --- 9 | 10 | ## Contents 11 | 12 | ``` 13 | harness-generator/ 14 | ├── batch_generate.py # batch driver (multiple targets) 15 | ├── src/ # Python package with core logic 16 | │ ├── codex_helper.py # Codex CLI wrapper (sentinel + retry logic) 17 | │ └── harness_generator.py # single-project orchestrator 18 | └── scripts/ # triage & reporting utilities 19 | ├── sort_jobs.py # classify jobs → ./sorted/[buckets] 20 | ├── summarize.py # Markdown summary of findings (no LLM usage) 21 | ├── generate_reports.py # create disclosure-style bug_report.md 22 | └── gather_reports.py # collect final artifacts into one folder 23 | └── yamls/ # sample target lists consumed by batch_generate.py 24 | ``` 25 | 26 | --- 27 | 28 | ## 1. Core Workflow Overview 29 | 30 | For **day-to-day usage** you will typically launch *batch_generate.py* – it 31 | drives the end-to-end process and drops every run into `./jobs/`. 32 | 33 | ```bash 34 | # Example: fuzz 40 C projects, eight rounds each, using 32 Codex workers 35 | python batch_generate.py --targets ./yamls/c-projects.yaml \ 36 | --threads 32 --rounds 8 37 | ``` 38 | 39 | Behind the scenes *batch_generate.py* clones the target repository, prunes 40 | unrelated project folders under `oss-fuzz/projects/`, then invokes 41 | `harness_generator.py` one or more times (**rounds**) for that project. All 42 | stdout/stderr is tee’d to `harness_round_.log` so nothing is lost if the 43 | main process is interrupted. 44 | 45 | `harness_generator.py` itself encapsulates the following high-level steps: 46 | 47 | 1. **Baseline build** – build the project’s Docker image & existing fuzzers 48 | (via `infra/helper.py`) to record the current binary set. 49 | 2. **Archive extraction** – unpack any source bundles (tar/zip) so Codex can 50 | edit the real files. 51 | 3. **Harness creation** – Codex is instructed to add one new 52 | `LLVMFuzzerTestOneInput` harness and adjust build scripts accordingly. 53 | 4. **Re-package archives** – re-create any bundles touched by Codex. 54 | 5. **Rebuild with retries** – rebuild image & fuzzers; compiler errors are 55 | forwarded to Codex for minimal fixes (configurable retry count). 56 | 6. **Seed corpus** – before each *new* fuzzer is executed, Codex populates a 57 | seed corpus directory with meaningful inputs. 58 | 7. **Fuzzer execution** – every new fuzzer is run; crash / OOM / timeout 59 | artifacts are detected and logged. 60 | 8. **Crash analysis** – the first crash is reproduced; the harness source, 61 | reproducer log and hexdump are combined into *crash_info.md*. 62 | Codex then writes *crash_analysis.md* explaining root cause, impact and 63 | patch guidance. Finally a `crash_reproducer.sh` PoC script is authored. 64 | 65 | All Codex interactions are handled by **CodexHelper**. It runs the Codex CLI 66 | in a pseudo-terminal, watches for a sentinel file (`./done`), retries on 67 | transient errors, and only returns once a *git diff* confirms that edits were 68 | made. 69 | 70 | ### Running a single project 71 | 72 | ```bash 73 | python -m src.harness_generator \ 74 | --sanitizer address --codex-cli codex --max-retries 3 75 | ``` 76 | --- 77 | 78 | ## 2. Batch Generation 79 | 80 | `batch_generate.py` reads a YAML file whose `projects:` list describes 81 | multiple targets (name + fuzz-tooling repo URL + git ref). For every entry 82 | it clones the repository into **./jobs/**`_` and invokes 83 | `harness_generator.py` *n* times ("rounds"). All stdout/stderr is tee’d to 84 | `harness_round_.log` inside the job directory. 85 | 86 | The default output tree therefore looks like: 87 | 88 | ``` 89 | jobs/ 90 | libpng_16f7f21a/ 91 | crash_analysis.md 92 | crash_info.md 93 | ... 94 | freetype2_51c9ea11/ 95 | ... 96 | ``` 97 | 98 | --- 99 | 100 | ## 3. Triage & Reporting Utilities (scripts/) 101 | 102 | | Script | Purpose | 103 | |--------|---------| 104 | | **sort_jobs.py** | Move each job directory from `./jobs` into `./sorted/`:
• `crashes/` – real crash files present, *no* `HARNESS ERROR` marker.
• `false_positives/` – `HARNESS ERROR` appears in *crash_analysis.md*.
• `no_crashes/` – no crash/oom/timeout produced. | 105 | | **generate_reports.py** | For every job that has *crash_info.md* **and** *crash_analysis.md*, ask Codex to create a polished `bug_report.md` following the embedded disclosure template. | 106 | | **gather_reports.py** | Copy `{crash_info,crash_analysis,bug_report}.md` (+ optional PoC scripts) for each job into a flat structure under `./sorted/reports/` for easy export. | 107 | | **summarize.py** | Build a Markdown overview of all jobs (counts, per-project sections embedding analysis & info). | 108 | 109 | All helper CLIs expose `--help` with full documentation; defaults are chosen 110 | so running them in order without arguments *just works*: 111 | 112 | ``` 113 | # 1. Sort raw jobs into buckets 114 | python scripts/sort_jobs.py 115 | 116 | # 2. Generate bug_report.md for each real crash 117 | python scripts/generate_reports.py --input ./sorted/crashes 118 | 119 | # 3. Collect artifacts for disclosure upload 120 | python scripts/gather_reports.py --input ./sorted/crashes --output ./sorted/reports 121 | 122 | # 4. Produce a human-readable summary 123 | python scripts/summarize.py --input ./sorted/crashes > triage_summary.md 124 | ``` 125 | 126 | --- 127 | 128 | ## 4. Installation & Requirements 129 | 130 | 1. **Provide an API key** – either export it directly: 131 | 132 | ```bash 133 | export OPENAI_API_KEY="sk-your-key" 134 | ``` 135 | 136 | or create a `.env` file (anywhere) with 137 | 138 | ```ini 139 | OPENAI_API_KEY=sk-your-key 140 | ``` 141 | 142 | and pass the path via `--ai-key-path`. 143 | 144 | 2. **System packages** – Docker, git, clang/llvm, etc. as required by 145 | OSS-Fuzz’s `infra/helper.py` build process. 146 | 147 | 148 | ### Codex CLI 149 | 150 | The repository relies on the **Codex CLI**. `setup-env.sh` will detect its 151 | absence and offer to build & install it automatically (requires `go` and 152 | `sudo`). If you prefer manual installation: 153 | 154 | ```bash 155 | npm install -g @openai/codex 156 | ``` 157 | 158 | ### Python environment 159 | 160 | 1. Create & activate a virtual environment (recommended): 161 | 162 | ```bash 163 | python3 -m venv .venv 164 | source .venv/bin/activate 165 | ``` 166 | 167 | 2. Install the required Python packages: 168 | 169 | ```bash 170 | pip install -r requirements.txt 171 | ``` 172 | 173 | The toolkit depends on only three third-party libraries – *GitPython*, 174 | *PyYAML* and *python-dotenv*. They are listed in **requirements.txt** so 175 | the above command resolves everything in one go. 176 | 177 | **Note:** The codebase uses modern type-hinting features introduced in 178 | Python 3.9 – please make sure you run it on Python ≥ 3.9. 179 | 180 | 3. Ensure the **git** command-line tool itself is present. Several modules 181 | shell out to `git` for repository operations; missing it will result in 182 | runtime errors such as `FileNotFoundError: [Errno 2] No such file or directory: 'git'`. 183 | 184 | Other prerequisites 185 | ------------------- 186 | 187 | * Docker + OSS-Fuzz build dependencies 188 | * Codex CLI in `$PATH` (or specify via `--codex-cli`) 189 | * OpenAI-compatible API key (environment variable **OPENAI_API_KEY** or a 190 | path passed with `--ai-key-path`) 191 | -------------------------------------------------------------------------------- /harness_generator/scripts/summarize.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | #──────────── 4 | # 5 | # Copyright 2025 Artificial Intelligence Cyber Challenge 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | # this software and associated documentation files (the “Software”), to deal in the 9 | # Software without restriction, including without limitation the rights to use, 10 | # copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 11 | # Software, and to permit persons to whom the Software is furnished to do so, 12 | # subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 18 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 19 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 20 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | # 24 | # ──────────── 25 | 26 | """ 27 | summarize.py 28 | ──────────── 29 | 30 | Generate a Markdown overview of Codex harness runs stored in an *output* 31 | directory (default: **./jobs**). 32 | 33 | The report contains: 34 | 35 | • Total run directories processed and count of *unique* OSS-Fuzz projects. 36 | • Counts of run directories that include `crash_analysis.md`, `crash_info.md`, 37 | and those flagged as **false positives** (i.e. `crash_analysis.md` contains 38 | the string *“HARNESS ERROR”*). 39 | • **Only** projects whose crashes are **not** false positives get a section 40 | embedding: 41 | - Full path to every *real* crashing run directory. 42 | - Contents of `crash_analysis.md` and `crash_info.md`. 43 | 44 | False-positive runs are tallied but *omitted* from the detailed sections. 45 | 46 | Usage examples 47 | -------------- 48 | # Print report to stdout 49 | ./summarize.py 50 | 51 | # Custom output root and write to file 52 | ./summarize.py --output /tmp/my_runs --report triage_summary.md 53 | """ 54 | 55 | from __future__ import annotations 56 | 57 | import argparse 58 | import re 59 | from pathlib import Path 60 | from typing import Dict, List, Tuple 61 | 62 | 63 | # ────────────────────────── helpers ────────────────────────── 64 | 65 | 66 | def _project_name(run_dir: Path) -> str: 67 | """Best-effort project name inference from directory layout.""" 68 | build_out = run_dir / "build" / "out" 69 | if build_out.is_dir(): 70 | subs = [d.name for d in build_out.iterdir() if d.is_dir()] 71 | if len(subs) == 1: 72 | return subs[0] 73 | parts = run_dir.name.rsplit("_", 1) 74 | return parts[0] if len(parts) == 2 else run_dir.name 75 | 76 | 77 | def _safe_code(text: str) -> str: 78 | """Prevent premature closing of code fences in embedded markdown.""" 79 | return text.replace("```", "```​") 80 | 81 | 82 | def _is_false_positive(analysis_path: Path) -> bool: 83 | """Return True if crash_analysis.md contains 'HARNESS ERROR' (case-insensitive).""" 84 | if not analysis_path.is_file(): 85 | return False 86 | return bool(re.search(r"harness\s+error", analysis_path.read_text(errors="ignore"), re.I)) 87 | 88 | 89 | # ───────────────────────── summariser ───────────────────────── 90 | 91 | 92 | def build_summary(output_root: Path) -> str: 93 | run_dirs = [d for d in output_root.iterdir() if d.is_dir()] 94 | 95 | total_runs = len(run_dirs) 96 | unique_projects = {_project_name(d) for d in run_dirs} 97 | 98 | info_total = 0 99 | analysis_total = 0 100 | fp_total = 0 # false positives 101 | 102 | # Stores (run_dir, is_false_positive) 103 | project_runs: Dict[str, List[Tuple[Path, bool]]] = {} 104 | 105 | for run_dir in run_dirs: 106 | analysis_path = run_dir / "crash_analysis.md" 107 | info_path = run_dir / "crash_info.md" 108 | 109 | has_info = info_path.is_file() 110 | has_analysis = analysis_path.is_file() 111 | is_fp = _is_false_positive(analysis_path) 112 | 113 | if has_info or has_analysis: 114 | proj = _project_name(run_dir) 115 | project_runs.setdefault(proj, []).append((run_dir, is_fp)) 116 | 117 | if has_info: 118 | info_total += 1 119 | if has_analysis: 120 | analysis_total += 1 121 | if is_fp: 122 | fp_total += 1 123 | 124 | # ───────────────────── build markdown ───────────────────── 125 | md_lines: List[str] = [ 126 | "# Codex Harness Run Summary", 127 | f"Scan directory: {output_root}", 128 | "", 129 | "## Totals", 130 | f"- Run directories scanned: {total_runs}", 131 | f"- Unique projects: {len(unique_projects)}", 132 | f"- Directories with crash_analysis.md: {analysis_total}", 133 | f"- Directories with crash_info.md: {info_total}", 134 | f"- **False positives (HARNESS ERROR): {fp_total}**", 135 | "", 136 | ] 137 | 138 | # Only include detailed sections for *real* crashes 139 | real_project_sections_written = False 140 | 141 | for proj, runs in sorted(project_runs.items()): 142 | # Filter out false-positive runs 143 | real_runs = [r for r, is_fp in runs if not is_fp] 144 | if not real_runs: 145 | continue # nothing real to show for this project 146 | 147 | real_project_sections_written = True 148 | md_lines.extend([f"## {proj}", ""]) 149 | 150 | for run_dir in real_runs: 151 | md_lines.append(f"### {run_dir}") 152 | 153 | # ---- Crash Analysis -------------------------------------- 154 | analysis_path = run_dir / "crash_analysis.md" 155 | if analysis_path.is_file(): 156 | md_lines.extend( 157 | [ 158 | "#### Crash Analysis", 159 | "```markdown", 160 | _safe_code( 161 | analysis_path.read_text( 162 | encoding="utf-8", errors="replace" 163 | ) 164 | ), 165 | "```", 166 | "", 167 | ] 168 | ) 169 | 170 | # ---- Crash Info ----------------------------------------- 171 | info_path = run_dir / "crash_info.md" 172 | if info_path.is_file(): 173 | md_lines.extend( 174 | [ 175 | "#### Crash Info", 176 | "```markdown", 177 | _safe_code( 178 | info_path.read_text( 179 | encoding="utf-8", errors="replace" 180 | ) 181 | ), 182 | "```", 183 | "", 184 | ] 185 | ) 186 | 187 | if not real_project_sections_written: 188 | md_lines.append("_All detected crashes are marked as false positives._\n") 189 | 190 | return "\n".join(md_lines).rstrip() + "\n" 191 | 192 | 193 | def main() -> None: 194 | ap = argparse.ArgumentParser( 195 | description="Produce a Markdown summary of harness run results.", 196 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 197 | ) 198 | ap.add_argument( 199 | "--input", 200 | type=Path, 201 | default=Path("./jobs"), 202 | help="Root directory containing harness run directories.", 203 | dest="jobs", 204 | ) 205 | ap.add_argument( 206 | "--report", 207 | type=Path, 208 | help="Write report to this file instead of stdout.", 209 | ) 210 | args = ap.parse_args() 211 | 212 | root = args.jobs.expanduser().resolve() 213 | if not root.is_dir(): 214 | raise SystemExit(f"Jobs directory not found: {root}") 215 | 216 | md_doc = build_summary(root) 217 | 218 | if args.report: 219 | args.report.write_text(md_doc, encoding="utf-8") 220 | print(f"✓ Summary written to {args.report}") 221 | else: 222 | print(md_doc) 223 | 224 | 225 | if __name__ == "__main__": 226 | main() 227 | -------------------------------------------------------------------------------- /harness_generator/scripts/gather_reports.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | #──────────── 4 | # 5 | # Copyright 2025 Artificial Intelligence Cyber Challenge 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | # this software and associated documentation files (the “Software”), to deal in the 9 | # Software without restriction, including without limitation the rights to use, 10 | # copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 11 | # Software, and to permit persons to whom the Software is furnished to do so, 12 | # subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 18 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 19 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 20 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | # 24 | # ──────────── 25 | 26 | """ 27 | gather_reports.py 28 | ──────────── 29 | Gather bug-report artifacts from a triage directory. 30 | 31 | Usage: 32 | ./gather_reports [] [--output-dir ] 33 | 34 | By default the script scans ./triage for job folders that follow the pattern 35 | "_". If a job folder (anywhere in its subtree) contains the 36 | three markdown files *crash_analysis.md*, *crash_info.md*, and 37 | *bug_report.md*, they are copied into a new directory named after the **uuid** 38 | under ./bug_reports. 39 | 40 | The output directory will now contain sub-directories with the same 41 | names as the corresponding job folders found in *triage_dir* (for example 42 | ``apache-httpd_1234abcd``), preserving the project name instead of keeping 43 | only the raw UUID. 44 | 45 | Directory layout variants 46 | ------------------------ 47 | Historically, *triage_dir* contained only the per-job folders themselves:: 48 | 49 | triage/ 50 | _/ 51 | 52 | With the introduction of *categories* an additional level may be present:: 53 | 54 | triage/ 55 | / 56 | _/ 57 | 58 | `gather_reports` now transparently handles **both** layouts by examining the 59 | immediate children of *triage_dir* **and**, if they are not job directories, 60 | their own direct sub-directories. 61 | 62 | If category directories are detected the script replicates this structure 63 | under the output directory so that artifacts remain grouped:: 64 | 65 | triage/ 66 | asan/ 67 | project_1111aaaa/ 68 | 69 | → bug_reports/ 70 | asan/ 71 | project_1111aaaa/ 72 | 73 | When a job directory contains a *poc.sh* or *poc.py* file, it is copied along 74 | with the three required markdown files. 75 | """ 76 | 77 | from __future__ import annotations 78 | 79 | import argparse 80 | import os 81 | import shutil 82 | import sys 83 | from pathlib import Path 84 | 85 | 86 | # --------------------------------------------------------------------------- 87 | # Configuration 88 | # --------------------------------------------------------------------------- 89 | 90 | # These files must be present for a job directory to be considered complete. 91 | REQUIRED_FILES = { 92 | "crash_analysis.md", 93 | "crash_info.md", 94 | "bug_report.md", 95 | } 96 | 97 | # Optional proof-of-concept files that are copied alongside the required 98 | # markdown artifacts when present. Only the **first** occurrence of each file 99 | # name within a job directory is taken into account. 100 | 101 | OPTIONAL_POC_FILES = [ 102 | "poc.sh", 103 | "poc.py", 104 | ] 105 | 106 | 107 | def extract_uuid(job_dir_name: str) -> str | None: 108 | """Return the substring after the final underscore in *job_dir_name*. 109 | 110 | Example:: 111 | 112 | >>> extract_uuid('apache-httpd_1234abcd') 113 | '1234abcd' 114 | """ 115 | 116 | if "_" not in job_dir_name: 117 | return None 118 | 119 | return job_dir_name.split("_")[-1] 120 | 121 | 122 | # --------------------------------------------------------------------------- 123 | # Helper functions 124 | # --------------------------------------------------------------------------- 125 | 126 | 127 | def _find_first(root: Path, filename: str) -> Path | None: 128 | """Return the **first** occurrence of *filename* under *root* or *None*.""" 129 | 130 | try: 131 | return next(root.rglob(filename)) 132 | except StopIteration: 133 | return None 134 | 135 | 136 | def find_required_files(root: Path) -> dict[str, Path] | None: 137 | """Search *root* recursively for all REQUIRED_FILES. 138 | 139 | Returns a mapping *filename → Path* for the first occurrence of every 140 | required file or *None* if any file is missing. 141 | """ 142 | 143 | found: dict[str, Path] = {} 144 | 145 | for name in REQUIRED_FILES: 146 | path = _find_first(root, name) 147 | if path is None: 148 | return None 149 | found[name] = path 150 | 151 | return found 152 | 153 | 154 | def gather_reports(triage_dir: Path, output_dir: Path) -> None: 155 | """Populate *output_dir* with consolidated bug-report artifacts.""" 156 | 157 | if not triage_dir.is_dir(): 158 | sys.exit(f"Error: '{triage_dir}' is not a directory") 159 | 160 | output_dir.mkdir(exist_ok=True) 161 | 162 | def _process_job_dir(job_dir: Path, *, category: str | None = None) -> bool: 163 | """Copy artifacts from *job_dir* to *output_dir*. 164 | 165 | Returns True if the directory was handled successfully, False 166 | otherwise (e.g. not a job dir or missing files). 167 | """ 168 | 169 | if not job_dir.is_dir(): 170 | return False 171 | 172 | uuid = extract_uuid(job_dir.name) 173 | if uuid is None: 174 | return False # not a job directory 175 | 176 | artifacts = find_required_files(job_dir) 177 | if artifacts is None: 178 | return False # incomplete job – skip 179 | 180 | # Preserve categories in the output directory if requested. 181 | dest = output_dir / job_dir.name if category is None else output_dir / category / job_dir.name 182 | 183 | if dest.exists(): 184 | print( 185 | f"[!] Destination '{dest}' already exists – skipping duplicate job from '{job_dir.name}'", 186 | file=sys.stderr, 187 | ) 188 | return True # already processed, treat as handled to avoid deeper fallback 189 | 190 | dest.mkdir(parents=True) 191 | 192 | 193 | for name, src in artifacts.items(): 194 | shutil.copy2(src, dest / name) 195 | 196 | # Copy optional PoC files if they exist. 197 | for poc_name in OPTIONAL_POC_FILES: 198 | poc_path = _find_first(job_dir, poc_name) 199 | if poc_path is not None: 200 | shutil.copy2(poc_path, dest / poc_name) 201 | 202 | print(f"[+] Collected reports for job '{job_dir.name}' → '{dest}'") 203 | return True 204 | 205 | # Iterate over immediate children; if a child isn't processed try its sub-dirs. 206 | for child in triage_dir.iterdir(): 207 | if not child.is_dir(): 208 | continue 209 | 210 | handled = _process_job_dir(child) 211 | if handled: 212 | continue 213 | 214 | # Treat *child* as category and look one level deeper. 215 | for grandchild in child.iterdir(): 216 | _process_job_dir(grandchild, category=child.name) 217 | 218 | 219 | def main(argv: list[str] | None = None) -> None: # noqa: D401 220 | parser = argparse.ArgumentParser( 221 | description=( 222 | "Collect crash_analysis.md, crash_info.md and bug_report.md files " 223 | "from each job directory under --input and copy them into " 224 | "--output (default ./sorted/reports) preserving job folder names." 225 | ), 226 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 227 | ) 228 | 229 | parser.add_argument( 230 | "--input", 231 | default="./jobs", 232 | help="Root directory containing job run folders.", 233 | ) 234 | parser.add_argument( 235 | "--output", 236 | default="./sorted/reports", 237 | help="Destination where consolidated reports will be written.", 238 | ) 239 | 240 | args = parser.parse_args(argv) 241 | 242 | gather_reports( 243 | Path(args.input).expanduser().resolve(), 244 | Path(args.output).expanduser().resolve(), 245 | ) 246 | 247 | 248 | if __name__ == "__main__": 249 | main() 250 | -------------------------------------------------------------------------------- /leveldb_writeup/workflow.md: -------------------------------------------------------------------------------- 1 | LevelDB – Automated Harness Generation & Bug Discovery Workflow 2 | ============================================================== 3 | 4 | This document is a case-study of the **end-to-end workflow** that the 5 | LLM-powered harness generator followed to find, trigger and triage a 6 | previously-undiscovered bug in "leveldb". 7 | 8 | Contents 9 | -------- 10 | 1. High-level timeline 11 | 2. Environment bootstrap 12 | 3. How the new target was chosen 13 | 4. Codex harness synthesis (`fuzz_table_open.cc`) 14 | 5. Building & running the new target 15 | 6. Crash detection & reproduction 16 | 7. Automated crash triage and false positive detection (→ `crash_analysis.md`) 17 | 8. Resulting artifacts 18 | 9. Harness quality & model intentionality 19 | 10. Key take-aways 20 | 21 | ## 1. High-level timeline 22 | 23 | | Step | Actor | What happened | 24 | |------|-------|---------------| 25 | | 1 | *cli wrapper* | Clone LevelDB OSS-Fuzz project and build the **baseline fuzzers** (only `fuzz_db`). | 26 | | 2 | *Codex* | Prompted with high-level instructions to “add one **new** fuzz target that reaches previously unfuzzed code”. | 27 | | 3 | *builder* | Codex edits the tree, creating `projects/leveldb/fuzz_table_open.cc` and adjusting build scripts. | 28 | | 4 | *fuzzer* | libFuzzer starts; within seconds hits an **OOM in `ReadBlock()`**. | 29 | | 5 | *runner* | Detects a new `oom-*` file, reproduces the issue and copies the logs → `crash_info.md`. | 30 | | 6 | *Codex* | Second prompt: *“Analyse this stack trace & produce human report.”* Output stored in `crash_analysis.md`. | 31 | 32 | 33 | ## 2. Environment bootstrap 34 | 35 | The harness generator launches the standard OSS-Fuzz helper scripts: 36 | 37 | ```text 38 | $ python infra/helper.py build_image leveldb 39 | $ python infra/helper.py build_fuzzers leveldb --sanitizer address --clean 40 | ``` 41 | 42 | The log excerpt below shows that **only one 43 | baseline target** was discovered: 44 | 45 | ```text 46 | [*] Baseline has 1 fuzzer(s): { 'fuzz_db' } 47 | ``` 48 | 49 | `fuzz_db` exercises the public database API with randomly generated keys 50 | and values, but *never loads SSTable files from disk* – a gap our 51 | analysis will soon exploit. 52 | 53 | 54 | ## 3. How the new target was chosen 55 | 56 | After the baseline build, the workflow jumps straight to a Codex 57 | invocation that is given high-level instructions to create a new fuzzer 58 | harness for the project. 59 | (see [`harness_generator.py`](../harness_generator/src/harness_generator.py) → `_invoke_codex_for_harness`) 60 | 61 | Codex is free to inspect any file in the working copy, clone the main 62 | repository, or rely on its own training data. The **selection logic is 63 | therefore internal to the LLM** – the Python driver makes *no* attempt 64 | to parse ELF symbol tables, ASTs, or code coverage reports. 65 | 66 | For LevelDB, Codex picked 67 | ```c++ 68 | Status Table::Open(const Options&, RandomAccessFile*, uint64_t file_size, 69 | Table**); 70 | ``` 71 | 72 | from `table/table.cc`. This function parses on-disk SSTable files and 73 | was not reached by the existing `fuzz_db` target, making it a sensible 74 | choice even without a pre-computed coverage map. 75 | 76 | 77 | ## 4. Codex harness synthesis 78 | 79 | ### Codex instructions (excerpt) 80 | 81 | ```text 82 | **Objective (high-value fuzz target)** 83 | Create a **new libFuzzer harness** for the **leveldb** OSS-Fuzz project that 84 | exercises a *public* or *documented* API reachable with **user-supplied input** 85 | (e.g. files, packets, strings) and therefore has real-world security impact. 86 | 87 | ──────────────────────────────────────── 88 | **Target-selection rules** 89 | 90 | 1. **Start at the top**: pick the *highest-level* function that 91 | *directly* consumes attacker-controlled data. 92 | • Good examples: `exif_data_load()`, `freerdp_peer_context_new()`, 93 | `curl_url_set()`, `png_read_info()`. 94 | • **Avoid** low-level helpers (`*_parse_int()`, `*_read_field()` etc.) 95 | unless *no higher layer* validates input. 96 | ``` 97 | 98 | ### What Codex does 99 | 100 | 1. Analyzes `projects/leveldb/` to learn how LevelDB objects are 101 | initialised, how the corpus input is written to disk, and how the 102 | harness is compiled (compiler flags, build.sh edits, etc.). 103 | 2. Generates `projects/leveldb/fuzz_table_open.cc`, re-using the helper 104 | functions and error handling patterns it saw in `fuzz_db.cc`. 105 | 3. Opens `projects/leveldb/build.sh` (or `Dockerfile` / `CMakeLists.txt` 106 | depending on the project) and appends a single `compile_cc` line so 107 | the new `.cc` file is compiled into a `fuzz_table_open` binary next 108 | to the existing `fuzz_db` target. 109 | 4. Writes the path of the new harness to a sentinel file called `./done` 110 | so the Python driver knows that edits are complete. 111 | 112 | The produced harness is short enough to show in full: 113 | 114 | ```c++ 115 | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { 116 | const char *fname = "/tmp/fuzz_table_open.sst"; 117 | std::ofstream out(fname, std::ios::binary); 118 | if (!out) return 0; 119 | out.write(reinterpret_cast(data), size); 120 | 121 | leveldb::Options options; 122 | options.env = leveldb::Env::Default(); 123 | 124 | leveldb::RandomAccessFile *file = nullptr; 125 | if (!options.env->NewRandomAccessFile(fname, &file).ok()) return 0; 126 | 127 | leveldb::Table *table = nullptr; 128 | if (!leveldb::Table::Open(options, file, size, &table).ok()) { 129 | delete file; 130 | return 0; 131 | } 132 | 133 | std::unique_ptr it( 134 | table->NewIterator(leveldb::ReadOptions())); 135 | for (it->SeekToFirst(); it->Valid(); it->Next()) { 136 | /* no-op – iteration alone is enough to exercise the parser */ 137 | } 138 | 139 | delete table; 140 | delete file; 141 | std::remove(fname); 142 | return 0; 143 | } 144 | ``` 145 | 146 | ### Build-until-green loop 147 | 148 | The driver now calls `_build_with_retries()` which attempts to rebuild 149 | the project. If compilation fails, the captured stderr is forwarded to 150 | `_invoke_codex_to_fix_build`, and Codex applies the smallest possible 151 | patch (typically adding a missing header include or fixing a compiler 152 | flag). This loop repeats until the build succeeds or the retry budget 153 | is exhausted. For LevelDB the very first build already succeeded 154 | because the harness was modelled closely after `fuzz_db.cc`. 155 | 156 | 157 | ## 5. Building & running the new target 158 | 159 | After compilation the runner automatically executed a short sanity fuzz 160 | session (1 k coverage-guided iterations). Within ~2000 execs libFuzzer 161 | emitted: 162 | 163 | ```text 164 | ==14==ERROR: AddressSanitizer: out of memory: allocator is trying to 165 | allocate 0xffffffffffe0 bytes 166 | #0 0x... in operator new[](unsigned long) 167 | #1 0x... in leveldb::ReadBlock(...) 168 | #2 0x... in leveldb::Table::Open(...) 169 | ``` 170 | 171 | Because the crash originates **inside project code (`format.cc:78`)** and 172 | the requested allocation is clearly bogus (`0xffffffffffe0`), the signal 173 | is marked as a *real* bug and saved. Reproducer and log were copied to 174 | `build/out/leveldb/crash-`. 175 | 176 | 177 | ## 6. Crash detection & reproduction 178 | 179 | After the fuzzer exits, `harness_generator.py` walks the 180 | `build/out/leveldb` directory tree and records every file whose name 181 | begins with one of the libFuzzer prefixes `crash`, `oom`, or `timeout` 182 | (see `_find_bug_files`). Any *new* file is assumed to be a genuine 183 | finding. The very first one is reproduced with the standard 184 | `infra/helper.py reproduce` command and its artifacts are collected into 185 | `crash_info.md`. 186 | 187 | 188 | ## 7. Automated crash triage 189 | 190 | A second Codex invocation is fed the **raw ASan log plus the offending 191 | source lines** to produce a human-readable crash analysis. 192 | 193 | Codex produced `crash_analysis.md`, identifying the issue as *unbounded 194 | memory allocation due to unchecked block handle size* and suggesting to 195 | validate `offset + size` before allocation – exactly the fix a human 196 | would write. 197 | 198 | 199 | ## 8. Resulting artifacts 200 | 201 | All important files live under `leveldb_writeup/artifacts/`: 202 | 203 | * `crash_info.md` – Reproducer command, ASan log, harness snippet & hex-dump. 204 | * `crash_analysis.md` – High-level vulnerability assessment. 205 | 206 | 207 | ## 9. Harness quality & model intentionality 208 | 209 | On the **first attempt**, Codex (o3) selected `leveldb::Table::Open()`, a 210 | high-level, attacker-controlled file parser that the baseline target never 211 | touched, produced a compiling harness without retries, and triggered an OOM in 212 | `ReadBlock()` within \~2k execs. This happened without any external coverage, 213 | static analysis, or symbol/AST reasoning. 214 | 215 | That outcome indicates the model is not merely emitting syntactically correct 216 | code; it is **prioritizing code patterns that historically harbor bugs** 217 | (complex, input-driven parsers). This reflects a learned, security-relevant 218 | **inductive prior**. In practice, this displaces a large portion of the manual 219 | front-end work (enumerating and ranking candidate entry points, drafting an 220 | initial harness, and iterating to green): the model did that prioritization and 221 | delivered a crash-inducing target in a single pass. 222 | 223 | 224 | ## 10. Key take-aways 225 | 226 | 1. Even mature OSS-Fuzz projects often have *format-parsing* code paths 227 | left unfuzzed – here the SSTable reader. 228 | 2. Even without an external coverage map, a single well-crafted prompt 229 | is often enough for the LLM to identify an unfuzzed API and deliver a 230 | working harness on the first attempt. 231 | 3. The same LLM that wrote the harness can immediately explain the bug – 232 | closing the loop from discovery to actionable triage with no human in 233 | the middle. 234 | 4. Codex (o3) showed a learned bias toward high-risk parsing entry points, 235 | producing a crash-inducing harness on the first try without coverage or 236 | static guidance — evidence that LLMs can assume much of the early triage 237 | and target-selection workload with surprising precision. 238 | -------------------------------------------------------------------------------- /harness_generator/batch_generate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | #──────────── 4 | # 5 | # Copyright 2025 Artificial Intelligence Cyber Challenge 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | # this software and associated documentation files (the “Software”), to deal in the 9 | # Software without restriction, including without limitation the rights to use, 10 | # copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 11 | # Software, and to permit persons to whom the Software is furnished to do so, 12 | # subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 18 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 19 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 20 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | # 24 | # ──────────── 25 | 26 | """ 27 | batch_generate.py 28 | ───────────────── 29 | 30 | Orchestrates batch execution of `harness_generator.py` against multiple 31 | OSS-Fuzz projects. 32 | 33 | • Consumes a YAML file whose top-level `projects` list describes the project 34 | name, git URL and commit/reference of each fuzz-tooling repository. 35 | • For every entry it clones the repository into an *output* directory, 36 | then invokes HarnessGenerator one or more times ("rounds"). 37 | • All stdout/stderr from each invocation is tee'd to 38 | `harness_round_.log` inside that project's run directory so logs are 39 | preserved even if the main process is interrupted. 40 | 41 | Most HarnessGenerator CLI flags are surfaced so the batch driver can choose 42 | sanitiser, Codex binary, scratch space, etc. Work is done sequentially and 43 | any clone/build failure simply skips the affected target, keeping the batch 44 | run going. 45 | """ 46 | 47 | from __future__ import annotations 48 | 49 | import argparse 50 | import concurrent.futures 51 | import logging 52 | import os 53 | import shutil 54 | import subprocess 55 | import sys 56 | import uuid 57 | from pathlib import Path 58 | from typing import Dict, List 59 | 60 | import yaml 61 | from dotenv import load_dotenv 62 | from git import Repo, exc as git_exc 63 | 64 | # ---------------------------------------------------------------------------# 65 | # Constants & global state 66 | # ---------------------------------------------------------------------------# 67 | # Default location for all job run directories created by this batch driver. 68 | # The original internal tooling wrote to an NFS mount; we switch to a local 69 | # folder so the released version works out-of-the-box. 70 | 71 | OUTPUT_ROOT = Path("./jobs").resolve() 72 | OUTPUT_ROOT.mkdir(parents=True, exist_ok=True) 73 | 74 | # ---------------------------------------------------------------------------# 75 | # YAML helper 76 | # ---------------------------------------------------------------------------# 77 | 78 | 79 | def load_targets_yaml(path: Path) -> list[dict[str, str]]: 80 | """Return the list under `projects:` from a YAML file.""" 81 | with open(path, "r", encoding="utf-8") as fh: 82 | data = yaml.safe_load(fh) 83 | if not data or "projects" not in data: 84 | raise ValueError("YAML must contain a top-level 'projects' list") 85 | return data["projects"] 86 | 87 | 88 | # ---------------------------------------------------------------------------# 89 | # Git helper 90 | # ---------------------------------------------------------------------------# 91 | 92 | 93 | def clone_and_checkout(url: str, ref: str, dest: Path) -> Repo: 94 | logging.info("Cloning %s → %s", url, dest) 95 | repo = Repo.clone_from(url, dest) 96 | try: 97 | repo.git.checkout(ref) 98 | except git_exc.GitCommandError: 99 | repo.git.fetch("origin", ref) 100 | repo.git.checkout("FETCH_HEAD") 101 | logging.info("Checked-out commit %s", repo.head.commit.hexsha) 102 | return repo 103 | 104 | 105 | # ---------------------------------------------------------------------------# 106 | # Worker function 107 | # ---------------------------------------------------------------------------# 108 | 109 | 110 | def process_project( 111 | target: Dict[str, str], 112 | *, 113 | rounds: int, 114 | ai_key_path: Path, 115 | sanitizer: str, 116 | codex_cli: str, 117 | scratch_space: Path | None, 118 | copy_repo: bool, 119 | no_build: bool, 120 | smoke: bool, 121 | max_retries: int, 122 | ) -> None: 123 | """ 124 | Clone the project and run HarnessGenerator `rounds` times in sequence. 125 | All stdout/stderr from each round is captured to a file. 126 | """ 127 | project = target["project_name"] 128 | url = target["fuzz_tooling_url"] 129 | ref = target["fuzz_tooling_ref"] 130 | 131 | run_dir = OUTPUT_ROOT / f"{project}_{uuid.uuid4().hex}" 132 | run_dir.mkdir(parents=True, exist_ok=True) 133 | 134 | try: 135 | clone_and_checkout(url, ref, run_dir) 136 | except Exception as err: 137 | logging.error("[SKIP] %s - clone/checkout failed: %s", project, err) 138 | return 139 | 140 | # ── Keep only the target project directory under oss-fuzz/projects/ ── 141 | projects_root = run_dir / "projects" 142 | if projects_root.is_dir(): 143 | for sub in projects_root.iterdir(): 144 | if sub.is_dir() and sub.name != project: 145 | try: 146 | shutil.rmtree(sub) 147 | except Exception as exc: 148 | logging.warning( 149 | "[%s] Failed to remove directory %s: %s", 150 | project, 151 | sub, 152 | exc, 153 | ) 154 | 155 | script_path = Path(__file__).parent / "src" / "harness_generator.py" 156 | 157 | for round_idx in range(1, rounds + 1): 158 | log_path = run_dir / f"harness_round_{round_idx}.log" 159 | logging.info( 160 | "[%s] Round %d/%d → %s", project, round_idx, rounds, log_path 161 | ) 162 | 163 | cmd = [ 164 | sys.executable, 165 | str(script_path), 166 | project, 167 | str(run_dir), 168 | str(ai_key_path), 169 | "--sanitizer", 170 | sanitizer, 171 | "--codex-cli", 172 | codex_cli, 173 | "--max-retries", 174 | str(max_retries), 175 | ] 176 | 177 | if scratch_space: 178 | cmd += ["--scratch-space", str(scratch_space)] 179 | if copy_repo: 180 | cmd.append("--copy-repo") 181 | if no_build: 182 | cmd.append("--no-build") 183 | if smoke: 184 | cmd.append("--smoke") 185 | 186 | # Capture combined stdout/stderr into the log file 187 | with open(log_path, "w", encoding="utf-8") as lf: 188 | proc = subprocess.run( 189 | cmd, 190 | stdout=lf, 191 | stderr=subprocess.STDOUT, 192 | text=True, 193 | ) 194 | if proc.returncode != 0: 195 | logging.error( 196 | "[%s] Round %d failed (rc=%d). " "See %s for details.", 197 | project, 198 | round_idx, 199 | proc.returncode, 200 | log_path, 201 | ) 202 | 203 | logging.info("[%s] All rounds complete → %s", project, run_dir) 204 | 205 | 206 | # ---------------------------------------------------------------------------# 207 | # Main 208 | # ---------------------------------------------------------------------------# 209 | 210 | 211 | def main() -> None: 212 | parser = argparse.ArgumentParser( 213 | description="Batch-generate OSS-Fuzz harnesses concurrently.", 214 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 215 | ) 216 | 217 | parser.add_argument( 218 | "--targets", 219 | type=Path, 220 | required=True, 221 | help="YAML file listing projects to process.", 222 | ) 223 | parser.add_argument( 224 | "--rounds", 225 | type=int, 226 | default=1, 227 | help="Successive Codex rounds per project.", 228 | ) 229 | parser.add_argument( 230 | "--threads", 231 | type=int, 232 | default=1, 233 | help="Maximum number of concurrent HarnessGenerator runs.", 234 | ) 235 | parser.add_argument( 236 | "--ai-key-path", 237 | type=Path, 238 | default="./.env", 239 | help="Path to .env or file containing OPENAI key.", 240 | ) 241 | parser.add_argument( 242 | "--sanitizer", 243 | default="address", 244 | help="Sanitizer to use when building fuzzers.", 245 | ) 246 | parser.add_argument( 247 | "--codex-cli", default="codex", help="Codex CLI executable." 248 | ) 249 | parser.add_argument( 250 | "--scratch-space", 251 | type=Path, 252 | help="Directory for HarnessGenerator temp copies.", 253 | ) 254 | parser.add_argument( 255 | "--copy-repo", 256 | action="store_true", 257 | help="Tell HarnessGenerator to copy the repo before edits.", 258 | ) 259 | parser.add_argument( 260 | "--no-build", 261 | action="store_true", 262 | help="Skip docker rebuild step (debug only).", 263 | ) 264 | parser.add_argument( 265 | "--smoke", 266 | action="store_true", 267 | help="Run a quick smoke test before Codex edits.", 268 | ) 269 | parser.add_argument( 270 | "--max-retries", 271 | type=int, 272 | default=3, 273 | help="Max build-fix rounds inside HarnessGenerator.", 274 | ) 275 | parser.add_argument( 276 | "--randomize", 277 | action="store_true", 278 | help="Randomize the order of projects before processing.", 279 | ) 280 | parser.add_argument( 281 | "--verbose", 282 | action="store_true", 283 | default=True, 284 | help="Enable DEBUG logging.", 285 | ) 286 | 287 | args = parser.parse_args() 288 | 289 | logging.basicConfig( 290 | level=logging.DEBUG if args.verbose else logging.INFO, 291 | format="[%(levelname)s] %(message)s", 292 | ) 293 | 294 | load_dotenv(os.path.expanduser(str(args.ai_key_path))) 295 | 296 | targets: List[Dict[str, str]] = load_targets_yaml(args.targets) 297 | if args.randomize: 298 | import random 299 | 300 | random.shuffle(targets) 301 | logging.info("--randomize is set; target list shuffled.") 302 | 303 | logging.info("Loaded %d project(s) from %s", len(targets), args.targets) 304 | logging.info("Running with up to %d concurrent job(s)…", args.threads) 305 | 306 | with concurrent.futures.ThreadPoolExecutor( 307 | max_workers=args.threads 308 | ) as pool: 309 | futures = [ 310 | pool.submit( 311 | process_project, 312 | t, 313 | rounds=args.rounds, 314 | ai_key_path=args.ai_key_path.expanduser(), 315 | sanitizer=args.sanitizer, 316 | codex_cli=args.codex_cli, 317 | scratch_space=args.scratch_space, 318 | copy_repo=args.copy_repo, 319 | no_build=args.no_build, 320 | smoke=args.smoke, 321 | max_retries=args.max_retries, 322 | ) 323 | for t in targets 324 | ] 325 | 326 | # wait for all tasks to finish, raising exceptions if any occurred 327 | for f in concurrent.futures.as_completed(futures): 328 | try: 329 | f.result() 330 | except Exception as exc: 331 | logging.error("Worker raised: %s", exc) 332 | 333 | logging.info("All work complete.") 334 | 335 | 336 | if __name__ == "__main__": 337 | try: 338 | main() 339 | except KeyboardInterrupt: 340 | print("\nInterrupted - exiting.") 341 | -------------------------------------------------------------------------------- /harness_generator/scripts/generate_reports.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | #──────────── 4 | # 5 | # Copyright 2025 Artificial Intelligence Cyber Challenge 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | # this software and associated documentation files (the “Software”), to deal in the 9 | # Software without restriction, including without limitation the rights to use, 10 | # copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 11 | # Software, and to permit persons to whom the Software is furnished to do so, 12 | # subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 18 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 19 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 20 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | # 24 | # ──────────── 25 | 26 | """ 27 | generate_reports.py 28 | ──────────── 29 | For every harness run directory that contains both `crash_analysis.md` and 30 | `crash_info.md`, invoke the Codex CLI to author a polished disclosure-style 31 | `bug_report.md` using the template supplied by the user. 32 | 33 | The script mirrors the Codex interaction pattern used in `harness_generator.py`: 34 | 35 | 1. Aggregate the contents of `crash_analysis.md` and `crash_info.md` into a 36 | single context blob that is provided to Codex. 37 | 2. Send high-level instructions asking Codex to create **exactly one new file 38 | called `bug_report.md`** in the same directory, following the required 39 | section layout verbatim (see `REPORT_TEMPLATE` below). 40 | 3. Repeat for every qualifying run directory found under the *input* root 41 | (default: `./jobs`). 42 | 43 | Like the other tooling, the script expects an OpenAI-compatible API key via 44 | `OPENAI_API_KEY` **or** a path to a dotenv file containing it. 45 | """ 46 | 47 | from __future__ import annotations 48 | 49 | import argparse 50 | import os 51 | import textwrap 52 | from pathlib import Path 53 | from typing import List 54 | 55 | from dotenv import load_dotenv 56 | 57 | load_dotenv(dotenv_path="./.env") 58 | 59 | # Re-use the Codex helper that is already part of this repository 60 | # Add src/ to import path then import CodexHelper 61 | import sys 62 | from pathlib import Path as _Path 63 | 64 | _REPO_ROOT = _Path(__file__).resolve().parent.parent 65 | _SRC_DIR = _REPO_ROOT / "src" 66 | sys.path.insert(0, str(_SRC_DIR)) 67 | 68 | from codex_helper import CodexHelper # type: ignore 69 | 70 | 71 | # --------------------------------------------------------------------------- 72 | # Constants 73 | # --------------------------------------------------------------------------- 74 | 75 | DEFAULT_INPUT_ROOT = Path("./jobs") 76 | 77 | CODEX_ANALYSIS_MODEL = os.environ.get("CODEX_ANALYSIS_MODEL", "o4-mini") 78 | CODEX_APPROVAL_MODE = os.environ.get("CODEX_APPROVAL_MODE", "full-auto") 79 | 80 | 81 | REPORT_TEMPLATE = textwrap.dedent( 82 | r""" 83 | # 84 | _Disclosure date: _ (use the current date) 85 | 86 | --- 87 | 88 | ## 1 Overview 89 | Brief, one-sentence statement of the flaw and why it matters. 90 | 91 | ## 2 Affected product(s) and version(s) 92 | * - (check the git history and use origin/HEAD) 93 | 94 | ## 3 Impact 95 | Describe what an attacker can do (RCE, DoS, info-leak, privilege escalation, etc.). 96 | _Add CVSS v3.1 vector & score here if you have one._ 97 | 98 | ## 4 Technical details 99 | 1. **Root cause** – where in the code / design the issue lives. 100 | 2. **Trigger** – how malformed input or an attacker’s action reaches that code path (include the harness and the crashing input) 101 | 3. **Why it fails safely/unsafely** – memory corruption, missing auth check, etc. 102 | 4. **Reproduction** – step-by-step commands or minimal PoC (link to file if large). 103 | 104 | ## 5 Mitigation / Patch guidance 105 | * Short-term workaround (e.g., config flag, WAF rule). 106 | * Long-term fix suggestion (code change, input validation, size check). 107 | 108 | ## 6 Timeline 109 | | Date | Event | 110 | |------|-------| 111 | | YYYY-MM-DD | Vulnerability discovered | (use the date when crash_info.md was created) 112 | 113 | ## 7 Credits 114 | _Reported by SHERPA_ 115 | 116 | ## 8 References 117 | * ISO/IEC 29147 section 5.4 (Disclosure contents) 118 | * CERT/CC Vulnerability Note style 119 | * CVE entry (reserved) – CVE-YYYY-NNNN 120 | """ 121 | ).strip() 122 | 123 | 124 | # --------------------------------------------------------------------------- 125 | # Helper functions 126 | # --------------------------------------------------------------------------- 127 | 128 | 129 | def _find_run_dirs(root: Path) -> List[Path]: 130 | """Return run-directory candidates located one **or two** levels below *root*. 131 | 132 | The original implementation only considered the immediate children of 133 | *root*:: 134 | 135 | output//crash_analysis.md 136 | 137 | Newer triage layouts introduce an additional *category* layer so the 138 | structure now looks like:: 139 | 140 | output///crash_analysis.md 141 | 142 | To stay backward-compatible while supporting the new layout the function 143 | operates in two steps: 144 | 145 | 1. Collect every *direct* sub-directory of *root*. 146 | 2. For each direct child that itself is **not** a run directory (i.e. 147 | lacks the required crash documents), collect its own sub-directories. 148 | 149 | The resulting list therefore contains 150 | 151 | • output/ 152 | • output// 153 | 154 | leaving the subsequent `_has_crash_docs` filter to decide which candidates 155 | actually qualify as run directories. 156 | """ 157 | 158 | run_dirs: List[Path] = [] 159 | 160 | # First pass — look at immediate children of *root*. 161 | for first_level in root.iterdir(): 162 | if not first_level.is_dir(): 163 | continue 164 | 165 | if _has_crash_docs(first_level): 166 | # Classic layout: the run directory sits directly under *root*. 167 | run_dirs.append(first_level) 168 | continue 169 | 170 | # Second pass — treat *first_level* as a category and inspect its 171 | # direct sub-directories. We intentionally do **not** recurse further 172 | # to avoid unexpectedly deep walks while still covering the new 173 | # two-level layout. 174 | for candidate in first_level.iterdir(): 175 | if candidate.is_dir() and _has_crash_docs(candidate): 176 | run_dirs.append(candidate) 177 | 178 | return run_dirs 179 | 180 | 181 | def _has_crash_docs(run_dir: Path) -> bool: 182 | return (run_dir / "crash_analysis.md").is_file() and ( 183 | run_dir / "crash_info.md" 184 | ).is_file() 185 | 186 | 187 | # --------------------------------------------------------------------------- 188 | # Codex interaction per run directory 189 | # --------------------------------------------------------------------------- 190 | 191 | 192 | def _invoke_codex_for_report( 193 | run_dir: Path, codex_cli: str, ai_key_path: Path 194 | ) -> None: 195 | """Ask Codex to create *bug_report.md* inside *run_dir*.""" 196 | 197 | analysis_path = run_dir / "crash_analysis.md" 198 | info_path = run_dir / "crash_info.md" 199 | 200 | # Combine the two markdown files into one context blob 201 | context_blob = textwrap.dedent( 202 | """ 203 | === crash_analysis.md === 204 | {analysis} 205 | 206 | === crash_info.md === 207 | {info} 208 | """ 209 | ).format( 210 | analysis=analysis_path.read_text(encoding="utf-8", errors="replace"), 211 | info=info_path.read_text(encoding="utf-8", errors="replace"), 212 | ) 213 | 214 | instructions = textwrap.dedent( 215 | f""" 216 | You are an experienced vulnerability disclosure author. 217 | 218 | Using the *context* provided (crash analysis and crash info), write a 219 | **new file** called `bug_report.md` inside the same directory. 220 | 221 | If `bug_report.md` already exists, just create the ./done file and exit. 222 | 223 | Requirements: 224 | • Follow the exact section headings and formatting shown below. 225 | • Where possible, extract details from the analysis/info; otherwise 226 | leave concise TODO placeholders for a human analyst. 227 | • Do **not** modify existing files. 228 | 229 | Important: If the bug is caused by a harness error (bad library usage, wrong params, etc.) 230 | then you must put "HARNESS ERROR" somewhere in your report to flag this false positive. 231 | 232 | --- 233 | BEGIN TEMPLATE (copy verbatim, then fill) 234 | {REPORT_TEMPLATE} 235 | END TEMPLATE 236 | """ 237 | ).strip() 238 | 239 | patcher = CodexHelper( 240 | repo_path=run_dir, 241 | ai_key_path=str(ai_key_path), 242 | copy_repo=False, 243 | codex_cli=codex_cli, 244 | codex_model=CODEX_ANALYSIS_MODEL, 245 | approval_mode=CODEX_APPROVAL_MODE, 246 | ) 247 | 248 | stdout = patcher.run_codex_command( 249 | instructions, additional_context=context_blob 250 | ) 251 | 252 | if stdout is None: 253 | print(f"[!] Codex did not create bug_report.md in {run_dir}") 254 | else: 255 | print(f"✓ bug_report.md generated for {run_dir}") 256 | 257 | 258 | # --------------------------------------------------------------------------- 259 | # CLI 260 | # --------------------------------------------------------------------------- 261 | 262 | 263 | def main() -> None: 264 | parser = argparse.ArgumentParser( 265 | description="Generate bug_report.md for each crash-containing run directory via Codex.", 266 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 267 | ) 268 | 269 | parser.add_argument( 270 | "--input", 271 | type=Path, 272 | default=DEFAULT_INPUT_ROOT, 273 | help="Root directory containing harness run directories (default: ./jobs)", 274 | ) 275 | parser.add_argument( 276 | "--ai-key-path", 277 | type=Path, 278 | default=Path("./.env"), 279 | help="Path to .env file holding your OPENAI-compatible API key.", 280 | ) 281 | parser.add_argument( 282 | "--codex-cli", 283 | default="codex", 284 | help="Codex CLI executable path (default: codex)", 285 | ) 286 | 287 | parser.add_argument( 288 | "--threads", 289 | type=int, 290 | default=1, 291 | help="Maximum number of parallel Codex report generations.", 292 | ) 293 | 294 | args = parser.parse_args() 295 | 296 | # Ensure API key is exported exactly like harness_generator does 297 | load_dotenv(dotenv_path=os.path.expanduser(str(args.ai_key_path))) 298 | 299 | root = args.input.expanduser().resolve() 300 | if not root.is_dir(): 301 | raise SystemExit(f"Output directory not found: {root}") 302 | 303 | run_dirs = _find_run_dirs(root) 304 | 305 | if not run_dirs: 306 | print("[!] No run directories found – nothing to do.") 307 | return 308 | 309 | todo = [d for d in run_dirs if _has_crash_docs(d)] 310 | 311 | if not todo: 312 | print("[!] No crash_analysis.md found under", root) 313 | return 314 | 315 | print( 316 | f"[*] Found {len(todo)} run directorie(s) with crashes. Using up to {args.threads} thread(s)." 317 | ) 318 | 319 | import concurrent.futures as _cf 320 | 321 | with _cf.ThreadPoolExecutor(max_workers=args.threads) as pool: 322 | futures = [ 323 | pool.submit( 324 | _invoke_codex_for_report, 325 | run_dir, 326 | codex_cli=args.codex_cli, 327 | ai_key_path=args.ai_key_path.expanduser(), 328 | ) 329 | for run_dir in todo 330 | ] 331 | 332 | # Wait for completion, surface exceptions early 333 | for f in _cf.as_completed(futures): 334 | try: 335 | f.result() 336 | except Exception as exc: 337 | print(f"[!] Worker raised exception: {exc}") 338 | 339 | 340 | if __name__ == "__main__": 341 | main() 342 | -------------------------------------------------------------------------------- /harness_generator/src/codex_helper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | #──────────── 4 | # 5 | # Copyright 2025 Artificial Intelligence Cyber Challenge 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | # this software and associated documentation files (the “Software”), to deal in the 9 | # Software without restriction, including without limitation the rights to use, 10 | # copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 11 | # Software, and to permit persons to whom the Software is furnished to do so, 12 | # subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 18 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 19 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 20 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | # 24 | # ──────────── 25 | 26 | """ 27 | codex_helper.py 28 | ──────────────── 29 | • Runs the Codex CLI inside a pseudo-terminal so coloured output streams live 30 | to stdout while still being captured for later inspection. 31 | • Watches for the sentinel file `./done` that Codex is instructed to write 32 | once it has applied all edits. The session is terminated as soon as the 33 | file appears. 34 | • Retries the **CLI invocation** on common transient failure strings. 35 | • Retries the **whole patch generation attempt** when no diff was produced. 36 | • Enforces a hard wall-clock timeout and performs a 3-stage 37 | (SIGINT→SIGTERM→SIGKILL) shutdown sequence. 38 | • Returns *None* if Codex made no edits; otherwise returns full captured 39 | stdout so callers can inspect or log the conversation. 40 | • Optional *ai_key_path* can point to a file containing the OpenAI key; the 41 | helper sets the OPENAI_API_KEY environment variable if it was not yet 42 | defined. 43 | """ 44 | 45 | from __future__ import annotations 46 | 47 | import errno 48 | import logging 49 | import os 50 | import pty 51 | import select 52 | import shutil 53 | import signal 54 | import subprocess 55 | import tempfile 56 | import textwrap 57 | import time 58 | from pathlib import Path 59 | from typing import List, Sequence 60 | 61 | from git import Repo, exc as git_exc 62 | 63 | # --------------------------------------------------------------------------- 64 | # Logging setup 65 | # --------------------------------------------------------------------------- 66 | 67 | 68 | LOGGER = logging.getLogger(__name__) 69 | 70 | 71 | # --------------------------------------------------------------------------- 72 | # Helpers 73 | # --------------------------------------------------------------------------- 74 | 75 | 76 | def _ensure_git_repo(path: Path) -> Repo: 77 | """Return a *Repo* object, initialising a new repository if needed.""" 78 | 79 | try: 80 | repo = Repo(path) 81 | except git_exc.InvalidGitRepositoryError: 82 | repo = Repo.init(path) 83 | 84 | # Make sure at least one commit exists so `git diff` behaves. 85 | if not repo.head.is_valid(): 86 | repo.git.add(A=True) 87 | try: 88 | repo.git.commit(m="Initial commit", allow_empty=True) 89 | except git_exc.GitCommandError: 90 | # Happens when there is literally nothing to commit yet. 91 | pass 92 | return repo 93 | 94 | 95 | # --------------------------------------------------------------------------- 96 | # Core helper class 97 | # --------------------------------------------------------------------------- 98 | 99 | 100 | class CodexHelper: 101 | """Light-weight wrapper around the Codex CLI with robust retry logic.""" 102 | 103 | def __init__( 104 | self, 105 | *, 106 | repo_path: Path, 107 | ai_key_path: str | None = None, 108 | copy_repo: bool = True, 109 | scratch_space: Path | None = None, 110 | codex_cli: str = "codex", 111 | codex_model: str = "o3", 112 | approval_mode: str = "full-auto", 113 | dangerous_bypass: bool = False, 114 | sandbox_mode: str | None = None, 115 | ) -> None: 116 | 117 | self.repo_path = Path(repo_path).expanduser().resolve() 118 | if not self.repo_path.is_dir(): 119 | raise FileNotFoundError(f"Repository not found: {self.repo_path}") 120 | 121 | self.scratch_space = scratch_space or Path("/tmp") 122 | self.codex_cli = str(codex_cli) 123 | self.codex_model = codex_model 124 | self.approval_mode = approval_mode 125 | 126 | if sandbox_mode: 127 | self.sandbox_mode = sandbox_mode 128 | else: 129 | self.sandbox_mode = "workspace-write" 130 | 131 | if dangerous_bypass: 132 | self.approval_mode = "never" 133 | self.sandbox_mode = "danger-full-access" 134 | 135 | 136 | # Work on an isolated copy when requested so Codex can freely modify. 137 | if copy_repo: 138 | self.working_dir = Path( 139 | tempfile.mkdtemp(prefix="codex-helper-", dir=str(self.scratch_space)) 140 | ) 141 | shutil.copytree(self.repo_path, self.working_dir, dirs_exist_ok=True) 142 | else: 143 | self.working_dir = self.repo_path 144 | 145 | self.repo = _ensure_git_repo(self.working_dir) 146 | 147 | # Provide API key via env var if a path was supplied. 148 | if ai_key_path and "OPENAI_API_KEY" not in os.environ: 149 | key_path = Path(ai_key_path).expanduser() 150 | if key_path.is_file(): 151 | key = key_path.read_text(encoding="utf-8", errors="ignore").strip() 152 | if key: 153 | os.environ["OPENAI_API_KEY"] = key 154 | 155 | LOGGER.debug("CodexHelper working directory: %s", self.working_dir) 156 | 157 | # ------------------------------------------------------------------ 158 | # Public API 159 | # ------------------------------------------------------------------ 160 | 161 | def run_codex_command( 162 | self, 163 | instructions: str | Sequence[str], 164 | *, 165 | additional_context: str | None = None, 166 | max_attempts: int = 3, 167 | timeout: int = 1800, 168 | max_cli_retries: int = 3, 169 | initial_backoff: float = 3.0, 170 | ) -> str | None: 171 | """Execute Codex with robust retry logic and return its stdout or *None*.""" 172 | 173 | SENTINEL = "done" 174 | RETRY_ERRORS = ( 175 | "Connection closed prematurely", 176 | "internal error", 177 | "failed to send request", 178 | "model failed to respond", 179 | "Network error while contacting OpenAI", 180 | ) 181 | 182 | done_path = self.working_dir / SENTINEL 183 | 184 | # Build prompt body once (mirrors original behaviour). 185 | if isinstance(instructions, (list, tuple)): 186 | tasks = "\n".join(str(i) for i in instructions) 187 | else: 188 | tasks = str(instructions) 189 | 190 | prompt_parts: List[str] = [ 191 | "You are an expert engineer. Apply the edits below - no refactors.", 192 | "When ALL tasks are complete, output a summary of your changes,", 193 | "then populate a file called **done** in the repo root (`./done`).", 194 | "Write the relative path to the **single** most relevant file you created or modified into `./done`.", 195 | f"## Tasks\n{tasks}", 196 | ] 197 | 198 | if additional_context: 199 | prompt_parts.append( 200 | textwrap.dedent( 201 | f""" 202 | --- 203 | ### Additional context 204 | {additional_context.strip()} 205 | --- 206 | """ 207 | ) 208 | ) 209 | 210 | prompt = "\n".join(prompt_parts).strip() 211 | 212 | # ---------------------------------------------------------------- 213 | # Outer loop – retry full patch attempt if no diff produced. 214 | # ---------------------------------------------------------------- 215 | 216 | for attempt in range(1, max_attempts + 1): 217 | LOGGER.info("[CodexHelper] patch attempt %d/%d", attempt, max_attempts) 218 | 219 | done_path.unlink(missing_ok=True) 220 | 221 | # ---------------------------------------------------------------- 222 | # Inner loop – retry CLI invocation on transient errors. 223 | # ---------------------------------------------------------------- 224 | 225 | cli_try = 0 226 | backoff = initial_backoff 227 | 228 | while cli_try < max_cli_retries: 229 | cli_try += 1 230 | LOGGER.info("[CodexHelper] launch #%d (backoff=%.1fs)", cli_try, backoff) 231 | 232 | cmd = [ 233 | self.codex_cli, 234 | "exec", 235 | "-m", 236 | self.codex_model, 237 | "-c model_reasoning_effort=high", 238 | "-c disable_response_storage=true", 239 | "-c sandbox_mode="+self.sandbox_mode, 240 | "--full-auto" if self.approval_mode == "full-auto" else "-c approval_policy="+self.approval_mode, 241 | prompt, 242 | ] 243 | 244 | master_fd, slave_fd = pty.openpty() 245 | proc = subprocess.Popen( 246 | cmd, 247 | cwd=self.working_dir, 248 | stdin=slave_fd, 249 | stdout=slave_fd, 250 | stderr=slave_fd, 251 | env=os.environ.copy(), 252 | text=False, 253 | close_fds=True, 254 | ) 255 | os.close(slave_fd) 256 | 257 | captured_chunks: List[str] = [] 258 | start_time = time.time() 259 | saw_retry_error = False 260 | 261 | # Helper to perform 3-stage kill. 262 | def _kill_proc(grace: float = 4.0) -> None: 263 | for sig in (signal.SIGINT, signal.SIGTERM, signal.SIGKILL): 264 | if proc.poll() is not None: 265 | return 266 | try: 267 | proc.send_signal(sig) 268 | proc.wait(timeout=grace) 269 | except subprocess.TimeoutExpired: 270 | continue 271 | 272 | # Use non-blocking read with select() similar to original. 273 | try: 274 | with os.fdopen(master_fd, "rb", buffering=0) as stream: 275 | while True: 276 | # Wall-clock timeout. 277 | if time.time() - start_time > timeout: 278 | LOGGER.error("[CodexHelper] hard timeout") 279 | raise TimeoutError 280 | 281 | # Sentinel detected? 282 | if done_path.exists(): 283 | LOGGER.info("[CodexHelper] done flag detected") 284 | _kill_proc() 285 | break 286 | 287 | ready, *_ = select.select([stream], [], [], 1.0) 288 | if ready: 289 | try: 290 | chunk = stream.read(4096) 291 | except OSError as e: 292 | if e.errno == errno.EIO: # PTY closed 293 | break 294 | raise 295 | 296 | if not chunk: 297 | break # EOF 298 | 299 | text = chunk.decode("utf-8", errors="replace") 300 | print(text, end="") # live pass-through to caller 301 | captured_chunks.append(text) 302 | 303 | # Check for retryable error messages on the fly. 304 | if any(err in text for err in RETRY_ERRORS): 305 | LOGGER.warning("[CodexHelper] retryable error detected → abort") 306 | saw_retry_error = True 307 | _kill_proc() 308 | break 309 | 310 | if proc.poll() is not None and not ready: 311 | break 312 | except TimeoutError: 313 | _kill_proc() 314 | saw_retry_error = True 315 | LOGGER.warning("[CodexHelper] Codex timeout; will retry") 316 | 317 | # Decide if we should relaunch the CLI. 318 | if saw_retry_error: 319 | time.sleep(backoff) 320 | backoff *= 2 321 | continue # restart inner CLI loop 322 | 323 | # CLI completed without retryable error; break inner loop. 324 | break 325 | 326 | # After inner loop – did Codex create the sentinel and produce diff? 327 | 328 | if not done_path.exists(): 329 | LOGGER.warning("[CodexHelper] sentinel not created; next attempt") 330 | continue # outer attempt loop 331 | 332 | # Refresh repo to ensure it sees new changes. 333 | self.repo.git.add(A=True) 334 | 335 | if self.repo.git.diff('HEAD'): 336 | LOGGER.info("[CodexHelper] diff produced — success") 337 | return "".join(captured_chunks) 338 | 339 | LOGGER.info("[CodexHelper] sentinel present but no diff; next attempt") 340 | 341 | LOGGER.warning("[CodexHelper] exhausted attempts — no edits produced") 342 | return None 343 | 344 | 345 | # --------------------------------------------------------------------------- 346 | # Backwards-compat alias – internal code may still import CodexPatcher. 347 | # --------------------------------------------------------------------------- 348 | 349 | 350 | CodexPatcher = CodexHelper 351 | -------------------------------------------------------------------------------- /harness_generator/yamls/c-projects.yaml: -------------------------------------------------------------------------------- 1 | projects: 2 | - project_name: apache-httpd 3 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 4 | fuzz_tooling_ref: master 5 | - project_name: bc-gh 6 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 7 | fuzz_tooling_ref: master 8 | - project_name: bind9 9 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 10 | fuzz_tooling_ref: master 11 | - project_name: bluez 12 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 13 | fuzz_tooling_ref: master 14 | - project_name: cairo 15 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 16 | fuzz_tooling_ref: master 17 | - project_name: cgif 18 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 19 | fuzz_tooling_ref: master 20 | - project_name: civetweb 21 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 22 | fuzz_tooling_ref: master 23 | - project_name: clib 24 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 25 | fuzz_tooling_ref: master 26 | - project_name: cmake 27 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 28 | fuzz_tooling_ref: master 29 | - project_name: coturn 30 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 31 | fuzz_tooling_ref: master 32 | - project_name: cpuinfo 33 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 34 | fuzz_tooling_ref: master 35 | - project_name: croaring 36 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 37 | fuzz_tooling_ref: master 38 | - project_name: cryptsetup 39 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 40 | fuzz_tooling_ref: master 41 | - project_name: cups 42 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 43 | fuzz_tooling_ref: master 44 | - project_name: cyclonedds 45 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 46 | fuzz_tooling_ref: master 47 | - project_name: dbus-broker 48 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 49 | fuzz_tooling_ref: master 50 | - project_name: dnsmasq 51 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 52 | fuzz_tooling_ref: master 53 | - project_name: dovecot 54 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 55 | fuzz_tooling_ref: master 56 | - project_name: e2fsprogs 57 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 58 | fuzz_tooling_ref: master 59 | - project_name: edk2 60 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 61 | fuzz_tooling_ref: master 62 | - project_name: faad2 63 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 64 | fuzz_tooling_ref: master 65 | - project_name: flex 66 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 67 | fuzz_tooling_ref: master 68 | - project_name: freerdp 69 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 70 | fuzz_tooling_ref: master 71 | - project_name: fribidi 72 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 73 | fuzz_tooling_ref: master 74 | - project_name: fwupd 75 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 76 | fuzz_tooling_ref: master 77 | - project_name: gdbm 78 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 79 | fuzz_tooling_ref: master 80 | - project_name: gdk-pixbuf 81 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 82 | fuzz_tooling_ref: master 83 | - project_name: gnucobol 84 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 85 | fuzz_tooling_ref: master 86 | - project_name: gpac 87 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 88 | fuzz_tooling_ref: master 89 | - project_name: gpsd 90 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 91 | fuzz_tooling_ref: master 92 | - project_name: gss-ntlmssp 93 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 94 | fuzz_tooling_ref: master 95 | - project_name: h3 96 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 97 | fuzz_tooling_ref: master 98 | - project_name: hdf5 99 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 100 | fuzz_tooling_ref: master 101 | - project_name: hiredis 102 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 103 | fuzz_tooling_ref: master 104 | - project_name: hwloc 105 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 106 | fuzz_tooling_ref: master 107 | - project_name: igraph 108 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 109 | fuzz_tooling_ref: master 110 | - project_name: inchi 111 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 112 | fuzz_tooling_ref: master 113 | - project_name: inih 114 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 115 | fuzz_tooling_ref: master 116 | - project_name: jpegoptim 117 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 118 | fuzz_tooling_ref: master 119 | - project_name: jq 120 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 121 | fuzz_tooling_ref: master 122 | - project_name: kamailio 123 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 124 | fuzz_tooling_ref: master 125 | - project_name: krb5 126 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 127 | fuzz_tooling_ref: master 128 | - project_name: libbpf 129 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 130 | fuzz_tooling_ref: master 131 | - project_name: libcacard 132 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 133 | fuzz_tooling_ref: master 134 | - project_name: libconfig 135 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 136 | fuzz_tooling_ref: master 137 | - project_name: libcue 138 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 139 | fuzz_tooling_ref: master 140 | - project_name: libdwarf 141 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 142 | fuzz_tooling_ref: master 143 | - project_name: libfuse 144 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 145 | fuzz_tooling_ref: master 146 | - project_name: libiec61850 147 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 148 | fuzz_tooling_ref: master 149 | - project_name: libjpeg-turbo 150 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 151 | fuzz_tooling_ref: master 152 | - project_name: liblouis 153 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 154 | fuzz_tooling_ref: master 155 | - project_name: libmodbus 156 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 157 | fuzz_tooling_ref: master 158 | - project_name: liboqs 159 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 160 | fuzz_tooling_ref: master 161 | - project_name: libpg_query 162 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 163 | fuzz_tooling_ref: master 164 | - project_name: libproxy 165 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 166 | fuzz_tooling_ref: master 167 | - project_name: libredwg 168 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 169 | fuzz_tooling_ref: master 170 | - project_name: libsndfile 171 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 172 | fuzz_tooling_ref: master 173 | - project_name: libspdm 174 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 175 | fuzz_tooling_ref: master 176 | - project_name: libssh 177 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 178 | fuzz_tooling_ref: master 179 | - project_name: libucl 180 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 181 | fuzz_tooling_ref: master 182 | - project_name: libunwind 183 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 184 | fuzz_tooling_ref: master 185 | - project_name: libwebsockets 186 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 187 | fuzz_tooling_ref: master 188 | - project_name: libxlsxwriter 189 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 190 | fuzz_tooling_ref: master 191 | - project_name: libyal 192 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 193 | fuzz_tooling_ref: master 194 | - project_name: libyang 195 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 196 | fuzz_tooling_ref: master 197 | - project_name: lighttpd 198 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 199 | fuzz_tooling_ref: master 200 | - project_name: lldpd 201 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 202 | fuzz_tooling_ref: master 203 | - project_name: llhttp 204 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 205 | fuzz_tooling_ref: master 206 | - project_name: lua 207 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 208 | fuzz_tooling_ref: master 209 | - project_name: lxc 210 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 211 | fuzz_tooling_ref: master 212 | - project_name: md4c 213 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 214 | fuzz_tooling_ref: master 215 | - project_name: mdbtools 216 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 217 | fuzz_tooling_ref: master 218 | - project_name: memcached 219 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 220 | fuzz_tooling_ref: master 221 | - project_name: miniz 222 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 223 | fuzz_tooling_ref: master 224 | - project_name: mosquitto 225 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 226 | fuzz_tooling_ref: master 227 | - project_name: mpv 228 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 229 | fuzz_tooling_ref: master 230 | - project_name: ms-tpm-20-ref 231 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 232 | fuzz_tooling_ref: master 233 | - project_name: msquic 234 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 235 | fuzz_tooling_ref: master 236 | - project_name: networkmanager 237 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 238 | fuzz_tooling_ref: master 239 | - project_name: nginx 240 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 241 | fuzz_tooling_ref: master 242 | - project_name: nokogiri 243 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 244 | fuzz_tooling_ref: master 245 | - project_name: ntpsec 246 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 247 | fuzz_tooling_ref: master 248 | - project_name: numactl 249 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 250 | fuzz_tooling_ref: master 251 | - project_name: oniguruma 252 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 253 | fuzz_tooling_ref: master 254 | - project_name: open5gs 255 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 256 | fuzz_tooling_ref: master 257 | - project_name: opendds 258 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 259 | fuzz_tooling_ref: master 260 | - project_name: opensips 261 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 262 | fuzz_tooling_ref: master 263 | - project_name: openslide 264 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 265 | fuzz_tooling_ref: master 266 | - project_name: openvpn 267 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 268 | fuzz_tooling_ref: master 269 | - project_name: opusfile 270 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 271 | fuzz_tooling_ref: master 272 | - project_name: oss-fuzz-example 273 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 274 | fuzz_tooling_ref: master 275 | - project_name: ostree 276 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 277 | fuzz_tooling_ref: master 278 | - project_name: p11-kit 279 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 280 | fuzz_tooling_ref: master 281 | - project_name: pacemaker 282 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 283 | fuzz_tooling_ref: master 284 | - project_name: pidgin 285 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 286 | fuzz_tooling_ref: master 287 | - project_name: pjsip 288 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 289 | fuzz_tooling_ref: master 290 | - project_name: plan9port 291 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 292 | fuzz_tooling_ref: master 293 | - project_name: pngquant 294 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 295 | fuzz_tooling_ref: master 296 | - project_name: postfix 297 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 298 | fuzz_tooling_ref: master 299 | - project_name: postgresql 300 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 301 | fuzz_tooling_ref: master 302 | - project_name: proftpd 303 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 304 | fuzz_tooling_ref: master 305 | - project_name: protobuf-c 306 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 307 | fuzz_tooling_ref: master 308 | - project_name: pupnp 309 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 310 | fuzz_tooling_ref: master 311 | - project_name: pycryptodome 312 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 313 | fuzz_tooling_ref: master 314 | - project_name: python3-libraries 315 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 316 | fuzz_tooling_ref: master 317 | - project_name: qemu 318 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 319 | fuzz_tooling_ref: master 320 | - project_name: quickjs 321 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 322 | fuzz_tooling_ref: master 323 | - project_name: rabbitmq-c 324 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 325 | fuzz_tooling_ref: master 326 | - project_name: rauc 327 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 328 | fuzz_tooling_ref: master 329 | - project_name: rtpproxy 330 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 331 | fuzz_tooling_ref: master 332 | - project_name: samba 333 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 334 | fuzz_tooling_ref: master 335 | - project_name: selinux 336 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 337 | fuzz_tooling_ref: master 338 | - project_name: sound-open-firmware 339 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 340 | fuzz_tooling_ref: master 341 | - project_name: spdk 342 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 343 | fuzz_tooling_ref: master 344 | - project_name: sudoers 345 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 346 | fuzz_tooling_ref: master 347 | - project_name: tarantool 348 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 349 | fuzz_tooling_ref: master 350 | - project_name: tdengine 351 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 352 | fuzz_tooling_ref: master 353 | - project_name: tinysparql 354 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 355 | fuzz_tooling_ref: master 356 | - project_name: tmux 357 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 358 | fuzz_tooling_ref: master 359 | - project_name: unbound 360 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 361 | fuzz_tooling_ref: master 362 | - project_name: unit 363 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 364 | fuzz_tooling_ref: master 365 | - project_name: utf8proc 366 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 367 | fuzz_tooling_ref: master 368 | - project_name: util-linux 369 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 370 | fuzz_tooling_ref: master 371 | - project_name: varnish 372 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 373 | fuzz_tooling_ref: master 374 | - project_name: vlc 375 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 376 | fuzz_tooling_ref: master 377 | - project_name: vulkan-loader 378 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 379 | fuzz_tooling_ref: master 380 | - project_name: w3m 381 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 382 | fuzz_tooling_ref: master 383 | - project_name: wamr 384 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 385 | fuzz_tooling_ref: master 386 | - project_name: wasm3 387 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 388 | fuzz_tooling_ref: master 389 | - project_name: wazuh 390 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 391 | fuzz_tooling_ref: master 392 | - project_name: wolfmqtt 393 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 394 | fuzz_tooling_ref: master 395 | - project_name: xbps 396 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 397 | fuzz_tooling_ref: master 398 | - project_name: xen 399 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 400 | fuzz_tooling_ref: master 401 | - project_name: xs 402 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 403 | fuzz_tooling_ref: master 404 | - project_name: zip 405 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 406 | fuzz_tooling_ref: master 407 | - project_name: zydis 408 | fuzz_tooling_url: git@github.com:google/oss-fuzz.git 409 | fuzz_tooling_ref: master -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SHERPA — Security Harness Engineering for Robust Program Analysis 2 | 3 |
4 | 5 | SHERPA - Security Harness Engineering for Robust Program Analysis 6 | 7 | **Revolutionary LLM-powered fuzzing that targets attacker-controlled entry points** 8 | *Finding real bugs by moving fuzzing up the stack to where attackers actually hit production systems* 9 | 10 | Developed as part of DARPA's AI Cyber Challenge (AIxCC) 11 | 12 |
13 | 14 | --- 15 | 16 | ## 🎯 Impact: Real Bugs in Production Software 17 | 18 |
19 | 20 | | 💥 **Raw Crashes Found** | 🔍 **Auto-Filtered** | ✅ **Validated CVE-Class Bugs** | 🎯 **Precision Rate** | 21 | |:---:|:---:|:---:|:---:| 22 | | **127+** | **100+** | **18** | **67%** | 23 | 24 |
25 | 26 | **What makes this significant:** 27 | * 🎯 **Targeted Discovery**: 18 validated bugs from just 27 high-signal crashes (vs. typical fuzzing's 1-5% success rate) 28 | * 🚀 **Speed**: Minutes of targeted fuzzing vs. weeks of traditional coverage-guided fuzzing 29 | * 🔍 **Quality**: LLM crash analysis automatically filters 80% of false positives before human review 30 | * 📦 **Production Ready**: Every bug comes with reproducer, analysis, and patch guidance 31 | * 🔒 **Responsible Process**: Bugs are disclosed responsibly to maintainers with coordinated timelines 32 | 33 | 34 | 35 | 44 | 53 | 54 |
36 | 37 | **The Problem:** Traditional fuzzing targets low-level APIs that attackers rarely reach directly. Real exploits happen at high-level parsers processing attacker-controlled files, packets, and streams. 38 | 39 | **SHERPA's Solution:** An LLM pipeline that automatically identifies and generates fuzz harnesses for these high-value, attacker-controlled entry points in OSS-Fuzz projects—the exact functions that parse malicious PDFs, process network packets, and handle file uploads. 40 | 41 | **🚀 Key Innovation:** Moving fuzzing *up the stack* to where real attacks happen, with LLM agents that understand security impact and generate maintainer-ready artifacts. 42 | 43 | 45 | 46 | 47 | 48 | 49 | Kudu Logo 50 | 51 | 52 |
55 | 56 | * 🧪 **27 crash‑inducing inputs auto‑produced** across multiple OSS‑Fuzz projects 57 | * ✅ **18 validated bugs** after human triage (**\~67%** of auto‑produced crashes) 58 | * 🧹 **100+ false positives crashes auto‑filtered** by our LLM crash‑analysis agent 59 | * 🧵 **New high‑value harnesses**: focus on top‑level parsers and public APIs that consume untrusted data 60 | 61 | ## 🚀 Quick Start 62 | 63 | ```bash 64 | # Clone the repository 65 | git clone https://github.com/AIxCyberChallenge/sherpa.git 66 | 67 | # Run environment setup (Supports macOS & Linux) 68 | make setup 69 | 70 | # Generate harnesses for a target project (leveldb) 71 | # On Apple Silicon (M1/M2/M4), ensure Docker uses amd64 architecture: 72 | export DOCKER_DEFAULT_PLATFORM=linux/amd64 73 | # Set your OpenAI API key (required for harness generation) 74 | export OPENAI_API_KEY="your-api-key-here" 75 | # Run an example repo against the existing harness_generator_yamls/leveldb.yaml file 76 | make leveldb 77 | ``` 78 | 79 | ## 🏗️ How It Works 80 | 81 | ```mermaid 82 | flowchart TD 83 | A[📦 OSS-Fuzz Project] --> B{🔍 Coverage Analysis} 84 | B --> C[🎯 Identify Unfuzzed Entry Points] 85 | 86 | C --> D[🤖 LLM Harness Generation] 87 | D --> E[🔨 Build & Test Harness] 88 | E --> F{✅ Build Success?} 89 | F -->|No| G[🛠️ Auto-Fix Build Issues] 90 | G --> E 91 | 92 | F -->|Yes| H[🎲 Fuzzing Campaign] 93 | H --> I[💥 Crash Detection] 94 | I --> J[🧠 LLM Crash Analysis] 95 | J --> K{🔬 Valid Bug?} 96 | 97 | K -->|No| L[❌ Filter False Positive] 98 | K -->|Yes| M[📋 Generate Bug Report] 99 | 100 | M --> N[✅ Validated Bug + Artifacts] 101 | L --> O[📊 Metrics Update] 102 | N --> O 103 | 104 | %% Position nodes to use horizontal space better 105 | B ~~~ D 106 | E ~~~ H 107 | J ~~~ M 108 | L ~~~ N 109 | 110 | style A fill:#e1f5fe 111 | style D fill:#f3e5f5 112 | style J fill:#f3e5f5 113 | style N fill:#e8f5e8 114 | style L fill:#ffebee 115 | 116 | classDef aiNode fill:#f3e5f5,stroke:#9c27b0,stroke-width:2px 117 | classDef successNode fill:#e8f5e8,stroke:#4caf50,stroke-width:2px 118 | classDef errorNode fill:#ffebee,stroke:#f44336,stroke-width:2px 119 | 120 | class D,J aiNode 121 | class N successNode 122 | class L errorNode 123 | ``` 124 | 125 | **Pipeline Stages:** 126 | 127 | 1. **📊 Coverage Gap Analysis**: Identify unfuzzed, attacker-controlled entry points 128 | 2. **🤖 LLM Harness Generation**: Generate targeted fuzz harnesses using LLM agents 129 | 3. **🔨 Build-Until-Green**: Automatically fix compilation and build issues 130 | 4. **🎯 Targeted Fuzzing**: Run focused fuzzing campaigns on new harnesses 131 | 5. **🧠 LLM Crash Triage**: Automatically filter false positives and analyze crashes 132 | 6. **📋 Maintainer Artifacts**: Deliver actionable bug reports with repro steps 133 | 134 | --- 135 | 136 | Through these techniques we were able to produce **new harnesses** as well as **corresponding crashing fuzzer inputs**. Each auto‑produced crash is summarized below. 137 | 138 | # Automatically Produced Crashes 139 | 140 | | Bug Type | CWE | Repo Count | 141 | |--------------------------------------|---------|------------| 142 | | Uncontrolled memory allocation (DoS) | CWE-770 | 12 | 143 | | Heap buffer overflow | CWE-122 | 4 | 144 | | Off-by-one heap buffer overflow | CWE-193 | 3 | 145 | | Null pointer dereference | CWE-476 | 2 | 146 | | Out-of-bounds string replacement | CWE-787 | 1 | 147 | | Performance hang | CWE-834 | 1 | 148 | | Infinite loop (DoS) | CWE-835 | 1 | 149 | | Double-free | CWE-415 | 1 | 150 | | Uninitialized memory read | CWE-908 | 1 | 151 | | Stack buffer underflow | CWE-124 | 1 | 152 | 153 | From these crashes, human engineers performed triage and validation to produce a set of **legitimate bugs**. The validated set is shown below. 154 | 155 | # Validated Bugs 156 | 157 | | Bug Type | CWE | Repo Count | 158 | |--------------------------------------|---------|------------| 159 | | Uncontrolled memory allocation (DoS) | CWE-770 | 8 | 160 | | Off-by-one heap buffer overflow | CWE-193 | 3 | 161 | | Heap buffer overflow | CWE-122 | 2 | 162 | | Infinite loop (DoS) | CWE-835 | 1 | 163 | | Null pointer dereference | CWE-476 | 1 | 164 | | Stack buffer underflow | CWE-124 | 1 | 165 | | Double-free | CWE-415 | 1 | 166 | | Uninitialized memory read | CWE-908 | 1 | 167 | 168 | --- 169 | 170 |

171 | 172 | 173 | Validated Bugs by Category (n=18) 174 | 175 |
Figure: Validated bugs by category (n=18). 176 |

177 | 178 | > 179 | --- 180 | **📋 Responsible Disclosure Note** 181 | 182 | We provide a detailed [LevelDB case study](leveldb_writeup/workflow.md) as a complete example of SHERPA's methodology. The remaining 17 validated bugs are currently undergoing responsible disclosure with their respective maintainers. Full technical details and attribution to SHERPA will be published upon completion of the disclosure process, ensuring maintainers have adequate time to develop and deploy patches. 183 | 184 | --- 185 | 186 | ## False Positives & Quality Control 187 | 188 | We encountered **>100 raw false positives** (e.g., harness errors, misuse of library APIs, or practically unreachable conditions). These were **automatically triaged** by an LLM crash‑analysis agent and **excluded** from the *Automatically Produced Crashes* table. The remaining items were then **manually triaged** to produce the *Validated Bugs* table above. 189 | 190 | **How we mitigate false positives (two layers):** 191 | 192 | 1. **Ex‑ante safeguards in the harness‑generator prompt** 193 | The generator is instructed to: 194 | 195 | * Target **public/documented APIs** and **high‑level parsers** that ingest attacker‑controlled inputs. 196 | * **Mirror canonical initialization** and teardown patterns from project examples. 197 | * **Honor documented preconditions** (sizes, flags, state); avoid undefined behavior and unrealistic call sequences. 198 | * Prefer **file/stream entry points** and add a small, meaningful seed corpus. 199 | * Compile with sanitizers and ensure resource hygiene (close files, free objects). 200 | 201 | 2. **Ex‑post LLM crash analysis (automatic triage)** 202 | For each crash, the agent: 203 | 204 | * Parses sanitizer logs and **attributes the fault** to project/library vs. harness code. 205 | * Detects **harness anti‑patterns** (calling private/internal functions, skipping required init, invalid lifetimes, wrong buffer sizes/flags). 206 | * Labels likely non‑bugs as false-positives and filters them out. 207 | * Produces `crash_analysis.md` with root cause, impact, and patch guidance for retained findings. 208 | 209 | This two‑stage process yields a **high‑signal set** of auto‑produced crashes (first table), which then undergo **human validation** (second table) to confirm exploitability and practical relevance. 210 | 211 | --- 212 | 213 | ## 🚀 Why This Changes Everything 214 | 215 | **Traditional fuzzing finds bugs that attackers can't reach. SHERPA finds bugs that attackers exploit.** 216 | 217 | ### **The Fundamental Problem** 218 | Current fuzzing tools excel at hitting internal parsing functions, but attackers don't call `parse_header_field()` directly. They upload malicious PDFs, send crafted network packets, or submit malformed form data. There's a massive gap between what we fuzz and what attackers actually hit. 219 | 220 | ### **SHERPA's Paradigm Shift** 221 | 222 | | Traditional Approach | SHERPA Approach | 223 | |---------------------|---------------| 224 | | 🎯 **Target**: Low-level internal APIs | 🎯 **Target**: Attacker-controlled entry points | 225 | | ⏱️ **Speed**: Weeks of coverage-guided fuzzing | ⏱️ **Speed**: Minutes of targeted fuzzing | 226 | | 🎲 **Success**: 1-5% of crashes are real bugs | 🎲 **Success**: 67% precision rate | 227 | | 👨‍💻 **Triage**: Manual analysis of 100+ crashes | 👨‍💻 **Triage**: AI pre-filters to 18 validated bugs | 228 | | 📋 **Output**: Raw crash dumps | 📋 **Output**: CVE-ready reports with patches | 229 | 230 | ### **Real-World Impact** 231 | - **Security Teams**: Find exploitable bugs, not just fuzzing artifacts 232 | - **OSS Maintainers**: Receive actionable reports with reproduction steps and patch guidance 233 | - **Red Teams**: Discover attack surfaces that traditional tools miss 234 | - **Researchers**: Scale expert-level security analysis using LLM reasoning 235 | 236 | **Bottom Line**: We're not just improving fuzzing efficiency—we're changing what gets fuzzed to match real attack patterns. 237 | 238 | --- 239 | 240 | ## 🤖 LLM Architecture & AI Integration 241 | 242 | ### **Model Selection & Configuration** 243 | - **Primary Model**: OpenAI o3 (latest reasoning model) for complex code generation and analysis 244 | - **Fallback Model**: o4-mini for lighter tasks like report generation 245 | - **API Integration**: Standard OpenAI-compatible endpoints with robust retry logic 246 | - **Execution Environment**: Custom `CodexHelper` wrapper with pseudo-terminal integration 247 | 248 | ### **Multi-Stage AI Pipeline** 249 | 250 | **Stage 1: Intelligent Target Selection** 251 | - Analyzes OSS-Fuzz project structure and existing harnesses 252 | - Identifies high-value, unfuzzed entry points using security heuristics 253 | - Prioritizes public APIs that process attacker-controlled data 254 | 255 | **Stage 2: Context-Aware Harness Generation** 256 | - Clones target repositories for API signature validation 257 | - Generates libFuzzer harnesses with proper library initialization 258 | - Includes realistic setup patterns to prevent false positives 259 | 260 | **Stage 3: Automated Build Debugging** 261 | - Captures compiler errors and automatically generates minimal fixes 262 | - Iterative build-fix cycles (configurable retry limit) 263 | - Preserves project structure while ensuring compilation success 264 | 265 | **Stage 4: Semantic Corpus Generation** 266 | - Creates meaningful seed inputs based on harness analysis 267 | - Generates both text and binary test cases as appropriate 268 | - Focuses on edge cases and boundary conditions 269 | 270 | **Stage 5: Intelligent Crash Triage** 271 | - Distinguishes genuine bugs from harness implementation errors 272 | - Uses sentinel patterns (`HARNESS ERROR`) for automatic filtering 273 | - Performs root cause analysis with impact assessment 274 | 275 | ### **Key Prompt Engineering Innovations** 276 | 277 | 1. **Security-First Target Selection**: AI agents prioritize real attack surfaces over internal APIs 278 | 2. **Anti-False-Positive Design**: Built-in validation prevents common harness implementation errors 279 | 3. **Automated Quality Control**: Two-layer filtering (prompt guidance + post-crash analysis) 280 | 4. **Maintainer-Ready Output**: Structured reports with CWE mapping and actionable patch guidance 281 | 282 | ### **Technical Implementation Details** 283 | 284 | ```python 285 | # Core architecture components 286 | class HarnessGenerator: 287 | def _invoke_codex_for_harness(self) -> None: 288 | # Targets highest-level APIs with attacker-controlled input 289 | # Validates function signatures against cloned repositories 290 | # Ensures realistic library usage patterns 291 | 292 | def _generate_bug_report(self) -> None: 293 | # Analyzes crashes for genuine vs. harness-induced bugs 294 | # Generates CVE-ready reports with impact assessment 295 | # Creates reproduction scripts for maintainers 296 | ``` 297 | 298 | **Advanced Features:** 299 | - **Robust Error Handling**: Automatic retry with exponential backoff for API failures 300 | - **Git Integration**: Tracks changes and ensures clean diffs for each AI intervention 301 | - **Resource Management**: Configurable timeouts and memory limits for long-running operations 302 | - **Parallel Processing**: Concurrent harness generation across multiple projects 303 | 304 | **Full implementation details available in source code under GPL v3 license.** 305 | 306 | --- 307 | 308 | ## 🛠️ Configuration & Advanced Usage 309 | 310 | ### Environment Setup 311 | ```bash 312 | # Set your OpenAI API key (required for harness generation) 313 | export OPENAI_API_KEY="your-api-key-here" 314 | 315 | # Optional: Configure target projects 316 | export OSS_FUZZ_PATH="/path/to/oss-fuzz" 317 | ``` 318 | 319 | ### Project Configuration 320 | Projects are configured via YAML files in `harness_generator/yamls/`: 321 | - `leveldb.yaml` - LevelDB-specific settings 322 | 323 | ### Batch Processing 324 | ```bash 325 | # Generate harnesses for multiple projects 326 | cd harness_generator 327 | python batch_generate.py --targets yamls/c-projects.yaml --threads 4 328 | ``` 329 | 330 | --- 331 | 332 | ## 🤝 Contributing 333 | 334 | We welcome contributions! This project was developed for the security research community. 335 | 336 | ### Ways to Contribute: 337 | - **🐛 Bug Reports**: Found an issue? Open a GitHub issue 338 | - **🎯 New Target Projects**: Add YAML configs for additional OSS-Fuzz projects 339 | - **🧠 LLM Improvements**: Enhance prompt engineering or crash analysis 340 | - **📊 Evaluation**: Run SHERPA on new projects and share results 341 | 342 | ### Development Setup: 343 | ```bash 344 | git clone https://github.com/AIxCyberChallenge/sherpa.git 345 | cd sherpa/harness_generator 346 | pip install -r requirements.txt 347 | bash setup-env.sh 348 | ``` 349 | 350 | ### Responsible Disclosure: 351 | SHERPA follows industry-standard responsible disclosure practices: 352 | - **Coordinated timelines** with maintainers (typically 90-day disclosure window) 353 | - **Patch assistance** provided to development teams when requested 354 | - **Public attribution** to SHERPA methodology upon disclosure completion 355 | - **CVE coordination** through appropriate channels (MITRE, project security teams) 356 | 357 | For detailed examples of our methodology and results, see the [LevelDB case study](leveldb_writeup/workflow.md). 358 | 359 | --- 360 | 361 | ### Unharnessed Fuzzing 362 | 363 | This project also includes a utility to generate OSS-Fuzz compliant fuzzing harnesses for projects 364 | that are not currently supported by OSS-FUZZ 365 | 366 | #### Fuzz a single repository 367 | ``` 368 | cd harness_generator/src 369 | python fuzz_unharnessed_repo.py --repo 370 | 371 | # for example 372 | python fuzz_unharnessed_repo.py --repo https://github.com/syoyo/tinyexr.git 373 | ``` 374 | 375 | ## 📜 License & Citation 376 | 377 | **License**: This project is released under the MIT License - see `LICENSE` for details. 378 | 379 | **Citation**: If you use SHERPA in your research, please cite: 380 | ```bibtex 381 | @misc{sherpa2024, 382 | title={SHERPA: Security Harness Engineering for Robust Program Analysis}, 383 | author={Kudu Dynamics}, 384 | year={2025}, 385 | url={https://github.com/AIxCyberChallenge/sherpa.git}, 386 | note={Developed as part of DARPA's AI Cyber Challenge (AIxCC)} 387 | } 388 | ``` 389 | 390 | **Acknowledgments**: 391 | This work was developed as part of **DARPA's AI Cyber Challenge (AIxCC)**, which brings together leading experts in LLMs and cybersecurity to safeguard software critical to national infrastructure. Learn more at [aicyberchallenge.com](https://aicyberchallenge.com). 392 | 393 | --- 394 | 395 |
396 | 397 | **🔒 Built for Security Researchers, by Security Researchers** 398 | 399 | *Developed by [Kudu Dynamics](https://kududyn.com) as part of [DARPA's AI Cyber Challenge](https://aicyberchallenge.com)* 400 | 401 | [![GitHub stars](https://img.shields.io/github/stars/aixcyberchallenge/sherpa?style=social)](https://github.com/AIxCyberChallenge/sherpa/stargazers) 402 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 403 | [![AIxCC](https://img.shields.io/badge/DARPA-AIxCC-blue.svg)](https://aicyberchallenge.com) 404 | 405 |
406 | -------------------------------------------------------------------------------- /harness_generator/src/harness_generator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | #──────────── 4 | # 5 | # Copyright 2025 Artificial Intelligence Cyber Challenge 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 8 | # this software and associated documentation files (the “Software”), to deal in the 9 | # Software without restriction, including without limitation the rights to use, 10 | # copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 11 | # Software, and to permit persons to whom the Software is furnished to do so, 12 | # subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 18 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 19 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 20 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | # 24 | # ──────────── 25 | 26 | """ 27 | harness_generator.py 28 | ──────────────────── 29 | 30 | Automates the lifecycle of adding an extra libFuzzer harness to an 31 | OSS-Fuzz project and running it end-to-end. Operations are performed in a 32 | working copy of the project's oss-fuzz directory and rely on the **Codex CLI** 33 | for all code-writing tasks. 34 | 35 | High-level flow 36 | =============== 37 | 1. Baseline build - compile the docker image and existing fuzzers to record 38 | the current binary set. 39 | 2. Extract archives - unpack any tar/zip bundles so the source can be edited 40 | directly. 41 | 3. Harness creation - ask Codex to write a new `LLVMFuzzerTestOneInput` and 42 | update build scripts. 43 | 4. Repack archives - re-create any bundles that were unpacked and edited. 44 | 5. Rebuild with retries - rebuild image & fuzzers; on compiler errors, send 45 | the diagnostics back to Codex for minimal fixes (configurable retries). 46 | 6. Seed corpus - before each new fuzzer is executed, instruct Codex to 47 | populate the corresponding corpus directory with one or more meaningful 48 | seed inputs, using the harness source as context. 49 | 7. Fuzzer execution - run every newly-built fuzzer, capture stdout/stderr and 50 | detect any crash / OOM / timeout artifacts. 51 | 8. Crash handling - for the first crash found: 52 | • reproduce it with `infra/helper.py reproduce` and write 53 | `crash_reproduction.log` (commented with the exact command); 54 | • gather the reproducer log, harness source and a hexdump of the input 55 | into `crash_info.md`; 56 | • pass the same context to Codex and request `crash_analysis.md` that 57 | summarises bug type, impact and patch guidance. 58 | 59 | Command-line flags allow skipping the rebuild, running a smoke test first, or 60 | changing the maximum fix-retry count. The script requires Python ≥ 3.9, 61 | GitPython, python-dotenv, the Codex CLI, Docker and a functional oss-fuzz 62 | checkout. 63 | """ 64 | 65 | from __future__ import annotations 66 | 67 | import argparse 68 | import logging 69 | import os 70 | import re 71 | import shutil 72 | import subprocess 73 | import sys 74 | import tarfile 75 | import tempfile 76 | import textwrap 77 | import time 78 | import uuid 79 | from dotenv import load_dotenv 80 | from git import Repo, exc as git_exc 81 | from pathlib import Path 82 | from typing import Dict, Sequence 83 | 84 | # Make the helper discoverable whether this module is executed as a script 85 | # or imported as part of the *src* package. 86 | try: 87 | from .codex_helper import CodexHelper # type: ignore 88 | except ImportError: # pragma: no cover 89 | import sys 90 | from pathlib import Path as _Path 91 | 92 | _SRC_DIR = _Path(__file__).resolve().parent 93 | sys.path.insert(0, str(_SRC_DIR)) 94 | from codex_helper import CodexHelper # type: ignore 95 | 96 | # --------------------------------------------------------------------------- # 97 | # Constants 98 | # --------------------------------------------------------------------------- # 99 | DEFAULT_SANITIZER = "address" 100 | MAX_BUILD_RETRIES = 3 101 | CODEX_ANALYSIS_MODEL = os.environ.get("CODEX_ANALYSIS_MODEL", "o3") 102 | CODEX_APPROVAL_MODE = os.environ.get("CODEX_APPROVAL_MODE", "full-auto") 103 | 104 | 105 | class HarnessGeneratorError(RuntimeError): 106 | """Raised for any harness-generation failure.""" 107 | 108 | 109 | class HarnessGenerator: 110 | """Automate Codex-assisted creation of an additional OSS-Fuzz harness.""" 111 | 112 | # ───────────────────────── INITIALIZATION ───────────────────────── # 113 | 114 | def __init__( 115 | self, 116 | project_name: str, 117 | oss_fuzz_path: Path, 118 | *, 119 | ai_key_path: str, 120 | sanitizer: str = DEFAULT_SANITIZER, 121 | codex_cli: str = "codex", 122 | scratch_space: Path | None = None, 123 | copy_repo: bool = False, 124 | ) -> None: 125 | # Basic fields 126 | self.project = project_name.strip() 127 | self.oss_fuzz_path = oss_fuzz_path.expanduser().resolve() 128 | self.ai_key_path = Path(ai_key_path).expanduser() 129 | self.sanitizer = sanitizer 130 | self.codex_cli = codex_cli 131 | self.scratch_space = scratch_space or Path("/tmp") 132 | self.copy_repo = copy_repo 133 | self.logger = logging.getLogger(__name__) 134 | 135 | if not self.oss_fuzz_path.is_dir(): 136 | raise FileNotFoundError( 137 | f"OSS-Fuzz path not found: {self.oss_fuzz_path}" 138 | ) 139 | 140 | # Optionally copy the oss-fuzz tree so Codex works on a throw-away copy 141 | self.repo_path = ( 142 | self._copy_repo(self.oss_fuzz_path) 143 | if copy_repo 144 | else self.oss_fuzz_path 145 | ) 146 | self.repo = self._ensure_git_repo(self.repo_path) 147 | 148 | print(f"[*] Ready (project={self.project}, repo={self.repo_path})") 149 | 150 | # Mapping of extracted_dir → original_archive_path 151 | self._archives: Dict[Path, Path] = {} 152 | 153 | # ───────────────────────── PUBLIC ENTRY-POINT ────────────────────── # 154 | def generate_harness( 155 | self, 156 | *, 157 | build: bool = True, 158 | run_smoke: bool = False, 159 | max_iterations: int = MAX_BUILD_RETRIES, 160 | ) -> None: 161 | """Run the full workflow end-to-end.""" 162 | 163 | # 1. Baseline build (with automatic Codex-assisted fixes) 164 | print("[*] Building docker image and fuzzers...") 165 | self._build_with_retries(clean=True, max_iterations=1) 166 | 167 | baseline_fuzzers = self._list_fuzzer_binaries() 168 | print( 169 | f"[*] Baseline has {len(baseline_fuzzers)} fuzzer(s):\n{baseline_fuzzers}\n" 170 | ) 171 | if run_smoke: 172 | self._run_any_fuzzer_once() 173 | 174 | # 2. Extract any archives 175 | print("[*] Extracting any project archives...") 176 | self._extract_archives() 177 | 178 | # 3. Ask Codex to add a harness 179 | print("[*] Running Codex to generate a new harness...") 180 | self._invoke_codex_for_harness() 181 | 182 | # 4. Re-pack archives (Codex may have edited files inside them) 183 | print("[*] Repackaging any project archives...") 184 | self._repack_archives() 185 | 186 | # 5. Rebuild after harness has been added (again with retries) 187 | if build: 188 | print("[*] Attempting image/fuzzer rebuild...") 189 | self._build_with_retries(clean=False, max_iterations=max_iterations) 190 | 191 | # 6. Detect which fuzzers are new and run them 192 | final_fuzzers = self._list_fuzzer_binaries() 193 | new_fuzzers = sorted(final_fuzzers - baseline_fuzzers) 194 | 195 | if not new_fuzzers: 196 | print("[!] No new fuzzer binaries detected after Codex run.") 197 | return 198 | 199 | print( 200 | f"[*] Detected {len(new_fuzzers)} new fuzzer(s): {', '.join(new_fuzzers)}" 201 | ) 202 | for fuzzer in new_fuzzers: 203 | # ── Generate seed corpus files before running ── 204 | try: 205 | self._invoke_codex_to_generate_seeds(fuzzer) 206 | except HarnessGeneratorError as err: 207 | print(f"[!] Failed to generate seeds for {fuzzer}: {err}") 208 | 209 | print(f"[*] ➤ Running {fuzzer} …") 210 | time.sleep(5) 211 | try: 212 | # ── Record existing crash/timeout/oom files before run 213 | baseline_bug_files = self._find_bug_files() 214 | 215 | output = self._run_fuzzer(fuzzer) 216 | 217 | # ── Detect newly-generated bug files 218 | new_bug_files = self._find_bug_files() - baseline_bug_files 219 | if new_bug_files: 220 | print( 221 | f"[!] Detected {len(new_bug_files)} crash/oom/timeout file(s):" 222 | ) 223 | for p in new_bug_files: 224 | print(f" • {p.relative_to(self.repo_path)}") 225 | 226 | # Reproduce only the first file (additional files can be handled later) 227 | bug_path = sorted(new_bug_files)[0] 228 | try: 229 | repro_log, repro_cmd = self._reproduce_crash( 230 | fuzzer, bug_path 231 | ) 232 | self._generate_bug_report( 233 | fuzzer, bug_path, repro_log, repro_cmd 234 | ) 235 | except HarnessGeneratorError as err: 236 | print( 237 | f"[!] Failed to reproduce or analyse crash: {err}" 238 | ) 239 | 240 | except HarnessGeneratorError as err: 241 | print(f"[!] {fuzzer} failed: {err}") 242 | 243 | # ───────────────────────── INTERNAL HELPERS ─────────────────────── # 244 | 245 | # ---- Git helpers -------------------------------------------------- # 246 | def _copy_repo(self, src: Path) -> Path: 247 | dst = Path( 248 | tempfile.mkdtemp( 249 | prefix="oss-fuzz-harness-", dir=str(self.scratch_space) 250 | ) 251 | ) 252 | shutil.copytree(src, dst, dirs_exist_ok=True) 253 | return dst 254 | 255 | def _ensure_git_repo(self, path: Path) -> Repo: 256 | try: 257 | repo = Repo(path) 258 | except git_exc.InvalidGitRepositoryError: 259 | repo = Repo.init(path) 260 | repo.git.add(A=True) 261 | try: 262 | repo.git.commit(m="Initial commit (baseline)", allow_empty=True) 263 | except git_exc.GitCommandError: 264 | pass 265 | return repo 266 | 267 | # ---- Build helpers ------------------------------------------------- # 268 | def _build_image_and_fuzzers(self, *, clean: bool) -> None: 269 | helper = self.repo_path / "infra" / "helper.py" 270 | if not helper.is_file(): 271 | raise HarnessGeneratorError( 272 | "infra/helper.py not found - invalid checkout?" 273 | ) 274 | 275 | env = os.environ.copy() 276 | env.setdefault("OSSFUZZ_SKIP_UNSHALLOW", "1") 277 | 278 | # Build image (auto-confirm y/n prompt) 279 | self._run_cmd( 280 | ["python3", str(helper), "build_image", self.project], 281 | cwd=self.repo_path, 282 | env=env, 283 | input="y\n", 284 | ) 285 | 286 | # Build fuzzers 287 | cmd = [ 288 | "python3", 289 | str(helper), 290 | "build_fuzzers", 291 | self.project, 292 | "--sanitizer", 293 | self.sanitizer, 294 | ] 295 | if clean: 296 | cmd.append("--clean") 297 | self._run_cmd(cmd, cwd=self.repo_path, env=env) 298 | 299 | # ---- Fuzzer discovery -------------------------------------------- # 300 | def _list_fuzzer_binaries(self) -> set[str]: 301 | """Return the names of all executable fuzzer binaries for this project.""" 302 | out_dir = self.repo_path / "build" / "out" / self.project 303 | if not out_dir.is_dir(): 304 | return set() 305 | return { 306 | p.name 307 | for p in out_dir.iterdir() 308 | if p.is_file() 309 | and os.access(p, os.X_OK) 310 | and not p.name.endswith(".dict") 311 | } 312 | 313 | # ---- Build with retries (Codex-assisted) ------------------------- # 314 | def _build_with_retries( 315 | self, 316 | *, 317 | clean: bool, 318 | max_iterations: int = MAX_BUILD_RETRIES, 319 | ) -> None: 320 | """Attempt to build image & fuzzers, asking Codex to fix failures. 321 | 322 | This consolidates the repeated logic used for both the initial 323 | baseline build **and** the post-harness rebuild. On every failure we 324 | forward the compiler diagnostics to Codex, let it apply minimal 325 | patches, optionally re-package any modified archives, and then retry 326 | the build until it succeeds or *max_iterations* is reached. 327 | """ 328 | 329 | for attempt in range(1, max_iterations + 1): 330 | try: 331 | # Only pass the --clean flag on the *first* attempt – subsequent 332 | # iterations should reuse the prior build cache to save time. 333 | self._build_image_and_fuzzers(clean=clean and attempt == 1) 334 | print(f"[*] Fuzzer build succeeded on attempt {attempt}!") 335 | return 336 | except HarnessGeneratorError as err: 337 | if attempt == max_iterations: 338 | raise 339 | 340 | print( 341 | f"[!] Build failed (attempt {attempt}/{max_iterations}). " 342 | "Sending compiler stderr back to Codex..." 343 | ) 344 | 345 | # Ask Codex for a minimal patch based on the compiler output. 346 | self._invoke_codex_to_fix_build(str(err)) 347 | 348 | # If the project uses bundled source archives we may have to 349 | # regenerate them after Codex edits. 350 | self._repack_archives() 351 | 352 | # ---- Archive extraction / repack ---------------------------------- # 353 | ARCHIVE_REGEX = re.compile(r"\.(?:tar\.gz|tgz|tar|zip)$", re.IGNORECASE) 354 | 355 | def _extract_archives(self) -> None: 356 | proj_dir = self.repo_path / "projects" / self.project 357 | if not proj_dir.is_dir(): 358 | return 359 | 360 | for arch in proj_dir.rglob("*"): 361 | if arch.is_file() and self.ARCHIVE_REGEX.search(arch.name): 362 | if arch.name.endswith(".tar.gz"): 363 | extract_root = arch.with_name( 364 | arch.stem[:-4] 365 | ) # Remove .tar from .tar.gz 366 | elif arch.name.endswith(".tgz"): 367 | extract_root = arch.with_name(arch.stem) 368 | else: 369 | extract_root = arch.with_suffix("") 370 | 371 | if extract_root.exists(): 372 | continue 373 | 374 | print(f"[*] Extracting {arch.relative_to(self.repo_path)}") 375 | 376 | tmp_dir = tempfile.mkdtemp(dir=self.scratch_space) 377 | tmp_path = Path(tmp_dir) 378 | 379 | # Extract to temp location 380 | if arch.name.endswith(".zip"): 381 | shutil.unpack_archive(str(arch), str(tmp_path)) 382 | else: 383 | with tarfile.open(arch, mode="r:*") as tf: 384 | tf.extractall(tmp_path) 385 | 386 | # Move contents into extract_root (flatten, don't preserve temp dir) 387 | extract_root.mkdir(parents=True, exist_ok=True) 388 | for item in tmp_path.iterdir(): 389 | shutil.move(str(item), extract_root / item.name) 390 | 391 | shutil.rmtree(tmp_path, ignore_errors=True) 392 | self._archives[extract_root] = arch 393 | 394 | def _repack_archives(self) -> None: 395 | for src_dir, arch in self._archives.items(): 396 | print(f"[*] Re-packing {arch.relative_to(self.repo_path)}") 397 | 398 | # Remove old archive 399 | arch.unlink(missing_ok=True) 400 | 401 | parent = arch.parent 402 | base_name = arch.name 403 | if base_name.endswith(".tar.gz"): 404 | base = arch.with_suffix("").with_suffix( 405 | "" 406 | ) # Remove .gz then .tar 407 | mode = "w:gz" 408 | archive_path = parent / f"{base.name}.tar.gz" 409 | elif base_name.endswith(".tgz"): 410 | base = arch.with_suffix("") # Remove .tgz 411 | mode = "w:gz" 412 | archive_path = parent / f"{base.name}.tgz" 413 | elif base_name.endswith(".tar"): 414 | base = arch.with_suffix("") 415 | mode = "w" 416 | archive_path = parent / f"{base.name}.tar" 417 | elif base_name.endswith(".zip"): 418 | base = arch.with_suffix("") 419 | archive_path = shutil.make_archive( 420 | str(base), "zip", root_dir=src_dir 421 | ) 422 | continue 423 | else: 424 | raise HarnessGeneratorError( 425 | f"Unsupported archive format: {arch}" 426 | ) 427 | 428 | with tarfile.open(archive_path, mode) as tf: 429 | for item in sorted(src_dir.rglob("*")): 430 | tf.add(item, arcname=item.relative_to(src_dir)) 431 | 432 | # ---- Codex interaction -------------------------------------------- # 433 | def _invoke_codex_for_harness(self) -> None: 434 | patcher = CodexHelper( 435 | repo_path=self.repo_path, 436 | ai_key_path=str(self.ai_key_path), 437 | copy_repo=False, 438 | codex_cli=self.codex_cli, 439 | codex_model=CODEX_ANALYSIS_MODEL, 440 | approval_mode=CODEX_APPROVAL_MODE, 441 | ) 442 | 443 | # High-level tasks for Codex 444 | # IMPROVEME: extend prompt for java support 445 | instructions = textwrap.dedent( 446 | f""" 447 | **Objective (high-value fuzz target)** 448 | Create a **new libFuzzer harness** for the **{self.project}** OSS-Fuzz project that 449 | exercises a *public* or *documented* API reachable with **user-supplied input** 450 | (e.g. files, packets, strings) and therefore has real-world security impact. 451 | 452 | ──────────────────────────────────────── 453 | **Target-selection rules** 454 | 455 | 1. **Start at the top**: pick the *highest-level* function that 456 | *directly* consumes attacker-controlled data. 457 | • Good examples: `exif_data_load()`, `freerdp_peer_context_new()`, 458 | `curl_url_set()`, `png_read_info()`. 459 | • **Avoid** low-level helpers (`*_parse_int()`, `*_read_field()` etc.) 460 | unless *no higher layer* validates input. 461 | 462 | 2. **Document reachability** 463 | Add a one-line comment in the harness explaining why the chosen API 464 | is reachable from untrusted input in real software (file upload, 465 | network packet, etc.). 466 | 467 | 3. **Minimal realistic setup** 468 | If the API needs a context/handle, initialise it exactly as a real 469 | app would (e.g. `exif_data = exif_data_new_from_file(data, size)`). 470 | Don't stub out internal structs—use official constructors. 471 | 472 | 4. **One API per harness** 473 | If multiple candidate APIs exist, pick the single best one that is 474 | *not already fuzzed* (check existing harnesses + binaries). 475 | 476 | 5. **ENSURE HARNESS USES THE LIBRARY CORRECTLY** 477 | Many false positives are the result of the generated harness code failing 478 | to exercise the library properly (passing a ptr instead of an int, etc.) 479 | Ensure all calls performed by the harness match the library signatures 480 | and use the library in the way it was intended to be used. Our goal is to 481 | only uncover bugs that are true positives with real world implications. 482 | 483 | ──────────────────────────────────────── 484 | **Implementation requirements** 485 | 486 | * Harness signature 487 | ```c++ 488 | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size); 489 | ```` 490 | 491 | * Keep everything in C/C++ (follow project style). 492 | * **Do not** remove or refactor existing code; just add the harness and 493 | tweak build scripts so it is compiled. 494 | * Place the harness source next to similar existing harnesses. 495 | 496 | Extracted archive directories (may be empty if none): 497 | {', '.join(str(p.relative_to(self.repo_path)) for p in self._archives) or 'None'} 498 | 499 | **NO** build/run commands—just write code + build recipe edits. 500 | When finished, write the path to the new harness into `./done` 501 | 502 | Notes: 503 | - The oss-fuzz project typically contains a Dockerfile, build.sh, and project.yaml. 504 | - The repo source is typically not included, but can be cloned to assist in analysis. 505 | - It may be specified in project.yaml as `main_repo`. It may be cloned as part of the docker build. 506 | - When you clone the repo source, you must clone it within your working directory (don't use /tmp) 507 | - Carefully analyze the existing build structure to fully understand what is needed to successfully include your new harness in the build. 508 | 509 | VERY IMPORTANT: You must clone the repo so that you can validate the function signatures of every library function you put in the new harness. 510 | You must ensure that the library is being used correctly to mitigate false-positive crashes caused by errors in the harness. 511 | 512 | This task is very important! Every bug we trigger will be responsibly disclosed to make the world a safer place. 513 | Have fun and do your very best! 514 | """ 515 | ).strip() 516 | 517 | stdout = patcher.run_codex_command(instructions) 518 | if stdout is None: 519 | raise HarnessGeneratorError( 520 | "Codex produced no edits when adding harness." 521 | ) 522 | print( 523 | f"[*] Codex stdout (truncated):\n{stdout[:1200]}", 524 | ) 525 | 526 | def _invoke_codex_to_fix_build(self, build_stderr: str) -> None: 527 | patcher = CodexHelper( 528 | repo_path=self.repo_path, 529 | ai_key_path=str(self.ai_key_path), 530 | copy_repo=False, 531 | codex_cli=self.codex_cli, 532 | codex_model=CODEX_ANALYSIS_MODEL, 533 | approval_mode=CODEX_APPROVAL_MODE, 534 | ) 535 | instructions = [ 536 | "Compilation failed. Read the compiler output below and make only " 537 | "the minimal edits necessary to fix build-blocking errors. " 538 | "Do not add features or refactor unrelated code." 539 | "Do not execute any commands to build or run any fuzzers, just correct the build statically." 540 | ] 541 | stdout = patcher.run_codex_command( 542 | instructions, additional_context=build_stderr 543 | ) 544 | if stdout is None: 545 | raise HarnessGeneratorError("Codex failed to resolve build errors.") 546 | 547 | # ---- Run New Fuzzer ------------------------------------------------ # 548 | def _run_fuzzer( 549 | self, 550 | fuzzer_name: str, 551 | *, 552 | timeout_seconds: int = 600, 553 | engine: str = "libfuzzer", 554 | sanitizer: str | None = None, 555 | architecture: str = "x86_64", 556 | rss_limit_mb: int = 16_384, 557 | max_len: int = 1024, 558 | ) -> str: 559 | """ 560 | Run a single fuzzer and return its combined stdout + stderr. 561 | 562 | • Captures raw bytes → decodes with errors='backslashreplace' 563 | • If stderr is not empty, appends it under "=== STDERR ===" in log. 564 | • Prints the last ≈200 lines and writes the full log to fuzzer_run_.txt 565 | • Never raises on non-zero exit codes (crash/OOM/timeout are findings). 566 | """ 567 | helper = self.repo_path / "infra" / "helper.py" 568 | if not helper.is_file(): 569 | raise HarnessGeneratorError( 570 | "infra/helper.py not found - invalid checkout?" 571 | ) 572 | 573 | corpus_dir = ( 574 | self.repo_path 575 | / "build" 576 | / "out" 577 | / self.project 578 | / "corpus" 579 | / fuzzer_name 580 | ) 581 | corpus_dir.mkdir(parents=True, exist_ok=True) 582 | 583 | env = os.environ.copy() 584 | env.setdefault("RSS_LIMIT_MB", str(rss_limit_mb)) 585 | env.setdefault("TIMEOUT", "45") 586 | 587 | cmd = [ 588 | "python3", 589 | str(helper), 590 | "run_fuzzer", 591 | "--architecture", 592 | architecture, 593 | "--engine", 594 | engine, 595 | "--sanitizer", 596 | sanitizer or self.sanitizer, 597 | "--corpus-dir", 598 | str(corpus_dir), 599 | self.project, 600 | fuzzer_name, 601 | "--", 602 | f"-max_total_time={timeout_seconds}", 603 | f"-max_len={max_len}", 604 | "-print_final_stats=1", 605 | ] 606 | 607 | print(f"[*] ➜ {' '.join(cmd)}") 608 | proc = subprocess.Popen( 609 | cmd, 610 | cwd=self.repo_path, 611 | env=env, 612 | stdout=subprocess.PIPE, # raw bytes 613 | stderr=subprocess.PIPE, # keep stderr separate 614 | text=False, # important: capture bytes 615 | ) 616 | 617 | try: 618 | raw_stdout, raw_stderr = proc.communicate( 619 | timeout=timeout_seconds + 30 620 | ) 621 | except subprocess.TimeoutExpired: 622 | proc.kill() 623 | raw_stdout, raw_stderr = proc.communicate() 624 | print("[!] Fuzzer process exceeded hard timeout; killed.") 625 | except Exception: 626 | import traceback 627 | 628 | traceback.print_exc() 629 | raw_stdout = b"" 630 | raw_stderr = traceback.format_exc().encode() 631 | 632 | # Decode safely 633 | stdout_dec = raw_stdout.decode("utf-8", errors="backslashreplace") 634 | stderr_dec = raw_stderr.decode("utf-8", errors="backslashreplace") 635 | 636 | # Combine, adding labelled section if needed 637 | if stderr_dec.strip(): 638 | full_output = f"{stdout_dec}\n\n=== STDERR ===\n{stderr_dec}" 639 | else: 640 | full_output = stdout_dec 641 | 642 | # Normalise CRs 643 | full_output = full_output.replace("\r", "\n") 644 | 645 | # Persist full log 646 | log_path = self.repo_path / f"fuzzer_run_{uuid.uuid4().hex}.txt" 647 | with open(log_path, "w", encoding="utf-8") as fh: 648 | fh.write(full_output) 649 | 650 | # Pretty-print the last 200 lines 651 | tail_lines = full_output.splitlines()[-200:] 652 | print("\n".join(tail_lines)) 653 | print(f"\n[*] Full fuzzer log saved to: {log_path}") 654 | 655 | if proc.returncode != 0: 656 | print( 657 | f"[!] Fuzzer exited with rc={proc.returncode} " 658 | "(non-zero is expected for crash/timeout/OOM)." 659 | ) 660 | 661 | return full_output 662 | 663 | # ────────────────────── Crash-handling helpers ─────────────────────── # 664 | 665 | BUG_PREFIXES = ("crash", "oom", "timeout") 666 | 667 | def _find_bug_files(self) -> set[Path]: 668 | """Return a *set* of Paths matching crash/oom/timeout files for project.""" 669 | root = self.repo_path / "build" / "out" / self.project 670 | if not root.is_dir(): 671 | return set() 672 | return { 673 | p 674 | for p in root.rglob("*") 675 | if p.is_file() 676 | and any(p.name.startswith(pref) for pref in self.BUG_PREFIXES) 677 | } 678 | 679 | # ------------------------------------------------------------------ # 680 | def _reproduce_crash( 681 | self, fuzzer_name: str, crash_path: Path 682 | ) -> tuple[str, str]: 683 | """Run `helper.py reproduce` and persist output → crash_reproduction.log. 684 | 685 | Returns a tuple (full_log, command_line). 686 | """ 687 | 688 | helper = self.repo_path / "infra" / "helper.py" 689 | if not helper.is_file(): 690 | raise HarnessGeneratorError( 691 | "infra/helper.py not found - cannot reproduce crash" 692 | ) 693 | 694 | cmd_list = [ 695 | "python3", 696 | str(helper), 697 | "reproduce", 698 | self.project, 699 | fuzzer_name, 700 | str(crash_path), 701 | ] 702 | 703 | cmd_str = " ".join(cmd_list) 704 | print(f"[*] ➜ {cmd_str} (reproducing crash)") 705 | 706 | proc = subprocess.run( 707 | cmd_list, 708 | cwd=self.repo_path, 709 | capture_output=True, 710 | text=True, 711 | env=os.environ.copy(), 712 | ) 713 | 714 | repro_output = proc.stdout + ( 715 | "\n=== STDERR ===\n" + proc.stderr if proc.stderr else "" 716 | ) 717 | 718 | # ── Strip ANSI colour / control codes for readability ────────── 719 | repro_output = self._strip_ansi(repro_output) 720 | 721 | # Build comment line with relative paths for readability 722 | try: 723 | crash_rel = crash_path.relative_to(self.repo_path) 724 | except ValueError: 725 | crash_rel = Path(crash_path).name 726 | helper_rel = helper.relative_to(self.repo_path) 727 | command_line = f"python {helper_rel} reproduce {self.project} {fuzzer_name} {crash_rel}" 728 | 729 | comment_line = f"# {command_line}\n" 730 | 731 | log_path = self.repo_path / "crash_reproduction.log" 732 | with open(log_path, "w", encoding="utf-8", errors="replace") as fh: 733 | fh.write(comment_line) 734 | fh.write(repro_output) 735 | 736 | print( 737 | f"[*] Crash reproduction log written to {log_path.relative_to(self.repo_path)}" 738 | ) 739 | 740 | full_log = comment_line + repro_output 741 | return full_log, command_line 742 | 743 | # ------------------------------------------------------------------ # 744 | def _hexdump(self, path: Path, limit_bytes: int = 512) -> str: 745 | """Return an xxd -g1 style hexdump (≤limit_bytes) of a file.""" 746 | try: 747 | return subprocess.check_output( 748 | [ 749 | "xxd", 750 | "-g1", 751 | "-l", 752 | str(limit_bytes), 753 | str(path), 754 | ], 755 | text=True, 756 | ) 757 | except Exception: 758 | data = path.read_bytes()[:limit_bytes] 759 | lines = [] 760 | for off in range(0, len(data), 16): 761 | chunk = data[off : off + 16] 762 | hex_bytes = " ".join(f"{b:02x}" for b in chunk) 763 | ascii = "".join(chr(b) if 32 <= b < 127 else "." for b in chunk) 764 | lines.append(f"{off:08x}: {hex_bytes:<47} {ascii}") 765 | return "\n".join(lines) 766 | 767 | # ------------------------------------------------------------------ # 768 | _ANSI_ESCAPE_RE = re.compile( 769 | r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])", re.MULTILINE 770 | ) 771 | 772 | @classmethod 773 | def _strip_ansi(cls, text: str) -> str: 774 | """Return *text* with any ANSI colour/control sequences removed.""" 775 | 776 | # A pre-compiled regex is used for efficiency as logs can be large. 777 | # The pattern aims to match the majority of common ANSI escape 778 | # sequences produced by oss-fuzz tooling (colour, cursor movement, 779 | # screen erasing etc.). If a sequence slips through it will simply 780 | # render as an innocuous control code in the markdown, which is still 781 | # preferable to the unreadable colour gibberish. 782 | return cls._ANSI_ESCAPE_RE.sub("", text) 783 | 784 | # ------------------------------------------------------------------ # 785 | def _locate_harness_source(self, fuzzer_name: str) -> Path | None: 786 | """Locate the harness source file, primarily via the ./done marker.""" 787 | 788 | done_file = self.repo_path / "done" 789 | if done_file.is_file(): 790 | try: 791 | rel_path = ( 792 | done_file.read_text(encoding="utf-8", errors="replace") 793 | .splitlines()[0] 794 | .strip() 795 | ) 796 | if rel_path: 797 | abs_path = (self.repo_path / rel_path).resolve() 798 | if abs_path.is_file(): 799 | return abs_path 800 | except Exception: 801 | pass 802 | # First: look for file name containing fuzzer_name with typical C/C++ suffix 803 | exts = {".c", ".cc", ".cpp", ".cxx", ".h", ".hpp"} 804 | for p in self.repo_path.rglob("*"): 805 | if p.suffix.lower() in exts and fuzzer_name in p.name: 806 | return p 807 | 808 | # Fallback: any file containing LLVMFuzzerTestOneInput token 809 | for p in self.repo_path.rglob("*"): 810 | if p.suffix.lower() in exts: 811 | try: 812 | txt = p.read_text(encoding="utf-8", errors="ignore") 813 | except Exception: 814 | continue 815 | if "LLVMFuzzerTestOneInput" in txt: 816 | return p 817 | 818 | return None 819 | 820 | # ------------------------------------------------------------------ # 821 | def _generate_bug_report( 822 | self, 823 | fuzzer_name: str, 824 | crash_path: Path, 825 | reproducer_log: str, 826 | reproducer_cmd: str, 827 | ) -> None: 828 | """Invoke Codex to write crash_analysis.md at repo root.""" 829 | 830 | harness_path = self._locate_harness_source(fuzzer_name) 831 | harness_source = ( 832 | harness_path.read_text(encoding="utf-8", errors="replace") 833 | if harness_path and harness_path.is_file() 834 | else "*Harness source not found*" 835 | ) 836 | 837 | hexdump_text = self._hexdump(crash_path) 838 | 839 | # Build context block (text, not markdown) for Codex 840 | context_parts = [ 841 | "=== Reproducer Log ===\n", 842 | reproducer_log, 843 | "\n\n=== Harness Source ===\n", 844 | harness_source, 845 | "\n\n=== Crashing Input (hexdump) ===\n", 846 | hexdump_text, 847 | "\n", 848 | ] 849 | additional_context = "".join(context_parts) 850 | 851 | # ── Write crash_info.md ──────────────────────────────────────── 852 | def _md_safe(text: str) -> str: 853 | return text.replace("```", "```​") # no early fence close 854 | 855 | md_lines = [ 856 | "# Crash Info", 857 | "", 858 | "## Reproducer command", 859 | "```bash", 860 | reproducer_cmd, 861 | "```", 862 | "", 863 | "## Reproducer log", 864 | "```text", 865 | _md_safe(reproducer_log), 866 | "```", 867 | "", 868 | "## Harness source", 869 | "```c", 870 | _md_safe(harness_source), 871 | "```", 872 | "", 873 | "## Crashing input (hexdump)", 874 | "```text", 875 | hexdump_text, 876 | "```", 877 | "", 878 | ] 879 | 880 | (self.repo_path / "crash_info.md").write_text( 881 | "\n".join(md_lines), encoding="utf-8" 882 | ) 883 | print("[*] crash_info.md written") 884 | 885 | instructions = textwrap.dedent( 886 | """ 887 | You are an experienced security researcher. 888 | 889 | Using the context provided, write a **new file** called `crash_analysis.md` in the repository root with the following top-level sections: 890 | 891 | 1. Bug Type 892 | 2. Bug Summary 893 | 3. Bug Impact (real world reachability/exploitability/constraints) 894 | 4. How to Patch 895 | 896 | Requirements: 897 | • Provide concise yet complete analysis (markdown). 898 | • If the bug could not be reproduced (the reproducer exited cleanly) then indicate this in your analysis. 899 | • These harnesses were *just generated*. Carefully consider whether the crash is due to a genuine bug in the target project or a mistake in the harness. 900 | If it is harness-induced, explicitly state this in your analysis and use **severity: None** in the *bug impact* section. 901 | Look out for harness mistakes like erroneous frees, misuse of the target library, incorrect function arguments / types, or anything else indicating this is not a genuine bug in the target library. 902 | For these cases, you must also include the sentinel "HARNESS ERROR" somewhere in your analysis. 903 | """ 904 | ).strip() 905 | 906 | print("[*] Calling Codex to generate crash_analysis.md …") 907 | 908 | patcher = CodexHelper( 909 | repo_path=self.repo_path, 910 | ai_key_path=str(self.ai_key_path), 911 | copy_repo=False, 912 | codex_cli=self.codex_cli, 913 | codex_model=CODEX_ANALYSIS_MODEL, 914 | approval_mode=CODEX_APPROVAL_MODE, 915 | ) 916 | 917 | stdout = patcher.run_codex_command( 918 | instructions, 919 | additional_context=additional_context, 920 | ) 921 | 922 | if stdout is None: 923 | print("[!] Codex did not produce crash_analysis.md") 924 | else: 925 | print( 926 | "[*] Codex generated crash_analysis.md (truncated output below):" 927 | ) 928 | print(stdout[:1000]) 929 | 930 | # ── Reproducer script generation ───────────────────────────── 931 | try: 932 | self._generate_reproducer_script() 933 | except HarnessGeneratorError as err: 934 | print(f"[!] Failed to generate crash_reproducer.sh: {err}") 935 | 936 | # ---- Seed corpus generation ------------------------------------ # 937 | def _invoke_codex_to_generate_seeds(self, fuzzer_name: str) -> None: 938 | """Ask Codex to create initial corpus seeds for the new harness.""" 939 | 940 | corpus_dir = ( 941 | self.repo_path 942 | / "build" 943 | / "out" 944 | / self.project 945 | / "corpus" 946 | / fuzzer_name 947 | ) 948 | corpus_dir.mkdir(parents=True, exist_ok=True) 949 | 950 | harness_path = self._locate_harness_source(fuzzer_name) 951 | if not harness_path or not harness_path.is_file(): 952 | raise HarnessGeneratorError( 953 | f"Unable to locate harness source for {fuzzer_name} when generating seeds" 954 | ) 955 | 956 | harness_source = harness_path.read_text( 957 | encoding="utf-8", errors="replace" 958 | ) 959 | 960 | instructions = textwrap.dedent( 961 | f""" 962 | The directory `{corpus_dir.relative_to(self.repo_path)}` is the **initial corpus** for the newly created libFuzzer harness `{fuzzer_name}`. 963 | 964 | You will receive the *full harness source code* as additional context. 965 | 966 | Task: create one or more **meaningful seed inputs** (at least one, up to five) and write them as **files** inside that corpus directory. 967 | 968 | Guidelines: 969 | • Inputs should be small yet exercise realistic code paths. 970 | • Prefer simple human-readable examples when possible; otherwise use `.bin` files. 971 | • Do **NOT** modify any existing source or build scripts. 972 | • Use appropriate file extensions if the target expects a specific format. 973 | • Binary content can be expressed via hex literals or base64 in the patch - whichever is most convenient. 974 | 975 | Write the files directly - no commentary - using the standard Codex patch instructions. 976 | """ 977 | ).strip() 978 | 979 | patcher = CodexHelper( 980 | repo_path=self.repo_path, 981 | ai_key_path=str(self.ai_key_path), 982 | copy_repo=False, 983 | codex_cli=self.codex_cli, 984 | codex_model=CODEX_ANALYSIS_MODEL, 985 | approval_mode=CODEX_APPROVAL_MODE, 986 | ) 987 | 988 | stdout = patcher.run_codex_command( 989 | instructions, 990 | additional_context=harness_source, 991 | ) 992 | 993 | if stdout is None: 994 | raise HarnessGeneratorError("Codex did not generate any seed files") 995 | 996 | print("[*] Codex seed-generation output (truncated):") 997 | print(stdout[:800]) 998 | 999 | # ------------------------------------------------------------------ # 1000 | def _generate_reproducer_script(self) -> None: 1001 | """Invoke Codex to create crash_reproducer.sh after crash analysis.""" 1002 | 1003 | info_path = self.repo_path / "crash_info.md" 1004 | analysis_path = self.repo_path / "crash_analysis.md" 1005 | 1006 | if not info_path.is_file() or not analysis_path.is_file(): 1007 | raise HarnessGeneratorError( 1008 | "Required markdown files not found for reproducer script generation." 1009 | ) 1010 | 1011 | context_blob = ( 1012 | "=== crash_info.md ===\n" 1013 | + info_path.read_text(encoding="utf-8", errors="replace") 1014 | + "\n\n=== crash_analysis.md ===\n" 1015 | + analysis_path.read_text(encoding="utf-8", errors="replace") 1016 | ) 1017 | 1018 | instructions = textwrap.dedent( 1019 | """ 1020 | Using the context provided, create a robust, idempotent Bash script named `crash_reproducer.sh` in the repository root that demonstrates the vulnerability described. 1021 | 1022 | The script must: 1023 | • Install any required build/runtime dependencies non-interactively (e.g. apt-get -y, pip install) and skip if already present. 1024 | • Build the vulnerable project with AddressSanitizer enabled (or another memory sanitizer that will surface the bug). 1025 | • Fetch or construct the proof-of-concept input that triggers the crash - ideally the same data that appears in the fuzzing crash file, but adapted for a real-world invocation path (command-line tool, library API, etc.). 1026 | • Construct the proof-of-concept script so that it reproduces the harness bug, but do not call the harness directly. 1027 | • Apply reasonable execution limits (timeout, ulimit) so it never hangs. 1028 | • Exit with non-zero status if the bug is reproduced; otherwise exit 0. 1029 | • Contain clear comments for every major section. 1030 | 1031 | Notes: 1032 | • You can run `git status --porcelain` to discover which harness source file was added or modified. Use this knowledge to understand the target API. 1033 | • The script should work when executed from the repository root on a clean Ubuntu container. 1034 | • Only create `crash_reproducer.sh`. Do not modify existing files. 1035 | """ 1036 | ).strip() 1037 | 1038 | patcher = CodexHelper( 1039 | repo_path=self.repo_path, 1040 | ai_key_path=str(self.ai_key_path), 1041 | copy_repo=False, 1042 | codex_cli=self.codex_cli, 1043 | codex_model=CODEX_ANALYSIS_MODEL, 1044 | approval_mode=CODEX_APPROVAL_MODE, 1045 | ) 1046 | 1047 | stdout = patcher.run_codex_command( 1048 | instructions, 1049 | additional_context=context_blob, 1050 | ) 1051 | 1052 | if stdout is None: 1053 | raise HarnessGeneratorError( 1054 | "Codex did not create crash_reproducer.sh" 1055 | ) 1056 | 1057 | print("[*] Codex reproduce script output (truncated):") 1058 | print(stdout[:800]) 1059 | 1060 | # ---- Smoke test ---------------------------------------------------- # 1061 | def _run_any_fuzzer_once(self, timeout: int = 60) -> None: 1062 | out_dir = self.repo_path / "build" / "out" / self.project 1063 | fuzzers = [ 1064 | p 1065 | for p in out_dir.iterdir() 1066 | if p.is_file() 1067 | and os.access(p, os.X_OK) 1068 | and not p.name.endswith(".dict") 1069 | ] 1070 | if not fuzzers: 1071 | print("[*] No fuzzer binaries found.") 1072 | return 1073 | fuzzer = fuzzers[0].name 1074 | print( 1075 | f"[*] Smoke-testing fuzzer {fuzzer} …", 1076 | ) 1077 | helper = self.repo_path / "infra" / "helper.py" 1078 | corpus = out_dir / "corpus" / fuzzer 1079 | corpus.mkdir(parents=True, exist_ok=True) 1080 | self._run_cmd( 1081 | [ 1082 | "python3", 1083 | str(helper), 1084 | "run_fuzzer", 1085 | "--engine", 1086 | "libfuzzer", 1087 | "--sanitizer", 1088 | self.sanitizer, 1089 | "--corpus-dir", 1090 | str(corpus), 1091 | self.project, 1092 | fuzzer, 1093 | "--", 1094 | f"-max_total_time={timeout}", 1095 | "-timeout=120", 1096 | "-print_final_stats=1", 1097 | ], 1098 | cwd=self.repo_path, 1099 | env=os.environ.copy(), 1100 | ) 1101 | 1102 | # ---- Shell helper -------------------------------------------------- # 1103 | def _run_cmd( 1104 | self, 1105 | cmd: Sequence[str], 1106 | *, 1107 | cwd: Path, 1108 | env: dict[str, str], 1109 | input: str | None = None, 1110 | ) -> None: 1111 | """Run a subprocess and raise HarnessGeneratorError on failure.""" 1112 | cmd_str = " ".join(cmd) 1113 | print(f"[*] ➜ {cmd_str}") 1114 | proc = subprocess.Popen( 1115 | cmd, 1116 | cwd=cwd, 1117 | env=env, 1118 | stdin=subprocess.PIPE if input else None, 1119 | stdout=subprocess.PIPE, 1120 | stderr=subprocess.PIPE, 1121 | text=True, 1122 | ) 1123 | try: 1124 | stdout, stderr = proc.communicate(input=input, timeout=7200) 1125 | except subprocess.TimeoutExpired: 1126 | proc.kill() 1127 | raise HarnessGeneratorError("Command timed out: " + " ".join(cmd)) 1128 | 1129 | if proc.returncode != 0: 1130 | print( 1131 | f"[*] Command failed (rc={proc.returncode})\nSTDOUT:\n{stdout}\n---\nSTDERR:\n{stderr}" 1132 | ) 1133 | raise HarnessGeneratorError(stderr) 1134 | 1135 | print(f"[*] Command succeeded. Truncated stdout:\n{stdout[:600]}") 1136 | 1137 | 1138 | # --------------------------------------------------------------------------- # 1139 | # CLI entry-point # 1140 | # --------------------------------------------------------------------------- # 1141 | if __name__ == "__main__": 1142 | parser = argparse.ArgumentParser( 1143 | description="Generate and integrate a new OSS-Fuzz harness with Codex." 1144 | ) 1145 | parser.add_argument( 1146 | "project_name", help="OSS-Fuzz project name (e.g. freerdp)" 1147 | ) 1148 | parser.add_argument( 1149 | "oss_fuzz_path", 1150 | type=Path, 1151 | default="./oss-fuzz", 1152 | help="Path to local oss-fuzz checkout (root directory)", 1153 | ) 1154 | parser.add_argument( 1155 | "ai_key_path", 1156 | type=Path, 1157 | default="./.env", 1158 | help="Path to file containing your OpenAI-compatible API key", 1159 | ) 1160 | 1161 | # Optional knobs 1162 | parser.add_argument( 1163 | "--sanitizer", 1164 | default=DEFAULT_SANITIZER, 1165 | help="Sanitizer to use when building fuzzers (default: address)", 1166 | ) 1167 | parser.add_argument( 1168 | "--codex-cli", 1169 | default="codex", 1170 | help="Executable name or path for the Codex CLI", 1171 | ) 1172 | parser.add_argument( 1173 | "--scratch-space", 1174 | type=Path, 1175 | help="Directory for temp working copies (defaults to /tmp)", 1176 | ) 1177 | parser.add_argument( 1178 | "--copy-repo", 1179 | action="store_true", 1180 | help="Work on a temporary copy of the oss-fuzz tree (safer, slower)", 1181 | ) 1182 | parser.add_argument( 1183 | "--no-build", 1184 | action="store_true", 1185 | help="Skip rebuilding image/fuzzers after adding the harness", 1186 | ) 1187 | parser.add_argument( 1188 | "--smoke", 1189 | action="store_true", 1190 | help="Run a 60-second smoke test with one fuzzer at the beginning", 1191 | ) 1192 | parser.add_argument( 1193 | "--max-retries", 1194 | type=int, 1195 | default=MAX_BUILD_RETRIES, 1196 | help=f"Maximum build-retry attempts (default: {MAX_BUILD_RETRIES})", 1197 | ) 1198 | 1199 | args = parser.parse_args() 1200 | load_dotenv(dotenv_path=os.path.expanduser(args.ai_key_path)) 1201 | 1202 | try: 1203 | hg = HarnessGenerator( 1204 | project_name=args.project_name, 1205 | oss_fuzz_path=args.oss_fuzz_path, 1206 | ai_key_path=args.ai_key_path, 1207 | sanitizer=args.sanitizer, 1208 | codex_cli=args.codex_cli, 1209 | scratch_space=args.scratch_space, 1210 | copy_repo=args.copy_repo, 1211 | ) 1212 | hg.generate_harness( 1213 | build=not args.no_build, 1214 | run_smoke=args.smoke, 1215 | max_iterations=args.max_retries, 1216 | ) 1217 | except HarnessGeneratorError as e: 1218 | print(f"[harness_generator] ERROR: {e}", file=sys.stderr) 1219 | sys.exit(1) 1220 | --------------------------------------------------------------------------------