├── .gitignore
├── assets
    ├── sherpa.jpg
    ├── logo_black.png
    ├── logo_white.png
    └── validated_bugs.png
├── harness_generator
    ├── yamls
    │   ├── leveldb.yaml
    │   └── c-projects.yaml
    ├── requirements.txt
    ├── src
    │   ├── __init__.py
    │   ├── codex_helper.py
    │   └── harness_generator.py
    ├── setup-env.sh
    ├── scripts
    │   ├── sort_jobs.py
    │   ├── summarize.py
    │   ├── gather_reports.py
    │   └── generate_reports.py
    ├── README.md
    └── batch_generate.py
├── LICENSE
├── Makefile
├── leveldb_writeup
    ├── artifacts
    │   ├── crash_analysis.md
    │   └── crash_info.md
    └── workflow.md
├── setup-env.sh
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | .venv
3 | 
4 | jobs/


--------------------------------------------------------------------------------
/assets/sherpa.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIxCyberChallenge/sherpa/HEAD/assets/sherpa.jpg


--------------------------------------------------------------------------------
/assets/logo_black.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIxCyberChallenge/sherpa/HEAD/assets/logo_black.png


--------------------------------------------------------------------------------
/assets/logo_white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIxCyberChallenge/sherpa/HEAD/assets/logo_white.png


--------------------------------------------------------------------------------
/assets/validated_bugs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AIxCyberChallenge/sherpa/HEAD/assets/validated_bugs.png


--------------------------------------------------------------------------------
/harness_generator/yamls/leveldb.yaml:
--------------------------------------------------------------------------------
1 | projects:
2 |   - project_name: leveldb
3 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
4 |     fuzz_tooling_ref: master


--------------------------------------------------------------------------------
/harness_generator/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Python dependencies for the harness-generation toolkit.
 2 | # -----------------------------------------------
 3 | # Versions are intentionally left open but pegged to reasonably
 4 | # recent releases that are available on PyPI.
 5 | #
 6 | # • GitPython      – git wrapper used by CodexHelper & batch scripts
 7 | # • PyYAML         – parsing of <targets>.yaml files
 8 | # • python-dotenv  – loading OPENAI_API_KEY from .env files
 9 | 
10 | PyYAML>=5.4
11 | GitPython>=3.1
12 | python-dotenv>=1.0
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Artificial Intelligence Cyber Challenge
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Makefile for sherpa project
 2 | # Usage:
 3 | #   make setup      # Set up Python venv and install dependencies
 4 | #   make clean      # Remove venv and __pycache__
 5 | #   make run-script SCRIPT=script_name.py # Run a script from harness_generator/scripts
 6 | 
 7 | VENV_DIR := .venv
 8 | JOBS_DIR := ./jobs
 9 | PYTHON := python3
10 | REQ_FILE := harness_generator/requirements.txt
11 | 
12 | setup:
13 | 	$(PYTHON) -m venv $(VENV_DIR)
14 | 	. $(VENV_DIR)/bin/activate && pip install --upgrade pip && pip install -r $(REQ_FILE)
15 | 
16 | clean:
17 | 	rm -rf $(VENV_DIR)
18 | 	rm -rf $(JOBS_DIR)
19 | 	find . -type d -name "__pycache__" -exec rm -rf {} +
20 | 
21 | run-script:
22 | 	. $(VENV_DIR)/bin/activate && python harness_generator/scripts/$(SCRIPT)
23 | 
24 | 
25 | leveldb:
26 | 	@if [ -z "$$OPENAI_API_KEY" ]; then \
27 | 	  echo "Error: OPENAI_API_KEY is not set. Please export your OpenAI API key before running make leveldb."; \
28 | 	  exit 1; \
29 | 	fi
30 | 	@docker info > /dev/null 2>&1 || (echo "Error: Docker is not running or not accessible. Please start Docker and try again." && exit 1)
31 | 	. $(VENV_DIR)/bin/activate && python harness_generator/batch_generate.py --targets harness_generator/yamls/leveldb.yaml
32 | 
33 | .PHONY: setup clean run-script leveldb
34 | 


--------------------------------------------------------------------------------
/leveldb_writeup/artifacts/crash_analysis.md:
--------------------------------------------------------------------------------
 1 | # Crash Analysis for fuzz_table_open Crash
 2 | 
 3 | ## 1. Bug Type
 4 | - Denial-of-Service (DoS) via unbounded memory allocation
 5 | 
 6 | ## 2. Bug Summary
 7 | Feeding arbitrary data as an SSTable file to `leveldb::Table::Open` can trigger an out-of-memory crash. The fuzzer input coincidentally contains the valid LevelDB table magic value, causing the parser to proceed. A malformed block handle is decoded with an extremely large `size` field, leading to a huge allocation request in `ReadBlock` and an AddressSanitizer OOM abort.
 8 | 
 9 | ## 3. Bug Impact (real world reachability/exploitability/constraints)
10 | - An attacker controlling SSTable input can cause the library to abort or consume excessive memory (denial-of-service).
11 | - Requires supplying a crafted `.sst` file; not exploitable via normal database operations unless untrusted SST files are loaded.
12 | - **severity:** Medium
13 | 
14 | ## 4. How to Patch
15 | - Validate decoded block handle fields before allocating memory:
16 |   - Ensure `offset + size` does not overflow and stays within the file bounds (`file_size`).
17 |   - Impose a reasonable maximum block size threshold or fail gracefully on suspicious values.
18 | - Return an error status from `Table::Open`/`ReadBlock` instead of proceeding to allocate if validation fails.


--------------------------------------------------------------------------------
/harness_generator/src/__init__.py:
--------------------------------------------------------------------------------
 1 | #────────────
 2 | #
 3 | # Copyright 2025 Artificial Intelligence Cyber Challenge
 4 | #
 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 
 6 | # this software and associated documentation files (the “Software”), to deal in the 
 7 | # Software without restriction, including without limitation the rights to use, 
 8 | # copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 
 9 | # Software, and to permit persons to whom the Software is furnished to do so, 
10 | # subject to the following conditions:
11 | #
12 | # The above copyright notice and this permission notice shall be included in all 
13 | # copies or substantial portions of the Software.
14 | #
15 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
16 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 
17 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
18 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
19 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
20 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | #
22 | # ────────────
23 | 
24 | """Harness generation toolkit (stand-alone release)."""
25 | 
26 | from .codex_helper import CodexHelper  # re-export for convenience
27 | 
28 | __all__ = [
29 |     "CodexHelper",
30 | ]
31 | 


--------------------------------------------------------------------------------
/setup-env.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # setup-env.sh for sherpa project
 3 | # Usage: source ./setup-env.sh
 4 | 
 5 | VENV_DIR=".venv"
 6 | REQ_FILE="harness_generator/requirements.txt"
 7 | PYTHON_BIN="python3"
 8 | 
 9 | # Detect Apple Silicon and recommend Homebrew Python if needed
10 | if [[ $(uname -m) == "arm64" ]]; then
11 |     echo "Detected Apple Silicon (arm64)."
12 |     if ! command -v $PYTHON_BIN &> /dev/null; then
13 |         echo "$PYTHON_BIN not found. Please install Python 3 via Homebrew: brew install python3"
14 |         exit 1
15 |     fi
16 | fi
17 | 
18 | # Install codex binary if missing
19 | if ! command -v codex &> /dev/null; then
20 |     echo "codex not found. Installing..."
21 |     if [[ "$(uname)" == "Darwin" ]]; then
22 |         # macOS
23 |         if command -v brew &> /dev/null; then
24 |             brew install codex
25 |         else
26 |             echo "Homebrew not found. Please install Homebrew first: https://brew.sh"
27 |             exit 1
28 |         fi
29 |     elif [[ "$(uname)" == "Linux" ]]; then
30 |         # Linux
31 |         if command -v apt &> /dev/null; then
32 |             sudo apt update && sudo apt install -y codex
33 |         else
34 |             echo "apt not found. Please install codex manually."
35 |             exit 1
36 |         fi
37 |     else
38 |         echo "Unsupported OS. Please install codex manually."
39 |         exit 1
40 |     fi
41 | fi
42 | 
43 | # Create virtual environment if it doesn't exist
44 | if [ ! -d "$VENV_DIR" ]; then
45 |     echo "Creating virtual environment in $VENV_DIR..."
46 |     $PYTHON_BIN -m venv $VENV_DIR
47 | fi
48 | 
49 | # Activate virtual environment
50 | source $VENV_DIR/bin/activate
51 | 
52 | # Upgrade pip and install dependencies
53 | pip install --upgrade pip
54 | pip install -r $REQ_FILE
55 | 
56 | echo "Environment setup complete."
57 | 


--------------------------------------------------------------------------------
/leveldb_writeup/artifacts/crash_info.md:
--------------------------------------------------------------------------------
  1 | # Crash Info
  2 | 
  3 | ## Reproducer command
  4 | ```bash
  5 | python infra/helper.py reproduce leveldb fuzz_table_open build/out/leveldb/crash-eb318a4efc67ba9452a00fc1e8bec0fd4bc8ecd3
  6 | ```
  7 | 
  8 | ## Reproducer log
  9 | ```text
 10 | # python infra/helper.py reproduce leveldb fuzz_table_open build/out/leveldb/crash-eb318a4efc67ba9452a00fc1e8bec0fd4bc8ecd3
 11 | + FUZZER=fuzz_table_open
 12 | + shift
 13 | + '[' '!' -v TESTCASE ']'
 14 | + TESTCASE=/testcase
 15 | + '[' '!' -f /testcase ']'
 16 | + export RUN_FUZZER_MODE=interactive
 17 | + RUN_FUZZER_MODE=interactive
 18 | + export FUZZING_ENGINE=libfuzzer
 19 | + FUZZING_ENGINE=libfuzzer
 20 | + export SKIP_SEED_CORPUS=1
 21 | + SKIP_SEED_CORPUS=1
 22 | + run_fuzzer fuzz_table_open -runs=100 /testcase
 23 | vm.mmap_rnd_bits = 28
 24 | /out/fuzz_table_open -rss_limit_mb=2560 -timeout=25 -runs=100 /testcase < /dev/null
 25 | INFO: Running with entropic power schedule (0xFF, 100).
 26 | INFO: Seed: 1983861041
 27 | INFO: Loaded 1 modules   (1554 inline 8-bit counters): 1554 [0x5591fc773288, 0x5591fc77389a), 
 28 | INFO: Loaded 1 PC tables (1554 PCs): 1554 [0x5591fc7738a0,0x5591fc7799c0), 
 29 | /out/fuzz_table_open: Running 1 inputs 100 time(s) each.
 30 | Running: /testcase
 31 | ==14==WARNING: AddressSanitizer failed to allocate 0xffffffffffe0 bytes
 32 | =================================================================
 33 | ==14==ERROR: AddressSanitizer: out of memory: allocator is trying to allocate 0xffffffffffe0 bytes
 34 |     #0 0x5591fc64a44d in operator new[](unsigned long) /src/llvm-project/compiler-rt/lib/asan/asan_new_delete.cpp:89:3
 35 |     #1 0x5591fc66bac0 in leveldb::ReadBlock(leveldb::RandomAccessFile*, leveldb::ReadOptions const&, leveldb::BlockHandle const&, leveldb::BlockContents*) /src/leveldb/table/format.cc:78:15
 36 |     #2 0x5591fc64d960 in leveldb::Table::Open(leveldb::Options const&, leveldb::RandomAccessFile*, unsigned long, leveldb::Table**) /src/leveldb/table/table.cc:61:7
 37 |     #3 0x5591fc64ca56 in LLVMFuzzerTestOneInput /src/leveldb/build/../fuzz_table_open.cc:29:7
 38 |     #4 0x5591fc5011a0 in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:614:13
 39 |     #5 0x5591fc4ec415 in fuzzer::RunOneTest(fuzzer::Fuzzer*, char const*, unsigned long) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:327:6
 40 |     #6 0x5591fc4f1eaf in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:862:9
 41 |     #7 0x5591fc51d152 in main /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerMain.cpp:20:10
 42 |     #8 0x7eff7acd7082 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x24082) (BuildId: 5792732f783158c66fb4f3756458ca24e46e827d)
 43 | 
 44 | DEDUP_TOKEN: operator new[](unsigned long)--leveldb::ReadBlock(leveldb::RandomAccessFile*, leveldb::ReadOptions const&, leveldb::BlockHandle const&, leveldb::BlockContents*)--leveldb::Table::Open(leveldb::Options const&, leveldb::RandomAccessFile*, unsigned long, leveldb::Table**)
 45 | ==14==HINT: if you don't care about these errors you may set allocator_may_return_null=1
 46 | SUMMARY: AddressSanitizer: out-of-memory /src/leveldb/table/format.cc:78:15 in leveldb::ReadBlock(leveldb::RandomAccessFile*, leveldb::ReadOptions const&, leveldb::BlockHandle const&, leveldb::BlockContents*)
 47 | ==14==ABORTING
 48 | 
 49 | === STDERR ===
 50 | INFO:__main__:Running: docker run --privileged --shm-size=2g --platform linux/amd64 --rm -i -e HELPER=True -e ARCHITECTURE=x86_64 -v /home/ubuntu/workspace/friday/tools/generate-harnesses/output/leveldb_a9fcfd3fbc7d492282c714b6e0b46723/build/out/leveldb:/out -v /home/ubuntu/workspace/friday/tools/generate-harnesses/output/leveldb_a9fcfd3fbc7d492282c714b6e0b46723/build/out/leveldb/crash-eb318a4efc67ba9452a00fc1e8bec0fd4bc8ecd3:/testcase -t gcr.io/oss-fuzz-base/base-runner reproduce fuzz_table_open -runs=100.
 51 | 
 52 | ```
 53 | 
 54 | ## Harness source
 55 | ```c
 56 | #include <cstddef>
 57 | #include <cstdint>
 58 | #include <string>
 59 | #include <fstream>
 60 | #include <cstdio>
 61 | #include "leveldb/env.h"
 62 | #include "leveldb/table.h"
 63 | #include "leveldb/options.h"
 64 | #include "leveldb/status.h"
 65 | #include "leveldb/iterator.h"
 66 | 
 67 | // Table::Open reads SST files from disk (e.g. user-supplied .sst files).
 68 | extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
 69 |   const char* fname = "/tmp/fuzz_table_open.sst";
 70 |   std::ofstream out(fname, std::ios::binary);
 71 |   if (!out)
 72 |     return 0;
 73 |   out.write(reinterpret_cast<const char*>(data), size);
 74 |   out.close();
 75 | 
 76 |   leveldb::Options options;
 77 |   options.env = leveldb::Env::Default();
 78 |   leveldb::RandomAccessFile* file = nullptr;
 79 |   leveldb::Status s = options.env->NewRandomAccessFile(fname, &file);
 80 |   if (!s.ok())
 81 |     return 0;
 82 | 
 83 |   leveldb::Table* table = nullptr;
 84 |   s = leveldb::Table::Open(options, file, size, &table);
 85 |   if (!s.ok()) {
 86 |     delete file;
 87 |     return 0;
 88 |   }
 89 | 
 90 |   leveldb::Iterator* it = table->NewIterator(leveldb::ReadOptions());
 91 |   for (it->SeekToFirst(); it->Valid(); it->Next()) {}
 92 |   delete it;
 93 |   delete table;
 94 |   delete file;
 95 |   std::remove(fname);
 96 |   return 0;
 97 | }
 98 | ```
 99 | 
100 | ## Crashing input (hexdump)
101 | ```text
102 | 00000000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff  ................
103 | 00000010: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff  ................
104 | 00000020: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff fe  ................
105 | 00000030: ff 57 fb 80 8b 24 75 47 db ff ff ff ff ff 3f 3d  .W...$uG......?=
106 | 00000040: 00 00 ff ff ff ff ff ff ff ff f7 ff ff ff ff ff  ................
107 | 00000050: 02 80 a8 0e 80 8b ff ff ff ff ff 57 fb 80 8b 24  ...........W...$
108 | 00000060: 75 47 db                                         uG.
109 | 
110 | ```
111 | 


--------------------------------------------------------------------------------
/harness_generator/setup-env.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | # Simple environment bootstrapper for this repository.
  4 | #
  5 | # Features
  6 | #   • Confirms that basic system tools (git, docker) are available – offering to
  7 | #     install them via apt when missing.
  8 | #   • Optionally installs the libxapian-dev development headers.
  9 | #   • Optionally creates (or re-uses) a virtual-environment in ./.sherpa-venv.
 10 | #   • Installs Python dependencies from requirements.txt.
 11 | #
 12 | # The script is intentionally interactive so it can be re-run safely.
 13 | 
 14 | #────────────
 15 | #
 16 | # Copyright 2025 Artificial Intelligence Cyber Challenge
 17 | #
 18 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 
 19 | # this software and associated documentation files (the “Software”), to deal in the 
 20 | # Software without restriction, including without limitation the rights to use, 
 21 | # copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 
 22 | # Software, and to permit persons to whom the Software is furnished to do so, 
 23 | # subject to the following conditions:
 24 | #
 25 | # The above copyright notice and this permission notice shall be included in all 
 26 | # copies or substantial portions of the Software.
 27 | #
 28 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
 29 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 
 30 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
 31 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
 32 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
 33 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 34 | #
 35 | # ────────────
 36 | 
 37 | set -euo pipefail
 38 | 
 39 | #------------------------------------------------------------------------------
 40 | # Ensure required command-line tools are present
 41 | #------------------------------------------------------------------------------
 42 | 
 43 | # ensure_tool <command|""> <apt-package>
 44 | #   If <command> is non-empty the function checks for its presence in PATH. If
 45 | #   the command is absent (or <command> is empty) it falls back to verifying
 46 | #   that the corresponding apt package is installed via dpkg. When missing it
 47 | #   offers an interactive prompt to install it.
 48 | 
 49 | ensure_tool() {
 50 |     local cmd_name="$1"   # may be empty string for header-only libs like libxapian-dev
 51 |     local pkg_name="$2"
 52 | 
 53 |     local cmd_missing=false
 54 |     if [[ -n "$cmd_name" ]]; then
 55 |         if ! command -v "$cmd_name" >/dev/null 2>&1; then
 56 |             cmd_missing=true
 57 |         fi
 58 |     fi
 59 | 
 60 |     # If we didn't check a command or the command is present, still ensure the
 61 |     # package is installed (covers header-only deps).
 62 |     if dpkg -s "$pkg_name" >/dev/null 2>&1; then
 63 |         # Package present, and command (if any) is present—nothing to do.
 64 |         $cmd_missing && echo "'$cmd_name' will become available after reopening the shell." >&2
 65 |         return 0
 66 |     fi
 67 | 
 68 |     echo "The package '$pkg_name' is required${cmd_name:+ (provides '$cmd_name')}." >&2
 69 |     read -rp "Install '$pkg_name' now? [y/N]: " _install_pkg
 70 |     case "${_install_pkg:-N}" in
 71 |         [yY]|[yY][eE][sS])
 72 |             echo "Installing $pkg_name (requires sudo)..."
 73 |             sudo apt update && sudo apt install -y "$pkg_name"
 74 |             ;;
 75 |         *)
 76 |             echo "Cannot continue without '$pkg_name'. Please install it and re-run the script." >&2
 77 |             exit 1
 78 |             ;;
 79 |     esac
 80 | }
 81 | 
 82 | # Verify core dependencies
 83 | ensure_tool git git
 84 | ensure_tool docker docker.io
 85 | ensure_tool "" libxapian-dev
 86 | 
 87 | # The repository relies on the "codex" command-line tool.
 88 | # Detect Codex – offer instructions for installing when missing.
 89 | if ! command -v codex >/dev/null 2>&1; then
 90 |     echo "Codex CLI not detected in PATH. It is required for harness generation."
 91 |     echo "Follow the instructions in the Codex CLI repository for installation: https://github.com/openai/codex"
 92 | fi
 93 | 
 94 | # libxapian-dev is handled by ensure_tool above.
 95 | 
 96 | PROJECT_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 97 | cd "$PROJECT_ROOT"
 98 | 
 99 | # Detect python executable
100 | detect_python() {
101 |     if command -v python3 > /dev/null 2>&1; then
102 |         echo python3
103 |     elif command -v python > /dev/null 2>&1; then
104 |         echo python
105 |     else
106 |         echo "Error: Python interpreter not found in PATH." >&2
107 |         exit 1
108 |     fi
109 | }
110 | 
111 | PYTHON_BIN="$(detect_python)"
112 | 
113 | VENV_DIR="${PROJECT_ROOT}/.sherpa-venv"
114 | 
115 | #------------------------------------------------------------------------------
116 | # Virtual-environment handling
117 | #------------------------------------------------------------------------------
118 | 
119 | activate_venv=false
120 | 
121 | if [[ -d "$VENV_DIR" ]]; then
122 |     echo "Found existing virtual environment at $VENV_DIR"
123 |     activate_venv=true
124 | else
125 |     read -rp "No virtual environment found. Create one at ./.sherpa-venv? [y/N]: " _create
126 |     case "${_create:-N}" in
127 |         [yY]|[yY][eE][sS])
128 |             echo "Creating virtual environment..."
129 |             "$PYTHON_BIN" -m venv "$VENV_DIR"
130 |             activate_venv=true
131 |             ;;
132 |         *)
133 |             echo "Proceeding without a dedicated virtual environment. Ensure you have the right permissions."
134 |             ;;
135 |     esac
136 | fi
137 | 
138 | # Determine pip invocation (always via python -m pip to avoid PATH issues)
139 | 
140 | if $activate_venv; then
141 |     source "$VENV_DIR/bin/activate"
142 | fi
143 | 
144 | # After potential activation re-detect python so it points to venv interpreter
145 | PYTHON_BIN="$(detect_python)"
146 | 
147 | PIP_CMD=("$PYTHON_BIN" -m pip)
148 | 
149 | #------------------------------------------------------------------------------
150 | # Requirements installation
151 | #------------------------------------------------------------------------------
152 | 
153 | if [[ -f "$PROJECT_ROOT/requirements.txt" ]]; then
154 |     echo "Installing dependencies from requirements.txt..."
155 |     "${PIP_CMD[@]}" install --upgrade -r "$PROJECT_ROOT/requirements.txt"
156 | else
157 |     echo "requirements.txt not found – skipping dependency installation." >&2
158 | fi
159 | 
160 | echo && echo "Environment setup complete. Ready for harness generation! 🚀"
161 | 
162 | if $activate_venv && [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
163 |     echo -e "\nExecute \`source ./.sherpa-venv/bin/activate\` to enter the virtual environment."
164 | fi
165 | 


--------------------------------------------------------------------------------
/harness_generator/scripts/sort_jobs.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #────────────
  3 | #
  4 | # Copyright 2025 Artificial Intelligence Cyber Challenge
  5 | #
  6 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 
  7 | # this software and associated documentation files (the “Software”), to deal in the 
  8 | # Software without restriction, including without limitation the rights to use, 
  9 | # copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 
 10 | # Software, and to permit persons to whom the Software is furnished to do so, 
 11 | # subject to the following conditions:
 12 | #
 13 | # The above copyright notice and this permission notice shall be included in all 
 14 | # copies or substantial portions of the Software.
 15 | #
 16 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
 17 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 
 18 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
 19 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
 20 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
 21 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | #
 23 | # ────────────
 24 | """
 25 | sort_jobs.py
 26 | ────────────
 27 | Classify and move harness-run job directories into three buckets:
 28 | 
 29 |     • crashes          - at least one crash/OOM/timeout file produced **and**
 30 |                         the crash *does not* appear to be harness-induced.
 31 |     • false_positives  - crash_analysis.md contains the marker *"HARNESS ERROR"*.
 32 |     • no_crashes       - build/out/** contains no crash, oom or timeout files.
 33 | 
 34 | The script replaces the previous trio of helper utilities
 35 | (*sort_crashes.py*, *sort_false_positives.py*, *sort_non_crashing.py*) with a
 36 | single, more ergonomic command.
 37 | 
 38 | Usage examples
 39 | ──────────────
 40 |     # Use defaults (./jobs  →  ./sorted)
 41 |     ./sort_jobs.py
 42 | 
 43 |     # Custom locations
 44 |     ./sort_jobs.py --input batch_runs --output triaged
 45 | 
 46 | Directory layout
 47 | ────────────────
 48 | All job directories directly under *input* are inspected.  They are **moved**
 49 | to one of the following sub-directories inside *output* (created if absent):
 50 | 
 51 |     sorted/
 52 |         crashes/
 53 |         false_positives/
 54 |         no_crashes/
 55 | 
 56 | If a target directory already exists a numeric suffix ("_1", "_2", …) is
 57 | automatically appended to avoid overwriting previous runs.
 58 | """
 59 | 
 60 | from __future__ import annotations
 61 | 
 62 | import argparse
 63 | import shutil
 64 | import sys
 65 | from pathlib import Path
 66 | from typing import Iterable, List, Tuple
 67 | 
 68 | 
 69 | # ---------------------------------------------------------------------------
 70 | # Helper functions
 71 | # ---------------------------------------------------------------------------
 72 | 
 73 | 
 74 | def _unique_dest(dest_root: Path, name: str) -> Path:
 75 |     """Return a unique destination path inside *dest_root* (adds _N if needed)."""
 76 | 
 77 |     candidate = dest_root / name
 78 |     idx = 1
 79 |     while candidate.exists():
 80 |         candidate = dest_root / f"{name}_{idx}"
 81 |         idx += 1
 82 |     return candidate
 83 | 
 84 | 
 85 | def _list_matching(root: Path, prefixes: Iterable[str]) -> List[Path]:
 86 |     """Return immediate children of *root* whose names start with any prefix."""
 87 | 
 88 |     return [
 89 |         p
 90 |         for p in root.glob("*")
 91 |         if p.is_file() and any(p.name.startswith(pre) for pre in prefixes)
 92 |     ]
 93 | 
 94 | 
 95 | def _detect_bug_files(run_dir: Path) -> bool:
 96 |     """Return *True* if the run directory contains any crash/oom/timeout files."""
 97 | 
 98 |     build_out_root = run_dir / "build" / "out"
 99 | 
100 |     # There should be exactly one project sub-directory under build/out/<project>
101 |     subdirs = (
102 |         [d for d in build_out_root.iterdir() if d.is_dir()]
103 |         if build_out_root.is_dir()
104 |         else []
105 |     )
106 | 
107 |     if len(subdirs) != 1:
108 |         return False
109 | 
110 |     project_out = subdirs[0]
111 |     bug_files = _list_matching(project_out, ("crash", "oom", "timeout"))
112 |     return bool(bug_files)
113 | 
114 | 
115 | def _has_harness_error(run_dir: Path) -> bool:
116 |     """Return *True* if crash_analysis.md mentions a harness error marker."""
117 | 
118 |     analysis = run_dir / "crash_analysis.md"
119 |     if not analysis.is_file():
120 |         return False
121 | 
122 |     try:
123 |         content = analysis.read_text(encoding="utf-8", errors="replace")
124 |     except Exception:
125 |         return False
126 | 
127 |     return "harness error" in content.lower()
128 | 
129 | 
130 | def classify(run_dir: Path) -> str:
131 |     """Return the classification label for *run_dir* (crashes/false_positives/no_crashes)."""
132 | 
133 |     # False positives have crash docs *and* the harness error marker.
134 |     if _has_harness_error(run_dir):
135 |         return "false_positives"
136 | 
137 |     if _detect_bug_files(run_dir):
138 |         return "crashes"
139 | 
140 |     return "no_crashes"
141 | 
142 | 
143 | def sort_jobs(src_root: Path, dst_root: Path) -> Tuple[int, int, int]:
144 |     """Move job directories from *src_root* into bucketed sub-directories under *dst_root*.
145 | 
146 |     Returns a tuple (crashes, false_positives, no_crashes) with the number of
147 |     directories moved into each bucket.
148 |     """
149 | 
150 |     if not src_root.is_dir():
151 |         sys.exit(f"Input directory not found: {src_root}")
152 | 
153 |     # Ensure bucket directories exist.
154 |     crashes_dir = dst_root / "crashes"
155 |     fp_dir = dst_root / "false_positives"
156 |     nc_dir = dst_root / "no_crashes"
157 | 
158 |     for d in (crashes_dir, fp_dir, nc_dir):
159 |         d.mkdir(parents=True, exist_ok=True)
160 | 
161 |     counts = {"crashes": 0, "false_positives": 0, "no_crashes": 0}
162 | 
163 |     for run_dir in sorted(src_root.iterdir()):
164 |         if not run_dir.is_dir():
165 |             continue
166 | 
167 |         label = classify(run_dir)
168 | 
169 |         dest_root = {
170 |             "crashes": crashes_dir,
171 |             "false_positives": fp_dir,
172 |             "no_crashes": nc_dir,
173 |         }[label]
174 | 
175 |         dest = _unique_dest(dest_root, run_dir.name)
176 |         print(f"[+] {run_dir.name}  →  {label}/{dest.name}")
177 |         shutil.move(str(run_dir), dest)
178 |         counts[label] += 1
179 | 
180 |     return counts["crashes"], counts["false_positives"], counts["no_crashes"]
181 | 
182 | 
183 | # ---------------------------------------------------------------------------
184 | # CLI
185 | # ---------------------------------------------------------------------------
186 | 
187 | 
188 | def main() -> None:
189 |     ap = argparse.ArgumentParser(
190 |         description="Sort job run directories into crashes/false_positives/no_crashes.",
191 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
192 |     )
193 | 
194 |     ap.add_argument(
195 |         "--input",
196 |         type=Path,
197 |         default=Path("./jobs"),
198 |         help="Directory produced by batch_generate.py (default: ./jobs)",
199 |     )
200 |     ap.add_argument(
201 |         "--output",
202 |         type=Path,
203 |         default=Path("./sorted"),
204 |         help="Destination root (buckets will be created here).",
205 |     )
206 | 
207 |     args = ap.parse_args()
208 | 
209 |     src = args.input.resolve()
210 |     dst = args.output.resolve()
211 | 
212 |     crashes, fps, ncs = sort_jobs(src, dst)
213 | 
214 |     print(
215 |         f"\nFinished.  Crashes: {crashes},  False-positives: {fps},  No-crash: {ncs}."
216 |     )
217 | 
218 | 
219 | if __name__ == "__main__":
220 |     main()
221 | 


--------------------------------------------------------------------------------
/harness_generator/README.md:
--------------------------------------------------------------------------------
  1 | # OSS-Fuzz Harness Generation Toolkit
  2 | 
  3 | The **Harness Generation Toolkit** automates the entire workflow of adding
  4 | new *libFuzzer* harnesses to existing [OSS-Fuzz] projects, executing the
  5 | resulting fuzzers and producing polished vulnerability reports when crashes
  6 | are identified.
  7 | 
  8 | ---
  9 | 
 10 | ## Contents
 11 | 
 12 | ```
 13 | harness-generator/
 14 | ├── batch_generate.py          # batch driver (multiple targets)
 15 | ├── src/                       # Python package with core logic
 16 | │   ├── codex_helper.py        # Codex CLI wrapper (sentinel + retry logic)
 17 | │   └── harness_generator.py   # single-project orchestrator
 18 | └── scripts/                   # triage & reporting utilities
 19 |     ├── sort_jobs.py           # classify jobs → ./sorted/[buckets]
 20 |     ├── summarize.py           # Markdown summary of findings (no LLM usage)
 21 |     ├── generate_reports.py    # create disclosure-style bug_report.md
 22 |     └── gather_reports.py      # collect final artifacts into one folder
 23 | └── yamls/                     # sample target lists consumed by batch_generate.py
 24 | ```
 25 | 
 26 | ---
 27 | 
 28 | ## 1. Core Workflow Overview
 29 | 
 30 | For **day-to-day usage** you will typically launch *batch_generate.py* – it
 31 | drives the end-to-end process and drops every run into `./jobs/`.
 32 | 
 33 | ```bash
 34 | # Example: fuzz 40 C projects, eight rounds each, using 32 Codex workers
 35 | python batch_generate.py --targets ./yamls/c-projects.yaml \
 36 |                        --threads 32 --rounds 8
 37 | ```
 38 | 
 39 | Behind the scenes *batch_generate.py* clones the target repository, prunes
 40 | unrelated project folders under `oss-fuzz/projects/`, then invokes
 41 | `harness_generator.py` one or more times (**rounds**) for that project.  All
 42 | stdout/stderr is tee’d to `harness_round_<n>.log` so nothing is lost if the
 43 | main process is interrupted.
 44 | 
 45 | `harness_generator.py` itself encapsulates the following high-level steps:
 46 | 
 47 | 1. **Baseline build** – build the project’s Docker image & existing fuzzers
 48 |    (via `infra/helper.py`) to record the current binary set.
 49 | 2. **Archive extraction** – unpack any source bundles (tar/zip) so Codex can
 50 |    edit the real files.
 51 | 3. **Harness creation** – Codex is instructed to add one new
 52 |    `LLVMFuzzerTestOneInput` harness and adjust build scripts accordingly.
 53 | 4. **Re-package archives** – re-create any bundles touched by Codex.
 54 | 5. **Rebuild with retries** – rebuild image & fuzzers; compiler errors are
 55 |    forwarded to Codex for minimal fixes (configurable retry count).
 56 | 6. **Seed corpus** – before each *new* fuzzer is executed, Codex populates a
 57 |    seed corpus directory with meaningful inputs.
 58 | 7. **Fuzzer execution** – every new fuzzer is run; crash / OOM / timeout
 59 |    artifacts are detected and logged.
 60 | 8. **Crash analysis** – the first crash is reproduced; the harness source,
 61 |    reproducer log and hexdump are combined into *crash_info.md*.
 62 |    Codex then writes *crash_analysis.md* explaining root cause, impact and
 63 |    patch guidance.  Finally a `crash_reproducer.sh` PoC script is authored.
 64 | 
 65 | All Codex interactions are handled by **CodexHelper**.  It runs the Codex CLI
 66 | in a pseudo-terminal, watches for a sentinel file (`./done`), retries on
 67 | transient errors, and only returns once a *git diff* confirms that edits were
 68 | made.
 69 | 
 70 | ### Running a single project
 71 | 
 72 | ```bash
 73 | python -m src.harness_generator <project> <path/to/oss-fuzz/checkout> <key.env> \
 74 |        --sanitizer address --codex-cli codex --max-retries 3
 75 | ```
 76 | ---
 77 | 
 78 | ## 2. Batch Generation
 79 | 
 80 | `batch_generate.py` reads a YAML file whose `projects:` list describes
 81 | multiple targets (name + fuzz-tooling repo URL + git ref).  For every entry
 82 | it clones the repository into **./jobs/**`<project>_<uuid>` and invokes
 83 | `harness_generator.py` *n* times ("rounds").  All stdout/stderr is tee’d to
 84 | `harness_round_<n>.log` inside the job directory.
 85 | 
 86 | The default output tree therefore looks like:
 87 | 
 88 | ```
 89 | jobs/
 90 |     libpng_16f7f21a/
 91 |         crash_analysis.md
 92 |         crash_info.md
 93 |         ...
 94 |     freetype2_51c9ea11/
 95 |         ...
 96 | ```
 97 | 
 98 | ---
 99 | 
100 | ## 3. Triage & Reporting Utilities (scripts/)
101 | 
102 | | Script | Purpose |
103 | |--------|---------|
104 | | **sort_jobs.py** | Move each job directory from `./jobs` into `./sorted/<bucket>`:<br>• `crashes/` – real crash files present, *no* `HARNESS ERROR` marker.<br>• `false_positives/` – `HARNESS ERROR` appears in *crash_analysis.md*.<br>• `no_crashes/` – no crash/oom/timeout produced. |
105 | | **generate_reports.py** | For every job that has *crash_info.md* **and** *crash_analysis.md*, ask Codex to create a polished `bug_report.md` following the embedded disclosure template. |
106 | | **gather_reports.py** | Copy `{crash_info,crash_analysis,bug_report}.md` (+ optional PoC scripts) for each job into a flat structure under `./sorted/reports/` for easy export. |
107 | | **summarize.py** | Build a Markdown overview of all jobs (counts, per-project sections embedding analysis & info). |
108 | 
109 | All helper CLIs expose `--help` with full documentation; defaults are chosen
110 | so running them in order without arguments *just works*:
111 | 
112 | ```
113 | # 1. Sort raw jobs into buckets
114 | python scripts/sort_jobs.py
115 | 
116 | # 2. Generate bug_report.md for each real crash
117 | python scripts/generate_reports.py --input ./sorted/crashes
118 | 
119 | # 3. Collect artifacts for disclosure upload
120 | python scripts/gather_reports.py --input ./sorted/crashes --output ./sorted/reports
121 | 
122 | # 4. Produce a human-readable summary
123 | python scripts/summarize.py --input ./sorted/crashes > triage_summary.md
124 | ```
125 | 
126 | ---
127 | 
128 | ## 4. Installation & Requirements
129 | 
130 | 1. **Provide an API key** – either export it directly:
131 | 
132 |    ```bash
133 |    export OPENAI_API_KEY="sk-your-key"
134 |    ```
135 | 
136 |    or create a `.env` file (anywhere) with
137 | 
138 |    ```ini
139 |    OPENAI_API_KEY=sk-your-key
140 |    ```
141 | 
142 |    and pass the path via `--ai-key-path`.
143 | 
144 | 2. **System packages** – Docker, git, clang/llvm, etc. as required by
145 |    OSS-Fuzz’s `infra/helper.py` build process.
146 | 
147 | 
148 | ### Codex CLI
149 | 
150 | The repository relies on the **Codex CLI**.  `setup-env.sh` will detect its
151 | absence and offer to build & install it automatically (requires `go` and
152 | `sudo`).  If you prefer manual installation:
153 | 
154 | ```bash
155 | npm install -g @openai/codex
156 | ```
157 | 
158 | ### Python environment
159 | 
160 | 1. Create & activate a virtual environment (recommended):
161 | 
162 |    ```bash
163 |    python3 -m venv .venv
164 |    source .venv/bin/activate
165 |    ```
166 | 
167 | 2. Install the required Python packages:
168 | 
169 |    ```bash
170 |    pip install -r requirements.txt
171 |    ```
172 | 
173 |    The toolkit depends on only three third-party libraries – *GitPython*,
174 |    *PyYAML* and *python-dotenv*. They are listed in **requirements.txt** so
175 |    the above command resolves everything in one go.
176 | 
177 |    **Note:** The codebase uses modern type-hinting features introduced in
178 |    Python 3.9 – please make sure you run it on Python ≥ 3.9.
179 | 
180 | 3. Ensure the **git** command-line tool itself is present.  Several modules
181 |    shell out to `git` for repository operations; missing it will result in
182 |    runtime errors such as `FileNotFoundError: [Errno 2] No such file or directory: 'git'`.
183 | 
184 | Other prerequisites
185 | -------------------
186 | 
187 | * Docker + OSS-Fuzz build dependencies
188 | * Codex CLI in `$PATH` (or specify via `--codex-cli`)
189 | * OpenAI-compatible API key (environment variable **OPENAI_API_KEY** or a
190 |   path passed with `--ai-key-path`)
191 | 


--------------------------------------------------------------------------------
/harness_generator/scripts/summarize.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | #────────────
  4 | #
  5 | # Copyright 2025 Artificial Intelligence Cyber Challenge
  6 | #
  7 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 
  8 | # this software and associated documentation files (the “Software”), to deal in the 
  9 | # Software without restriction, including without limitation the rights to use, 
 10 | # copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 
 11 | # Software, and to permit persons to whom the Software is furnished to do so, 
 12 | # subject to the following conditions:
 13 | #
 14 | # The above copyright notice and this permission notice shall be included in all 
 15 | # copies or substantial portions of the Software.
 16 | #
 17 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
 18 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 
 19 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
 20 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
 21 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
 22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 23 | #
 24 | # ────────────
 25 | 
 26 | """
 27 | summarize.py
 28 | ────────────
 29 | 
 30 | Generate a Markdown overview of Codex harness runs stored in an *output*
 31 | directory (default: **./jobs**).
 32 | 
 33 | The report contains:
 34 | 
 35 | • Total run directories processed and count of *unique* OSS-Fuzz projects.  
 36 | • Counts of run directories that include `crash_analysis.md`, `crash_info.md`,
 37 |   and those flagged as **false positives** (i.e. `crash_analysis.md` contains
 38 |   the string *“HARNESS ERROR”*).  
 39 | • **Only** projects whose crashes are **not** false positives get a section
 40 |   embedding:
 41 |     - Full path to every *real* crashing run directory.  
 42 |     - Contents of `crash_analysis.md` and `crash_info.md`.
 43 | 
 44 | False-positive runs are tallied but *omitted* from the detailed sections.
 45 | 
 46 | Usage examples
 47 | --------------
 48 |     # Print report to stdout
 49 |     ./summarize.py
 50 | 
 51 |     # Custom output root and write to file
 52 |     ./summarize.py --output /tmp/my_runs --report triage_summary.md
 53 | """
 54 | 
 55 | from __future__ import annotations
 56 | 
 57 | import argparse
 58 | import re
 59 | from pathlib import Path
 60 | from typing import Dict, List, Tuple
 61 | 
 62 | 
 63 | # ────────────────────────── helpers ──────────────────────────
 64 | 
 65 | 
 66 | def _project_name(run_dir: Path) -> str:
 67 |     """Best-effort project name inference from directory layout."""
 68 |     build_out = run_dir / "build" / "out"
 69 |     if build_out.is_dir():
 70 |         subs = [d.name for d in build_out.iterdir() if d.is_dir()]
 71 |         if len(subs) == 1:
 72 |             return subs[0]
 73 |     parts = run_dir.name.rsplit("_", 1)
 74 |     return parts[0] if len(parts) == 2 else run_dir.name
 75 | 
 76 | 
 77 | def _safe_code(text: str) -> str:
 78 |     """Prevent premature closing of code fences in embedded markdown."""
 79 |     return text.replace("```", "```​")
 80 | 
 81 | 
 82 | def _is_false_positive(analysis_path: Path) -> bool:
 83 |     """Return True if crash_analysis.md contains 'HARNESS ERROR' (case-insensitive)."""
 84 |     if not analysis_path.is_file():
 85 |         return False
 86 |     return bool(re.search(r"harness\s+error", analysis_path.read_text(errors="ignore"), re.I))
 87 | 
 88 | 
 89 | # ───────────────────────── summariser ─────────────────────────
 90 | 
 91 | 
 92 | def build_summary(output_root: Path) -> str:
 93 |     run_dirs = [d for d in output_root.iterdir() if d.is_dir()]
 94 | 
 95 |     total_runs = len(run_dirs)
 96 |     unique_projects = {_project_name(d) for d in run_dirs}
 97 | 
 98 |     info_total = 0
 99 |     analysis_total = 0
100 |     fp_total = 0  # false positives
101 | 
102 |     # Stores (run_dir, is_false_positive)
103 |     project_runs: Dict[str, List[Tuple[Path, bool]]] = {}
104 | 
105 |     for run_dir in run_dirs:
106 |         analysis_path = run_dir / "crash_analysis.md"
107 |         info_path = run_dir / "crash_info.md"
108 | 
109 |         has_info = info_path.is_file()
110 |         has_analysis = analysis_path.is_file()
111 |         is_fp = _is_false_positive(analysis_path)
112 | 
113 |         if has_info or has_analysis:
114 |             proj = _project_name(run_dir)
115 |             project_runs.setdefault(proj, []).append((run_dir, is_fp))
116 | 
117 |         if has_info:
118 |             info_total += 1
119 |         if has_analysis:
120 |             analysis_total += 1
121 |         if is_fp:
122 |             fp_total += 1
123 | 
124 |     # ───────────────────── build markdown ─────────────────────
125 |     md_lines: List[str] = [
126 |         "# Codex Harness Run Summary",
127 |         f"Scan directory: {output_root}",
128 |         "",
129 |         "## Totals",
130 |         f"- Run directories scanned: {total_runs}",
131 |         f"- Unique projects: {len(unique_projects)}",
132 |         f"- Directories with crash_analysis.md: {analysis_total}",
133 |         f"- Directories with crash_info.md:    {info_total}",
134 |         f"- **False positives (HARNESS ERROR): {fp_total}**",
135 |         "",
136 |     ]
137 | 
138 |     # Only include detailed sections for *real* crashes
139 |     real_project_sections_written = False
140 | 
141 |     for proj, runs in sorted(project_runs.items()):
142 |         # Filter out false-positive runs
143 |         real_runs = [r for r, is_fp in runs if not is_fp]
144 |         if not real_runs:
145 |             continue  # nothing real to show for this project
146 | 
147 |         real_project_sections_written = True
148 |         md_lines.extend([f"## {proj}", ""])
149 | 
150 |         for run_dir in real_runs:
151 |             md_lines.append(f"### {run_dir}")
152 | 
153 |             # ---- Crash Analysis --------------------------------------
154 |             analysis_path = run_dir / "crash_analysis.md"
155 |             if analysis_path.is_file():
156 |                 md_lines.extend(
157 |                     [
158 |                         "#### Crash Analysis",
159 |                         "```markdown",
160 |                         _safe_code(
161 |                             analysis_path.read_text(
162 |                                 encoding="utf-8", errors="replace"
163 |                             )
164 |                         ),
165 |                         "```",
166 |                         "",
167 |                     ]
168 |                 )
169 | 
170 |             # ---- Crash Info -----------------------------------------
171 |             info_path = run_dir / "crash_info.md"
172 |             if info_path.is_file():
173 |                 md_lines.extend(
174 |                     [
175 |                         "#### Crash Info",
176 |                         "```markdown",
177 |                         _safe_code(
178 |                             info_path.read_text(
179 |                                 encoding="utf-8", errors="replace"
180 |                             )
181 |                         ),
182 |                         "```",
183 |                         "",
184 |                     ]
185 |                 )
186 | 
187 |     if not real_project_sections_written:
188 |         md_lines.append("_All detected crashes are marked as false positives._\n")
189 | 
190 |     return "\n".join(md_lines).rstrip() + "\n"
191 | 
192 | 
193 | def main() -> None:
194 |     ap = argparse.ArgumentParser(
195 |         description="Produce a Markdown summary of harness run results.",
196 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
197 |     )
198 |     ap.add_argument(
199 |         "--input",
200 |         type=Path,
201 |         default=Path("./jobs"),
202 |         help="Root directory containing harness run directories.",
203 |         dest="jobs",
204 |     )
205 |     ap.add_argument(
206 |         "--report",
207 |         type=Path,
208 |         help="Write report to this file instead of stdout.",
209 |     )
210 |     args = ap.parse_args()
211 | 
212 |     root = args.jobs.expanduser().resolve()
213 |     if not root.is_dir():
214 |         raise SystemExit(f"Jobs directory not found: {root}")
215 | 
216 |     md_doc = build_summary(root)
217 | 
218 |     if args.report:
219 |         args.report.write_text(md_doc, encoding="utf-8")
220 |         print(f"✓ Summary written to {args.report}")
221 |     else:
222 |         print(md_doc)
223 | 
224 | 
225 | if __name__ == "__main__":
226 |     main()
227 | 


--------------------------------------------------------------------------------
/harness_generator/scripts/gather_reports.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | #────────────
  4 | #
  5 | # Copyright 2025 Artificial Intelligence Cyber Challenge
  6 | #
  7 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 
  8 | # this software and associated documentation files (the “Software”), to deal in the 
  9 | # Software without restriction, including without limitation the rights to use, 
 10 | # copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 
 11 | # Software, and to permit persons to whom the Software is furnished to do so, 
 12 | # subject to the following conditions:
 13 | #
 14 | # The above copyright notice and this permission notice shall be included in all 
 15 | # copies or substantial portions of the Software.
 16 | #
 17 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
 18 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 
 19 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
 20 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
 21 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
 22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 23 | #
 24 | # ────────────
 25 | 
 26 | """
 27 | gather_reports.py
 28 | ────────────
 29 | Gather bug-report artifacts from a triage directory.
 30 | 
 31 | Usage:
 32 |     ./gather_reports [<triage_dir>] [--output-dir <path>]
 33 | 
 34 | By default the script scans ./triage for job folders that follow the pattern
 35 | "<project>_<uuid>".  If a job folder (anywhere in its subtree) contains the
 36 | three markdown files *crash_analysis.md*, *crash_info.md*, and
 37 | *bug_report.md*, they are copied into a new directory named after the **uuid**
 38 | under ./bug_reports.
 39 | 
 40 | The output directory will now contain sub-directories with the same
 41 | names as the corresponding job folders found in *triage_dir* (for example
 42 | ``apache-httpd_1234abcd``), preserving the project name instead of keeping
 43 | only the raw UUID.
 44 | 
 45 | Directory layout variants
 46 | ------------------------
 47 | Historically, *triage_dir* contained only the per-job folders themselves::
 48 | 
 49 |     triage/
 50 |         <project>_<uuid>/
 51 | 
 52 | With the introduction of *categories* an additional level may be present::
 53 | 
 54 |     triage/
 55 |         <category>/
 56 |             <project>_<uuid>/
 57 | 
 58 | `gather_reports` now transparently handles **both** layouts by examining the
 59 | immediate children of *triage_dir* **and**, if they are not job directories,
 60 | their own direct sub-directories.
 61 | 
 62 | If category directories are detected the script replicates this structure
 63 | under the output directory so that artifacts remain grouped::
 64 | 
 65 |     triage/
 66 |         asan/
 67 |             project_1111aaaa/
 68 | 
 69 |     → bug_reports/
 70 |         asan/
 71 |             project_1111aaaa/
 72 | 
 73 | When a job directory contains a *poc.sh* or *poc.py* file, it is copied along
 74 | with the three required markdown files.
 75 | """
 76 | 
 77 | from __future__ import annotations
 78 | 
 79 | import argparse
 80 | import os
 81 | import shutil
 82 | import sys
 83 | from pathlib import Path
 84 | 
 85 | 
 86 | # ---------------------------------------------------------------------------
 87 | # Configuration
 88 | # ---------------------------------------------------------------------------
 89 | 
 90 | # These files must be present for a job directory to be considered complete.
 91 | REQUIRED_FILES = {
 92 |     "crash_analysis.md",
 93 |     "crash_info.md",
 94 |     "bug_report.md",
 95 | }
 96 | 
 97 | # Optional proof-of-concept files that are copied alongside the required
 98 | # markdown artifacts when present.  Only the **first** occurrence of each file
 99 | # name within a job directory is taken into account.
100 | 
101 | OPTIONAL_POC_FILES = [
102 |     "poc.sh",
103 |     "poc.py",
104 | ]
105 | 
106 | 
107 | def extract_uuid(job_dir_name: str) -> str | None:
108 |     """Return the substring after the final underscore in *job_dir_name*.
109 | 
110 |     Example::
111 | 
112 |         >>> extract_uuid('apache-httpd_1234abcd')
113 |         '1234abcd'
114 |     """
115 | 
116 |     if "_" not in job_dir_name:
117 |         return None
118 | 
119 |     return job_dir_name.split("_")[-1]
120 | 
121 | 
122 | # ---------------------------------------------------------------------------
123 | # Helper functions
124 | # ---------------------------------------------------------------------------
125 | 
126 | 
127 | def _find_first(root: Path, filename: str) -> Path | None:
128 |     """Return the **first** occurrence of *filename* under *root* or *None*."""
129 | 
130 |     try:
131 |         return next(root.rglob(filename))
132 |     except StopIteration:
133 |         return None
134 | 
135 | 
136 | def find_required_files(root: Path) -> dict[str, Path] | None:
137 |     """Search *root* recursively for all REQUIRED_FILES.
138 | 
139 |     Returns a mapping *filename → Path* for the first occurrence of every
140 |     required file or *None* if any file is missing.
141 |     """
142 | 
143 |     found: dict[str, Path] = {}
144 | 
145 |     for name in REQUIRED_FILES:
146 |         path = _find_first(root, name)
147 |         if path is None:
148 |             return None
149 |         found[name] = path
150 | 
151 |     return found
152 | 
153 | 
154 | def gather_reports(triage_dir: Path, output_dir: Path) -> None:
155 |     """Populate *output_dir* with consolidated bug-report artifacts."""
156 | 
157 |     if not triage_dir.is_dir():
158 |         sys.exit(f"Error: '{triage_dir}' is not a directory")
159 | 
160 |     output_dir.mkdir(exist_ok=True)
161 | 
162 |     def _process_job_dir(job_dir: Path, *, category: str | None = None) -> bool:
163 |         """Copy artifacts from *job_dir* to *output_dir*.
164 | 
165 |         Returns True if the directory was handled successfully, False
166 |         otherwise (e.g. not a job dir or missing files).
167 |         """
168 | 
169 |         if not job_dir.is_dir():
170 |             return False
171 | 
172 |         uuid = extract_uuid(job_dir.name)
173 |         if uuid is None:
174 |             return False  # not a job directory
175 | 
176 |         artifacts = find_required_files(job_dir)
177 |         if artifacts is None:
178 |             return False  # incomplete job – skip
179 | 
180 |         # Preserve categories in the output directory if requested.
181 |         dest = output_dir / job_dir.name if category is None else output_dir / category / job_dir.name
182 | 
183 |         if dest.exists():
184 |             print(
185 |                 f"[!] Destination '{dest}' already exists – skipping duplicate job from '{job_dir.name}'",
186 |                 file=sys.stderr,
187 |             )
188 |             return True  # already processed, treat as handled to avoid deeper fallback
189 | 
190 |         dest.mkdir(parents=True)
191 | 
192 | 
193 |         for name, src in artifacts.items():
194 |             shutil.copy2(src, dest / name)
195 | 
196 |         # Copy optional PoC files if they exist.
197 |         for poc_name in OPTIONAL_POC_FILES:
198 |             poc_path = _find_first(job_dir, poc_name)
199 |             if poc_path is not None:
200 |                 shutil.copy2(poc_path, dest / poc_name)
201 | 
202 |         print(f"[+] Collected reports for job '{job_dir.name}' → '{dest}'")
203 |         return True
204 | 
205 |     # Iterate over immediate children; if a child isn't processed try its sub-dirs.
206 |     for child in triage_dir.iterdir():
207 |         if not child.is_dir():
208 |             continue
209 | 
210 |         handled = _process_job_dir(child)
211 |         if handled:
212 |             continue
213 | 
214 |         # Treat *child* as category and look one level deeper.
215 |         for grandchild in child.iterdir():
216 |             _process_job_dir(grandchild, category=child.name)
217 | 
218 | 
219 | def main(argv: list[str] | None = None) -> None:  # noqa: D401
220 |     parser = argparse.ArgumentParser(
221 |         description=(
222 |             "Collect crash_analysis.md, crash_info.md and bug_report.md files "
223 |             "from each job directory under --input and copy them into "
224 |             "--output (default ./sorted/reports) preserving job folder names."
225 |         ),
226 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
227 |     )
228 | 
229 |     parser.add_argument(
230 |         "--input",
231 |         default="./jobs",
232 |         help="Root directory containing job run folders.",
233 |     )
234 |     parser.add_argument(
235 |         "--output",
236 |         default="./sorted/reports",
237 |         help="Destination where consolidated reports will be written.",
238 |     )
239 | 
240 |     args = parser.parse_args(argv)
241 | 
242 |     gather_reports(
243 |         Path(args.input).expanduser().resolve(),
244 |         Path(args.output).expanduser().resolve(),
245 |     )
246 | 
247 | 
248 | if __name__ == "__main__":
249 |     main()
250 | 


--------------------------------------------------------------------------------
/leveldb_writeup/workflow.md:
--------------------------------------------------------------------------------
  1 | LevelDB – Automated Harness Generation & Bug Discovery Workflow
  2 | ==============================================================
  3 | 
  4 | This document is a case-study of the **end-to-end workflow** that the
  5 | LLM-powered harness generator followed to find, trigger and triage a
  6 | previously-undiscovered bug in "leveldb".
  7 | 
  8 | Contents
  9 | --------
 10 | 1.  High-level timeline
 11 | 2.  Environment bootstrap
 12 | 3.  How the new target was chosen
 13 | 4.  Codex harness synthesis (`fuzz_table_open.cc`)
 14 | 5.  Building & running the new target
 15 | 6.  Crash detection & reproduction
 16 | 7.  Automated crash triage and false positive detection (→ `crash_analysis.md`)
 17 | 8.  Resulting artifacts
 18 | 9.  Harness quality & model intentionality 
 19 | 10. Key take-aways
 20 | 
 21 | ## 1. High-level timeline
 22 | 
 23 | | Step | Actor | What happened |
 24 | |------|-------|---------------|
 25 | | 1 | *cli wrapper* | Clone LevelDB OSS-Fuzz project and build the **baseline fuzzers** (only `fuzz_db`). |
 26 | | 2 | *Codex* | Prompted with high-level instructions to “add one **new** fuzz target that reaches previously unfuzzed code”. |
 27 | | 3 | *builder* | Codex edits the tree, creating `projects/leveldb/fuzz_table_open.cc` and adjusting build scripts. |
 28 | | 4 | *fuzzer* | libFuzzer starts; within seconds hits an **OOM in `ReadBlock()`**. |
 29 | | 5 | *runner* | Detects a new `oom-*` file, reproduces the issue and copies the logs →  `crash_info.md`. |
 30 | | 6 | *Codex* | Second prompt: *“Analyse this stack trace & produce human report.”*  Output stored in `crash_analysis.md`. |
 31 | 
 32 | 
 33 | ## 2. Environment bootstrap
 34 | 
 35 | The harness generator launches the standard OSS-Fuzz helper scripts:
 36 | 
 37 | ```text
 38 | $ python infra/helper.py build_image leveldb
 39 | $ python infra/helper.py build_fuzzers leveldb --sanitizer address --clean
 40 | ```
 41 | 
 42 | The log excerpt below shows that **only one
 43 | baseline target** was discovered:
 44 | 
 45 | ```text
 46 | [*] Baseline has 1 fuzzer(s): { 'fuzz_db' }
 47 | ```
 48 | 
 49 | `fuzz_db` exercises the public database API with randomly generated keys
 50 | and values, but *never loads SSTable files from disk* – a gap our
 51 | analysis will soon exploit.
 52 | 
 53 | 
 54 | ## 3. How the new target was chosen
 55 | 
 56 | After the baseline build, the workflow jumps straight to a Codex
 57 | invocation that is given high-level instructions to create a new fuzzer
 58 | harness for the project.  
 59 | (see [`harness_generator.py`](../harness_generator/src/harness_generator.py) → `_invoke_codex_for_harness`)
 60 | 
 61 | Codex is free to inspect any file in the working copy, clone the main
 62 | repository, or rely on its own training data.  The **selection logic is
 63 | therefore internal to the LLM** – the Python driver makes *no* attempt
 64 | to parse ELF symbol tables, ASTs, or code coverage reports.
 65 | 
 66 | For LevelDB, Codex picked
 67 | ```c++
 68 | Status Table::Open(const Options&, RandomAccessFile*, uint64_t file_size,
 69 |                    Table**);
 70 | ```
 71 | 
 72 | from `table/table.cc`.  This function parses on-disk SSTable files and
 73 | was not reached by the existing `fuzz_db` target, making it a sensible
 74 | choice even without a pre-computed coverage map.
 75 | 
 76 | 
 77 | ## 4. Codex harness synthesis
 78 | 
 79 | ### Codex instructions (excerpt)
 80 | 
 81 | ```text
 82 | **Objective (high-value fuzz target)**  
 83 | Create a **new libFuzzer harness** for the **leveldb** OSS-Fuzz project that
 84 | exercises a *public* or *documented* API reachable with **user-supplied input**
 85 | (e.g. files, packets, strings) and therefore has real-world security impact.
 86 | 
 87 | ────────────────────────────────────────
 88 | **Target-selection rules**
 89 | 
 90 | 1. **Start at the top**: pick the *highest-level* function that
 91 | *directly* consumes attacker-controlled data.  
 92 | • Good examples: `exif_data_load()`, `freerdp_peer_context_new()`,  
 93 |   `curl_url_set()`, `png_read_info()`.  
 94 | • **Avoid** low-level helpers (`*_parse_int()`, `*_read_field()` etc.)
 95 |   unless *no higher layer* validates input.
 96 | ```
 97 | 
 98 | ### What Codex does
 99 | 
100 | 1. Analyzes `projects/leveldb/` to learn how LevelDB objects are
101 |    initialised, how the corpus input is written to disk, and how the
102 |    harness is compiled (compiler flags, build.sh edits, etc.).
103 | 2. Generates `projects/leveldb/fuzz_table_open.cc`, re-using the helper
104 |    functions and error handling patterns it saw in `fuzz_db.cc`.
105 | 3. Opens `projects/leveldb/build.sh` (or `Dockerfile` / `CMakeLists.txt`
106 |    depending on the project) and appends a single `compile_cc` line so
107 |    the new `.cc` file is compiled into a `fuzz_table_open` binary next
108 |    to the existing `fuzz_db` target.
109 | 4. Writes the path of the new harness to a sentinel file called `./done`
110 |    so the Python driver knows that edits are complete.
111 | 
112 | The produced harness is short enough to show in full:
113 | 
114 | ```c++
115 | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
116 |   const char *fname = "/tmp/fuzz_table_open.sst";
117 |   std::ofstream out(fname, std::ios::binary);
118 |   if (!out) return 0;
119 |   out.write(reinterpret_cast<const char *>(data), size);
120 | 
121 |   leveldb::Options options;
122 |   options.env = leveldb::Env::Default();
123 | 
124 |   leveldb::RandomAccessFile *file = nullptr;
125 |   if (!options.env->NewRandomAccessFile(fname, &file).ok()) return 0;
126 | 
127 |   leveldb::Table *table = nullptr;
128 |   if (!leveldb::Table::Open(options, file, size, &table).ok()) {
129 |     delete file;
130 |     return 0;
131 |   }
132 | 
133 |   std::unique_ptr<leveldb::Iterator> it(
134 |       table->NewIterator(leveldb::ReadOptions()));
135 |   for (it->SeekToFirst(); it->Valid(); it->Next()) {
136 |     /* no-op – iteration alone is enough to exercise the parser */
137 |   }
138 | 
139 |   delete table;
140 |   delete file;
141 |   std::remove(fname);
142 |   return 0;
143 | }
144 | ```
145 | 
146 | ### Build-until-green loop
147 | 
148 | The driver now calls `_build_with_retries()` which attempts to rebuild
149 | the project.  If compilation fails, the captured stderr is forwarded to
150 | `_invoke_codex_to_fix_build`, and Codex applies the smallest possible
151 | patch (typically adding a missing header include or fixing a compiler
152 | flag).  This loop repeats until the build succeeds or the retry budget
153 | is exhausted.  For LevelDB the very first build already succeeded
154 | because the harness was modelled closely after `fuzz_db.cc`.
155 | 
156 | 
157 | ## 5.  Building & running the new target
158 | 
159 | After compilation the runner automatically executed a short sanity fuzz
160 | session (1 k coverage-guided iterations).  Within ~2000 execs libFuzzer
161 | emitted:
162 | 
163 | ```text
164 | ==14==ERROR: AddressSanitizer: out of memory: allocator is trying to
165 | allocate 0xffffffffffe0 bytes
166 |     #0 0x... in operator new[](unsigned long)
167 |     #1 0x... in leveldb::ReadBlock(...)
168 |     #2 0x... in leveldb::Table::Open(...)
169 | ```
170 | 
171 | Because the crash originates **inside project code (`format.cc:78`)** and
172 | the requested allocation is clearly bogus (`0xffffffffffe0`), the signal
173 | is marked as a *real* bug and saved.  Reproducer and log were copied to
174 | `build/out/leveldb/crash-<SHA>`.
175 | 
176 | 
177 | ## 6.  Crash detection & reproduction
178 | 
179 | After the fuzzer exits, `harness_generator.py` walks the
180 | `build/out/leveldb` directory tree and records every file whose name
181 | begins with one of the libFuzzer prefixes `crash`, `oom`, or `timeout`
182 | (see `_find_bug_files`).  Any *new* file is assumed to be a genuine
183 | finding.  The very first one is reproduced with the standard
184 | `infra/helper.py reproduce` command and its artifacts are collected into
185 | `crash_info.md`.
186 | 
187 | 
188 | ## 7.  Automated crash triage
189 | 
190 | A second Codex invocation is fed the **raw ASan log plus the offending
191 | source lines** to produce a human-readable crash analysis.
192 | 
193 | Codex produced `crash_analysis.md`, identifying the issue as *unbounded
194 | memory allocation due to unchecked block handle size* and suggesting to
195 | validate `offset + size` before allocation – exactly the fix a human
196 | would write.
197 | 
198 | 
199 | ## 8.  Resulting artifacts
200 | 
201 | All important files live under `leveldb_writeup/artifacts/`:
202 | 
203 | * `crash_info.md` – Reproducer command, ASan log, harness snippet & hex-dump.
204 | * `crash_analysis.md` – High-level vulnerability assessment.
205 | 
206 | 
207 | ## 9. Harness quality & model intentionality
208 | 
209 | On the **first attempt**, Codex (o3) selected `leveldb::Table::Open()`, a
210 | high-level, attacker-controlled file parser that the baseline target never
211 | touched, produced a compiling harness without retries, and triggered an OOM in
212 | `ReadBlock()` within \~2k execs. This happened without any external coverage,
213 | static analysis, or symbol/AST reasoning.
214 | 
215 | That outcome indicates the model is not merely emitting syntactically correct
216 | code; it is **prioritizing code patterns that historically harbor bugs**
217 | (complex, input-driven parsers). This reflects a learned, security-relevant
218 | **inductive prior**. In practice, this displaces a large portion of the manual
219 | front-end work (enumerating and ranking candidate entry points, drafting an
220 | initial harness, and iterating to green): the model did that prioritization and
221 | delivered a crash-inducing target in a single pass.
222 | 
223 | 
224 | ## 10. Key take-aways
225 | 
226 | 1. Even mature OSS-Fuzz projects often have *format-parsing* code paths
227 |    left unfuzzed – here the SSTable reader.
228 | 2. Even without an external coverage map, a single well-crafted prompt
229 |    is often enough for the LLM to identify an unfuzzed API and deliver a
230 |    working harness on the first attempt.
231 | 3. The same LLM that wrote the harness can immediately explain the bug –
232 |    closing the loop from discovery to actionable triage with no human in
233 |    the middle.
234 | 4. Codex (o3) showed a learned bias toward high-risk parsing entry points,
235 |    producing a crash-inducing harness on the first try without coverage or
236 |    static guidance — evidence that LLMs can assume much of the early triage
237 |    and target-selection workload with surprising precision.
238 | 


--------------------------------------------------------------------------------
/harness_generator/batch_generate.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | #────────────
  4 | #
  5 | # Copyright 2025 Artificial Intelligence Cyber Challenge
  6 | #
  7 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 
  8 | # this software and associated documentation files (the “Software”), to deal in the 
  9 | # Software without restriction, including without limitation the rights to use, 
 10 | # copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 
 11 | # Software, and to permit persons to whom the Software is furnished to do so, 
 12 | # subject to the following conditions:
 13 | #
 14 | # The above copyright notice and this permission notice shall be included in all 
 15 | # copies or substantial portions of the Software.
 16 | #
 17 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
 18 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 
 19 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
 20 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
 21 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
 22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 23 | #
 24 | # ────────────
 25 | 
 26 | """
 27 | batch_generate.py
 28 | ─────────────────
 29 | 
 30 | Orchestrates batch execution of `harness_generator.py` against multiple
 31 | OSS-Fuzz projects.
 32 | 
 33 | • Consumes a YAML file whose top-level `projects` list describes the project
 34 |   name, git URL and commit/reference of each fuzz-tooling repository.
 35 | • For every entry it clones the repository into an *output* directory,
 36 |   then invokes HarnessGenerator one or more times ("rounds").
 37 | • All stdout/stderr from each invocation is tee'd to
 38 |   `harness_round_<n>.log` inside that project's run directory so logs are
 39 |   preserved even if the main process is interrupted.
 40 | 
 41 | Most HarnessGenerator CLI flags are surfaced so the batch driver can choose
 42 | sanitiser, Codex binary, scratch space, etc.  Work is done sequentially and
 43 | any clone/build failure simply skips the affected target, keeping the batch
 44 | run going.
 45 | """
 46 | 
 47 | from __future__ import annotations
 48 | 
 49 | import argparse
 50 | import concurrent.futures
 51 | import logging
 52 | import os
 53 | import shutil
 54 | import subprocess
 55 | import sys
 56 | import uuid
 57 | from pathlib import Path
 58 | from typing import Dict, List
 59 | 
 60 | import yaml
 61 | from dotenv import load_dotenv
 62 | from git import Repo, exc as git_exc
 63 | 
 64 | # ---------------------------------------------------------------------------#
 65 | # Constants & global state
 66 | # ---------------------------------------------------------------------------#
 67 | # Default location for all job run directories created by this batch driver.
 68 | # The original internal tooling wrote to an NFS mount; we switch to a local
 69 | # folder so the released version works out-of-the-box.
 70 | 
 71 | OUTPUT_ROOT = Path("./jobs").resolve()
 72 | OUTPUT_ROOT.mkdir(parents=True, exist_ok=True)
 73 | 
 74 | # ---------------------------------------------------------------------------#
 75 | # YAML helper
 76 | # ---------------------------------------------------------------------------#
 77 | 
 78 | 
 79 | def load_targets_yaml(path: Path) -> list[dict[str, str]]:
 80 |     """Return the list under `projects:` from a YAML file."""
 81 |     with open(path, "r", encoding="utf-8") as fh:
 82 |         data = yaml.safe_load(fh)
 83 |     if not data or "projects" not in data:
 84 |         raise ValueError("YAML must contain a top-level 'projects' list")
 85 |     return data["projects"]
 86 | 
 87 | 
 88 | # ---------------------------------------------------------------------------#
 89 | # Git helper
 90 | # ---------------------------------------------------------------------------#
 91 | 
 92 | 
 93 | def clone_and_checkout(url: str, ref: str, dest: Path) -> Repo:
 94 |     logging.info("Cloning %s → %s", url, dest)
 95 |     repo = Repo.clone_from(url, dest)
 96 |     try:
 97 |         repo.git.checkout(ref)
 98 |     except git_exc.GitCommandError:
 99 |         repo.git.fetch("origin", ref)
100 |         repo.git.checkout("FETCH_HEAD")
101 |     logging.info("Checked-out commit %s", repo.head.commit.hexsha)
102 |     return repo
103 | 
104 | 
105 | # ---------------------------------------------------------------------------#
106 | # Worker function
107 | # ---------------------------------------------------------------------------#
108 | 
109 | 
110 | def process_project(
111 |     target: Dict[str, str],
112 |     *,
113 |     rounds: int,
114 |     ai_key_path: Path,
115 |     sanitizer: str,
116 |     codex_cli: str,
117 |     scratch_space: Path | None,
118 |     copy_repo: bool,
119 |     no_build: bool,
120 |     smoke: bool,
121 |     max_retries: int,
122 | ) -> None:
123 |     """
124 |     Clone the project and run HarnessGenerator `rounds` times in sequence.
125 |     All stdout/stderr from each round is captured to a file.
126 |     """
127 |     project = target["project_name"]
128 |     url = target["fuzz_tooling_url"]
129 |     ref = target["fuzz_tooling_ref"]
130 | 
131 |     run_dir = OUTPUT_ROOT / f"{project}_{uuid.uuid4().hex}"
132 |     run_dir.mkdir(parents=True, exist_ok=True)
133 | 
134 |     try:
135 |         clone_and_checkout(url, ref, run_dir)
136 |     except Exception as err:
137 |         logging.error("[SKIP] %s - clone/checkout failed: %s", project, err)
138 |         return
139 | 
140 |     # ── Keep only the target project directory under oss-fuzz/projects/ ──
141 |     projects_root = run_dir / "projects"
142 |     if projects_root.is_dir():
143 |         for sub in projects_root.iterdir():
144 |             if sub.is_dir() and sub.name != project:
145 |                 try:
146 |                     shutil.rmtree(sub)
147 |                 except Exception as exc:
148 |                     logging.warning(
149 |                         "[%s] Failed to remove directory %s: %s",
150 |                         project,
151 |                         sub,
152 |                         exc,
153 |                     )
154 | 
155 |     script_path = Path(__file__).parent / "src" / "harness_generator.py"
156 | 
157 |     for round_idx in range(1, rounds + 1):
158 |         log_path = run_dir / f"harness_round_{round_idx}.log"
159 |         logging.info(
160 |             "[%s] Round %d/%d → %s", project, round_idx, rounds, log_path
161 |         )
162 | 
163 |         cmd = [
164 |             sys.executable,
165 |             str(script_path),
166 |             project,
167 |             str(run_dir),
168 |             str(ai_key_path),
169 |             "--sanitizer",
170 |             sanitizer,
171 |             "--codex-cli",
172 |             codex_cli,
173 |             "--max-retries",
174 |             str(max_retries),
175 |         ]
176 | 
177 |         if scratch_space:
178 |             cmd += ["--scratch-space", str(scratch_space)]
179 |         if copy_repo:
180 |             cmd.append("--copy-repo")
181 |         if no_build:
182 |             cmd.append("--no-build")
183 |         if smoke:
184 |             cmd.append("--smoke")
185 | 
186 |         # Capture combined stdout/stderr into the log file
187 |         with open(log_path, "w", encoding="utf-8") as lf:
188 |             proc = subprocess.run(
189 |                 cmd,
190 |                 stdout=lf,
191 |                 stderr=subprocess.STDOUT,
192 |                 text=True,
193 |             )
194 |         if proc.returncode != 0:
195 |             logging.error(
196 |                 "[%s] Round %d failed (rc=%d). " "See %s for details.",
197 |                 project,
198 |                 round_idx,
199 |                 proc.returncode,
200 |                 log_path,
201 |             )
202 | 
203 |     logging.info("[%s] All rounds complete → %s", project, run_dir)
204 | 
205 | 
206 | # ---------------------------------------------------------------------------#
207 | # Main
208 | # ---------------------------------------------------------------------------#
209 | 
210 | 
211 | def main() -> None:
212 |     parser = argparse.ArgumentParser(
213 |         description="Batch-generate OSS-Fuzz harnesses concurrently.",
214 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
215 |     )
216 | 
217 |     parser.add_argument(
218 |         "--targets",
219 |         type=Path,
220 |         required=True,
221 |         help="YAML file listing projects to process.",
222 |     )
223 |     parser.add_argument(
224 |         "--rounds",
225 |         type=int,
226 |         default=1,
227 |         help="Successive Codex rounds per project.",
228 |     )
229 |     parser.add_argument(
230 |         "--threads",
231 |         type=int,
232 |         default=1,
233 |         help="Maximum number of concurrent HarnessGenerator runs.",
234 |     )
235 |     parser.add_argument(
236 |         "--ai-key-path",
237 |         type=Path,
238 |         default="./.env",
239 |         help="Path to .env or file containing OPENAI key.",
240 |     )
241 |     parser.add_argument(
242 |         "--sanitizer",
243 |         default="address",
244 |         help="Sanitizer to use when building fuzzers.",
245 |     )
246 |     parser.add_argument(
247 |         "--codex-cli", default="codex", help="Codex CLI executable."
248 |     )
249 |     parser.add_argument(
250 |         "--scratch-space",
251 |         type=Path,
252 |         help="Directory for HarnessGenerator temp copies.",
253 |     )
254 |     parser.add_argument(
255 |         "--copy-repo",
256 |         action="store_true",
257 |         help="Tell HarnessGenerator to copy the repo before edits.",
258 |     )
259 |     parser.add_argument(
260 |         "--no-build",
261 |         action="store_true",
262 |         help="Skip docker rebuild step (debug only).",
263 |     )
264 |     parser.add_argument(
265 |         "--smoke",
266 |         action="store_true",
267 |         help="Run a quick smoke test before Codex edits.",
268 |     )
269 |     parser.add_argument(
270 |         "--max-retries",
271 |         type=int,
272 |         default=3,
273 |         help="Max build-fix rounds inside HarnessGenerator.",
274 |     )
275 |     parser.add_argument(
276 |         "--randomize",
277 |         action="store_true",
278 |         help="Randomize the order of projects before processing.",
279 |     )
280 |     parser.add_argument(
281 |         "--verbose",
282 |         action="store_true",
283 |         default=True,
284 |         help="Enable DEBUG logging.",
285 |     )
286 | 
287 |     args = parser.parse_args()
288 | 
289 |     logging.basicConfig(
290 |         level=logging.DEBUG if args.verbose else logging.INFO,
291 |         format="[%(levelname)s] %(message)s",
292 |     )
293 | 
294 |     load_dotenv(os.path.expanduser(str(args.ai_key_path)))
295 | 
296 |     targets: List[Dict[str, str]] = load_targets_yaml(args.targets)
297 |     if args.randomize:
298 |         import random
299 | 
300 |         random.shuffle(targets)
301 |         logging.info("--randomize is set; target list shuffled.")
302 | 
303 |     logging.info("Loaded %d project(s) from %s", len(targets), args.targets)
304 |     logging.info("Running with up to %d concurrent job(s)…", args.threads)
305 | 
306 |     with concurrent.futures.ThreadPoolExecutor(
307 |         max_workers=args.threads
308 |     ) as pool:
309 |         futures = [
310 |             pool.submit(
311 |                 process_project,
312 |                 t,
313 |                 rounds=args.rounds,
314 |                 ai_key_path=args.ai_key_path.expanduser(),
315 |                 sanitizer=args.sanitizer,
316 |                 codex_cli=args.codex_cli,
317 |                 scratch_space=args.scratch_space,
318 |                 copy_repo=args.copy_repo,
319 |                 no_build=args.no_build,
320 |                 smoke=args.smoke,
321 |                 max_retries=args.max_retries,
322 |             )
323 |             for t in targets
324 |         ]
325 | 
326 |         # wait for all tasks to finish, raising exceptions if any occurred
327 |         for f in concurrent.futures.as_completed(futures):
328 |             try:
329 |                 f.result()
330 |             except Exception as exc:
331 |                 logging.error("Worker raised: %s", exc)
332 | 
333 |     logging.info("All work complete.")
334 | 
335 | 
336 | if __name__ == "__main__":
337 |     try:
338 |         main()
339 |     except KeyboardInterrupt:
340 |         print("\nInterrupted - exiting.")
341 | 


--------------------------------------------------------------------------------
/harness_generator/scripts/generate_reports.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | #────────────
  4 | #
  5 | # Copyright 2025 Artificial Intelligence Cyber Challenge
  6 | #
  7 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 
  8 | # this software and associated documentation files (the “Software”), to deal in the 
  9 | # Software without restriction, including without limitation the rights to use, 
 10 | # copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 
 11 | # Software, and to permit persons to whom the Software is furnished to do so, 
 12 | # subject to the following conditions:
 13 | #
 14 | # The above copyright notice and this permission notice shall be included in all 
 15 | # copies or substantial portions of the Software.
 16 | #
 17 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
 18 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 
 19 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
 20 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
 21 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
 22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 23 | #
 24 | # ────────────
 25 | 
 26 | """
 27 | generate_reports.py
 28 | ────────────
 29 | For every harness run directory that contains both `crash_analysis.md` and
 30 | `crash_info.md`, invoke the Codex CLI to author a polished disclosure-style
 31 | `bug_report.md` using the template supplied by the user.
 32 | 
 33 | The script mirrors the Codex interaction pattern used in `harness_generator.py`:
 34 | 
 35 | 1. Aggregate the contents of `crash_analysis.md` and `crash_info.md` into a
 36 |    single context blob that is provided to Codex.
 37 | 2. Send high-level instructions asking Codex to create **exactly one new file
 38 |    called `bug_report.md`** in the same directory, following the required
 39 |    section layout verbatim (see `REPORT_TEMPLATE` below).
 40 | 3. Repeat for every qualifying run directory found under the *input* root
 41 |    (default: `./jobs`).
 42 | 
 43 | Like the other tooling, the script expects an OpenAI-compatible API key via
 44 | `OPENAI_API_KEY` **or** a path to a dotenv file containing it.
 45 | """
 46 | 
 47 | from __future__ import annotations
 48 | 
 49 | import argparse
 50 | import os
 51 | import textwrap
 52 | from pathlib import Path
 53 | from typing import List
 54 | 
 55 | from dotenv import load_dotenv
 56 | 
 57 | load_dotenv(dotenv_path="./.env")
 58 | 
 59 | # Re-use the Codex helper that is already part of this repository
 60 | # Add src/ to import path then import CodexHelper
 61 | import sys
 62 | from pathlib import Path as _Path
 63 | 
 64 | _REPO_ROOT = _Path(__file__).resolve().parent.parent
 65 | _SRC_DIR = _REPO_ROOT / "src"
 66 | sys.path.insert(0, str(_SRC_DIR))
 67 | 
 68 | from codex_helper import CodexHelper  # type: ignore
 69 | 
 70 | 
 71 | # ---------------------------------------------------------------------------
 72 | # Constants
 73 | # ---------------------------------------------------------------------------
 74 | 
 75 | DEFAULT_INPUT_ROOT = Path("./jobs")
 76 | 
 77 | CODEX_ANALYSIS_MODEL = os.environ.get("CODEX_ANALYSIS_MODEL", "o4-mini")
 78 | CODEX_APPROVAL_MODE = os.environ.get("CODEX_APPROVAL_MODE", "full-auto")
 79 | 
 80 | 
 81 | REPORT_TEMPLATE = textwrap.dedent(
 82 |     r"""
 83 |     # <Project / Component> – <Short Bug Title>
 84 |     _Disclosure date: <YYYY-MM-DD>_ (use the current date)
 85 | 
 86 |     ---
 87 | 
 88 |     ## 1 Overview
 89 |     Brief, one-sentence statement of the flaw and why it matters.
 90 | 
 91 |     ## 2 Affected product(s) and version(s)
 92 |     * <Product 1> - <version / git SHA / build> (check the git history and use origin/HEAD)
 93 | 
 94 |     ## 3 Impact
 95 |     Describe what an attacker can do (RCE, DoS, info-leak, privilege escalation, etc.).  
 96 |     _Add CVSS v3.1 vector & score here if you have one._
 97 | 
 98 |     ## 4 Technical details
 99 |     1. **Root cause** – where in the code / design the issue lives.  
100 |     2. **Trigger** – how malformed input or an attacker’s action reaches that code path (include the harness and the crashing input)
101 |     3. **Why it fails safely/unsafely** – memory corruption, missing auth check, etc.  
102 |     4. **Reproduction** – step-by-step commands or minimal PoC (link to file if large).
103 | 
104 |     ## 5 Mitigation / Patch guidance
105 |     * Short-term workaround (e.g., config flag, WAF rule).  
106 |     * Long-term fix suggestion (code change, input validation, size check).
107 | 
108 |     ## 6 Timeline
109 |     | Date | Event |
110 |     |------|-------|
111 |     | YYYY-MM-DD | Vulnerability discovered | (use the date when crash_info.md was created)
112 | 
113 |     ## 7 Credits
114 |     _Reported by SHERPA_
115 | 
116 |     ## 8 References
117 |     * ISO/IEC 29147 section 5.4 (Disclosure contents)  
118 |     * CERT/CC Vulnerability Note style  
119 |     * CVE entry (reserved) – CVE-YYYY-NNNN
120 |     """
121 | ).strip()
122 | 
123 | 
124 | # ---------------------------------------------------------------------------
125 | # Helper functions
126 | # ---------------------------------------------------------------------------
127 | 
128 | 
129 | def _find_run_dirs(root: Path) -> List[Path]:
130 |     """Return run-directory candidates located one **or two** levels below *root*.
131 | 
132 |     The original implementation only considered the immediate children of
133 |     *root*::
134 | 
135 |         output/<target>/crash_analysis.md
136 | 
137 |     Newer triage layouts introduce an additional *category* layer so the
138 |     structure now looks like::
139 | 
140 |         output/<category>/<target>/crash_analysis.md
141 | 
142 |     To stay backward-compatible while supporting the new layout the function
143 |     operates in two steps:
144 | 
145 |     1.  Collect every *direct* sub-directory of *root*.
146 |     2.  For each direct child that itself is **not** a run directory (i.e.
147 |         lacks the required crash documents), collect its own sub-directories.
148 | 
149 |     The resulting list therefore contains
150 | 
151 |         • output/<target>
152 |         • output/<category>/<target>
153 | 
154 |     leaving the subsequent `_has_crash_docs` filter to decide which candidates
155 |     actually qualify as run directories.
156 |     """
157 | 
158 |     run_dirs: List[Path] = []
159 | 
160 |     # First pass — look at immediate children of *root*.
161 |     for first_level in root.iterdir():
162 |         if not first_level.is_dir():
163 |             continue
164 | 
165 |         if _has_crash_docs(first_level):
166 |             # Classic layout: the run directory sits directly under *root*.
167 |             run_dirs.append(first_level)
168 |             continue
169 | 
170 |         # Second pass — treat *first_level* as a category and inspect its
171 |         # direct sub-directories.  We intentionally do **not** recurse further
172 |         # to avoid unexpectedly deep walks while still covering the new
173 |         # two-level layout.
174 |         for candidate in first_level.iterdir():
175 |             if candidate.is_dir() and _has_crash_docs(candidate):
176 |                 run_dirs.append(candidate)
177 | 
178 |     return run_dirs
179 | 
180 | 
181 | def _has_crash_docs(run_dir: Path) -> bool:
182 |     return (run_dir / "crash_analysis.md").is_file() and (
183 |         run_dir / "crash_info.md"
184 |     ).is_file()
185 | 
186 | 
187 | # ---------------------------------------------------------------------------
188 | # Codex interaction per run directory
189 | # ---------------------------------------------------------------------------
190 | 
191 | 
192 | def _invoke_codex_for_report(
193 |     run_dir: Path, codex_cli: str, ai_key_path: Path
194 | ) -> None:
195 |     """Ask Codex to create *bug_report.md* inside *run_dir*."""
196 | 
197 |     analysis_path = run_dir / "crash_analysis.md"
198 |     info_path = run_dir / "crash_info.md"
199 | 
200 |     # Combine the two markdown files into one context blob
201 |     context_blob = textwrap.dedent(
202 |         """
203 |         === crash_analysis.md ===
204 |         {analysis}
205 | 
206 |         === crash_info.md ===
207 |         {info}
208 |         """
209 |     ).format(
210 |         analysis=analysis_path.read_text(encoding="utf-8", errors="replace"),
211 |         info=info_path.read_text(encoding="utf-8", errors="replace"),
212 |     )
213 | 
214 |     instructions = textwrap.dedent(
215 |         f"""
216 |         You are an experienced vulnerability disclosure author.
217 | 
218 |         Using the *context* provided (crash analysis and crash info), write a
219 |         **new file** called `bug_report.md` inside the same directory.
220 | 
221 |         If `bug_report.md` already exists, just create the ./done file and exit.
222 | 
223 |         Requirements:
224 |         • Follow the exact section headings and formatting shown below.
225 |         • Where possible, extract details from the analysis/info; otherwise
226 |           leave concise TODO placeholders for a human analyst.
227 |         • Do **not** modify existing files.
228 | 
229 |         Important: If the bug is caused by a harness error (bad library usage, wrong params, etc.)
230 |           then you must put "HARNESS ERROR" somewhere in your report to flag this false positive.
231 | 
232 |         ---
233 |         BEGIN TEMPLATE (copy verbatim, then fill)
234 |         {REPORT_TEMPLATE}
235 |         END TEMPLATE
236 |         """
237 |     ).strip()
238 | 
239 |     patcher = CodexHelper(
240 |         repo_path=run_dir,
241 |         ai_key_path=str(ai_key_path),
242 |         copy_repo=False,
243 |         codex_cli=codex_cli,
244 |         codex_model=CODEX_ANALYSIS_MODEL,
245 |         approval_mode=CODEX_APPROVAL_MODE,
246 |     )
247 | 
248 |     stdout = patcher.run_codex_command(
249 |         instructions, additional_context=context_blob
250 |     )
251 | 
252 |     if stdout is None:
253 |         print(f"[!] Codex did not create bug_report.md in {run_dir}")
254 |     else:
255 |         print(f"✓ bug_report.md generated for {run_dir}")
256 | 
257 | 
258 | # ---------------------------------------------------------------------------
259 | # CLI
260 | # ---------------------------------------------------------------------------
261 | 
262 | 
263 | def main() -> None:
264 |     parser = argparse.ArgumentParser(
265 |         description="Generate bug_report.md for each crash-containing run directory via Codex.",
266 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
267 |     )
268 | 
269 |     parser.add_argument(
270 |         "--input",
271 |         type=Path,
272 |         default=DEFAULT_INPUT_ROOT,
273 |         help="Root directory containing harness run directories (default: ./jobs)",
274 |     )
275 |     parser.add_argument(
276 |         "--ai-key-path",
277 |         type=Path,
278 |         default=Path("./.env"),
279 |         help="Path to .env file holding your OPENAI-compatible API key.",
280 |     )
281 |     parser.add_argument(
282 |         "--codex-cli",
283 |         default="codex",
284 |         help="Codex CLI executable path (default: codex)",
285 |     )
286 | 
287 |     parser.add_argument(
288 |         "--threads",
289 |         type=int,
290 |         default=1,
291 |         help="Maximum number of parallel Codex report generations.",
292 |     )
293 | 
294 |     args = parser.parse_args()
295 | 
296 |     # Ensure API key is exported exactly like harness_generator does
297 |     load_dotenv(dotenv_path=os.path.expanduser(str(args.ai_key_path)))
298 | 
299 |     root = args.input.expanduser().resolve()
300 |     if not root.is_dir():
301 |         raise SystemExit(f"Output directory not found: {root}")
302 | 
303 |     run_dirs = _find_run_dirs(root)
304 | 
305 |     if not run_dirs:
306 |         print("[!] No run directories found – nothing to do.")
307 |         return
308 | 
309 |     todo = [d for d in run_dirs if _has_crash_docs(d)]
310 | 
311 |     if not todo:
312 |         print("[!] No crash_analysis.md found under", root)
313 |         return
314 | 
315 |     print(
316 |         f"[*] Found {len(todo)} run directorie(s) with crashes. Using up to {args.threads} thread(s)."
317 |     )
318 | 
319 |     import concurrent.futures as _cf
320 | 
321 |     with _cf.ThreadPoolExecutor(max_workers=args.threads) as pool:
322 |         futures = [
323 |             pool.submit(
324 |                 _invoke_codex_for_report,
325 |                 run_dir,
326 |                 codex_cli=args.codex_cli,
327 |                 ai_key_path=args.ai_key_path.expanduser(),
328 |             )
329 |             for run_dir in todo
330 |         ]
331 | 
332 |         # Wait for completion, surface exceptions early
333 |         for f in _cf.as_completed(futures):
334 |             try:
335 |                 f.result()
336 |             except Exception as exc:
337 |                 print(f"[!] Worker raised exception: {exc}")
338 | 
339 | 
340 | if __name__ == "__main__":
341 |     main()
342 | 


--------------------------------------------------------------------------------
/harness_generator/src/codex_helper.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | #────────────
  4 | #
  5 | # Copyright 2025 Artificial Intelligence Cyber Challenge
  6 | #
  7 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 
  8 | # this software and associated documentation files (the “Software”), to deal in the 
  9 | # Software without restriction, including without limitation the rights to use, 
 10 | # copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 
 11 | # Software, and to permit persons to whom the Software is furnished to do so, 
 12 | # subject to the following conditions:
 13 | #
 14 | # The above copyright notice and this permission notice shall be included in all 
 15 | # copies or substantial portions of the Software.
 16 | #
 17 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
 18 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 
 19 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
 20 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
 21 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
 22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 23 | #
 24 | # ────────────
 25 | 
 26 | """
 27 | codex_helper.py
 28 | ────────────────
 29 | • Runs the Codex CLI inside a pseudo-terminal so coloured output streams live
 30 |   to stdout while still being captured for later inspection.
 31 | • Watches for the sentinel file `./done` that Codex is instructed to write
 32 |   once it has applied all edits.  The session is terminated as soon as the
 33 |   file appears.
 34 | • Retries the **CLI invocation** on common transient failure strings.
 35 | • Retries the **whole patch generation attempt** when no diff was produced.
 36 | • Enforces a hard wall-clock timeout and performs a 3-stage
 37 |   (SIGINT→SIGTERM→SIGKILL) shutdown sequence.
 38 | • Returns *None* if Codex made no edits; otherwise returns full captured
 39 |   stdout so callers can inspect or log the conversation.
 40 | • Optional *ai_key_path* can point to a file containing the OpenAI key; the
 41 |   helper sets the OPENAI_API_KEY environment variable if it was not yet
 42 |   defined.
 43 | """
 44 | 
 45 | from __future__ import annotations
 46 | 
 47 | import errno
 48 | import logging
 49 | import os
 50 | import pty
 51 | import select
 52 | import shutil
 53 | import signal
 54 | import subprocess
 55 | import tempfile
 56 | import textwrap
 57 | import time
 58 | from pathlib import Path
 59 | from typing import List, Sequence
 60 | 
 61 | from git import Repo, exc as git_exc
 62 | 
 63 | # ---------------------------------------------------------------------------
 64 | # Logging setup
 65 | # ---------------------------------------------------------------------------
 66 | 
 67 | 
 68 | LOGGER = logging.getLogger(__name__)
 69 | 
 70 | 
 71 | # ---------------------------------------------------------------------------
 72 | # Helpers
 73 | # ---------------------------------------------------------------------------
 74 | 
 75 | 
 76 | def _ensure_git_repo(path: Path) -> Repo:
 77 |     """Return a *Repo* object, initialising a new repository if needed."""
 78 | 
 79 |     try:
 80 |         repo = Repo(path)
 81 |     except git_exc.InvalidGitRepositoryError:
 82 |         repo = Repo.init(path)
 83 | 
 84 |     # Make sure at least one commit exists so `git diff` behaves.
 85 |     if not repo.head.is_valid():
 86 |         repo.git.add(A=True)
 87 |         try:
 88 |             repo.git.commit(m="Initial commit", allow_empty=True)
 89 |         except git_exc.GitCommandError:
 90 |             # Happens when there is literally nothing to commit yet.
 91 |             pass
 92 |     return repo
 93 | 
 94 | 
 95 | # ---------------------------------------------------------------------------
 96 | # Core helper class
 97 | # ---------------------------------------------------------------------------
 98 | 
 99 | 
100 | class CodexHelper:
101 |     """Light-weight wrapper around the Codex CLI with robust retry logic."""
102 | 
103 |     def __init__(
104 |         self,
105 |         *,
106 |         repo_path: Path,
107 |         ai_key_path: str | None = None,
108 |         copy_repo: bool = True,
109 |         scratch_space: Path | None = None,
110 |         codex_cli: str = "codex",
111 |         codex_model: str = "o3",
112 |         approval_mode: str = "full-auto",
113 |         dangerous_bypass: bool = False,
114 |         sandbox_mode: str | None = None,
115 |     ) -> None:
116 | 
117 |         self.repo_path = Path(repo_path).expanduser().resolve()
118 |         if not self.repo_path.is_dir():
119 |             raise FileNotFoundError(f"Repository not found: {self.repo_path}")
120 | 
121 |         self.scratch_space = scratch_space or Path("/tmp")
122 |         self.codex_cli = str(codex_cli)
123 |         self.codex_model = codex_model
124 |         self.approval_mode = approval_mode
125 | 
126 |         if sandbox_mode:
127 |             self.sandbox_mode = sandbox_mode
128 |         else:
129 |             self.sandbox_mode = "workspace-write"
130 |         
131 |         if dangerous_bypass:
132 |             self.approval_mode = "never"
133 |             self.sandbox_mode = "danger-full-access"
134 |         
135 | 
136 |         # Work on an isolated copy when requested so Codex can freely modify.
137 |         if copy_repo:
138 |             self.working_dir = Path(
139 |                 tempfile.mkdtemp(prefix="codex-helper-", dir=str(self.scratch_space))
140 |             )
141 |             shutil.copytree(self.repo_path, self.working_dir, dirs_exist_ok=True)
142 |         else:
143 |             self.working_dir = self.repo_path
144 | 
145 |         self.repo = _ensure_git_repo(self.working_dir)
146 | 
147 |         # Provide API key via env var if a path was supplied.
148 |         if ai_key_path and "OPENAI_API_KEY" not in os.environ:
149 |             key_path = Path(ai_key_path).expanduser()
150 |             if key_path.is_file():
151 |                 key = key_path.read_text(encoding="utf-8", errors="ignore").strip()
152 |                 if key:
153 |                     os.environ["OPENAI_API_KEY"] = key
154 | 
155 |         LOGGER.debug("CodexHelper working directory: %s", self.working_dir)
156 | 
157 |     # ------------------------------------------------------------------
158 |     # Public API
159 |     # ------------------------------------------------------------------
160 | 
161 |     def run_codex_command(
162 |         self,
163 |         instructions: str | Sequence[str],
164 |         *,
165 |         additional_context: str | None = None,
166 |         max_attempts: int = 3,
167 |         timeout: int = 1800,
168 |         max_cli_retries: int = 3,
169 |         initial_backoff: float = 3.0,
170 |     ) -> str | None:
171 |         """Execute Codex with robust retry logic and return its stdout or *None*."""
172 | 
173 |         SENTINEL = "done"
174 |         RETRY_ERRORS = (
175 |             "Connection closed prematurely",
176 |             "internal error",
177 |             "failed to send request",
178 |             "model failed to respond",
179 |             "Network error while contacting OpenAI",
180 |         )
181 | 
182 |         done_path = self.working_dir / SENTINEL
183 | 
184 |         # Build prompt body once (mirrors original behaviour).
185 |         if isinstance(instructions, (list, tuple)):
186 |             tasks = "\n".join(str(i) for i in instructions)
187 |         else:
188 |             tasks = str(instructions)
189 | 
190 |         prompt_parts: List[str] = [
191 |             "You are an expert engineer. Apply the edits below - no refactors.",
192 |             "When ALL tasks are complete, output a summary of your changes,",
193 |             "then populate a file called **done** in the repo root (`./done`).",
194 |             "Write the relative path to the **single** most relevant file you created or modified into `./done`.",
195 |             f"## Tasks\n{tasks}",
196 |         ]
197 | 
198 |         if additional_context:
199 |             prompt_parts.append(
200 |                 textwrap.dedent(
201 |                     f"""
202 |                     ---
203 |                     ### Additional context
204 |                     {additional_context.strip()}
205 |                     ---
206 |                     """
207 |                 )
208 |             )
209 | 
210 |         prompt = "\n".join(prompt_parts).strip()
211 | 
212 |         # ----------------------------------------------------------------
213 |         # Outer loop – retry full patch attempt if no diff produced.
214 |         # ----------------------------------------------------------------
215 | 
216 |         for attempt in range(1, max_attempts + 1):
217 |             LOGGER.info("[CodexHelper] patch attempt %d/%d", attempt, max_attempts)
218 | 
219 |             done_path.unlink(missing_ok=True)
220 | 
221 |             # ----------------------------------------------------------------
222 |             # Inner loop – retry CLI invocation on transient errors.
223 |             # ----------------------------------------------------------------
224 | 
225 |             cli_try = 0
226 |             backoff = initial_backoff
227 | 
228 |             while cli_try < max_cli_retries:
229 |                 cli_try += 1
230 |                 LOGGER.info("[CodexHelper] launch #%d (backoff=%.1fs)", cli_try, backoff)
231 | 
232 |                 cmd = [
233 |                     self.codex_cli,
234 |                     "exec",
235 |                     "-m",
236 |                     self.codex_model,
237 |                     "-c model_reasoning_effort=high",
238 |                     "-c disable_response_storage=true",
239 |                     "-c sandbox_mode="+self.sandbox_mode,
240 |                     "--full-auto" if self.approval_mode == "full-auto" else "-c approval_policy="+self.approval_mode,
241 |                     prompt,
242 |                 ]
243 | 
244 |                 master_fd, slave_fd = pty.openpty()
245 |                 proc = subprocess.Popen(
246 |                     cmd,
247 |                     cwd=self.working_dir,
248 |                     stdin=slave_fd,
249 |                     stdout=slave_fd,
250 |                     stderr=slave_fd,
251 |                     env=os.environ.copy(),
252 |                     text=False,
253 |                     close_fds=True,
254 |                 )
255 |                 os.close(slave_fd)
256 | 
257 |                 captured_chunks: List[str] = []
258 |                 start_time = time.time()
259 |                 saw_retry_error = False
260 | 
261 |                 # Helper to perform 3-stage kill.
262 |                 def _kill_proc(grace: float = 4.0) -> None:
263 |                     for sig in (signal.SIGINT, signal.SIGTERM, signal.SIGKILL):
264 |                         if proc.poll() is not None:
265 |                             return
266 |                         try:
267 |                             proc.send_signal(sig)
268 |                             proc.wait(timeout=grace)
269 |                         except subprocess.TimeoutExpired:
270 |                             continue
271 | 
272 |                 # Use non-blocking read with select() similar to original.
273 |                 try:
274 |                     with os.fdopen(master_fd, "rb", buffering=0) as stream:
275 |                         while True:
276 |                             # Wall-clock timeout.
277 |                             if time.time() - start_time > timeout:
278 |                                 LOGGER.error("[CodexHelper] hard timeout")
279 |                                 raise TimeoutError
280 | 
281 |                             # Sentinel detected?
282 |                             if done_path.exists():
283 |                                 LOGGER.info("[CodexHelper] done flag detected")
284 |                                 _kill_proc()
285 |                                 break
286 | 
287 |                             ready, *_ = select.select([stream], [], [], 1.0)
288 |                             if ready:
289 |                                 try:
290 |                                     chunk = stream.read(4096)
291 |                                 except OSError as e:
292 |                                     if e.errno == errno.EIO:  # PTY closed
293 |                                         break
294 |                                     raise
295 | 
296 |                                 if not chunk:
297 |                                     break  # EOF
298 | 
299 |                                 text = chunk.decode("utf-8", errors="replace")
300 |                                 print(text, end="")  # live pass-through to caller
301 |                                 captured_chunks.append(text)
302 | 
303 |                                 # Check for retryable error messages on the fly.
304 |                                 if any(err in text for err in RETRY_ERRORS):
305 |                                     LOGGER.warning("[CodexHelper] retryable error detected → abort")
306 |                                     saw_retry_error = True
307 |                                     _kill_proc()
308 |                                     break
309 | 
310 |                             if proc.poll() is not None and not ready:
311 |                                 break
312 |                 except TimeoutError:
313 |                     _kill_proc()
314 |                     saw_retry_error = True
315 |                     LOGGER.warning("[CodexHelper] Codex timeout; will retry")
316 | 
317 |                 # Decide if we should relaunch the CLI.
318 |                 if saw_retry_error:
319 |                     time.sleep(backoff)
320 |                     backoff *= 2
321 |                     continue  # restart inner CLI loop
322 | 
323 |                 # CLI completed without retryable error; break inner loop.
324 |                 break
325 | 
326 |             # After inner loop – did Codex create the sentinel and produce diff?
327 | 
328 |             if not done_path.exists():
329 |                 LOGGER.warning("[CodexHelper] sentinel not created; next attempt")
330 |                 continue  # outer attempt loop
331 | 
332 |             # Refresh repo to ensure it sees new changes.
333 |             self.repo.git.add(A=True)
334 | 
335 |             if self.repo.git.diff('HEAD'):
336 |                 LOGGER.info("[CodexHelper] diff produced — success")
337 |                 return "".join(captured_chunks)
338 | 
339 |             LOGGER.info("[CodexHelper] sentinel present but no diff; next attempt")
340 | 
341 |         LOGGER.warning("[CodexHelper] exhausted attempts — no edits produced")
342 |         return None
343 | 
344 | 
345 | # ---------------------------------------------------------------------------
346 | # Backwards-compat alias – internal code may still import CodexPatcher.
347 | # ---------------------------------------------------------------------------
348 | 
349 | 
350 | CodexPatcher = CodexHelper
351 | 


--------------------------------------------------------------------------------
/harness_generator/yamls/c-projects.yaml:
--------------------------------------------------------------------------------
  1 | projects:
  2 |   - project_name: apache-httpd
  3 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
  4 |     fuzz_tooling_ref: master
  5 |   - project_name: bc-gh
  6 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
  7 |     fuzz_tooling_ref: master
  8 |   - project_name: bind9
  9 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 10 |     fuzz_tooling_ref: master
 11 |   - project_name: bluez
 12 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 13 |     fuzz_tooling_ref: master
 14 |   - project_name: cairo
 15 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 16 |     fuzz_tooling_ref: master
 17 |   - project_name: cgif
 18 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 19 |     fuzz_tooling_ref: master
 20 |   - project_name: civetweb
 21 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 22 |     fuzz_tooling_ref: master
 23 |   - project_name: clib
 24 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 25 |     fuzz_tooling_ref: master
 26 |   - project_name: cmake
 27 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 28 |     fuzz_tooling_ref: master
 29 |   - project_name: coturn
 30 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 31 |     fuzz_tooling_ref: master
 32 |   - project_name: cpuinfo
 33 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 34 |     fuzz_tooling_ref: master
 35 |   - project_name: croaring
 36 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 37 |     fuzz_tooling_ref: master
 38 |   - project_name: cryptsetup
 39 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 40 |     fuzz_tooling_ref: master
 41 |   - project_name: cups
 42 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 43 |     fuzz_tooling_ref: master
 44 |   - project_name: cyclonedds
 45 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 46 |     fuzz_tooling_ref: master
 47 |   - project_name: dbus-broker
 48 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 49 |     fuzz_tooling_ref: master
 50 |   - project_name: dnsmasq
 51 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 52 |     fuzz_tooling_ref: master
 53 |   - project_name: dovecot
 54 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 55 |     fuzz_tooling_ref: master
 56 |   - project_name: e2fsprogs
 57 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 58 |     fuzz_tooling_ref: master
 59 |   - project_name: edk2
 60 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 61 |     fuzz_tooling_ref: master
 62 |   - project_name: faad2
 63 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 64 |     fuzz_tooling_ref: master
 65 |   - project_name: flex
 66 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 67 |     fuzz_tooling_ref: master
 68 |   - project_name: freerdp
 69 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 70 |     fuzz_tooling_ref: master
 71 |   - project_name: fribidi
 72 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 73 |     fuzz_tooling_ref: master
 74 |   - project_name: fwupd
 75 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 76 |     fuzz_tooling_ref: master
 77 |   - project_name: gdbm
 78 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 79 |     fuzz_tooling_ref: master
 80 |   - project_name: gdk-pixbuf
 81 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 82 |     fuzz_tooling_ref: master
 83 |   - project_name: gnucobol
 84 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 85 |     fuzz_tooling_ref: master
 86 |   - project_name: gpac
 87 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 88 |     fuzz_tooling_ref: master
 89 |   - project_name: gpsd
 90 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 91 |     fuzz_tooling_ref: master
 92 |   - project_name: gss-ntlmssp
 93 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 94 |     fuzz_tooling_ref: master
 95 |   - project_name: h3
 96 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
 97 |     fuzz_tooling_ref: master
 98 |   - project_name: hdf5
 99 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
100 |     fuzz_tooling_ref: master
101 |   - project_name: hiredis
102 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
103 |     fuzz_tooling_ref: master
104 |   - project_name: hwloc
105 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
106 |     fuzz_tooling_ref: master
107 |   - project_name: igraph
108 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
109 |     fuzz_tooling_ref: master
110 |   - project_name: inchi
111 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
112 |     fuzz_tooling_ref: master
113 |   - project_name: inih
114 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
115 |     fuzz_tooling_ref: master
116 |   - project_name: jpegoptim
117 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
118 |     fuzz_tooling_ref: master
119 |   - project_name: jq
120 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
121 |     fuzz_tooling_ref: master
122 |   - project_name: kamailio
123 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
124 |     fuzz_tooling_ref: master
125 |   - project_name: krb5
126 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
127 |     fuzz_tooling_ref: master
128 |   - project_name: libbpf
129 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
130 |     fuzz_tooling_ref: master
131 |   - project_name: libcacard
132 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
133 |     fuzz_tooling_ref: master
134 |   - project_name: libconfig
135 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
136 |     fuzz_tooling_ref: master
137 |   - project_name: libcue
138 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
139 |     fuzz_tooling_ref: master
140 |   - project_name: libdwarf
141 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
142 |     fuzz_tooling_ref: master
143 |   - project_name: libfuse
144 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
145 |     fuzz_tooling_ref: master
146 |   - project_name: libiec61850
147 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
148 |     fuzz_tooling_ref: master
149 |   - project_name: libjpeg-turbo
150 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
151 |     fuzz_tooling_ref: master
152 |   - project_name: liblouis
153 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
154 |     fuzz_tooling_ref: master
155 |   - project_name: libmodbus
156 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
157 |     fuzz_tooling_ref: master
158 |   - project_name: liboqs
159 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
160 |     fuzz_tooling_ref: master
161 |   - project_name: libpg_query
162 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
163 |     fuzz_tooling_ref: master
164 |   - project_name: libproxy
165 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
166 |     fuzz_tooling_ref: master
167 |   - project_name: libredwg
168 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
169 |     fuzz_tooling_ref: master
170 |   - project_name: libsndfile
171 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
172 |     fuzz_tooling_ref: master
173 |   - project_name: libspdm
174 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
175 |     fuzz_tooling_ref: master
176 |   - project_name: libssh
177 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
178 |     fuzz_tooling_ref: master
179 |   - project_name: libucl
180 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
181 |     fuzz_tooling_ref: master
182 |   - project_name: libunwind
183 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
184 |     fuzz_tooling_ref: master
185 |   - project_name: libwebsockets
186 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
187 |     fuzz_tooling_ref: master
188 |   - project_name: libxlsxwriter
189 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
190 |     fuzz_tooling_ref: master
191 |   - project_name: libyal
192 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
193 |     fuzz_tooling_ref: master
194 |   - project_name: libyang
195 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
196 |     fuzz_tooling_ref: master
197 |   - project_name: lighttpd
198 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
199 |     fuzz_tooling_ref: master
200 |   - project_name: lldpd
201 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
202 |     fuzz_tooling_ref: master
203 |   - project_name: llhttp
204 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
205 |     fuzz_tooling_ref: master
206 |   - project_name: lua
207 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
208 |     fuzz_tooling_ref: master
209 |   - project_name: lxc
210 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
211 |     fuzz_tooling_ref: master
212 |   - project_name: md4c
213 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
214 |     fuzz_tooling_ref: master
215 |   - project_name: mdbtools
216 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
217 |     fuzz_tooling_ref: master
218 |   - project_name: memcached
219 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
220 |     fuzz_tooling_ref: master
221 |   - project_name: miniz
222 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
223 |     fuzz_tooling_ref: master
224 |   - project_name: mosquitto
225 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
226 |     fuzz_tooling_ref: master
227 |   - project_name: mpv
228 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
229 |     fuzz_tooling_ref: master
230 |   - project_name: ms-tpm-20-ref
231 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
232 |     fuzz_tooling_ref: master
233 |   - project_name: msquic
234 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
235 |     fuzz_tooling_ref: master
236 |   - project_name: networkmanager
237 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
238 |     fuzz_tooling_ref: master
239 |   - project_name: nginx
240 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
241 |     fuzz_tooling_ref: master
242 |   - project_name: nokogiri
243 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
244 |     fuzz_tooling_ref: master
245 |   - project_name: ntpsec
246 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
247 |     fuzz_tooling_ref: master
248 |   - project_name: numactl
249 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
250 |     fuzz_tooling_ref: master
251 |   - project_name: oniguruma
252 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
253 |     fuzz_tooling_ref: master
254 |   - project_name: open5gs
255 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
256 |     fuzz_tooling_ref: master
257 |   - project_name: opendds
258 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
259 |     fuzz_tooling_ref: master
260 |   - project_name: opensips
261 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
262 |     fuzz_tooling_ref: master
263 |   - project_name: openslide
264 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
265 |     fuzz_tooling_ref: master
266 |   - project_name: openvpn
267 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
268 |     fuzz_tooling_ref: master
269 |   - project_name: opusfile
270 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
271 |     fuzz_tooling_ref: master
272 |   - project_name: oss-fuzz-example
273 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
274 |     fuzz_tooling_ref: master
275 |   - project_name: ostree
276 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
277 |     fuzz_tooling_ref: master
278 |   - project_name: p11-kit
279 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
280 |     fuzz_tooling_ref: master
281 |   - project_name: pacemaker
282 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
283 |     fuzz_tooling_ref: master
284 |   - project_name: pidgin
285 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
286 |     fuzz_tooling_ref: master
287 |   - project_name: pjsip
288 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
289 |     fuzz_tooling_ref: master
290 |   - project_name: plan9port
291 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
292 |     fuzz_tooling_ref: master
293 |   - project_name: pngquant
294 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
295 |     fuzz_tooling_ref: master
296 |   - project_name: postfix
297 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
298 |     fuzz_tooling_ref: master
299 |   - project_name: postgresql
300 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
301 |     fuzz_tooling_ref: master
302 |   - project_name: proftpd
303 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
304 |     fuzz_tooling_ref: master
305 |   - project_name: protobuf-c
306 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
307 |     fuzz_tooling_ref: master
308 |   - project_name: pupnp
309 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
310 |     fuzz_tooling_ref: master
311 |   - project_name: pycryptodome
312 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
313 |     fuzz_tooling_ref: master
314 |   - project_name: python3-libraries
315 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
316 |     fuzz_tooling_ref: master
317 |   - project_name: qemu
318 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
319 |     fuzz_tooling_ref: master
320 |   - project_name: quickjs
321 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
322 |     fuzz_tooling_ref: master
323 |   - project_name: rabbitmq-c
324 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
325 |     fuzz_tooling_ref: master
326 |   - project_name: rauc
327 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
328 |     fuzz_tooling_ref: master
329 |   - project_name: rtpproxy
330 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
331 |     fuzz_tooling_ref: master
332 |   - project_name: samba
333 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
334 |     fuzz_tooling_ref: master
335 |   - project_name: selinux
336 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
337 |     fuzz_tooling_ref: master
338 |   - project_name: sound-open-firmware
339 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
340 |     fuzz_tooling_ref: master
341 |   - project_name: spdk
342 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
343 |     fuzz_tooling_ref: master
344 |   - project_name: sudoers
345 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
346 |     fuzz_tooling_ref: master
347 |   - project_name: tarantool
348 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
349 |     fuzz_tooling_ref: master
350 |   - project_name: tdengine
351 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
352 |     fuzz_tooling_ref: master
353 |   - project_name: tinysparql
354 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
355 |     fuzz_tooling_ref: master
356 |   - project_name: tmux
357 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
358 |     fuzz_tooling_ref: master
359 |   - project_name: unbound
360 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
361 |     fuzz_tooling_ref: master
362 |   - project_name: unit
363 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
364 |     fuzz_tooling_ref: master
365 |   - project_name: utf8proc
366 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
367 |     fuzz_tooling_ref: master
368 |   - project_name: util-linux
369 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
370 |     fuzz_tooling_ref: master
371 |   - project_name: varnish
372 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
373 |     fuzz_tooling_ref: master
374 |   - project_name: vlc
375 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
376 |     fuzz_tooling_ref: master
377 |   - project_name: vulkan-loader
378 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
379 |     fuzz_tooling_ref: master
380 |   - project_name: w3m
381 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
382 |     fuzz_tooling_ref: master
383 |   - project_name: wamr
384 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
385 |     fuzz_tooling_ref: master
386 |   - project_name: wasm3
387 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
388 |     fuzz_tooling_ref: master
389 |   - project_name: wazuh
390 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
391 |     fuzz_tooling_ref: master
392 |   - project_name: wolfmqtt
393 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
394 |     fuzz_tooling_ref: master
395 |   - project_name: xbps
396 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
397 |     fuzz_tooling_ref: master
398 |   - project_name: xen
399 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
400 |     fuzz_tooling_ref: master
401 |   - project_name: xs
402 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
403 |     fuzz_tooling_ref: master
404 |   - project_name: zip
405 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
406 |     fuzz_tooling_ref: master
407 |   - project_name: zydis
408 |     fuzz_tooling_url: git@github.com:google/oss-fuzz.git
409 |     fuzz_tooling_ref: master


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # SHERPA — Security Harness Engineering for Robust Program Analysis
  2 | 
  3 | <div align="center">
  4 | 
  5 | <img src="assets/sherpa.jpg" alt="SHERPA - Security Harness Engineering for Robust Program Analysis" width="400">
  6 | 
  7 | **Revolutionary LLM-powered fuzzing that targets attacker-controlled entry points**  
  8 | *Finding real bugs by moving fuzzing up the stack to where attackers actually hit production systems*
  9 | 
 10 | <sub><em>Developed as part of DARPA's <strong><a href="https://aicyberchallenge.com">AI Cyber Challenge (AIxCC)</a></strong></em></sub>
 11 | 
 12 | </div>
 13 | 
 14 | ---
 15 | 
 16 | ## 🎯 Impact: Real Bugs in Production Software
 17 | 
 18 | <div align="center">
 19 | 
 20 | | 💥 **Raw Crashes Found** | 🔍 **Auto-Filtered** | ✅ **Validated CVE-Class Bugs** | 🎯 **Precision Rate** |
 21 | |:---:|:---:|:---:|:---:|
 22 | | **127+** | **100+** | **18** | **67%** |
 23 | 
 24 | </div>
 25 | 
 26 | **What makes this significant:**
 27 | * 🎯 **Targeted Discovery**: 18 validated bugs from just 27 high-signal crashes (vs. typical fuzzing's 1-5% success rate)
 28 | * 🚀 **Speed**: Minutes of targeted fuzzing vs. weeks of traditional coverage-guided fuzzing
 29 | * 🔍 **Quality**: LLM crash analysis automatically filters 80% of false positives before human review
 30 | * 📦 **Production Ready**: Every bug comes with reproducer, analysis, and patch guidance
 31 | * 🔒 **Responsible Process**: Bugs are disclosed responsibly to maintainers with coordinated timelines
 32 | 
 33 | <table>
 34 | <tr>
 35 | <td width="60%">
 36 | 
 37 | **The Problem:** Traditional fuzzing targets low-level APIs that attackers rarely reach directly. Real exploits happen at high-level parsers processing attacker-controlled files, packets, and streams.
 38 | 
 39 | **SHERPA's Solution:** An LLM pipeline that automatically identifies and generates fuzz harnesses for these high-value, attacker-controlled entry points in OSS-Fuzz projects—the exact functions that parse malicious PDFs, process network packets, and handle file uploads.
 40 | 
 41 | **🚀 Key Innovation:** Moving fuzzing *up the stack* to where real attacks happen, with LLM agents that understand security impact and generate maintainer-ready artifacts.
 42 | 
 43 | </td>
 44 | <td width="40%" align="center">
 45 | 
 46 | <picture>
 47 |   <source media="(prefers-color-scheme: dark)" srcset="assets/logo_white.png">
 48 |   <source media="(prefers-color-scheme: light)" srcset="assets/logo_black.png">
 49 |   <img src="assets/logo_white.png" alt="Kudu Logo" width="300">
 50 | </picture>
 51 | 
 52 | </td>
 53 | </tr>
 54 | </table>
 55 | 
 56 | * 🧪 **27 crash‑inducing inputs auto‑produced** across multiple OSS‑Fuzz projects
 57 | * ✅ **18 validated bugs** after human triage (**\~67%** of auto‑produced crashes)
 58 | * 🧹 **100+ false positives crashes auto‑filtered** by our LLM crash‑analysis agent
 59 | * 🧵 **New high‑value harnesses**: focus on top‑level parsers and public APIs that consume untrusted data
 60 | 
 61 | ## 🚀 Quick Start
 62 | 
 63 | ```bash
 64 | # Clone the repository
 65 | git clone https://github.com/AIxCyberChallenge/sherpa.git
 66 | 
 67 | # Run environment setup (Supports macOS & Linux)
 68 | make setup
 69 | 
 70 | # Generate harnesses for a target project (leveldb)
 71 | # On Apple Silicon (M1/M2/M4), ensure Docker uses amd64 architecture:
 72 | export DOCKER_DEFAULT_PLATFORM=linux/amd64
 73 | # Set your OpenAI API key (required for harness generation)
 74 | export OPENAI_API_KEY="your-api-key-here"
 75 | # Run an example repo against the existing harness_generator_yamls/leveldb.yaml file
 76 | make leveldb
 77 | ```
 78 | 
 79 | ## 🏗️ How It Works
 80 | 
 81 | ```mermaid
 82 | flowchart TD
 83 |     A[📦 OSS-Fuzz Project] --> B{🔍 Coverage Analysis}
 84 |     B --> C[🎯 Identify Unfuzzed Entry Points]
 85 |     
 86 |     C --> D[🤖 LLM Harness Generation]
 87 |     D --> E[🔨 Build & Test Harness]
 88 |     E --> F{✅ Build Success?}
 89 |     F -->|No| G[🛠️ Auto-Fix Build Issues]
 90 |     G --> E
 91 |     
 92 |     F -->|Yes| H[🎲 Fuzzing Campaign]
 93 |     H --> I[💥 Crash Detection]
 94 |     I --> J[🧠 LLM Crash Analysis]
 95 |     J --> K{🔬 Valid Bug?}
 96 |     
 97 |     K -->|No| L[❌ Filter False Positive]
 98 |     K -->|Yes| M[📋 Generate Bug Report]
 99 |     
100 |     M --> N[✅ Validated Bug + Artifacts]
101 |     L --> O[📊 Metrics Update]
102 |     N --> O
103 |     
104 |     %% Position nodes to use horizontal space better
105 |     B ~~~ D
106 |     E ~~~ H
107 |     J ~~~ M
108 |     L ~~~ N
109 |     
110 |     style A fill:#e1f5fe
111 |     style D fill:#f3e5f5
112 |     style J fill:#f3e5f5
113 |     style N fill:#e8f5e8
114 |     style L fill:#ffebee
115 |     
116 |     classDef aiNode fill:#f3e5f5,stroke:#9c27b0,stroke-width:2px
117 |     classDef successNode fill:#e8f5e8,stroke:#4caf50,stroke-width:2px
118 |     classDef errorNode fill:#ffebee,stroke:#f44336,stroke-width:2px
119 |     
120 |     class D,J aiNode
121 |     class N successNode
122 |     class L errorNode
123 | ```
124 | 
125 | **Pipeline Stages:**
126 | 
127 | 1. **📊 Coverage Gap Analysis**: Identify unfuzzed, attacker-controlled entry points
128 | 2. **🤖 LLM Harness Generation**: Generate targeted fuzz harnesses using LLM agents  
129 | 3. **🔨 Build-Until-Green**: Automatically fix compilation and build issues
130 | 4. **🎯 Targeted Fuzzing**: Run focused fuzzing campaigns on new harnesses
131 | 5. **🧠 LLM Crash Triage**: Automatically filter false positives and analyze crashes
132 | 6. **📋 Maintainer Artifacts**: Deliver actionable bug reports with repro steps
133 | 
134 | ---
135 | 
136 | Through these techniques we were able to produce **new harnesses** as well as **corresponding crashing fuzzer inputs**. Each auto‑produced crash is summarized below.
137 | 
138 | # Automatically Produced Crashes
139 | 
140 | | Bug Type                             | CWE     | Repo Count |
141 | |--------------------------------------|---------|------------|
142 | | Uncontrolled memory allocation (DoS) | CWE-770 | 12         |
143 | | Heap buffer overflow                 | CWE-122 | 4          |
144 | | Off-by-one heap buffer overflow      | CWE-193 | 3          |
145 | | Null pointer dereference             | CWE-476 | 2          |
146 | | Out-of-bounds string replacement     | CWE-787 | 1          |
147 | | Performance hang                     | CWE-834 | 1          |
148 | | Infinite loop (DoS)                  | CWE-835 | 1          |
149 | | Double-free                          | CWE-415 | 1          |
150 | | Uninitialized memory read            | CWE-908 | 1          |
151 | | Stack buffer underflow               | CWE-124 | 1          |
152 | 
153 | From these crashes, human engineers performed triage and validation to produce a set of **legitimate bugs**. The validated set is shown below.
154 | 
155 | # Validated Bugs
156 | 
157 | | Bug Type                             | CWE     | Repo Count |
158 | |--------------------------------------|---------|------------|
159 | | Uncontrolled memory allocation (DoS) | CWE-770 | 8          |
160 | | Off-by-one heap buffer overflow      | CWE-193 | 3          |
161 | | Heap buffer overflow                 | CWE-122 | 2          |
162 | | Infinite loop (DoS)                  | CWE-835 | 1          |
163 | | Null pointer dereference             | CWE-476 | 1          |
164 | | Stack buffer underflow               | CWE-124 | 1          |
165 | | Double-free                          | CWE-415 | 1          |
166 | | Uninitialized memory read            | CWE-908 | 1          |
167 | 
168 | ---
169 | <!-- Embedded chart (SVG with PNG fallback). Left-aligned -->
170 | <p>
171 |   <picture>
172 |     <source type="image/svg+xml" srcset="assets/validated_bugs.png">
173 |     <img src="assets/validated_bugs.png" alt="Validated Bugs by Category (n=18)" width="600">
174 |   </picture>
175 |   <br><sub>Figure: Validated bugs by category (n=18).</sub>
176 | </p>
177 | 
178 | > 
179 | ---
180 | **📋 Responsible Disclosure Note**
181 | 
182 | We provide a detailed [LevelDB case study](leveldb_writeup/workflow.md) as a complete example of SHERPA's methodology. The remaining 17 validated bugs are currently undergoing responsible disclosure with their respective maintainers. Full technical details and attribution to SHERPA will be published upon completion of the disclosure process, ensuring maintainers have adequate time to develop and deploy patches.
183 | 
184 | ---
185 | 
186 | ## False Positives & Quality Control
187 | 
188 | We encountered **>100 raw false positives** (e.g., harness errors, misuse of library APIs, or practically unreachable conditions). These were **automatically triaged** by an LLM crash‑analysis agent and **excluded** from the *Automatically Produced Crashes* table. The remaining items were then **manually triaged** to produce the *Validated Bugs* table above.
189 | 
190 | **How we mitigate false positives (two layers):**
191 | 
192 | 1. **Ex‑ante safeguards in the harness‑generator prompt**
193 |    The generator is instructed to:
194 | 
195 |    * Target **public/documented APIs** and **high‑level parsers** that ingest attacker‑controlled inputs.
196 |    * **Mirror canonical initialization** and teardown patterns from project examples.
197 |    * **Honor documented preconditions** (sizes, flags, state); avoid undefined behavior and unrealistic call sequences.
198 |    * Prefer **file/stream entry points** and add a small, meaningful seed corpus.
199 |    * Compile with sanitizers and ensure resource hygiene (close files, free objects).
200 | 
201 | 2. **Ex‑post LLM crash analysis (automatic triage)**
202 |    For each crash, the agent:
203 | 
204 |    * Parses sanitizer logs and **attributes the fault** to project/library vs. harness code.
205 |    * Detects **harness anti‑patterns** (calling private/internal functions, skipping required init, invalid lifetimes, wrong buffer sizes/flags).
206 |    * Labels likely non‑bugs as false-positives and filters them out.
207 |    * Produces `crash_analysis.md` with root cause, impact, and patch guidance for retained findings.
208 | 
209 | This two‑stage process yields a **high‑signal set** of auto‑produced crashes (first table), which then undergo **human validation** (second table) to confirm exploitability and practical relevance.
210 | 
211 | ---
212 | 
213 | ## 🚀 Why This Changes Everything
214 | 
215 | **Traditional fuzzing finds bugs that attackers can't reach. SHERPA finds bugs that attackers exploit.**
216 | 
217 | ### **The Fundamental Problem**
218 | Current fuzzing tools excel at hitting internal parsing functions, but attackers don't call `parse_header_field()` directly. They upload malicious PDFs, send crafted network packets, or submit malformed form data. There's a massive gap between what we fuzz and what attackers actually hit.
219 | 
220 | ### **SHERPA's Paradigm Shift**
221 | 
222 | | Traditional Approach | SHERPA Approach |
223 | |---------------------|---------------|
224 | | 🎯 **Target**: Low-level internal APIs | 🎯 **Target**: Attacker-controlled entry points |
225 | | ⏱️ **Speed**: Weeks of coverage-guided fuzzing | ⏱️ **Speed**: Minutes of targeted fuzzing |
226 | | 🎲 **Success**: 1-5% of crashes are real bugs | 🎲 **Success**: 67% precision rate |
227 | | 👨‍💻 **Triage**: Manual analysis of 100+ crashes | 👨‍💻 **Triage**: AI pre-filters to 18 validated bugs |
228 | | 📋 **Output**: Raw crash dumps | 📋 **Output**: CVE-ready reports with patches |
229 | 
230 | ### **Real-World Impact**
231 | - **Security Teams**: Find exploitable bugs, not just fuzzing artifacts
232 | - **OSS Maintainers**: Receive actionable reports with reproduction steps and patch guidance  
233 | - **Red Teams**: Discover attack surfaces that traditional tools miss
234 | - **Researchers**: Scale expert-level security analysis using LLM reasoning
235 | 
236 | **Bottom Line**: We're not just improving fuzzing efficiency—we're changing what gets fuzzed to match real attack patterns.
237 | 
238 | ---
239 | 
240 | ## 🤖 LLM Architecture & AI Integration
241 | 
242 | ### **Model Selection & Configuration**
243 | - **Primary Model**: OpenAI o3 (latest reasoning model) for complex code generation and analysis
244 | - **Fallback Model**: o4-mini for lighter tasks like report generation  
245 | - **API Integration**: Standard OpenAI-compatible endpoints with robust retry logic
246 | - **Execution Environment**: Custom `CodexHelper` wrapper with pseudo-terminal integration
247 | 
248 | ### **Multi-Stage AI Pipeline**
249 | 
250 | **Stage 1: Intelligent Target Selection**
251 | - Analyzes OSS-Fuzz project structure and existing harnesses
252 | - Identifies high-value, unfuzzed entry points using security heuristics
253 | - Prioritizes public APIs that process attacker-controlled data
254 | 
255 | **Stage 2: Context-Aware Harness Generation**  
256 | - Clones target repositories for API signature validation
257 | - Generates libFuzzer harnesses with proper library initialization
258 | - Includes realistic setup patterns to prevent false positives
259 | 
260 | **Stage 3: Automated Build Debugging**
261 | - Captures compiler errors and automatically generates minimal fixes
262 | - Iterative build-fix cycles (configurable retry limit)
263 | - Preserves project structure while ensuring compilation success
264 | 
265 | **Stage 4: Semantic Corpus Generation**
266 | - Creates meaningful seed inputs based on harness analysis
267 | - Generates both text and binary test cases as appropriate
268 | - Focuses on edge cases and boundary conditions
269 | 
270 | **Stage 5: Intelligent Crash Triage**
271 | - Distinguishes genuine bugs from harness implementation errors
272 | - Uses sentinel patterns (`HARNESS ERROR`) for automatic filtering
273 | - Performs root cause analysis with impact assessment
274 | 
275 | ### **Key Prompt Engineering Innovations**
276 | 
277 | 1. **Security-First Target Selection**: AI agents prioritize real attack surfaces over internal APIs
278 | 2. **Anti-False-Positive Design**: Built-in validation prevents common harness implementation errors
279 | 3. **Automated Quality Control**: Two-layer filtering (prompt guidance + post-crash analysis)
280 | 4. **Maintainer-Ready Output**: Structured reports with CWE mapping and actionable patch guidance
281 | 
282 | ### **Technical Implementation Details**
283 | 
284 | ```python
285 | # Core architecture components
286 | class HarnessGenerator:
287 |     def _invoke_codex_for_harness(self) -> None:
288 |         # Targets highest-level APIs with attacker-controlled input
289 |         # Validates function signatures against cloned repositories
290 |         # Ensures realistic library usage patterns
291 |         
292 |     def _generate_bug_report(self) -> None:
293 |         # Analyzes crashes for genuine vs. harness-induced bugs
294 |         # Generates CVE-ready reports with impact assessment
295 |         # Creates reproduction scripts for maintainers
296 | ```
297 | 
298 | **Advanced Features:**
299 | - **Robust Error Handling**: Automatic retry with exponential backoff for API failures
300 | - **Git Integration**: Tracks changes and ensures clean diffs for each AI intervention
301 | - **Resource Management**: Configurable timeouts and memory limits for long-running operations
302 | - **Parallel Processing**: Concurrent harness generation across multiple projects
303 | 
304 | **Full implementation details available in source code under GPL v3 license.**
305 | 
306 | ---
307 | 
308 | ## 🛠️ Configuration & Advanced Usage
309 | 
310 | ### Environment Setup
311 | ```bash
312 | # Set your OpenAI API key (required for harness generation)
313 | export OPENAI_API_KEY="your-api-key-here"
314 | 
315 | # Optional: Configure target projects
316 | export OSS_FUZZ_PATH="/path/to/oss-fuzz"
317 | ```
318 | 
319 | ### Project Configuration
320 | Projects are configured via YAML files in `harness_generator/yamls/`:
321 | - `leveldb.yaml` - LevelDB-specific settings
322 | 
323 | ### Batch Processing
324 | ```bash
325 | # Generate harnesses for multiple projects
326 | cd harness_generator
327 | python batch_generate.py --targets yamls/c-projects.yaml --threads 4
328 | ```
329 | 
330 | ---
331 | 
332 | ## 🤝 Contributing
333 | 
334 | We welcome contributions! This project was developed for the security research community.
335 | 
336 | ### Ways to Contribute:
337 | - **🐛 Bug Reports**: Found an issue? Open a GitHub issue
338 | - **🎯 New Target Projects**: Add YAML configs for additional OSS-Fuzz projects
339 | - **🧠 LLM Improvements**: Enhance prompt engineering or crash analysis
340 | - **📊 Evaluation**: Run SHERPA on new projects and share results
341 | 
342 | ### Development Setup:
343 | ```bash
344 | git clone https://github.com/AIxCyberChallenge/sherpa.git
345 | cd sherpa/harness_generator
346 | pip install -r requirements.txt
347 | bash setup-env.sh
348 | ```
349 | 
350 | ### Responsible Disclosure:
351 | SHERPA follows industry-standard responsible disclosure practices:
352 | - **Coordinated timelines** with maintainers (typically 90-day disclosure window)
353 | - **Patch assistance** provided to development teams when requested  
354 | - **Public attribution** to SHERPA methodology upon disclosure completion
355 | - **CVE coordination** through appropriate channels (MITRE, project security teams)
356 | 
357 | For detailed examples of our methodology and results, see the [LevelDB case study](leveldb_writeup/workflow.md).
358 | 
359 | ---
360 | 
361 | ### Unharnessed Fuzzing 
362 | 
363 | This project also includes a utility to generate OSS-Fuzz compliant fuzzing harnesses for projects
364 | that are not currently supported by OSS-FUZZ
365 | 
366 | #### Fuzz a single repository
367 | ```
368 | cd harness_generator/src
369 | python fuzz_unharnessed_repo.py --repo <git-url>
370 | 
371 | # for example
372 | python fuzz_unharnessed_repo.py --repo https://github.com/syoyo/tinyexr.git
373 | ```
374 | 
375 | ## 📜 License & Citation
376 | 
377 | **License**: This project is released under the MIT License - see `LICENSE` for details.
378 | 
379 | **Citation**: If you use SHERPA in your research, please cite:
380 | ```bibtex
381 | @misc{sherpa2024,
382 |   title={SHERPA: Security Harness Engineering for Robust Program Analysis},
383 |   author={Kudu Dynamics},
384 |   year={2025},
385 |   url={https://github.com/AIxCyberChallenge/sherpa.git},
386 |   note={Developed as part of DARPA's AI Cyber Challenge (AIxCC)}
387 | }
388 | ```
389 | 
390 | **Acknowledgments**: 
391 | This work was developed as part of **DARPA's AI Cyber Challenge (AIxCC)**, which brings together leading experts in LLMs and cybersecurity to safeguard software critical to national infrastructure. Learn more at [aicyberchallenge.com](https://aicyberchallenge.com).
392 | 
393 | ---
394 | 
395 | <div align="center">
396 | 
397 | **🔒 Built for Security Researchers, by Security Researchers**
398 | 
399 | *Developed by [Kudu Dynamics](https://kududyn.com) as part of [DARPA's AI Cyber Challenge](https://aicyberchallenge.com)*
400 | 
401 | [![GitHub stars](https://img.shields.io/github/stars/aixcyberchallenge/sherpa?style=social)](https://github.com/AIxCyberChallenge/sherpa/stargazers)
402 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
403 | [![AIxCC](https://img.shields.io/badge/DARPA-AIxCC-blue.svg)](https://aicyberchallenge.com)
404 | 
405 | </div>
406 | 


--------------------------------------------------------------------------------
/harness_generator/src/harness_generator.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/env python3
   2 | 
   3 | #────────────
   4 | #
   5 | # Copyright 2025 Artificial Intelligence Cyber Challenge
   6 | #
   7 | # Permission is hereby granted, free of charge, to any person obtaining a copy of 
   8 | # this software and associated documentation files (the “Software”), to deal in the 
   9 | # Software without restriction, including without limitation the rights to use, 
  10 | # copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the 
  11 | # Software, and to permit persons to whom the Software is furnished to do so, 
  12 | # subject to the following conditions:
  13 | #
  14 | # The above copyright notice and this permission notice shall be included in all 
  15 | # copies or substantial portions of the Software.
  16 | #
  17 | # THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 
  18 | # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 
  19 | # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
  20 | # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 
  21 | # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22 | # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23 | #
  24 | # ────────────
  25 | 
  26 | """
  27 | harness_generator.py
  28 | ────────────────────
  29 | 
  30 | Automates the lifecycle of adding an extra libFuzzer harness to an
  31 | OSS-Fuzz project and running it end-to-end.  Operations are performed in a
  32 | working copy of the project's oss-fuzz directory and rely on the **Codex CLI**
  33 | for all code-writing tasks.
  34 | 
  35 | High-level flow
  36 | ===============
  37 | 1. Baseline build - compile the docker image and existing fuzzers to record
  38 |    the current binary set.
  39 | 2. Extract archives - unpack any tar/zip bundles so the source can be edited
  40 |    directly.
  41 | 3. Harness creation - ask Codex to write a new `LLVMFuzzerTestOneInput` and
  42 |    update build scripts.
  43 | 4. Repack archives - re-create any bundles that were unpacked and edited.
  44 | 5. Rebuild with retries - rebuild image & fuzzers; on compiler errors, send
  45 |    the diagnostics back to Codex for minimal fixes (configurable retries).
  46 | 6. Seed corpus - before each new fuzzer is executed, instruct Codex to
  47 |    populate the corresponding corpus directory with one or more meaningful
  48 |    seed inputs, using the harness source as context.
  49 | 7. Fuzzer execution - run every newly-built fuzzer, capture stdout/stderr and
  50 |    detect any crash / OOM / timeout artifacts.
  51 | 8. Crash handling - for the first crash found:
  52 |       • reproduce it with `infra/helper.py reproduce` and write
  53 |         `crash_reproduction.log` (commented with the exact command);
  54 |       • gather the reproducer log, harness source and a hexdump of the input
  55 |         into `crash_info.md`;
  56 |       • pass the same context to Codex and request `crash_analysis.md` that
  57 |         summarises bug type, impact and patch guidance.
  58 | 
  59 | Command-line flags allow skipping the rebuild, running a smoke test first, or
  60 | changing the maximum fix-retry count.  The script requires Python ≥ 3.9,
  61 | GitPython, python-dotenv, the Codex CLI, Docker and a functional oss-fuzz
  62 | checkout.
  63 | """
  64 | 
  65 | from __future__ import annotations
  66 | 
  67 | import argparse
  68 | import logging
  69 | import os
  70 | import re
  71 | import shutil
  72 | import subprocess
  73 | import sys
  74 | import tarfile
  75 | import tempfile
  76 | import textwrap
  77 | import time
  78 | import uuid
  79 | from dotenv import load_dotenv
  80 | from git import Repo, exc as git_exc
  81 | from pathlib import Path
  82 | from typing import Dict, Sequence
  83 | 
  84 | # Make the helper discoverable whether this module is executed as a script
  85 | # or imported as part of the *src* package.
  86 | try:
  87 |     from .codex_helper import CodexHelper  # type: ignore
  88 | except ImportError:  # pragma: no cover
  89 |     import sys
  90 |     from pathlib import Path as _Path
  91 | 
  92 |     _SRC_DIR = _Path(__file__).resolve().parent
  93 |     sys.path.insert(0, str(_SRC_DIR))
  94 |     from codex_helper import CodexHelper  # type: ignore
  95 | 
  96 | # --------------------------------------------------------------------------- #
  97 | # Constants
  98 | # --------------------------------------------------------------------------- #
  99 | DEFAULT_SANITIZER = "address"
 100 | MAX_BUILD_RETRIES = 3
 101 | CODEX_ANALYSIS_MODEL = os.environ.get("CODEX_ANALYSIS_MODEL", "o3")
 102 | CODEX_APPROVAL_MODE = os.environ.get("CODEX_APPROVAL_MODE", "full-auto")
 103 | 
 104 | 
 105 | class HarnessGeneratorError(RuntimeError):
 106 |     """Raised for any harness-generation failure."""
 107 | 
 108 | 
 109 | class HarnessGenerator:
 110 |     """Automate Codex-assisted creation of an additional OSS-Fuzz harness."""
 111 | 
 112 |     # ───────────────────────── INITIALIZATION ───────────────────────── #
 113 | 
 114 |     def __init__(
 115 |         self,
 116 |         project_name: str,
 117 |         oss_fuzz_path: Path,
 118 |         *,
 119 |         ai_key_path: str,
 120 |         sanitizer: str = DEFAULT_SANITIZER,
 121 |         codex_cli: str = "codex",
 122 |         scratch_space: Path | None = None,
 123 |         copy_repo: bool = False,
 124 |     ) -> None:
 125 |         # Basic fields
 126 |         self.project = project_name.strip()
 127 |         self.oss_fuzz_path = oss_fuzz_path.expanduser().resolve()
 128 |         self.ai_key_path = Path(ai_key_path).expanduser()
 129 |         self.sanitizer = sanitizer
 130 |         self.codex_cli = codex_cli
 131 |         self.scratch_space = scratch_space or Path("/tmp")
 132 |         self.copy_repo = copy_repo
 133 |         self.logger = logging.getLogger(__name__)
 134 | 
 135 |         if not self.oss_fuzz_path.is_dir():
 136 |             raise FileNotFoundError(
 137 |                 f"OSS-Fuzz path not found: {self.oss_fuzz_path}"
 138 |             )
 139 | 
 140 |         # Optionally copy the oss-fuzz tree so Codex works on a throw-away copy
 141 |         self.repo_path = (
 142 |             self._copy_repo(self.oss_fuzz_path)
 143 |             if copy_repo
 144 |             else self.oss_fuzz_path
 145 |         )
 146 |         self.repo = self._ensure_git_repo(self.repo_path)
 147 | 
 148 |         print(f"[*] Ready (project={self.project}, repo={self.repo_path})")
 149 | 
 150 |         # Mapping of extracted_dir  →  original_archive_path
 151 |         self._archives: Dict[Path, Path] = {}
 152 | 
 153 |     # ───────────────────────── PUBLIC ENTRY-POINT ────────────────────── #
 154 |     def generate_harness(
 155 |         self,
 156 |         *,
 157 |         build: bool = True,
 158 |         run_smoke: bool = False,
 159 |         max_iterations: int = MAX_BUILD_RETRIES,
 160 |     ) -> None:
 161 |         """Run the full workflow end-to-end."""
 162 | 
 163 |         # 1. Baseline build (with automatic Codex-assisted fixes)
 164 |         print("[*] Building docker image and fuzzers...")
 165 |         self._build_with_retries(clean=True, max_iterations=1)
 166 | 
 167 |         baseline_fuzzers = self._list_fuzzer_binaries()
 168 |         print(
 169 |             f"[*] Baseline has {len(baseline_fuzzers)} fuzzer(s):\n{baseline_fuzzers}\n"
 170 |         )
 171 |         if run_smoke:
 172 |             self._run_any_fuzzer_once()
 173 | 
 174 |         # 2. Extract any archives
 175 |         print("[*] Extracting any project archives...")
 176 |         self._extract_archives()
 177 | 
 178 |         # 3. Ask Codex to add a harness
 179 |         print("[*] Running Codex to generate a new harness...")
 180 |         self._invoke_codex_for_harness()
 181 | 
 182 |         # 4. Re-pack archives (Codex may have edited files inside them)
 183 |         print("[*] Repackaging any project archives...")
 184 |         self._repack_archives()
 185 | 
 186 |         # 5. Rebuild after harness has been added (again with retries)
 187 |         if build:
 188 |             print("[*] Attempting image/fuzzer rebuild...")
 189 |             self._build_with_retries(clean=False, max_iterations=max_iterations)
 190 | 
 191 |         # 6. Detect which fuzzers are new and run them
 192 |         final_fuzzers = self._list_fuzzer_binaries()
 193 |         new_fuzzers = sorted(final_fuzzers - baseline_fuzzers)
 194 | 
 195 |         if not new_fuzzers:
 196 |             print("[!] No new fuzzer binaries detected after Codex run.")
 197 |             return
 198 | 
 199 |         print(
 200 |             f"[*] Detected {len(new_fuzzers)} new fuzzer(s): {', '.join(new_fuzzers)}"
 201 |         )
 202 |         for fuzzer in new_fuzzers:
 203 |             # ── Generate seed corpus files before running ──
 204 |             try:
 205 |                 self._invoke_codex_to_generate_seeds(fuzzer)
 206 |             except HarnessGeneratorError as err:
 207 |                 print(f"[!] Failed to generate seeds for {fuzzer}: {err}")
 208 | 
 209 |             print(f"[*] ➤ Running {fuzzer} …")
 210 |             time.sleep(5)
 211 |             try:
 212 |                 # ── Record existing crash/timeout/oom files before run
 213 |                 baseline_bug_files = self._find_bug_files()
 214 | 
 215 |                 output = self._run_fuzzer(fuzzer)
 216 | 
 217 |                 # ── Detect newly-generated bug files
 218 |                 new_bug_files = self._find_bug_files() - baseline_bug_files
 219 |                 if new_bug_files:
 220 |                     print(
 221 |                         f"[!] Detected {len(new_bug_files)} crash/oom/timeout file(s):"
 222 |                     )
 223 |                     for p in new_bug_files:
 224 |                         print(f"    • {p.relative_to(self.repo_path)}")
 225 | 
 226 |                     # Reproduce only the first file (additional files can be handled later)
 227 |                     bug_path = sorted(new_bug_files)[0]
 228 |                     try:
 229 |                         repro_log, repro_cmd = self._reproduce_crash(
 230 |                             fuzzer, bug_path
 231 |                         )
 232 |                         self._generate_bug_report(
 233 |                             fuzzer, bug_path, repro_log, repro_cmd
 234 |                         )
 235 |                     except HarnessGeneratorError as err:
 236 |                         print(
 237 |                             f"[!] Failed to reproduce or analyse crash: {err}"
 238 |                         )
 239 | 
 240 |             except HarnessGeneratorError as err:
 241 |                 print(f"[!] {fuzzer} failed: {err}")
 242 | 
 243 |     # ───────────────────────── INTERNAL HELPERS ─────────────────────── #
 244 | 
 245 |     # ---- Git helpers -------------------------------------------------- #
 246 |     def _copy_repo(self, src: Path) -> Path:
 247 |         dst = Path(
 248 |             tempfile.mkdtemp(
 249 |                 prefix="oss-fuzz-harness-", dir=str(self.scratch_space)
 250 |             )
 251 |         )
 252 |         shutil.copytree(src, dst, dirs_exist_ok=True)
 253 |         return dst
 254 | 
 255 |     def _ensure_git_repo(self, path: Path) -> Repo:
 256 |         try:
 257 |             repo = Repo(path)
 258 |         except git_exc.InvalidGitRepositoryError:
 259 |             repo = Repo.init(path)
 260 |         repo.git.add(A=True)
 261 |         try:
 262 |             repo.git.commit(m="Initial commit (baseline)", allow_empty=True)
 263 |         except git_exc.GitCommandError:
 264 |             pass
 265 |         return repo
 266 | 
 267 |     # ---- Build helpers ------------------------------------------------- #
 268 |     def _build_image_and_fuzzers(self, *, clean: bool) -> None:
 269 |         helper = self.repo_path / "infra" / "helper.py"
 270 |         if not helper.is_file():
 271 |             raise HarnessGeneratorError(
 272 |                 "infra/helper.py not found - invalid checkout?"
 273 |             )
 274 | 
 275 |         env = os.environ.copy()
 276 |         env.setdefault("OSSFUZZ_SKIP_UNSHALLOW", "1")
 277 | 
 278 |         # Build image (auto-confirm y/n prompt)
 279 |         self._run_cmd(
 280 |             ["python3", str(helper), "build_image", self.project],
 281 |             cwd=self.repo_path,
 282 |             env=env,
 283 |             input="y\n",
 284 |         )
 285 | 
 286 |         # Build fuzzers
 287 |         cmd = [
 288 |             "python3",
 289 |             str(helper),
 290 |             "build_fuzzers",
 291 |             self.project,
 292 |             "--sanitizer",
 293 |             self.sanitizer,
 294 |         ]
 295 |         if clean:
 296 |             cmd.append("--clean")
 297 |         self._run_cmd(cmd, cwd=self.repo_path, env=env)
 298 | 
 299 |     # ---- Fuzzer discovery -------------------------------------------- #
 300 |     def _list_fuzzer_binaries(self) -> set[str]:
 301 |         """Return the names of all executable fuzzer binaries for this project."""
 302 |         out_dir = self.repo_path / "build" / "out" / self.project
 303 |         if not out_dir.is_dir():
 304 |             return set()
 305 |         return {
 306 |             p.name
 307 |             for p in out_dir.iterdir()
 308 |             if p.is_file()
 309 |             and os.access(p, os.X_OK)
 310 |             and not p.name.endswith(".dict")
 311 |         }
 312 | 
 313 |     # ---- Build with retries (Codex-assisted) ------------------------- #
 314 |     def _build_with_retries(
 315 |         self,
 316 |         *,
 317 |         clean: bool,
 318 |         max_iterations: int = MAX_BUILD_RETRIES,
 319 |     ) -> None:
 320 |         """Attempt to build image & fuzzers, asking Codex to fix failures.
 321 | 
 322 |         This consolidates the repeated logic used for both the initial
 323 |         baseline build **and** the post-harness rebuild.  On every failure we
 324 |         forward the compiler diagnostics to Codex, let it apply minimal
 325 |         patches, optionally re-package any modified archives, and then retry
 326 |         the build until it succeeds or *max_iterations* is reached.
 327 |         """
 328 | 
 329 |         for attempt in range(1, max_iterations + 1):
 330 |             try:
 331 |                 # Only pass the --clean flag on the *first* attempt – subsequent
 332 |                 # iterations should reuse the prior build cache to save time.
 333 |                 self._build_image_and_fuzzers(clean=clean and attempt == 1)
 334 |                 print(f"[*] Fuzzer build succeeded on attempt {attempt}!")
 335 |                 return
 336 |             except HarnessGeneratorError as err:
 337 |                 if attempt == max_iterations:
 338 |                     raise
 339 | 
 340 |                 print(
 341 |                     f"[!] Build failed (attempt {attempt}/{max_iterations}). "
 342 |                     "Sending compiler stderr back to Codex..."
 343 |                 )
 344 | 
 345 |                 # Ask Codex for a minimal patch based on the compiler output.
 346 |                 self._invoke_codex_to_fix_build(str(err))
 347 | 
 348 |                 # If the project uses bundled source archives we may have to
 349 |                 # regenerate them after Codex edits.
 350 |                 self._repack_archives()
 351 | 
 352 |     # ---- Archive extraction / repack ---------------------------------- #
 353 |     ARCHIVE_REGEX = re.compile(r"\.(?:tar\.gz|tgz|tar|zip)$", re.IGNORECASE)
 354 | 
 355 |     def _extract_archives(self) -> None:
 356 |         proj_dir = self.repo_path / "projects" / self.project
 357 |         if not proj_dir.is_dir():
 358 |             return
 359 | 
 360 |         for arch in proj_dir.rglob("*"):
 361 |             if arch.is_file() and self.ARCHIVE_REGEX.search(arch.name):
 362 |                 if arch.name.endswith(".tar.gz"):
 363 |                     extract_root = arch.with_name(
 364 |                         arch.stem[:-4]
 365 |                     )  # Remove .tar from .tar.gz
 366 |                 elif arch.name.endswith(".tgz"):
 367 |                     extract_root = arch.with_name(arch.stem)
 368 |                 else:
 369 |                     extract_root = arch.with_suffix("")
 370 | 
 371 |                 if extract_root.exists():
 372 |                     continue
 373 | 
 374 |                 print(f"[*] Extracting {arch.relative_to(self.repo_path)}")
 375 | 
 376 |                 tmp_dir = tempfile.mkdtemp(dir=self.scratch_space)
 377 |                 tmp_path = Path(tmp_dir)
 378 | 
 379 |                 # Extract to temp location
 380 |                 if arch.name.endswith(".zip"):
 381 |                     shutil.unpack_archive(str(arch), str(tmp_path))
 382 |                 else:
 383 |                     with tarfile.open(arch, mode="r:*") as tf:
 384 |                         tf.extractall(tmp_path)
 385 | 
 386 |                 # Move contents into extract_root (flatten, don't preserve temp dir)
 387 |                 extract_root.mkdir(parents=True, exist_ok=True)
 388 |                 for item in tmp_path.iterdir():
 389 |                     shutil.move(str(item), extract_root / item.name)
 390 | 
 391 |                 shutil.rmtree(tmp_path, ignore_errors=True)
 392 |                 self._archives[extract_root] = arch
 393 | 
 394 |     def _repack_archives(self) -> None:
 395 |         for src_dir, arch in self._archives.items():
 396 |             print(f"[*] Re-packing {arch.relative_to(self.repo_path)}")
 397 | 
 398 |             # Remove old archive
 399 |             arch.unlink(missing_ok=True)
 400 | 
 401 |             parent = arch.parent
 402 |             base_name = arch.name
 403 |             if base_name.endswith(".tar.gz"):
 404 |                 base = arch.with_suffix("").with_suffix(
 405 |                     ""
 406 |                 )  # Remove .gz then .tar
 407 |                 mode = "w:gz"
 408 |                 archive_path = parent / f"{base.name}.tar.gz"
 409 |             elif base_name.endswith(".tgz"):
 410 |                 base = arch.with_suffix("")  # Remove .tgz
 411 |                 mode = "w:gz"
 412 |                 archive_path = parent / f"{base.name}.tgz"
 413 |             elif base_name.endswith(".tar"):
 414 |                 base = arch.with_suffix("")
 415 |                 mode = "w"
 416 |                 archive_path = parent / f"{base.name}.tar"
 417 |             elif base_name.endswith(".zip"):
 418 |                 base = arch.with_suffix("")
 419 |                 archive_path = shutil.make_archive(
 420 |                     str(base), "zip", root_dir=src_dir
 421 |                 )
 422 |                 continue
 423 |             else:
 424 |                 raise HarnessGeneratorError(
 425 |                     f"Unsupported archive format: {arch}"
 426 |                 )
 427 | 
 428 |             with tarfile.open(archive_path, mode) as tf:
 429 |                 for item in sorted(src_dir.rglob("*")):
 430 |                     tf.add(item, arcname=item.relative_to(src_dir))
 431 | 
 432 |     # ---- Codex interaction -------------------------------------------- #
 433 |     def _invoke_codex_for_harness(self) -> None:
 434 |         patcher = CodexHelper(
 435 |             repo_path=self.repo_path,
 436 |             ai_key_path=str(self.ai_key_path),
 437 |             copy_repo=False,
 438 |             codex_cli=self.codex_cli,
 439 |             codex_model=CODEX_ANALYSIS_MODEL,
 440 |             approval_mode=CODEX_APPROVAL_MODE,
 441 |         )
 442 | 
 443 |         # High-level tasks for Codex
 444 |         # IMPROVEME: extend prompt for java support
 445 |         instructions = textwrap.dedent(
 446 |             f"""
 447 |             **Objective (high-value fuzz target)**  
 448 |             Create a **new libFuzzer harness** for the **{self.project}** OSS-Fuzz project that
 449 |             exercises a *public* or *documented* API reachable with **user-supplied input**
 450 |             (e.g. files, packets, strings) and therefore has real-world security impact.
 451 | 
 452 |             ────────────────────────────────────────
 453 |             **Target-selection rules**
 454 | 
 455 |             1. **Start at the top**: pick the *highest-level* function that
 456 |             *directly* consumes attacker-controlled data.  
 457 |             • Good examples: `exif_data_load()`, `freerdp_peer_context_new()`,  
 458 |                 `curl_url_set()`, `png_read_info()`.  
 459 |             • **Avoid** low-level helpers (`*_parse_int()`, `*_read_field()` etc.)
 460 |                 unless *no higher layer* validates input.
 461 | 
 462 |             2. **Document reachability**  
 463 |             Add a one-line comment in the harness explaining why the chosen API
 464 |             is reachable from untrusted input in real software (file upload,
 465 |             network packet, etc.).
 466 | 
 467 |             3. **Minimal realistic setup**  
 468 |             If the API needs a context/handle, initialise it exactly as a real
 469 |             app would (e.g. `exif_data = exif_data_new_from_file(data, size)`).
 470 |             Don't stub out internal structs—use official constructors.
 471 | 
 472 |             4. **One API per harness**  
 473 |             If multiple candidate APIs exist, pick the single best one that is
 474 |             *not already fuzzed* (check existing harnesses + binaries).
 475 | 
 476 |             5. **ENSURE HARNESS USES THE LIBRARY CORRECTLY**
 477 |             Many false positives are the result of the generated harness code failing
 478 |             to exercise the library properly (passing a ptr instead of an int, etc.)
 479 |             Ensure all calls performed by the harness match the library signatures
 480 |             and use the library in the way it was intended to be used. Our goal is to
 481 |             only uncover bugs that are true positives with real world implications.
 482 | 
 483 |             ────────────────────────────────────────
 484 |             **Implementation requirements**
 485 | 
 486 |             * Harness signature  
 487 |             ```c++
 488 |             extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
 489 |             ````
 490 | 
 491 |             * Keep everything in C/C++ (follow project style).
 492 |             * **Do not** remove or refactor existing code; just add the harness and
 493 |             tweak build scripts so it is compiled.
 494 |             * Place the harness source next to similar existing harnesses.
 495 | 
 496 |             Extracted archive directories (may be empty if none):
 497 |             {', '.join(str(p.relative_to(self.repo_path)) for p in self._archives) or 'None'}
 498 | 
 499 |             **NO** build/run commands—just write code + build recipe edits.
 500 |             When finished, write the path to the new harness into `./done`
 501 | 
 502 |             Notes: 
 503 |             - The oss-fuzz project typically contains a Dockerfile, build.sh, and project.yaml.
 504 |             - The repo source is typically not included, but can be cloned to assist in analysis.
 505 |               - It may be specified in project.yaml as `main_repo`. It may be cloned as part of the docker build.
 506 |               - When you clone the repo source, you must clone it within your working directory (don't use /tmp)
 507 |             - Carefully analyze the existing build structure to fully understand what is needed to successfully include your new harness in the build.
 508 | 
 509 |             VERY IMPORTANT: You must clone the repo so that you can validate the function signatures of every library function you put in the new harness.
 510 |                             You must ensure that the library is being used correctly to mitigate false-positive crashes caused by errors in the harness.
 511 | 
 512 |             This task is very important! Every bug we trigger will be responsibly disclosed to make the world a safer place.
 513 |             Have fun and do your very best!
 514 |             """
 515 |         ).strip()
 516 | 
 517 |         stdout = patcher.run_codex_command(instructions)
 518 |         if stdout is None:
 519 |             raise HarnessGeneratorError(
 520 |                 "Codex produced no edits when adding harness."
 521 |             )
 522 |         print(
 523 |             f"[*] Codex stdout (truncated):\n{stdout[:1200]}",
 524 |         )
 525 | 
 526 |     def _invoke_codex_to_fix_build(self, build_stderr: str) -> None:
 527 |         patcher = CodexHelper(
 528 |             repo_path=self.repo_path,
 529 |             ai_key_path=str(self.ai_key_path),
 530 |             copy_repo=False,
 531 |             codex_cli=self.codex_cli,
 532 |             codex_model=CODEX_ANALYSIS_MODEL,
 533 |             approval_mode=CODEX_APPROVAL_MODE,
 534 |         )
 535 |         instructions = [
 536 |             "Compilation failed.  Read the compiler output below and make only "
 537 |             "the minimal edits necessary to fix build-blocking errors.  "
 538 |             "Do not add features or refactor unrelated code."
 539 |             "Do not execute any commands to build or run any fuzzers, just correct the build statically."
 540 |         ]
 541 |         stdout = patcher.run_codex_command(
 542 |             instructions, additional_context=build_stderr
 543 |         )
 544 |         if stdout is None:
 545 |             raise HarnessGeneratorError("Codex failed to resolve build errors.")
 546 | 
 547 |     # ---- Run New Fuzzer ------------------------------------------------ #
 548 |     def _run_fuzzer(
 549 |         self,
 550 |         fuzzer_name: str,
 551 |         *,
 552 |         timeout_seconds: int = 600,
 553 |         engine: str = "libfuzzer",
 554 |         sanitizer: str | None = None,
 555 |         architecture: str = "x86_64",
 556 |         rss_limit_mb: int = 16_384,
 557 |         max_len: int = 1024,
 558 |     ) -> str:
 559 |         """
 560 |         Run a single fuzzer and return its combined stdout + stderr.
 561 | 
 562 |         • Captures raw bytes → decodes with errors='backslashreplace'
 563 |         • If stderr is not empty, appends it under "=== STDERR ===" in log.
 564 |         • Prints the last ≈200 lines and writes the full log to fuzzer_run_<uuid>.txt
 565 |         • Never raises on non-zero exit codes (crash/OOM/timeout are findings).
 566 |         """
 567 |         helper = self.repo_path / "infra" / "helper.py"
 568 |         if not helper.is_file():
 569 |             raise HarnessGeneratorError(
 570 |                 "infra/helper.py not found - invalid checkout?"
 571 |             )
 572 | 
 573 |         corpus_dir = (
 574 |             self.repo_path
 575 |             / "build"
 576 |             / "out"
 577 |             / self.project
 578 |             / "corpus"
 579 |             / fuzzer_name
 580 |         )
 581 |         corpus_dir.mkdir(parents=True, exist_ok=True)
 582 | 
 583 |         env = os.environ.copy()
 584 |         env.setdefault("RSS_LIMIT_MB", str(rss_limit_mb))
 585 |         env.setdefault("TIMEOUT", "45")
 586 | 
 587 |         cmd = [
 588 |             "python3",
 589 |             str(helper),
 590 |             "run_fuzzer",
 591 |             "--architecture",
 592 |             architecture,
 593 |             "--engine",
 594 |             engine,
 595 |             "--sanitizer",
 596 |             sanitizer or self.sanitizer,
 597 |             "--corpus-dir",
 598 |             str(corpus_dir),
 599 |             self.project,
 600 |             fuzzer_name,
 601 |             "--",
 602 |             f"-max_total_time={timeout_seconds}",
 603 |             f"-max_len={max_len}",
 604 |             "-print_final_stats=1",
 605 |         ]
 606 | 
 607 |         print(f"[*] ➜  {' '.join(cmd)}")
 608 |         proc = subprocess.Popen(
 609 |             cmd,
 610 |             cwd=self.repo_path,
 611 |             env=env,
 612 |             stdout=subprocess.PIPE,  # raw bytes
 613 |             stderr=subprocess.PIPE,  # keep stderr separate
 614 |             text=False,  # important: capture bytes
 615 |         )
 616 | 
 617 |         try:
 618 |             raw_stdout, raw_stderr = proc.communicate(
 619 |                 timeout=timeout_seconds + 30
 620 |             )
 621 |         except subprocess.TimeoutExpired:
 622 |             proc.kill()
 623 |             raw_stdout, raw_stderr = proc.communicate()
 624 |             print("[!] Fuzzer process exceeded hard timeout; killed.")
 625 |         except Exception:
 626 |             import traceback
 627 | 
 628 |             traceback.print_exc()
 629 |             raw_stdout = b""
 630 |             raw_stderr = traceback.format_exc().encode()
 631 | 
 632 |         # Decode safely
 633 |         stdout_dec = raw_stdout.decode("utf-8", errors="backslashreplace")
 634 |         stderr_dec = raw_stderr.decode("utf-8", errors="backslashreplace")
 635 | 
 636 |         # Combine, adding labelled section if needed
 637 |         if stderr_dec.strip():
 638 |             full_output = f"{stdout_dec}\n\n=== STDERR ===\n{stderr_dec}"
 639 |         else:
 640 |             full_output = stdout_dec
 641 | 
 642 |         # Normalise CRs
 643 |         full_output = full_output.replace("\r", "\n")
 644 | 
 645 |         # Persist full log
 646 |         log_path = self.repo_path / f"fuzzer_run_{uuid.uuid4().hex}.txt"
 647 |         with open(log_path, "w", encoding="utf-8") as fh:
 648 |             fh.write(full_output)
 649 | 
 650 |         # Pretty-print the last 200 lines
 651 |         tail_lines = full_output.splitlines()[-200:]
 652 |         print("\n".join(tail_lines))
 653 |         print(f"\n[*] Full fuzzer log saved to: {log_path}")
 654 | 
 655 |         if proc.returncode != 0:
 656 |             print(
 657 |                 f"[!] Fuzzer exited with rc={proc.returncode} "
 658 |                 "(non-zero is expected for crash/timeout/OOM)."
 659 |             )
 660 | 
 661 |         return full_output
 662 | 
 663 |     # ────────────────────── Crash-handling helpers ─────────────────────── #
 664 | 
 665 |     BUG_PREFIXES = ("crash", "oom", "timeout")
 666 | 
 667 |     def _find_bug_files(self) -> set[Path]:
 668 |         """Return a *set* of Paths matching crash/oom/timeout files for project."""
 669 |         root = self.repo_path / "build" / "out" / self.project
 670 |         if not root.is_dir():
 671 |             return set()
 672 |         return {
 673 |             p
 674 |             for p in root.rglob("*")
 675 |             if p.is_file()
 676 |             and any(p.name.startswith(pref) for pref in self.BUG_PREFIXES)
 677 |         }
 678 | 
 679 |     # ------------------------------------------------------------------ #
 680 |     def _reproduce_crash(
 681 |         self, fuzzer_name: str, crash_path: Path
 682 |     ) -> tuple[str, str]:
 683 |         """Run `helper.py reproduce` and persist output → crash_reproduction.log.
 684 | 
 685 |         Returns a tuple (full_log, command_line).
 686 |         """
 687 | 
 688 |         helper = self.repo_path / "infra" / "helper.py"
 689 |         if not helper.is_file():
 690 |             raise HarnessGeneratorError(
 691 |                 "infra/helper.py not found - cannot reproduce crash"
 692 |             )
 693 | 
 694 |         cmd_list = [
 695 |             "python3",
 696 |             str(helper),
 697 |             "reproduce",
 698 |             self.project,
 699 |             fuzzer_name,
 700 |             str(crash_path),
 701 |         ]
 702 | 
 703 |         cmd_str = " ".join(cmd_list)
 704 |         print(f"[*] ➜  {cmd_str} (reproducing crash)")
 705 | 
 706 |         proc = subprocess.run(
 707 |             cmd_list,
 708 |             cwd=self.repo_path,
 709 |             capture_output=True,
 710 |             text=True,
 711 |             env=os.environ.copy(),
 712 |         )
 713 | 
 714 |         repro_output = proc.stdout + (
 715 |             "\n=== STDERR ===\n" + proc.stderr if proc.stderr else ""
 716 |         )
 717 | 
 718 |         # ── Strip ANSI colour / control codes for readability ──────────
 719 |         repro_output = self._strip_ansi(repro_output)
 720 | 
 721 |         # Build comment line with relative paths for readability
 722 |         try:
 723 |             crash_rel = crash_path.relative_to(self.repo_path)
 724 |         except ValueError:
 725 |             crash_rel = Path(crash_path).name
 726 |         helper_rel = helper.relative_to(self.repo_path)
 727 |         command_line = f"python {helper_rel} reproduce {self.project} {fuzzer_name} {crash_rel}"
 728 | 
 729 |         comment_line = f"# {command_line}\n"
 730 | 
 731 |         log_path = self.repo_path / "crash_reproduction.log"
 732 |         with open(log_path, "w", encoding="utf-8", errors="replace") as fh:
 733 |             fh.write(comment_line)
 734 |             fh.write(repro_output)
 735 | 
 736 |         print(
 737 |             f"[*] Crash reproduction log written to {log_path.relative_to(self.repo_path)}"
 738 |         )
 739 | 
 740 |         full_log = comment_line + repro_output
 741 |         return full_log, command_line
 742 | 
 743 |     # ------------------------------------------------------------------ #
 744 |     def _hexdump(self, path: Path, limit_bytes: int = 512) -> str:
 745 |         """Return an xxd -g1 style hexdump (≤limit_bytes) of a file."""
 746 |         try:
 747 |             return subprocess.check_output(
 748 |                 [
 749 |                     "xxd",
 750 |                     "-g1",
 751 |                     "-l",
 752 |                     str(limit_bytes),
 753 |                     str(path),
 754 |                 ],
 755 |                 text=True,
 756 |             )
 757 |         except Exception:
 758 |             data = path.read_bytes()[:limit_bytes]
 759 |             lines = []
 760 |             for off in range(0, len(data), 16):
 761 |                 chunk = data[off : off + 16]
 762 |                 hex_bytes = " ".join(f"{b:02x}" for b in chunk)
 763 |                 ascii = "".join(chr(b) if 32 <= b < 127 else "." for b in chunk)
 764 |                 lines.append(f"{off:08x}: {hex_bytes:<47}  {ascii}")
 765 |             return "\n".join(lines)
 766 | 
 767 |     # ------------------------------------------------------------------ #
 768 |     _ANSI_ESCAPE_RE = re.compile(
 769 |         r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])", re.MULTILINE
 770 |     )
 771 | 
 772 |     @classmethod
 773 |     def _strip_ansi(cls, text: str) -> str:
 774 |         """Return *text* with any ANSI colour/control sequences removed."""
 775 | 
 776 |         # A pre-compiled regex is used for efficiency as logs can be large.
 777 |         # The pattern aims to match the majority of common ANSI escape
 778 |         # sequences produced by oss-fuzz tooling (colour, cursor movement,
 779 |         # screen erasing etc.).  If a sequence slips through it will simply
 780 |         # render as an innocuous control code in the markdown, which is still
 781 |         # preferable to the unreadable colour gibberish.
 782 |         return cls._ANSI_ESCAPE_RE.sub("", text)
 783 | 
 784 |     # ------------------------------------------------------------------ #
 785 |     def _locate_harness_source(self, fuzzer_name: str) -> Path | None:
 786 |         """Locate the harness source file, primarily via the ./done marker."""
 787 | 
 788 |         done_file = self.repo_path / "done"
 789 |         if done_file.is_file():
 790 |             try:
 791 |                 rel_path = (
 792 |                     done_file.read_text(encoding="utf-8", errors="replace")
 793 |                     .splitlines()[0]
 794 |                     .strip()
 795 |                 )
 796 |                 if rel_path:
 797 |                     abs_path = (self.repo_path / rel_path).resolve()
 798 |                     if abs_path.is_file():
 799 |                         return abs_path
 800 |             except Exception:
 801 |                 pass
 802 |         # First: look for file name containing fuzzer_name with typical C/C++ suffix
 803 |         exts = {".c", ".cc", ".cpp", ".cxx", ".h", ".hpp"}
 804 |         for p in self.repo_path.rglob("*"):
 805 |             if p.suffix.lower() in exts and fuzzer_name in p.name:
 806 |                 return p
 807 | 
 808 |         # Fallback: any file containing LLVMFuzzerTestOneInput token
 809 |         for p in self.repo_path.rglob("*"):
 810 |             if p.suffix.lower() in exts:
 811 |                 try:
 812 |                     txt = p.read_text(encoding="utf-8", errors="ignore")
 813 |                 except Exception:
 814 |                     continue
 815 |                 if "LLVMFuzzerTestOneInput" in txt:
 816 |                     return p
 817 | 
 818 |         return None
 819 | 
 820 |     # ------------------------------------------------------------------ #
 821 |     def _generate_bug_report(
 822 |         self,
 823 |         fuzzer_name: str,
 824 |         crash_path: Path,
 825 |         reproducer_log: str,
 826 |         reproducer_cmd: str,
 827 |     ) -> None:
 828 |         """Invoke Codex to write crash_analysis.md at repo root."""
 829 | 
 830 |         harness_path = self._locate_harness_source(fuzzer_name)
 831 |         harness_source = (
 832 |             harness_path.read_text(encoding="utf-8", errors="replace")
 833 |             if harness_path and harness_path.is_file()
 834 |             else "*Harness source not found*"
 835 |         )
 836 | 
 837 |         hexdump_text = self._hexdump(crash_path)
 838 | 
 839 |         # Build context block (text, not markdown) for Codex
 840 |         context_parts = [
 841 |             "=== Reproducer Log ===\n",
 842 |             reproducer_log,
 843 |             "\n\n=== Harness Source ===\n",
 844 |             harness_source,
 845 |             "\n\n=== Crashing Input (hexdump) ===\n",
 846 |             hexdump_text,
 847 |             "\n",
 848 |         ]
 849 |         additional_context = "".join(context_parts)
 850 | 
 851 |         # ── Write crash_info.md ────────────────────────────────────────
 852 |         def _md_safe(text: str) -> str:
 853 |             return text.replace("```", "```​")  # no early fence close
 854 | 
 855 |         md_lines = [
 856 |             "# Crash Info",
 857 |             "",
 858 |             "## Reproducer command",
 859 |             "```bash",
 860 |             reproducer_cmd,
 861 |             "```",
 862 |             "",
 863 |             "## Reproducer log",
 864 |             "```text",
 865 |             _md_safe(reproducer_log),
 866 |             "```",
 867 |             "",
 868 |             "## Harness source",
 869 |             "```c",
 870 |             _md_safe(harness_source),
 871 |             "```",
 872 |             "",
 873 |             "## Crashing input (hexdump)",
 874 |             "```text",
 875 |             hexdump_text,
 876 |             "```",
 877 |             "",
 878 |         ]
 879 | 
 880 |         (self.repo_path / "crash_info.md").write_text(
 881 |             "\n".join(md_lines), encoding="utf-8"
 882 |         )
 883 |         print("[*] crash_info.md written")
 884 | 
 885 |         instructions = textwrap.dedent(
 886 |             """
 887 |             You are an experienced security researcher.
 888 | 
 889 |             Using the context provided, write a **new file** called `crash_analysis.md` in the repository root with the following top-level sections:
 890 | 
 891 |             1. Bug Type
 892 |             2. Bug Summary
 893 |             3. Bug Impact (real world reachability/exploitability/constraints)
 894 |             4. How to Patch
 895 | 
 896 |             Requirements:
 897 |               • Provide concise yet complete analysis (markdown).
 898 |               • If the bug could not be reproduced (the reproducer exited cleanly) then indicate this in your analysis. 
 899 |               • These harnesses were *just generated*. Carefully consider whether the crash is due to a genuine bug in the target project or a mistake in the harness.  
 900 |                 If it is harness-induced, explicitly state this in your analysis and use **severity: None** in the *bug impact* section.
 901 |                 Look out for harness mistakes like erroneous frees, misuse of the target library, incorrect function arguments / types, or anything else indicating this is not a genuine bug in the target library.
 902 |                 For these cases, you must also include the sentinel "HARNESS ERROR" somewhere in your analysis.
 903 |             """
 904 |         ).strip()
 905 | 
 906 |         print("[*] Calling Codex to generate crash_analysis.md …")
 907 | 
 908 |         patcher = CodexHelper(
 909 |             repo_path=self.repo_path,
 910 |             ai_key_path=str(self.ai_key_path),
 911 |             copy_repo=False,
 912 |             codex_cli=self.codex_cli,
 913 |             codex_model=CODEX_ANALYSIS_MODEL,
 914 |             approval_mode=CODEX_APPROVAL_MODE,
 915 |         )
 916 | 
 917 |         stdout = patcher.run_codex_command(
 918 |             instructions,
 919 |             additional_context=additional_context,
 920 |         )
 921 | 
 922 |         if stdout is None:
 923 |             print("[!] Codex did not produce crash_analysis.md")
 924 |         else:
 925 |             print(
 926 |                 "[*] Codex generated crash_analysis.md (truncated output below):"
 927 |             )
 928 |             print(stdout[:1000])
 929 | 
 930 |         # ── Reproducer script generation ─────────────────────────────
 931 |         try:
 932 |             self._generate_reproducer_script()
 933 |         except HarnessGeneratorError as err:
 934 |             print(f"[!] Failed to generate crash_reproducer.sh: {err}")
 935 | 
 936 |     # ---- Seed corpus generation ------------------------------------ #
 937 |     def _invoke_codex_to_generate_seeds(self, fuzzer_name: str) -> None:
 938 |         """Ask Codex to create initial corpus seeds for the new harness."""
 939 | 
 940 |         corpus_dir = (
 941 |             self.repo_path
 942 |             / "build"
 943 |             / "out"
 944 |             / self.project
 945 |             / "corpus"
 946 |             / fuzzer_name
 947 |         )
 948 |         corpus_dir.mkdir(parents=True, exist_ok=True)
 949 | 
 950 |         harness_path = self._locate_harness_source(fuzzer_name)
 951 |         if not harness_path or not harness_path.is_file():
 952 |             raise HarnessGeneratorError(
 953 |                 f"Unable to locate harness source for {fuzzer_name} when generating seeds"
 954 |             )
 955 | 
 956 |         harness_source = harness_path.read_text(
 957 |             encoding="utf-8", errors="replace"
 958 |         )
 959 | 
 960 |         instructions = textwrap.dedent(
 961 |             f"""
 962 |             The directory `{corpus_dir.relative_to(self.repo_path)}` is the **initial corpus** for the newly created libFuzzer harness `{fuzzer_name}`.
 963 | 
 964 |             You will receive the *full harness source code* as additional context.
 965 | 
 966 |             Task: create one or more **meaningful seed inputs** (at least one, up to five) and write them as **files** inside that corpus directory.
 967 | 
 968 |             Guidelines:
 969 |               • Inputs should be small yet exercise realistic code paths.  
 970 |               • Prefer simple human-readable examples when possible; otherwise use `.bin` files.  
 971 |               • Do **NOT** modify any existing source or build scripts.  
 972 |               • Use appropriate file extensions if the target expects a specific format.  
 973 |               • Binary content can be expressed via hex literals or base64 in the patch - whichever is most convenient.
 974 | 
 975 |             Write the files directly - no commentary - using the standard Codex patch instructions.
 976 |             """
 977 |         ).strip()
 978 | 
 979 |         patcher = CodexHelper(
 980 |             repo_path=self.repo_path,
 981 |             ai_key_path=str(self.ai_key_path),
 982 |             copy_repo=False,
 983 |             codex_cli=self.codex_cli,
 984 |             codex_model=CODEX_ANALYSIS_MODEL,
 985 |             approval_mode=CODEX_APPROVAL_MODE,
 986 |         )
 987 | 
 988 |         stdout = patcher.run_codex_command(
 989 |             instructions,
 990 |             additional_context=harness_source,
 991 |         )
 992 | 
 993 |         if stdout is None:
 994 |             raise HarnessGeneratorError("Codex did not generate any seed files")
 995 | 
 996 |         print("[*] Codex seed-generation output (truncated):")
 997 |         print(stdout[:800])
 998 | 
 999 |     # ------------------------------------------------------------------ #
1000 |     def _generate_reproducer_script(self) -> None:
1001 |         """Invoke Codex to create crash_reproducer.sh after crash analysis."""
1002 | 
1003 |         info_path = self.repo_path / "crash_info.md"
1004 |         analysis_path = self.repo_path / "crash_analysis.md"
1005 | 
1006 |         if not info_path.is_file() or not analysis_path.is_file():
1007 |             raise HarnessGeneratorError(
1008 |                 "Required markdown files not found for reproducer script generation."
1009 |             )
1010 | 
1011 |         context_blob = (
1012 |             "=== crash_info.md ===\n"
1013 |             + info_path.read_text(encoding="utf-8", errors="replace")
1014 |             + "\n\n=== crash_analysis.md ===\n"
1015 |             + analysis_path.read_text(encoding="utf-8", errors="replace")
1016 |         )
1017 | 
1018 |         instructions = textwrap.dedent(
1019 |             """
1020 |             Using the context provided, create a robust, idempotent Bash script named `crash_reproducer.sh` in the repository root that demonstrates the vulnerability described.
1021 | 
1022 |             The script must:
1023 |               • Install any required build/runtime dependencies non-interactively (e.g. apt-get -y, pip install) and skip if already present.
1024 |               • Build the vulnerable project with AddressSanitizer enabled (or another memory sanitizer that will surface the bug).
1025 |               • Fetch or construct the proof-of-concept input that triggers the crash - ideally the same data that appears in the fuzzing crash file, but adapted for a real-world invocation path (command-line tool, library API, etc.).
1026 |               • Construct the proof-of-concept script so that it reproduces the harness bug, but do not call the harness directly.
1027 |               • Apply reasonable execution limits (timeout, ulimit) so it never hangs.
1028 |               • Exit with non-zero status if the bug is reproduced; otherwise exit 0.
1029 |               • Contain clear comments for every major section.
1030 | 
1031 |             Notes:
1032 |               • You can run `git status --porcelain` to discover which harness source file was added or modified.  Use this knowledge to understand the target API.
1033 |               • The script should work when executed from the repository root on a clean Ubuntu container.
1034 |               • Only create `crash_reproducer.sh`. Do not modify existing files.
1035 |             """
1036 |         ).strip()
1037 | 
1038 |         patcher = CodexHelper(
1039 |             repo_path=self.repo_path,
1040 |             ai_key_path=str(self.ai_key_path),
1041 |             copy_repo=False,
1042 |             codex_cli=self.codex_cli,
1043 |             codex_model=CODEX_ANALYSIS_MODEL,
1044 |             approval_mode=CODEX_APPROVAL_MODE,
1045 |         )
1046 | 
1047 |         stdout = patcher.run_codex_command(
1048 |             instructions,
1049 |             additional_context=context_blob,
1050 |         )
1051 | 
1052 |         if stdout is None:
1053 |             raise HarnessGeneratorError(
1054 |                 "Codex did not create crash_reproducer.sh"
1055 |             )
1056 | 
1057 |         print("[*] Codex reproduce script output (truncated):")
1058 |         print(stdout[:800])
1059 | 
1060 |     # ---- Smoke test ---------------------------------------------------- #
1061 |     def _run_any_fuzzer_once(self, timeout: int = 60) -> None:
1062 |         out_dir = self.repo_path / "build" / "out" / self.project
1063 |         fuzzers = [
1064 |             p
1065 |             for p in out_dir.iterdir()
1066 |             if p.is_file()
1067 |             and os.access(p, os.X_OK)
1068 |             and not p.name.endswith(".dict")
1069 |         ]
1070 |         if not fuzzers:
1071 |             print("[*] No fuzzer binaries found.")
1072 |             return
1073 |         fuzzer = fuzzers[0].name
1074 |         print(
1075 |             f"[*] Smoke-testing fuzzer {fuzzer} …",
1076 |         )
1077 |         helper = self.repo_path / "infra" / "helper.py"
1078 |         corpus = out_dir / "corpus" / fuzzer
1079 |         corpus.mkdir(parents=True, exist_ok=True)
1080 |         self._run_cmd(
1081 |             [
1082 |                 "python3",
1083 |                 str(helper),
1084 |                 "run_fuzzer",
1085 |                 "--engine",
1086 |                 "libfuzzer",
1087 |                 "--sanitizer",
1088 |                 self.sanitizer,
1089 |                 "--corpus-dir",
1090 |                 str(corpus),
1091 |                 self.project,
1092 |                 fuzzer,
1093 |                 "--",
1094 |                 f"-max_total_time={timeout}",
1095 |                 "-timeout=120",
1096 |                 "-print_final_stats=1",
1097 |             ],
1098 |             cwd=self.repo_path,
1099 |             env=os.environ.copy(),
1100 |         )
1101 | 
1102 |     # ---- Shell helper -------------------------------------------------- #
1103 |     def _run_cmd(
1104 |         self,
1105 |         cmd: Sequence[str],
1106 |         *,
1107 |         cwd: Path,
1108 |         env: dict[str, str],
1109 |         input: str | None = None,
1110 |     ) -> None:
1111 |         """Run a subprocess and raise HarnessGeneratorError on failure."""
1112 |         cmd_str = " ".join(cmd)
1113 |         print(f"[*] ➜  {cmd_str}")
1114 |         proc = subprocess.Popen(
1115 |             cmd,
1116 |             cwd=cwd,
1117 |             env=env,
1118 |             stdin=subprocess.PIPE if input else None,
1119 |             stdout=subprocess.PIPE,
1120 |             stderr=subprocess.PIPE,
1121 |             text=True,
1122 |         )
1123 |         try:
1124 |             stdout, stderr = proc.communicate(input=input, timeout=7200)
1125 |         except subprocess.TimeoutExpired:
1126 |             proc.kill()
1127 |             raise HarnessGeneratorError("Command timed out: " + " ".join(cmd))
1128 | 
1129 |         if proc.returncode != 0:
1130 |             print(
1131 |                 f"[*] Command failed (rc={proc.returncode})\nSTDOUT:\n{stdout}\n---\nSTDERR:\n{stderr}"
1132 |             )
1133 |             raise HarnessGeneratorError(stderr)
1134 | 
1135 |         print(f"[*] Command succeeded. Truncated stdout:\n{stdout[:600]}")
1136 | 
1137 | 
1138 | # --------------------------------------------------------------------------- #
1139 | # CLI entry-point                                                             #
1140 | # --------------------------------------------------------------------------- #
1141 | if __name__ == "__main__":
1142 |     parser = argparse.ArgumentParser(
1143 |         description="Generate and integrate a new OSS-Fuzz harness with Codex."
1144 |     )
1145 |     parser.add_argument(
1146 |         "project_name", help="OSS-Fuzz project name (e.g. freerdp)"
1147 |     )
1148 |     parser.add_argument(
1149 |         "oss_fuzz_path",
1150 |         type=Path,
1151 |         default="./oss-fuzz",
1152 |         help="Path to local oss-fuzz checkout (root directory)",
1153 |     )
1154 |     parser.add_argument(
1155 |         "ai_key_path",
1156 |         type=Path,
1157 |         default="./.env",
1158 |         help="Path to file containing your OpenAI-compatible API key",
1159 |     )
1160 | 
1161 |     # Optional knobs
1162 |     parser.add_argument(
1163 |         "--sanitizer",
1164 |         default=DEFAULT_SANITIZER,
1165 |         help="Sanitizer to use when building fuzzers (default: address)",
1166 |     )
1167 |     parser.add_argument(
1168 |         "--codex-cli",
1169 |         default="codex",
1170 |         help="Executable name or path for the Codex CLI",
1171 |     )
1172 |     parser.add_argument(
1173 |         "--scratch-space",
1174 |         type=Path,
1175 |         help="Directory for temp working copies (defaults to /tmp)",
1176 |     )
1177 |     parser.add_argument(
1178 |         "--copy-repo",
1179 |         action="store_true",
1180 |         help="Work on a temporary copy of the oss-fuzz tree (safer, slower)",
1181 |     )
1182 |     parser.add_argument(
1183 |         "--no-build",
1184 |         action="store_true",
1185 |         help="Skip rebuilding image/fuzzers after adding the harness",
1186 |     )
1187 |     parser.add_argument(
1188 |         "--smoke",
1189 |         action="store_true",
1190 |         help="Run a 60-second smoke test with one fuzzer at the beginning",
1191 |     )
1192 |     parser.add_argument(
1193 |         "--max-retries",
1194 |         type=int,
1195 |         default=MAX_BUILD_RETRIES,
1196 |         help=f"Maximum build-retry attempts (default: {MAX_BUILD_RETRIES})",
1197 |     )
1198 | 
1199 |     args = parser.parse_args()
1200 |     load_dotenv(dotenv_path=os.path.expanduser(args.ai_key_path))
1201 | 
1202 |     try:
1203 |         hg = HarnessGenerator(
1204 |             project_name=args.project_name,
1205 |             oss_fuzz_path=args.oss_fuzz_path,
1206 |             ai_key_path=args.ai_key_path,
1207 |             sanitizer=args.sanitizer,
1208 |             codex_cli=args.codex_cli,
1209 |             scratch_space=args.scratch_space,
1210 |             copy_repo=args.copy_repo,
1211 |         )
1212 |         hg.generate_harness(
1213 |             build=not args.no_build,
1214 |             run_smoke=args.smoke,
1215 |             max_iterations=args.max_retries,
1216 |         )
1217 |     except HarnessGeneratorError as e:
1218 |         print(f"[harness_generator] ERROR: {e}", file=sys.stderr)
1219 |         sys.exit(1)
1220 | 


--------------------------------------------------------------------------------