├── afl_llvm_allowlist.txt ├── llvm-isel-afl ├── .gitignore ├── CMakeLists.txt ├── llvm-isel-fuzzer.cpp └── afl-driver.cpp ├── mutator ├── scripts │ ├── .gitignore │ └── validate.sh ├── include │ └── mutator.h ├── CMakeLists.txt └── src │ ├── fuzzmutate.cpp │ ├── main.cpp │ ├── mutator.cpp │ └── afl-mutator.c ├── seeds └── seed.bc ├── scripts ├── requirements.txt ├── lib │ ├── time_parser.py │ ├── fs.py │ ├── matcher_table_sizes.py │ ├── plot_data.py │ ├── __init__.py │ ├── arch.py │ ├── process_concurrency.py │ ├── experiment.py │ ├── triple.py │ ├── target_lists.py │ ├── llc_command.py │ ├── target.py │ └── llc_test.py ├── stat_experiments.py ├── combine_crash_data.py ├── summarize_crash_data.py ├── classify_llc_tests.py ├── collect_combined_mt_coverage.py ├── collect_bad_inputs.py ├── combine_fuzzing_results.py ├── collect_matcher_table_size.py ├── batch_classify.py ├── collect_seeds.py ├── batch_compile.py ├── process_data.py ├── compare_experiments.py ├── classify.py └── fuzz.py ├── .gitignore ├── .dockerignore ├── .gitmodules ├── Dockerfile ├── LICENSE └── README.md /afl_llvm_allowlist.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /llvm-isel-afl/.gitignore: -------------------------------------------------------------------------------- 1 | afl-compiler-rt.o 2 | -------------------------------------------------------------------------------- /mutator/scripts/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !validate.sh 3 | !.gitignore -------------------------------------------------------------------------------- /seeds/seed.bc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SecurityLab-UCD/IRFuzzer/HEAD/seeds/seed.bc -------------------------------------------------------------------------------- /scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | bitarray==2.8.0 2 | docker_py==1.10.6 3 | matplotlib==3.5.1 4 | numpy==1.21.5 5 | pandas==2.0.3 6 | tqdm==4.65.0 7 | typed_argument_parser==1.8.1 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | llvm-aie/ 3 | llvm-fix/ 4 | llvm-project/ 5 | .vscode/ 6 | AFLplusplus/ 7 | O 8 | fuzzing* 9 | *-w* 10 | tmp 11 | seed 12 | *.bc 13 | *.ll 14 | analysis 15 | __pycache__ 16 | tmpfs 17 | docker* 18 | output 19 | archive* 20 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | **/build/ 2 | llvm-aie/ 3 | llvm-fix/ 4 | llvm-project/ 5 | .vscode/ 6 | AFLplusplus/ 7 | O 8 | fuzzing* 9 | *-w* 10 | tmp 11 | seed 12 | *.bc 13 | *.ll 14 | analysis 15 | */__pycache__/ 16 | tmpfs 17 | docker* 18 | output 19 | archive* 20 | -------------------------------------------------------------------------------- /scripts/lib/time_parser.py: -------------------------------------------------------------------------------- 1 | SECONDS_PER_UNIT: dict[str, int] = { 2 | "s": 1, 3 | "m": 60, 4 | "h": 3600, 5 | "d": 86400, 6 | "w": 604800, 7 | } 8 | 9 | 10 | def get_time_in_seconds(s: str) -> int: 11 | return int(s[:-1]) * SECONDS_PER_UNIT[s[-1]] 12 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "llvm-project"] 2 | path = llvm-project 3 | url = https://github.com/SecurityLab-UCD/llvm-project.git 4 | branch = irfuzzer-0.2 5 | [submodule "AFLplusplus"] 6 | path = AFLplusplus 7 | url = https://github.com/SecurityLab-UCD/AFLplusplus.git 8 | branch = irfuzzer-0.2 -------------------------------------------------------------------------------- /mutator/include/mutator.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | void createISelMutator(); 8 | size_t LLVMFuzzerCustomMutator(uint8_t *Data, size_t Size, size_t MaxSize, 9 | unsigned int Seed); 10 | 11 | #ifdef __cplusplus 12 | } 13 | #endif -------------------------------------------------------------------------------- /scripts/lib/fs.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from typing import Iterator 4 | 5 | 6 | def subdirs_of(dir: Path | str) -> Iterator[os.DirEntry]: 7 | return (f for f in os.scandir(dir) if f.is_dir()) 8 | 9 | 10 | def count_files(dir: Path) -> int: 11 | """ 12 | count number of file in the specified directory (not including sub-directories) 13 | """ 14 | return len(next(os.walk(dir))[2]) 15 | -------------------------------------------------------------------------------- /mutator/scripts/validate.sh: -------------------------------------------------------------------------------- 1 | INPUT=$1 2 | 3 | if [[ -z $INPUT ]] 4 | then 5 | INPUT=$FUZZING_HOME/seeds/seed.bc 6 | fi 7 | 8 | export NPROC=`nproc --all` 9 | export NPROC=1 10 | for J in $(seq $NPROC) 11 | do 12 | rm O$J; touch O$J 13 | for I in $(seq 1000) 14 | do 15 | SEED=$(shuf -i 0-4294967295 -n 1) 16 | $FUZZING_HOME/mutator/build/MutatorDriver $INPUT $SEED -v 17 | if [[ $? -ne 0 ]] 18 | then 19 | echo $SEED &>> O$J 20 | fi 21 | done & 22 | done 23 | wait 24 | # Try to match O* that is not empty. 25 | ls -la | grep "$USER $USER [[:space:]]*[1-9].* [0-9]*:[0-9]* O[0-9]*" 26 | -------------------------------------------------------------------------------- /scripts/lib/matcher_table_sizes.py: -------------------------------------------------------------------------------- 1 | DAGISEL_MATCHER_TABLE_SIZES: dict[str, int] = { 2 | "AArch64": 486171, 3 | "AMDGPU": 493660, 4 | "ARC": 1998, 5 | "ARM": 201172, 6 | "AVR": 2973, 7 | "BPF": 3586, 8 | "CSKY": 19076, 9 | "Hexagon": 178301, 10 | "Lanai": 2337, 11 | "LoongArch": 28486, 12 | "M68k": 18850, 13 | "MSP430": 9103, 14 | "Mips": 54044, 15 | "NVPTX": 185247, 16 | "PowerPC": 190304, 17 | "RISCV": 2692926, 18 | "Sparc": 6607, 19 | "SystemZ": 53271, 20 | "VE": 71577, 21 | "WebAssembly": 25991, 22 | "X86": 685990, 23 | "XCore": 3854, 24 | } 25 | 26 | GISEL_MATCHER_TABLE_SIZES: dict[str, int] = { 27 | "AArch64": 277753, 28 | "AMDGPU": 338644, 29 | "ARM": 130029, 30 | "M68k": 2388, 31 | "Mips": 60449, 32 | "PowerPC": 83201, 33 | "RISCV": 183021, 34 | "X86": 62522, 35 | } 36 | -------------------------------------------------------------------------------- /scripts/lib/plot_data.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import pandas as pd 3 | 4 | 5 | def __convert_percentage_to_float(s: str) -> float: 6 | return float(s.strip("%")) / 100 7 | 8 | 9 | def read_plot_data(file_path: Path) -> pd.DataFrame: 10 | # the table header is not consistent, so we don't want pandas to detect and process the header (1st row of csv) 11 | # but let it use the hard-coded column names below. 12 | return pd.read_csv( 13 | file_path, 14 | index_col=False, 15 | header=None, 16 | skiprows=1, 17 | names=[ 18 | "# relative_time", 19 | "cycles_done", 20 | "cur_item", 21 | "corpus_count", 22 | "pending_total", 23 | "pending_favs", 24 | "bit_cvg", 25 | "shw_cvg", 26 | "saved_crashes", 27 | "saved_hangs", 28 | "max_depth", 29 | "execs_per_sec", 30 | "total_execs", 31 | "edges_found", 32 | ], 33 | converters={ 34 | "bit_cvg": __convert_percentage_to_float, 35 | "shw_cvg": __convert_percentage_to_float, 36 | }, 37 | ) 38 | -------------------------------------------------------------------------------- /scripts/stat_experiments.py: -------------------------------------------------------------------------------- 1 | from tap import Tap 2 | from lib.experiment import get_all_experiments 3 | from process_data import iterate_over_all_experiments 4 | 5 | 6 | class Args(Tap): 7 | input: str 8 | """root directory containing fuzzing output""" 9 | 10 | def configure(self) -> None: 11 | self.add_argument("input") 12 | 13 | 14 | def print_experiment_statuses(root_dir: str) -> None: 15 | for expr in get_all_experiments(root_dir): 16 | print( 17 | expr.isel.ljust(8), 18 | str(expr.target).ljust(40), 19 | str(expr.replicate_id).ljust(2), 20 | end=" ", 21 | ) 22 | 23 | df = expr.read_plot_data() 24 | 25 | if df.shape[0] > 0: 26 | print( 27 | f"{df.iloc[-1]['# relative_time'] / 3600 :.1f}h".ljust(6), 28 | f"{df.iloc[0]['shw_cvg']:.3%}".ljust(7), 29 | "->", 30 | f"{df.iloc[-1]['shw_cvg']:.3%}".ljust(8), 31 | ) 32 | else: 33 | print() 34 | 35 | 36 | def main(): 37 | args = Args().parse_args() 38 | print_experiment_statuses(args.input) 39 | 40 | 41 | if __name__ == "__main__": 42 | main() 43 | -------------------------------------------------------------------------------- /llvm-isel-afl/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.4) 2 | project(llvm-isel-fuzzer) 3 | set(AFLplusplus AFLplusplus) 4 | 5 | find_package(LLVM REQUIRED PATHS $ENV{FUZZING_HOME}/$ENV{LLVM}/build-afl/lib/cmake/llvm NO_DEFAULT_PATH) 6 | 7 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") 8 | message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") 9 | 10 | set(CMAKE_CXX_FLAGS "-std=c++17 -fno-rtti -Wall -pthread") 11 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG") 12 | 13 | llvm_map_components_to_libnames(LLVM_LIBS 14 | AllTargetsAsmParsers 15 | AllTargetsCodeGens 16 | AllTargetsDescs 17 | AllTargetsInfos 18 | Analysis 19 | AsmPrinter 20 | BitReader 21 | BitWriter 22 | CodeGen 23 | Core 24 | FuzzMutate 25 | IRReader 26 | MC 27 | ScalarOpts 28 | SelectionDAG 29 | Support 30 | Target 31 | ) 32 | 33 | include_directories( 34 | ${LLVM_INCLUDE_DIRS} 35 | ) 36 | add_executable(isel-fuzzing 37 | afl-driver.cpp 38 | llvm-isel-fuzzer.cpp 39 | ) 40 | target_compile_options(isel-fuzzing PRIVATE -fno-rtti) 41 | 42 | target_link_libraries(isel-fuzzing 43 | $ENV{FUZZING_HOME}/${AFLplusplus}/afl-compiler-rt.o 44 | ${LLVM_LIBS} 45 | ) -------------------------------------------------------------------------------- /scripts/lib/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from pathlib import Path 4 | import subprocess 5 | 6 | 7 | FUZZING_HOME = os.getenv(key="FUZZING_HOME") 8 | LLVM = os.getenv(key="LLVM", default="llvm-project") 9 | 10 | LLVM_BIN_PATH = Path(LLVM, "build-release/bin") 11 | LLC = Path(LLVM_BIN_PATH, "llc") 12 | LLVM_AS = Path(LLVM_BIN_PATH, "llvm-as") 13 | LLVM_DIS = Path(LLVM_BIN_PATH, "llvm-dis") 14 | 15 | IRFUZZER_DATA_ENV = "IRFUZZER_DATA" 16 | 17 | 18 | def __verify_working_dir(): 19 | if FUZZING_HOME is None: 20 | logging.error( 21 | "$FUZZING_HOME not set, why am I running? Did you install correctly?" 22 | ) 23 | exit(1) 24 | 25 | if not os.path.samefile(os.getcwd(), FUZZING_HOME): 26 | logging.warning("I am not in $FUZZING_HOME now.") 27 | 28 | 29 | def __verify_llvm_version(): 30 | expected_commit = "bcb8a9450388" 31 | 32 | actual_commit = ( 33 | subprocess.check_output( 34 | ["git", "-C", LLVM, "rev-parse", "--short", "HEAD"] 35 | ) 36 | .decode("ascii") 37 | .strip() 38 | ) 39 | 40 | if actual_commit != expected_commit: 41 | logging.warn( 42 | f"Your LLVM version {actual_commit} is not {expected_commit}." 43 | " Matcher table sizes may be incorrect." 44 | ) 45 | 46 | 47 | __verify_working_dir() 48 | __verify_llvm_version() 49 | -------------------------------------------------------------------------------- /scripts/lib/arch.py: -------------------------------------------------------------------------------- 1 | ARCH_TO_BACKEND_MAP: dict[str, str] = { 2 | "aarch64": "AArch64", 3 | "aarch64_32": "AArch64", 4 | "aarch64_be": "AArch64", 5 | "amdgcn": "AMDGPU", 6 | "arc": "ARC", 7 | "arm": "ARM", 8 | "armeb": "ARM", 9 | "avr": "AVR", 10 | "bpf": "BPF", 11 | "bpfeb": "BPF", 12 | "bpfel": "BPF", 13 | "csky": "CSKY", 14 | "hexagon": "Hexagon", 15 | "lanai": "Lanai", 16 | "loongarch32": "LoongArch", 17 | "loongarch64": "LoongArch", 18 | "m68k": "M68k", 19 | "mips": "Mips", 20 | "mips64": "Mips", 21 | "mips64el": "Mips", 22 | "mipsel": "Mips", 23 | "msp430": "MSP430", 24 | "nvptx": "NVPTX", 25 | "nvptx64": "NVPTX", 26 | "ppc": "PowerPC", 27 | "ppcle": "PowerPC", 28 | "ppc64": "PowerPC", 29 | "ppc64le": "PowerPC", 30 | "r600": "R600", 31 | "riscv32": "RISCV", 32 | "riscv64": "RISCV", 33 | "sparc": "Sparc", 34 | "sparcel": "Sparc", 35 | "sparcv9": "Sparc", 36 | "systemz": "SystemZ", 37 | "thumb": "ARM", 38 | "thumbeb": "ARM", 39 | "ve": "VE", 40 | "wasm32": "WebAssembly", 41 | "wasm64": "WebAssembly", 42 | "i686": "X86", 43 | "x86_64": "X86", 44 | "xcore": "XCore", 45 | } 46 | 47 | 48 | def normalize_arch(arch: str) -> str: 49 | match arch: 50 | case "aarch64" | "arm64": 51 | return "aarch64" 52 | case "aarch64_32" | "arm64_32": 53 | return "aarch64_32" 54 | case "powerpc" | "ppc" | "ppc32": 55 | return "ppc" 56 | case "powerpcle" | "ppcle" | "ppc32le": 57 | return "ppcle" 58 | case "powerpc64" | "ppc64": 59 | return "ppc64" 60 | case "powerpc64le" | "ppc64le": 61 | return "ppc64le" 62 | case "s390x" | "systemz": 63 | return "systemz" 64 | case _: 65 | return arch 66 | -------------------------------------------------------------------------------- /mutator/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.4) 2 | project(afl-ir-custom-mutator) 3 | 4 | 5 | if(NOT CMAKE_BUILD_TYPE) 6 | set(CMAKE_BUILD_TYPE "Release") 7 | message(STATUS "Build type not specified: Use Release by default") 8 | endif(NOT CMAKE_BUILD_TYPE) 9 | 10 | IF(CMAKE_BUILD_TYPE MATCHES Release) 11 | find_package(LLVM REQUIRED PATHS $ENV{FUZZING_HOME}/$ENV{LLVM}/build-release/lib/cmake/llvm NO_DEFAULT_PATH) 12 | ENDIF(CMAKE_BUILD_TYPE MATCHES Release) 13 | 14 | 15 | IF(CMAKE_BUILD_TYPE MATCHES Debug) 16 | find_package(LLVM REQUIRED PATHS $ENV{FUZZING_HOME}/$ENV{LLVM}/build-debug/lib/cmake/llvm NO_DEFAULT_PATH) 17 | ENDIF(CMAKE_BUILD_TYPE MATCHES Debug) 18 | 19 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") 20 | message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") 21 | 22 | set(CMAKE_CXX_FLAGS "-std=c++17 -fno-rtti -Wall -pthread") 23 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG") 24 | 25 | llvm_map_components_to_libnames(LLVM_LIBS 26 | AllTargetsAsmParsers 27 | AllTargetsCodeGens 28 | AllTargetsDescs 29 | AllTargetsInfos 30 | Analysis 31 | AsmPrinter 32 | BitReader 33 | BitWriter 34 | CodeGen 35 | Core 36 | FuzzMutate 37 | IRReader 38 | MC 39 | ScalarOpts 40 | SelectionDAG 41 | Support 42 | Target 43 | ) 44 | 45 | include_directories( 46 | $ENV{FUZZING_HOME}/$ENV{AFL}/include/ 47 | ${LLVM_INCLUDE_DIRS} 48 | ./include/ 49 | ) 50 | add_definitions(${LLVM_DEFINITIONS}) 51 | 52 | add_library(AFLCustomIRMutator SHARED 53 | src/afl-mutator.c 54 | src/mutator.cpp 55 | ) 56 | target_link_libraries(AFLCustomIRMutator 57 | ${LLVM_LIBS} 58 | ) 59 | 60 | add_library(AFLFuzzMutate SHARED 61 | src/afl-mutator.c 62 | src/fuzzmutate.cpp 63 | ) 64 | target_link_libraries(AFLFuzzMutate 65 | ${LLVM_LIBS} 66 | ) 67 | 68 | 69 | add_executable(MutatorDriver 70 | src/main.cpp 71 | ) 72 | target_link_libraries(MutatorDriver 73 | AFLCustomIRMutator 74 | ${LLVM_LIBS} 75 | ) 76 | -------------------------------------------------------------------------------- /scripts/combine_crash_data.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from itertools import groupby 3 | from typing import Generator, Set, Tuple 4 | 5 | import pandas as pd 6 | 7 | from lib.experiment import Experiment, get_all_experiments 8 | from lib.fs import subdirs_of 9 | 10 | 11 | def iterate_over_all_experiments( 12 | dir: str, 13 | ) -> Generator[Tuple[Experiment, Set[str]], None, None]: 14 | for expr in get_all_experiments(dir): 15 | crashes = set() 16 | 17 | for crash_type_dir in subdirs_of(expr.path): 18 | for subdir in subdirs_of(crash_type_dir.path): 19 | if subdir.name.startswith("tracedepth_"): 20 | crashes.add(subdir.name) 21 | else: 22 | for subsubdir in subdirs_of(subdir.path): 23 | assert subsubdir.name.startswith("tracedepth_") 24 | crashes.add(subsubdir.name) 25 | 26 | yield (expr, crashes) 27 | 28 | 29 | def main() -> None: 30 | parser = argparse.ArgumentParser( 31 | description="Combine crash data from differnt experiments to count unique crashes" 32 | ) 33 | 34 | parser.add_argument( 35 | "-i", 36 | "--input", 37 | type=str, 38 | required=True, 39 | help="The input directory (the output directory for batch classification script)", 40 | ) 41 | 42 | args = parser.parse_args() 43 | 44 | groups = groupby( 45 | iterate_over_all_experiments(args.input), 46 | key=lambda tuple: ([tuple[0].fuzzer, tuple[0].isel, str(tuple[0].target)]), 47 | ) 48 | 49 | df = pd.DataFrame( 50 | columns=["fuzzer", "isel", "target", "n_unique_crashes"], 51 | data=( 52 | [ 53 | *k, 54 | len(set((crash for (_, crashes) in v for crash in crashes))), 55 | ] 56 | for (k, v) in groups 57 | ), 58 | ) 59 | 60 | df.to_csv("combined-crash-counts.csv") 61 | 62 | 63 | if __name__ == "__main__": 64 | main() 65 | -------------------------------------------------------------------------------- /scripts/lib/process_concurrency.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import multiprocessing 3 | import os 4 | import subprocess 5 | from typing import Callable, Iterable, Optional, Tuple, TypeVar 6 | 7 | from tqdm import tqdm 8 | 9 | 10 | MAX_SUBPROCESSES = max(multiprocessing.cpu_count() - 2, 1) 11 | 12 | __T = TypeVar("__T") 13 | __R = TypeVar("__R") 14 | 15 | 16 | def run_concurrent_subprocesses( 17 | iter: Iterable[__T], 18 | subprocess_creator: Callable[[__T], subprocess.Popen], 19 | on_exit: Optional[Callable[[__T, Optional[int], subprocess.Popen], __R]] = None, 20 | max_jobs: int = MAX_SUBPROCESSES, 21 | ) -> dict[__T, __R]: 22 | """ 23 | Creates up to `max_jobs` subprocesses that run concurrently. 24 | `iter` contains inputs that is used to start each subprocess. 25 | `subprocess_creator` creates the subprocess and returns a `Popen`. 26 | After each subprocess ends, `on_exit` will go collect user defined input and return. 27 | The return valus is a dictionary of inputs and outputs. 28 | 29 | User has to guarantee elements in `iter` is unique, or the output may be incorrect. 30 | """ 31 | ret: dict[__T, __R] = {} 32 | processes: dict[int, Tuple[subprocess.Popen, __T]] = dict() 33 | 34 | def wait_next() -> None: 35 | pid, status = os.wait() 36 | p, i = processes.pop(pid) 37 | 38 | exit_code: Optional[int] = None 39 | 40 | if os.WIFEXITED(status): 41 | exit_code = os.WEXITSTATUS(status) 42 | logging.debug(f"Child process {pid} exited with code {exit_code}.") 43 | else: 44 | logging.debug(f"Child process {pid} exited abnormally.") 45 | 46 | if on_exit is not None: 47 | ret[i] = on_exit(i, exit_code, p) 48 | 49 | for input in tqdm(iter): 50 | p = subprocess_creator(input) 51 | processes[p.pid] = (p, input) 52 | 53 | if len(processes) >= max_jobs: 54 | wait_next() 55 | 56 | # wait for remaining processes to exit 57 | while len(processes) > 0: 58 | wait_next() 59 | 60 | return ret 61 | -------------------------------------------------------------------------------- /scripts/summarize_crash_data.py: -------------------------------------------------------------------------------- 1 | from typing import Generator, Tuple 2 | import pandas as pd 3 | import argparse 4 | 5 | from lib.experiment import Experiment, get_all_experiments 6 | 7 | 8 | def iterate_over_all_experiments( 9 | dir: str, 10 | ) -> Generator[Tuple[Experiment, int], None, None]: 11 | for expr_info in get_all_experiments(dir): 12 | with open( 13 | expr_info.path.joinpath("unique_crashes"), "r" 14 | ) as file: 15 | yield (expr_info, int(file.readline())) 16 | 17 | 18 | def collect_crash_data(dir: str) -> pd.DataFrame: 19 | return pd.DataFrame( 20 | columns=["fuzzer", "isel", "target", "replicate", "n_unique_crashes"], 21 | data=( 22 | [ 23 | exp.fuzzer, 24 | exp.isel, 25 | str(exp.target), 26 | exp.replicate_id, 27 | n_unique_crashes, 28 | ] 29 | for (exp, n_unique_crashes) in iterate_over_all_experiments(dir) 30 | ), 31 | ) 32 | 33 | 34 | def main() -> None: 35 | parser = argparse.ArgumentParser(description="Summerize crash data") 36 | 37 | parser.add_argument( 38 | "-i", 39 | "--input", 40 | type=str, 41 | required=True, 42 | help="The input directory (the output directory for batch classification script)", 43 | ) 44 | 45 | args = parser.parse_args() 46 | 47 | df = collect_crash_data(args.input) 48 | 49 | df_summary = ( 50 | df.drop(columns=["replicate"]) 51 | .groupby(["fuzzer", "isel", "target"]) 52 | .agg(["min", "max", "count", "mean", "std"]) 53 | ) 54 | 55 | df_summary.to_csv("crash-counts.csv") 56 | 57 | df_irfuzzer = df[df["fuzzer"] == "irfuzzer"].drop(columns=["fuzzer"]) 58 | df_libfuzzer = df[df["fuzzer"] == "libfuzzer"].drop(columns=["fuzzer"]) 59 | 60 | df_comparison = df_irfuzzer.merge( 61 | df_libfuzzer, 62 | on=["isel", "target", "replicate"], 63 | suffixes=("_irfuzzer", "_libfuzzer"), 64 | ) 65 | 66 | df_comparison.to_csv("crash-data-comparison.csv") 67 | 68 | 69 | if __name__ == "__main__": 70 | main() 71 | -------------------------------------------------------------------------------- /scripts/classify_llc_tests.py: -------------------------------------------------------------------------------- 1 | from itertools import groupby 2 | from pathlib import Path 3 | import pandas as pd 4 | from tap import Tap 5 | 6 | from lib.llc_test import LLCTest, parse_llc_tests 7 | 8 | 9 | class Args(Tap): 10 | output: str 11 | """directory for storing summary (will create if not exist)""" 12 | 13 | def configure(self): 14 | self.add_argument("-o", "--output") 15 | 16 | 17 | def classify( 18 | backend: str, 19 | tests: list[LLCTest], 20 | summary_out: Path, 21 | ) -> None: 22 | commands = (cmd for test in tests for cmd in test.runnable_llc_commands) 23 | 24 | df = pd.DataFrame( 25 | columns=["arch", "gisel", "triple", "cpu", "attrs"], 26 | data=( 27 | [ 28 | cmd.target.triple.arch, 29 | cmd.global_isel, 30 | str(cmd.target.triple), 31 | cmd.target.cpu, 32 | ",".join(sorted(cmd.target.attrs)), 33 | ] 34 | for cmd in commands 35 | ), 36 | ) 37 | 38 | df.to_csv(summary_out.joinpath(f"{backend}-raw.csv")) 39 | 40 | df.groupby(["arch", "gisel", "triple", "cpu", "attrs"], dropna=False).size().to_csv( 41 | summary_out.joinpath(f"{backend}-summary.csv") 42 | ) 43 | 44 | for arch in df["arch"].unique(): 45 | arch_df = df[df["arch"] == arch] 46 | 47 | pd.crosstab( 48 | index=arch_df["cpu"].fillna(""), 49 | columns=arch_df["attrs"], 50 | dropna=False, 51 | ).to_csv(summary_out.joinpath(f"{arch}-crosstab.csv")) 52 | 53 | 54 | def main() -> None: 55 | args = Args(underscores_to_dashes=True).parse_args() 56 | 57 | summary_out = Path(args.output) 58 | summary_out.mkdir(exist_ok=True) 59 | 60 | tests = parse_llc_tests() 61 | 62 | for key, group in groupby(tests, key=lambda test: test.backend): 63 | arch_summary_out = summary_out.joinpath(key) 64 | arch_summary_out.mkdir(exist_ok=True) 65 | 66 | classify( 67 | backend=key, 68 | tests=list(group), 69 | summary_out=arch_summary_out, 70 | ) 71 | 72 | 73 | if __name__ == "__main__": 74 | main() 75 | -------------------------------------------------------------------------------- /scripts/lib/experiment.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Iterable, NamedTuple, Optional 3 | 4 | import pandas as pd 5 | from lib.fs import subdirs_of 6 | from lib.plot_data import read_plot_data 7 | 8 | from lib.target import Target 9 | 10 | 11 | class Experiment(NamedTuple): 12 | path: Path 13 | fuzzer: str 14 | isel: str 15 | target: Target 16 | replicate_id: int 17 | 18 | @property 19 | def name(self) -> str: 20 | return f"{self.fuzzer}:{self.isel}:{self.target}:{self.replicate_id}" 21 | 22 | @property 23 | def plot_data_path(self) -> Path: 24 | return self.path.joinpath("default", "plot_data") 25 | 26 | @property 27 | def fuzzer_stats_path(self) -> Path: 28 | return self.path.joinpath("default", "fuzzer_stats") 29 | 30 | @property 31 | def cur_input_path(self) -> Path: 32 | return self.path.joinpath("default", ".cur_input") 33 | 34 | @property 35 | def run_time(self) -> int: 36 | s = self['run_time'] 37 | return -1 if s is None else int(s) 38 | 39 | def __getitem__(self, key: str) -> Optional[str]: 40 | if not self.fuzzer_stats_path.exists(): 41 | return None 42 | 43 | with open(self.fuzzer_stats_path) as f: 44 | for line in f: 45 | if line.startswith(key): 46 | return line.split(" : ")[1] 47 | 48 | return None 49 | 50 | def read_plot_data(self) -> pd.DataFrame: 51 | return read_plot_data(self.plot_data_path) 52 | 53 | 54 | def get_all_experiments(root_dir: Path | str) -> Iterable[Experiment]: 55 | for fuzzer_dir in subdirs_of(root_dir): 56 | for isel_dir in subdirs_of(fuzzer_dir.path): 57 | for target_dir in sorted( 58 | subdirs_of(isel_dir.path), key=lambda dir: dir.name 59 | ): 60 | for replicate_dir in subdirs_of(target_dir.path): 61 | yield Experiment( 62 | path=Path(replicate_dir.path), 63 | fuzzer=fuzzer_dir.name.split(".")[0], 64 | isel=isel_dir.name, 65 | target=Target.parse(target_dir.name), 66 | replicate_id=int(replicate_dir.name), 67 | ) 68 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | ENV DEBIAN_FRONTEND=noninteractive 4 | RUN apt-get update && \ 5 | apt-get -y upgrade && \ 6 | apt-get install -y -q git build-essential wget zlib1g-dev cmake python3 python3-pip ninja-build ccache && \ 7 | apt-get clean 8 | 9 | ENV FUZZING_HOME=/IRFuzzer 10 | WORKDIR $FUZZING_HOME 11 | COPY . $FUZZING_HOME 12 | 13 | ENV LLVM=llvm-project 14 | ENV AFL=AFLplusplus 15 | ENV PATH="${PATH}:/clang+llvm/bin" 16 | ENV AFL_LLVM_INSTRUMENT=CLASSIC 17 | 18 | RUN CLANG_LLVM=clang+llvm-14.0.0-x86_64-linux-gnu-ubuntu-18.04 && \ 19 | wget --no-verbose --show-progress --progress=dot:mega https://github.com/llvm/llvm-project/releases/download/llvmorg-14.0.0/$CLANG_LLVM.tar.xz && \ 20 | tar -xvf $CLANG_LLVM.tar.xz -C / && \ 21 | mv /$CLANG_LLVM /clang+llvm && \ 22 | rm $CLANG_LLVM.tar.xz 23 | 24 | RUN git clone https://github.com/SecurityLab-UCD/AFLplusplus.git --branch=irfuzzer-0.2 --depth=1 $AFL && \ 25 | cd $AFL && \ 26 | make -j 27 | 28 | RUN git clone https://github.com/SecurityLab-UCD/llvm-project.git --branch=irfuzzer-0.2 --depth=1 $LLVM 29 | 30 | RUN mkdir -p $LLVM/build-afl && \ 31 | cd $LLVM/build-afl && \ 32 | cmake \ 33 | -GNinja \ 34 | -DBUILD_SHARED_LIBS=OFF \ 35 | -DLLVM_BUILD_TOOLS=ON \ 36 | -DLLVM_CCACHE_BUILD=ON \ 37 | -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD="ARC;CSKY;LoongArch;M68k" \ 38 | -DCMAKE_C_COMPILER=$FUZZING_HOME/$AFL/afl-clang-fast \ 39 | -DCMAKE_CXX_COMPILER=$FUZZING_HOME/$AFL/afl-clang-fast++ \ 40 | -DCMAKE_BUILD_TYPE=Release \ 41 | -DLLVM_APPEND_VC_REV=OFF \ 42 | -DLLVM_BUILD_EXAMPLES=OFF \ 43 | -DLLVM_BUILD_RUNTIME=OFF \ 44 | -DLLVM_INCLUDE_EXAMPLES=OFF \ 45 | -DLLVM_USE_SANITIZE_COVERAGE=OFF \ 46 | -DLLVM_USE_SANITIZER="" \ 47 | ../llvm && \ 48 | ninja -j $(nproc --all) 49 | 50 | RUN mkdir -p $LLVM/build-release && \ 51 | cd $LLVM/build-release && \ 52 | cmake \ 53 | -GNinja \ 54 | -DBUILD_SHARED_LIBS=ON \ 55 | -DLLVM_CCACHE_BUILD=ON \ 56 | -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD="ARC;CSKY;LoongArch;M68k" \ 57 | -DCMAKE_C_COMPILER=clang \ 58 | -DCMAKE_CXX_COMPILER=clang++ \ 59 | -DCMAKE_BUILD_TYPE=Release \ 60 | ../llvm && \ 61 | ninja -j $(nproc --all) 62 | 63 | RUN mkdir -p llvm-isel-afl/build && \ 64 | cd llvm-isel-afl/build && \ 65 | cmake \ 66 | -GNinja \ 67 | -DCMAKE_C_COMPILER=$FUZZING_HOME/$AFL/afl-clang-fast \ 68 | -DCMAKE_CXX_COMPILER=$FUZZING_HOME/$AFL/afl-clang-fast++ \ 69 | .. && \ 70 | ninja -j $(nproc --all) 71 | 72 | RUN mkdir -p mutator/build && \ 73 | cd mutator/build && \ 74 | cmake -GNinja .. && \ 75 | ninja -j $(nproc --all) 76 | -------------------------------------------------------------------------------- /scripts/lib/triple.py: -------------------------------------------------------------------------------- 1 | from ctypes import CDLL, c_char_p, cdll 2 | from typing import ClassVar, Optional 3 | from lib import LLVM 4 | 5 | from lib.arch import ARCH_TO_BACKEND_MAP, normalize_arch 6 | 7 | 8 | LIB_LLVM_TARGET_PATH = LLVM + "/build-release/lib/libLLVMTarget.so" 9 | 10 | class Triple: 11 | llvm_lib: ClassVar[Optional[CDLL]] = None 12 | 13 | arch: str 14 | vendor: Optional[str] 15 | os: Optional[str] 16 | abi: Optional[str] 17 | 18 | @property 19 | def backend(self) -> str: 20 | return ARCH_TO_BACKEND_MAP[self.arch] 21 | 22 | def __init__( 23 | self, 24 | arch: str, 25 | vendor: Optional[str] = None, 26 | os: Optional[str] = None, 27 | abi: Optional[str] = None, 28 | ) -> None: 29 | assert len(arch) > 0 30 | self.arch = normalize_arch(arch) 31 | self.vendor = self.normalize_component(vendor) 32 | self.os = self.normalize_component(os) 33 | self.abi = self.normalize_component(abi) 34 | 35 | def __eq__(self, __o: object) -> bool: 36 | if not isinstance(__o, Triple): 37 | return False 38 | 39 | return ( 40 | self.arch == __o.arch 41 | and self.vendor == __o.vendor 42 | and self.os == __o.os 43 | and self.abi == __o.abi 44 | ) 45 | 46 | def __hash__(self) -> int: 47 | return hash(str(self)) 48 | 49 | def __repr__(self) -> str: 50 | s = "-".join( 51 | (component if component else "") 52 | for component in [self.arch, self.vendor, self.os, self.abi] 53 | ) 54 | 55 | return s.rstrip("-") 56 | 57 | @classmethod 58 | def normalize_component(cls, s: Optional[str]) -> Optional[str]: 59 | return None if s in [None, "", "none", "unknown"] else s 60 | 61 | @classmethod 62 | def normalize(cls, s: str) -> str: 63 | if cls.llvm_lib is None: 64 | cls.llvm_lib = cdll.LoadLibrary(LIB_LLVM_TARGET_PATH) 65 | cls.llvm_lib.LLVMNormalizeTargetTriple.restype = c_char_p 66 | 67 | c_arg = c_char_p(s.encode("ascii")) 68 | c_ret = cls.llvm_lib.LLVMNormalizeTargetTriple(c_arg) 69 | return c_ret.decode("ascii") 70 | 71 | @classmethod 72 | def parse(cls, s: str) -> "Triple": 73 | assert len(s) > 0 74 | 75 | parts = cls.normalize(s).split("-") 76 | n = len(parts) 77 | 78 | assert n > 0 and n <= 4 79 | 80 | return Triple( 81 | arch=parts[0], 82 | vendor=parts[1] if n >= 2 else None, 83 | os=parts[2] if n >= 3 else None, 84 | abi=parts[3] if n == 4 else None, 85 | ) 86 | -------------------------------------------------------------------------------- /scripts/lib/target_lists.py: -------------------------------------------------------------------------------- 1 | from lib.target import Target 2 | 3 | TARGET_LISTS: dict[str, list[Target]] = { 4 | "1": [ 5 | Target("arm"), 6 | Target("aarch64"), 7 | Target("i686"), 8 | Target("x86_64"), 9 | Target("riscv32"), 10 | Target("riscv64"), 11 | Target("wasm32"), 12 | Target("wasm64"), 13 | ], 14 | "1a": [ 15 | Target("arm", None, "+neon"), 16 | Target("aarch64", None, "+neon"), 17 | Target("i686", None, "+avx512f"), 18 | Target("x86_64", None, "+avx512f"), 19 | Target("riscv32", None, "+v"), 20 | Target("riscv64", None, "+v"), 21 | Target("wasm32", None, "+simd128"), 22 | Target("wasm64", None, "+simd128"), 23 | ], 24 | "2": [ 25 | Target("mips"), 26 | Target("mips64"), 27 | Target("ppc"), 28 | Target("ppc64"), 29 | Target("amdgcn"), 30 | Target("nvptx64"), 31 | Target("hexagon"), 32 | ], 33 | "3": [ 34 | Target("aarch64_32"), 35 | Target("aarch64_be"), 36 | Target("armeb"), 37 | Target("avr"), 38 | Target("bpf"), 39 | Target("bpfeb"), 40 | Target("bpfel"), 41 | Target("lanai"), 42 | Target("mips64el"), 43 | Target("mipsel"), 44 | Target("msp430"), 45 | Target("nvptx"), 46 | Target("ppcle"), 47 | Target("ppc64le"), 48 | Target("r600"), 49 | Target("sparc"), 50 | Target("sparcel"), 51 | Target("sparcv9"), 52 | Target("systemz"), 53 | Target("thumb"), 54 | Target("thumbeb"), 55 | Target("ve"), 56 | Target("xcore"), 57 | ], 58 | "cpu": [ 59 | # Intel 60 | Target("x86_64", "alderlake"), 61 | Target("x86_64", "sapphirerapids"), 62 | # AMD 63 | Target("x86_64", "znver3"), 64 | # Apple 65 | Target("aarch64", "apple-a16"), 66 | Target("aarch64", "apple-m2"), 67 | # Samsung 68 | Target("aarch64", "exynos-m5"), 69 | # ARM 70 | Target("aarch64", "cortex-a710"), 71 | Target("aarch64", "cortex-x2"), 72 | Target("aarch64", "cortex-r82"), 73 | # Target("aarch64", "neoverse-v2"), 74 | # AMD 75 | Target("amdgcn", "gfx1100"), 76 | Target("amdgcn", "gfx1036"), 77 | Target("amdgcn", "gfx1010"), 78 | # Qualcomm 79 | Target("hexagon", "hexagonv69"), 80 | # Nvidia 81 | Target("nvptx64", "sm_90"), 82 | # SiFive 83 | Target("riscv64", "sifive-u74"), 84 | # WASM 85 | Target("wasm64", "bleeding-edge"), 86 | ], 87 | } 88 | -------------------------------------------------------------------------------- /scripts/lib/llc_command.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import re 3 | from typing import Iterable, NamedTuple, Optional 4 | 5 | from lib import LLC 6 | from lib.triple import Triple 7 | from lib.target import Target 8 | 9 | 10 | class LLCCommand(NamedTuple): 11 | target: Target 12 | global_isel: bool 13 | 14 | def get_options(self, output: Optional[str | Path] = None) -> Iterable[str]: 15 | yield f"-mtriple={self.target.triple}" 16 | 17 | if self.target.cpu: 18 | yield f"-mcpu={self.target.cpu}" 19 | 20 | if len(self.target.attrs) > 0: 21 | yield f"-mattr={','.join(self.target.attrs)}" 22 | 23 | if self.global_isel: 24 | yield "-global-isel" 25 | 26 | if output: 27 | yield f"-o" 28 | yield str(output) 29 | 30 | def get_args( 31 | self, input: str | Path, output: Optional[str | Path] = None 32 | ) -> list[str]: 33 | return [ 34 | str(LLC), 35 | *self.get_options(output), 36 | str(input), 37 | ] 38 | 39 | @classmethod 40 | def parse( 41 | cls, command: str, default_triple: Optional[Triple] = None 42 | ) -> "LLCCommand": 43 | assert "llc" in command 44 | 45 | triple = cls.__get_triple_from_command(command) 46 | 47 | if triple is None: 48 | triple = default_triple 49 | 50 | assert triple is not None, f"Cannot determine triple" 51 | 52 | return LLCCommand( 53 | target=Target( 54 | triple=triple, 55 | cpu=cls.__get_cpu_from_command(command), 56 | attrs=cls.__get_attrs_from_command(command), 57 | ), 58 | global_isel=re.match(r".*-global-isel", command) is not None, 59 | ) 60 | 61 | @staticmethod 62 | def __get_triple_from_command(command: str) -> Optional[Triple]: 63 | if (match := re.match(r".*-mtriple[= ]\"?([a-z0-9_-]+)", command)) is not None: 64 | return Triple.parse(match.group(1)) 65 | 66 | if (match := re.match(r".*-march[= ]\"?([a-z0-9_-]+)", command)) is not None: 67 | return Triple(arch=match.group(1)) 68 | 69 | return None 70 | 71 | @staticmethod 72 | def __get_cpu_from_command(command: str) -> Optional[str]: 73 | if (match := re.match(r".*-mcpu[= ]\"?([a-z0-9-]+)", command)) is not None: 74 | return match.group(1) 75 | else: 76 | return None 77 | 78 | @staticmethod 79 | def __get_attrs_from_command(command: str) -> Iterable[str]: 80 | return ( 81 | attr 82 | for arg_val in re.findall(r"-mattr[= ]\"?([A-Za-z0-9,\+-]+)", command) 83 | for attr in arg_val.split(",") 84 | ) 85 | -------------------------------------------------------------------------------- /mutator/src/fuzzmutate.cpp: -------------------------------------------------------------------------------- 1 | #include "mutator.h" 2 | 3 | #include "llvm/ADT/StringRef.h" 4 | #include "llvm/Analysis/TargetLibraryInfo.h" 5 | #include "llvm/Bitcode/BitcodeReader.h" 6 | #include "llvm/Bitcode/BitcodeWriter.h" 7 | #include "llvm/CodeGen/CommandFlags.h" 8 | #include "llvm/FuzzMutate/FuzzerCLI.h" 9 | #include "llvm/FuzzMutate/IRMutator.h" 10 | #include "llvm/FuzzMutate/Operations.h" 11 | #include "llvm/IR/Constants.h" 12 | #include "llvm/IR/LLVMContext.h" 13 | #include "llvm/IR/LegacyPassManager.h" 14 | #include "llvm/IR/Module.h" 15 | #include "llvm/IR/Verifier.h" 16 | #include "llvm/IRReader/IRReader.h" 17 | #include "llvm/Support/CommandLine.h" 18 | #include "llvm/Support/DataTypes.h" 19 | #include "llvm/Support/Debug.h" 20 | #include "llvm/Support/SourceMgr.h" 21 | #include "llvm/Support/TargetSelect.h" 22 | #include "llvm/Target/TargetMachine.h" 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | using namespace llvm; 31 | 32 | static std::unique_ptr Mutator; 33 | 34 | extern "C" { 35 | 36 | // Original IRMutator setting. 37 | void createISelMutator() { 38 | std::vector Types{ 39 | Type::getInt1Ty, Type::getInt8Ty, Type::getInt16Ty, Type::getInt32Ty, 40 | Type::getInt64Ty, Type::getFloatTy, Type::getDoubleTy}; 41 | 42 | std::vector> Strategies; 43 | std::vector Ops; 44 | 45 | describeFuzzerIntOps(Ops); 46 | describeFuzzerFloatOps(Ops); 47 | describeFuzzerControlFlowOps(Ops); 48 | describeFuzzerPointerOps(Ops); 49 | describeFuzzerAggregateOps(Ops); 50 | describeFuzzerVectorOps(Ops); 51 | 52 | Strategies.emplace_back(new InjectorIRStrategy(std::move(Ops))); 53 | Strategies.emplace_back(new InstDeleterIRStrategy()); 54 | 55 | Mutator = 56 | std::make_unique(std::move(Types), std::move(Strategies)); 57 | } 58 | 59 | size_t LLVMFuzzerCustomMutator(uint8_t *Data, size_t Size, size_t MaxSize, 60 | unsigned int Seed) { 61 | LLVMContext Context; 62 | std::unique_ptr M; 63 | if (Size <= 1) 64 | // We get bogus data given an empty corpus - just create a new module. 65 | M.reset(new Module("M", Context)); 66 | else 67 | M = parseModule(Data, Size, Context); 68 | if (!M) { 69 | errs() << "Parse module error. No mutation is done. Data size: " << Size 70 | << ". Given data wrote to err.bc\n"; 71 | std::ofstream outfile = 72 | std::ofstream("err.bc", std::ios::out | std::ios::binary); 73 | outfile.write((char *)Data, Size); 74 | outfile.close(); 75 | exit(1); 76 | } 77 | 78 | Mutator->mutateModule(*M, Seed, MaxSize); 79 | 80 | return writeModule(*M, Data, MaxSize); 81 | } 82 | } -------------------------------------------------------------------------------- /scripts/collect_combined_mt_coverage.py: -------------------------------------------------------------------------------- 1 | from itertools import groupby 2 | from pathlib import Path 3 | from typing import Iterable 4 | from bitarray import bitarray 5 | from tap import Tap 6 | from math import ceil 7 | 8 | from lib.arch import ARCH_TO_BACKEND_MAP 9 | from lib.experiment import Experiment, get_all_experiments 10 | from lib.matcher_table_sizes import ( 11 | DAGISEL_MATCHER_TABLE_SIZES, 12 | GISEL_MATCHER_TABLE_SIZES, 13 | ) 14 | 15 | 16 | class Args(Tap): 17 | input: str 18 | """root directory containing fuzzing output""" 19 | 20 | def configure(self) -> None: 21 | self.add_argument("input") 22 | 23 | 24 | def read_coverage_map(path: Path, matcher_table_size: int) -> bitarray: 25 | cvg_map = bitarray() 26 | 27 | with open(path, "rb") as file: 28 | cvg_map.fromfile(file) 29 | assert ceil(matcher_table_size / 64) * 64 == len(cvg_map) 30 | cvg_map = cvg_map[:matcher_table_size] 31 | 32 | return cvg_map 33 | 34 | 35 | def get_combined_coverage_map( 36 | experiments: Iterable[Experiment], map_size: int, map_rel_path: str 37 | ) -> bitarray: 38 | combined_cvg_map = bitarray(map_size) 39 | combined_cvg_map.setall(1) 40 | 41 | for expr in experiments: 42 | cvg_map_path = expr.path.joinpath(map_rel_path) 43 | 44 | if not cvg_map_path.exists(): 45 | print(f"WARNING: {cvg_map_path} does not exist. Skipped.") 46 | continue 47 | 48 | combined_cvg_map &= read_coverage_map(cvg_map_path, map_size) 49 | 50 | return combined_cvg_map 51 | 52 | 53 | def get_matcher_table_size(backend: str, isel: str) -> int: 54 | backend = ARCH_TO_BACKEND_MAP[backend] 55 | 56 | if isel == "dagisel": 57 | return DAGISEL_MATCHER_TABLE_SIZES[backend] 58 | elif isel == "gisel": 59 | return GISEL_MATCHER_TABLE_SIZES[backend] 60 | else: 61 | raise Exception("Invalid ISel") 62 | 63 | 64 | def main(): 65 | args = Args().parse_args() 66 | 67 | for (arch, isel), exprs in groupby( 68 | get_all_experiments(args.input), 69 | lambda expr: (expr.target.triple.arch, expr.isel), 70 | ): 71 | matcher_table_size = get_matcher_table_size(arch, isel) 72 | exprs = list(exprs) 73 | 74 | initial_cvg_map = get_combined_coverage_map( 75 | exprs, 76 | matcher_table_size, 77 | "default/fuzz_initial_shadowmap", 78 | ) 79 | 80 | current_cvg_map = get_combined_coverage_map( 81 | exprs, 82 | matcher_table_size, 83 | "default/fuzz_shadowmap", 84 | ) 85 | 86 | assert len(initial_cvg_map) == len(current_cvg_map) 87 | 88 | print( 89 | arch.ljust(10), 90 | isel.ljust(8), 91 | f"{matcher_table_size}".ljust(8), 92 | f"{initial_cvg_map.count(0) / matcher_table_size :.3%}".ljust(6), 93 | "->", 94 | f"{current_cvg_map.count(0) / matcher_table_size :.3%}".ljust(6), 95 | ) 96 | 97 | 98 | if __name__ == "__main__": 99 | main() 100 | -------------------------------------------------------------------------------- /mutator/src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "mutator.h" 2 | 3 | #include "llvm/FuzzMutate/FuzzerCLI.h" 4 | #include "llvm/FuzzMutate/IRMutator.h" 5 | #include "llvm/FuzzMutate/Operations.h" 6 | #include "llvm/IR/LLVMContext.h" 7 | #include "llvm/IR/Module.h" 8 | #include "llvm/IR/Verifier.h" 9 | #include "llvm/Support/raw_ostream.h" 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #define MAX_SIZE 1048576 20 | 21 | // https://stackoverflow.com/questions/322938/recommended-way-to-initialize-srand 22 | unsigned long mix(unsigned long a, unsigned long b, unsigned long c) { 23 | a = a - b; 24 | a = a - c; 25 | a = a ^ (c >> 13); 26 | b = b - c; 27 | b = b - a; 28 | b = b ^ (a << 8); 29 | c = c - a; 30 | c = c - b; 31 | c = c ^ (b >> 13); 32 | a = a - b; 33 | a = a - c; 34 | a = a ^ (c >> 12); 35 | b = b - c; 36 | b = b - a; 37 | b = b ^ (a << 16); 38 | c = c - a; 39 | c = c - b; 40 | c = c ^ (b >> 5); 41 | a = a - b; 42 | a = a - c; 43 | a = a ^ (c >> 3); 44 | b = b - c; 45 | b = b - a; 46 | b = b ^ (a << 10); 47 | c = c - a; 48 | c = c - b; 49 | c = c ^ (b >> 15); 50 | return c; 51 | } 52 | int main(int argc, char **argv) { 53 | if (argc < 2) { 54 | fprintf(stderr, "I need a file to mutate on"); 55 | exit(1); 56 | } 57 | std::ifstream infile(argv[1], std::ios::binary | std::ios::ate); 58 | std::streamsize size = infile.tellg(); 59 | infile.seekg(0, std::ios::beg); 60 | 61 | std::vector buffer(MAX_SIZE); 62 | if (infile.read(buffer.data(), size)) { 63 | srand(mix(clock(), time(NULL), getpid())); 64 | createISelMutator(); 65 | unsigned int Seed = rand(); 66 | if (argc > 2) { 67 | Seed = atoi(argv[2]); 68 | } 69 | llvm::errs() << Seed << "\n"; 70 | bool validateMode = false; 71 | if (argc > 3 && argv[3][1] == 'v') { 72 | validateMode = true; 73 | } 74 | size_t newSize = 75 | LLVMFuzzerCustomMutator((uint8_t *)buffer.data(), size, MAX_SIZE, Seed); 76 | if (!validateMode) { 77 | std::ofstream outbc = 78 | std::ofstream("out.bc", std::ios::out | std::ios::binary); 79 | outbc.write(buffer.data(), newSize); 80 | outbc.close(); 81 | } 82 | llvm::LLVMContext Context; 83 | std::unique_ptr M = 84 | llvm::parseModule((uint8_t *)buffer.data(), newSize, Context); 85 | #ifdef DEBUG 86 | if (!validateMode) 87 | M->dump(); 88 | #endif 89 | /* 90 | std::error_code EC; 91 | llvm::raw_fd_ostream outll("out.ll", EC); 92 | M->print(outll, nullptr); 93 | */ 94 | // llvm::errs() << "Verifing Module..."; 95 | if (verifyModule(*M, &llvm::errs(), nullptr)) { 96 | llvm::errs() << "Verifier failed. Seed: " << Seed << "\n"; 97 | // llvm::errs() << *M << "\n"; 98 | } else { 99 | // llvm::errs() << "Good.\n"; 100 | } 101 | } else { 102 | fprintf(stderr, "I can't read the file."); 103 | } 104 | infile.close(); 105 | return 0; 106 | } -------------------------------------------------------------------------------- /scripts/collect_bad_inputs.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import random 3 | import shutil 4 | import subprocess 5 | from typing import Iterable 6 | from tap import Tap 7 | 8 | from lib.experiment import get_all_experiments 9 | from lib.time_parser import get_time_in_seconds 10 | 11 | 12 | class Args(Tap): 13 | input: str 14 | """Path to the fuzzing output directory""" 15 | 16 | output: str 17 | """Path to the directory to write bad inputs and seeds to""" 18 | 19 | time: str 20 | """ 21 | the threshold duration for an experiment to be considered failed. 22 | Current input that cause an experiment to fail in less than this time 23 | is considered a bad input. 24 | (e.g. '100s', '30m', '2h', '1d') 25 | """ 26 | 27 | n: int = 256 28 | """Number of random seeds to test for each bad input""" 29 | 30 | driver: str = "mutator/build/MutatorDriver" 31 | """Path to the mutator driver executable""" 32 | 33 | def configure(self) -> None: 34 | self.add_argument("input") 35 | self.add_argument("-o", "--output") 36 | self.add_argument("-t", "--time") 37 | 38 | def get_time_in_seconds(self) -> int: 39 | return get_time_in_seconds(self.time) 40 | 41 | 42 | def copy_bad_inputs( 43 | fuzzing_out_dir: Path, out_dir: Path, time_secs: int 44 | ) -> Iterable[Path]: 45 | for expr in get_all_experiments(fuzzing_out_dir): 46 | if expr.run_time < time_secs: 47 | dest_path = out_dir.joinpath(expr.name + ".bc") 48 | 49 | if not expr.cur_input_path.exists(): 50 | print(f"Warning: {expr.cur_input_path} does not exist!") 51 | continue 52 | 53 | shutil.copy(expr.cur_input_path, dest_path) 54 | yield dest_path 55 | 56 | 57 | def mutate(mutator_driver: Path, input_bc: Path, seed: int) -> int: 58 | return subprocess.run( 59 | [mutator_driver, input_bc, str(seed)], 60 | stdout=subprocess.DEVNULL, 61 | stderr=subprocess.DEVNULL, 62 | ).returncode 63 | 64 | 65 | def collect_bad_seeds(mutator_driver: Path, input_bc: Path, n: int) -> Iterable[int]: 66 | for _ in range(n): 67 | seed = random.randint(0, 4294967295) 68 | ret_code = mutate(mutator_driver, input_bc, seed) 69 | if ret_code != 0: 70 | yield seed 71 | 72 | 73 | def main() -> None: 74 | args = Args().parse_args() 75 | 76 | out_dir = Path(args.output) 77 | out_dir.mkdir(exist_ok=True) 78 | 79 | bad_input_paths = list( 80 | copy_bad_inputs( 81 | fuzzing_out_dir=Path(args.input), 82 | out_dir=out_dir, 83 | time_secs=args.get_time_in_seconds(), 84 | ) 85 | ) 86 | 87 | print(f"{len(bad_input_paths)} bad inputs written to dir {out_dir}.") 88 | 89 | for bad_input_path in bad_input_paths: 90 | print(f"Collecting bad seeds for {bad_input_path}...") 91 | 92 | bad_input_path.parent.joinpath( 93 | bad_input_path.name.removesuffix(".bc") + ".seeds.txt" 94 | ).write_text( 95 | "\n".join( 96 | str(seed) 97 | for seed in collect_bad_seeds( 98 | mutator_driver=Path(args.driver), 99 | input_bc=bad_input_path, 100 | n=args.n, 101 | ) 102 | ) 103 | ) 104 | 105 | print("Done.") 106 | 107 | 108 | if __name__ == "__main__": 109 | main() 110 | -------------------------------------------------------------------------------- /scripts/combine_fuzzing_results.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | from functools import reduce 5 | from typing import Callable 6 | 7 | from lib import IRFUZZER_DATA_ENV 8 | from lib.experiment import Experiment, get_all_experiments 9 | from lib.fs import subdirs_of 10 | 11 | 12 | class BlackList: 13 | name: str 14 | func: Callable[[Experiment, int], bool] 15 | 16 | def __init__(self, name, func): 17 | self.name = name 18 | self.func = func 19 | 20 | def ignore(self, expr: Experiment, mapped_id: int): 21 | do_ignore = self.func(expr, mapped_id) 22 | if do_ignore: 23 | print("\t", self.name, "Failed") 24 | return do_ignore 25 | 26 | 27 | use_xcore_makeup = BlackList( 28 | "use_xcore_makeup", 29 | lambda expr_info, _: "xcore" == expr_info.arch 30 | and "xcore-makeup" not in expr_info.expr_path, 31 | ) 32 | max_five_expr = BlackList("max_five_expr", lambda _, mapped_id: mapped_id > 4) 33 | fuzzed_long_enough = BlackList( 34 | "fuzzed_long_enough", lambda expr_info, _: expr_info.run_time < 259000 35 | ) 36 | ignore_arm64 = BlackList( 37 | "ignore_arm64", lambda expr_info, mapped_id: "arm64" in expr_info.arch 38 | ) 39 | 40 | blacklists = [use_xcore_makeup, max_five_expr, fuzzed_long_enough, ignore_arm64] 41 | 42 | 43 | def merge_subdirs_by_symlink(src: str, dest: str) -> None: 44 | for archive_dir in subdirs_of(src): 45 | for expr in get_all_experiments(archive_dir.path): 46 | symlink_dest_dir = os.path.join( 47 | dest, expr.fuzzer, expr.isel, str(expr.target) 48 | ) 49 | os.makedirs(symlink_dest_dir, exist_ok=True) 50 | mapped_id = 1 + max( 51 | [ 52 | -1, 53 | *( 54 | int(dir_entry.name) 55 | for dir_entry in subdirs_of(symlink_dest_dir) 56 | ), 57 | ] 58 | ) 59 | symlink_src = expr.path 60 | symlink_dest = os.path.join(symlink_dest_dir, str(mapped_id)) 61 | print( 62 | symlink_dest, 63 | " -> ", 64 | symlink_src, 65 | flush=True, 66 | ) 67 | 68 | if reduce( 69 | lambda a, b: a or b, 70 | [bl.ignore(expr, mapped_id) for bl in blacklists], 71 | ): 72 | print("NOT USED", flush=True) 73 | else: 74 | os.symlink(symlink_src, symlink_dest) 75 | print("DONE", flush=True) 76 | 77 | 78 | def main() -> None: 79 | parser = argparse.ArgumentParser(description="Combine experiments into one root.") 80 | parser.add_argument( 81 | "-i", 82 | "--input", 83 | type=str, 84 | default="", 85 | help=f"The directory containing all inputs. Default to ${IRFUZZER_DATA_ENV}", 86 | ) 87 | args = parser.parse_args() 88 | if args.input == "": 89 | args.input = os.getenv(IRFUZZER_DATA_ENV) 90 | if args.input == None: 91 | logging.error( 92 | f"Input directory not set, set --input or {IRFUZZER_DATA_ENV}" 93 | ) 94 | exit(1) 95 | # make sure current working directory is archive before running this 96 | merge_subdirs_by_symlink(args.input, os.path.join(args.input, "../combined")) 97 | 98 | 99 | if __name__ == "__main__": 100 | logging.basicConfig() 101 | logging.getLogger().setLevel(logging.INFO) 102 | main() 103 | -------------------------------------------------------------------------------- /scripts/collect_matcher_table_size.py: -------------------------------------------------------------------------------- 1 | from io import TextIOWrapper 2 | import multiprocessing 3 | import re 4 | import subprocess 5 | from pathlib import Path 6 | from typing import Optional 7 | 8 | from tap import Tap 9 | from lib import LLVM 10 | from lib.fs import subdirs_of 11 | 12 | 13 | LLVM_AFL_BUILD_PATH = Path(LLVM, "build-afl") 14 | 15 | 16 | class Args(Tap): 17 | jobs: int = multiprocessing.cpu_count() 18 | output: Optional[str] = "scripts/lib/matcher_table_sizes.py" 19 | 20 | def configure(self) -> None: 21 | self.add_argument("-o", "--output") 22 | 23 | 24 | def get_obj_file_suffix(global_isel: bool) -> str: 25 | return "InstructionSelector" if global_isel else "ISelDAGToDAG" 26 | 27 | 28 | def remove_matcher_table_build_files(global_isel: bool) -> None: 29 | suffix = get_obj_file_suffix(global_isel) 30 | 31 | for target_dir in subdirs_of(Path(LLVM_AFL_BUILD_PATH, "lib/Target")): 32 | if not target_dir.is_dir() or target_dir.name == "CMakeFiles": 33 | continue 34 | 35 | backend = target_dir.name 36 | paths = list( 37 | Path(target_dir).glob( 38 | f"CMakeFiles/LLVM{backend}CodeGen.dir/**/*{suffix}.cpp.o" 39 | ) 40 | ) 41 | 42 | for path in paths: 43 | print(f"Removing {path}...") 44 | path.unlink() 45 | 46 | 47 | def build_llvm_afl(jobs: int) -> str: 48 | args = ["ninja", "-j", str(jobs)] 49 | print(" ".join(args)) 50 | 51 | p = subprocess.run( 52 | args=args, 53 | cwd=LLVM_AFL_BUILD_PATH, 54 | stdout=subprocess.PIPE, 55 | stderr=subprocess.PIPE, 56 | ) 57 | 58 | stdout = p.stdout.decode("utf-8") 59 | stderr = p.stderr.decode("utf-8") 60 | 61 | if len(stderr) > 0: 62 | print(stderr) 63 | exit(1) 64 | 65 | p.check_returncode() 66 | 67 | ## Filter out compiler outputs, mostly warnings. 68 | stdout = "\n".join(filter(lambda l: len(l) > 1 and l[0] == "[", stdout.split("\n"))) 69 | 70 | print(stdout) 71 | 72 | return stdout 73 | 74 | 75 | def get_output_pattern(global_isel: bool) -> str: 76 | suffix = get_obj_file_suffix(global_isel) 77 | line1 = rf"\[(\d+)/\d+\] Building CXX object lib/Target/.+/CMakeFiles/.+/(.+){suffix}\.cpp\.o" 78 | line2 = r"\[\+\] MatcherTable size: (\d+)" 79 | return rf"{line1}\n{line2}" 80 | 81 | 82 | def extract_matcher_table_size(stdout: str, global_isel: bool) -> dict[str, int]: 83 | matches = re.findall(get_output_pattern(global_isel), stdout) 84 | 85 | table_sizes = {} 86 | 87 | for match in matches: 88 | backend = match[1] 89 | table_size = int(match[2]) 90 | table_sizes[backend] = table_size 91 | 92 | return table_sizes 93 | 94 | 95 | def dump_py( 96 | name: str, dict: dict[str, int], file: Optional[TextIOWrapper] = None 97 | ) -> None: 98 | print(name + ": dict[str, int] = {", file=file) 99 | for key in sorted(dict.keys()): 100 | print(f' "{key}": {dict[key]},', file=file) 101 | print("}", file=file) 102 | 103 | 104 | def main() -> None: 105 | args = Args().parse_args() 106 | 107 | remove_matcher_table_build_files(global_isel=False) 108 | remove_matcher_table_build_files(global_isel=True) 109 | 110 | stdout = build_llvm_afl(jobs=args.jobs) 111 | 112 | dag_isel_table_sizes = extract_matcher_table_size(stdout, global_isel=False) 113 | global_isel_table_sizes = extract_matcher_table_size(stdout, global_isel=True) 114 | 115 | f = open(args.output, "w") if args.output and args.output != "-" else None 116 | 117 | dump_py("DAGISEL_MATCHER_TABLE_SIZES", dag_isel_table_sizes, file=f) 118 | print(file=f) 119 | dump_py("GISEL_MATCHER_TABLE_SIZES", global_isel_table_sizes, file=f) 120 | 121 | if f: 122 | f.close() 123 | 124 | 125 | if __name__ == "__main__": 126 | main() 127 | -------------------------------------------------------------------------------- /scripts/batch_classify.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import subprocess 4 | 5 | from classify import classify 6 | from pathlib import Path 7 | 8 | from lib import LLC, LLVM_DIS 9 | from lib.fs import subdirs_of 10 | from lib.llc_command import LLCCommand 11 | from lib.process_concurrency import run_concurrent_subprocesses 12 | from lib.target import Target, TargetFilter 13 | 14 | 15 | def classify_wrapper( 16 | input_dir: Path, 17 | output_dir: Path, 18 | target: Target, 19 | global_isel: bool = False, 20 | generate_ll_files: bool = True, 21 | ) -> None: 22 | llc_command = LLCCommand(target=target, global_isel=global_isel) 23 | args = [str(LLC), *llc_command.get_options(output="-")] 24 | 25 | print(f"Start classifying {input_dir} using '{(' '.join(args))}'...") 26 | 27 | classify( 28 | args, 29 | input_dir, 30 | output_dir, 31 | force=True, 32 | verbose=False, 33 | create_symlink_to_source=True, 34 | hash_stacktrace_only=True, 35 | hash_op_code_only_for_isel_crash=True, 36 | remove_addr_in_stacktrace=True, 37 | ignore_undefined_external_symbol=True, 38 | ) 39 | 40 | print(f"Done classifying {input_dir} using '{(' '.join(args))}'.") 41 | 42 | if generate_ll_files: 43 | print(f"Generating human-readable IR files for {output_dir}...") 44 | 45 | run_concurrent_subprocesses( 46 | Path(output_dir).rglob("*.bc"), 47 | lambda ir_bc_path: subprocess.Popen( 48 | args=[LLVM_DIS, ir_bc_path], 49 | stdout=subprocess.DEVNULL, 50 | stderr=subprocess.DEVNULL, 51 | ), 52 | ) 53 | 54 | print(f"Done generating human-readable IR files for {output_dir}.") 55 | 56 | 57 | def batch_classify( 58 | input_root_dir: Path, 59 | output_root_dir: Path, 60 | global_isel: bool = False, 61 | generate_ll_files: bool = True, 62 | target_filter: TargetFilter = lambda _: True, 63 | ) -> None: 64 | for target_dir in subdirs_of(input_root_dir): 65 | target = Target.parse(target_dir.name) 66 | 67 | if not target_filter(target): 68 | continue 69 | 70 | for replicate_dir in subdirs_of(target_dir.path): 71 | try: 72 | classify_wrapper( 73 | input_dir=Path(replicate_dir.path, "default", "crashes"), 74 | output_dir=output_root_dir.joinpath( 75 | target_dir.name, replicate_dir.name 76 | ), 77 | target=target, 78 | global_isel=global_isel, 79 | generate_ll_files=generate_ll_files, 80 | ) 81 | except Exception: 82 | logging.exception( 83 | f"Something went wrong when processing {target_dir.path}" 84 | ) 85 | 86 | 87 | def main() -> None: 88 | parser = argparse.ArgumentParser( 89 | description="Batch classify LLVM crashes", 90 | ) 91 | 92 | parser.add_argument( 93 | "-i", 94 | "--input", 95 | type=str, 96 | required=True, 97 | help="The input directory containing all fuzzer directories", 98 | ) 99 | 100 | parser.add_argument( 101 | "-o", 102 | "--output", 103 | type=str, 104 | required=True, 105 | help="The output directory", 106 | ) 107 | 108 | args = parser.parse_args() 109 | 110 | for fuzzer_dir in subdirs_of(args.input): 111 | for isel_dir in subdirs_of(fuzzer_dir.path): 112 | batch_classify( 113 | input_root_dir=Path(isel_dir.path), 114 | output_root_dir=Path(args.output, fuzzer_dir.name, isel_dir.name), 115 | global_isel=isel_dir.name == "gisel", 116 | ) 117 | 118 | 119 | if __name__ == "__main__": 120 | main() 121 | -------------------------------------------------------------------------------- /scripts/lib/target.py: -------------------------------------------------------------------------------- 1 | from functools import reduce 2 | import re 3 | from typing import Callable, Iterable, Literal, Optional 4 | 5 | from lib.triple import Triple 6 | 7 | 8 | class Target: 9 | triple: Triple 10 | cpu: Optional[str] 11 | attrs: set[str] 12 | 13 | @property 14 | def backend(self) -> str: 15 | return self.triple.backend 16 | 17 | def __init__( 18 | self, 19 | triple: Triple | str, 20 | cpu: Optional[str] = None, 21 | attrs: Iterable[str] | str | None = None, 22 | ) -> None: 23 | self.triple = triple if isinstance(triple, Triple) else Triple.parse(triple) 24 | self.cpu = None if cpu is None or cpu == "" else cpu 25 | 26 | if isinstance(attrs, str): 27 | attrs = attrs.split(",") 28 | 29 | self.attrs = ( 30 | set( 31 | (("+" + attr) if not attr.startswith(("+", "-")) else attr) 32 | for attr in attrs 33 | ) 34 | if attrs 35 | else set() 36 | ) 37 | 38 | def __repr__(self) -> str: 39 | def get_parts() -> Iterable[str]: 40 | yield str(self.triple) 41 | 42 | if self.cpu: 43 | yield self.cpu 44 | 45 | for attr in sorted(self.attrs): 46 | yield attr 47 | 48 | return ",".join(get_parts()) 49 | 50 | def __eq__(self, __o: object) -> bool: 51 | if not isinstance(__o, Target): 52 | return False 53 | 54 | return ( 55 | self.triple == __o.triple 56 | and self.cpu == __o.cpu 57 | and self.attrs == __o.attrs 58 | ) 59 | 60 | def __hash__(self) -> int: 61 | return hash(str(self)) 62 | 63 | @staticmethod 64 | def parse(s: str) -> "Target": 65 | """ 66 | Acceptable formats: 67 | " [] [ ...]", 68 | " [] [,,...]", or 69 | "[,][,,,...]". 70 | An attribute must start with '+' or '-' to avoid ambiguity. 71 | """ 72 | 73 | parts = [part for part in re.split(r" |,", s) if part != ""] 74 | n = len(parts) 75 | 76 | assert n > 0 77 | 78 | # triple only 79 | if n == 1: 80 | return Target(triple=parts[0]) 81 | 82 | # triple with attributes 83 | if parts[1].startswith(("+", "-")): 84 | return Target( 85 | triple=parts[0], 86 | cpu=None, 87 | attrs=parts[1:], 88 | ) 89 | 90 | # triple with cpu 91 | if n == 2: 92 | return Target( 93 | triple=parts[0], 94 | cpu=parts[1], 95 | ) 96 | 97 | # triple with cpu and attributes 98 | return Target(triple=parts[0], cpu=parts[1], attrs=parts[2:]) 99 | 100 | 101 | TargetFilter = Callable[[Target], bool] 102 | TargetProp = Literal["triple", "arch", "vendor", "os", "abi", "cpu", "attrs"] 103 | 104 | 105 | def get_target_prop_selector( 106 | prop: TargetProp, 107 | ) -> Callable[[Target], Triple | str | set[str] | None]: 108 | match prop: 109 | case "triple": 110 | return lambda target: target.triple 111 | case "arch": 112 | return lambda target: target.triple.arch 113 | case "vendor": 114 | return lambda target: target.triple.vendor 115 | case "os": 116 | return lambda target: target.triple.os 117 | case "abi": 118 | return lambda target: target.triple.abi 119 | case "cpu": 120 | return lambda target: target.cpu 121 | case "attrs": 122 | return lambda target: target.attrs 123 | 124 | 125 | def get_target_prop_equality_checker( 126 | target: Target, prop: TargetProp 127 | ) -> Callable[[Target], bool]: 128 | prop_selector = get_target_prop_selector(prop) 129 | return lambda candidate: prop_selector(candidate) == prop_selector(target) 130 | 131 | 132 | def create_target_filter( 133 | target: Target, props_to_match: Iterable[TargetProp] 134 | ) -> TargetFilter: 135 | return reduce( 136 | lambda curr_filter, prop: ( 137 | lambda candidate: ( 138 | curr_filter(candidate) 139 | and get_target_prop_equality_checker(target, prop)(candidate) 140 | ) 141 | ), 142 | props_to_match, 143 | lambda _: True, 144 | ) 145 | -------------------------------------------------------------------------------- /scripts/collect_seeds.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | import subprocess 4 | from typing import Iterable, Literal, Optional 5 | 6 | from tap import Tap 7 | 8 | from lib.fs import count_files 9 | from lib.llc_command import LLCCommand 10 | from lib.llc_test import LLCTest, parse_llc_tests 11 | from lib.target import Target, TargetFilter, TargetProp, create_target_filter 12 | from lib.triple import Triple 13 | 14 | 15 | class Args(Tap): 16 | triple: str 17 | cpu: Optional[str] = None 18 | attrs: list[str] = [] 19 | global_isel: bool = False 20 | 21 | props_to_match: list[TargetProp] = ["triple", "cpu", "attrs"] 22 | """ 23 | the properties of a test target to match those of the fuzzing target, 24 | used to determine which tests should be included as seeds. 25 | """ 26 | 27 | seed_format: Literal["bc", "ll"] = "bc" 28 | """ 29 | whether to create symlinks to the tests, or assemble to bitcode (*.bc) files. 30 | """ 31 | 32 | timeout: Optional[float] = None 33 | """ 34 | only include test cases that can be compiled within the specified in seconds. 35 | """ 36 | 37 | output: str 38 | """directory for storing seeds (will create if not exist)""" 39 | 40 | def configure(self) -> None: 41 | self.add_argument("-o", "--output") 42 | 43 | 44 | def get_runnable_llc_tests( 45 | backend: str, 46 | global_isel: bool, 47 | target_filter: TargetFilter = lambda _: True, 48 | ) -> Iterable[LLCTest]: 49 | return ( 50 | test 51 | for test in parse_llc_tests(backend_filter=lambda a: a == backend) 52 | if any( 53 | cmd.global_isel == global_isel and target_filter(cmd.target) 54 | for cmd in test.runnable_llc_commands 55 | ) 56 | ) 57 | 58 | 59 | def validate_seed( 60 | seed_path: Path, llc_command: LLCCommand, timeout_secs: Optional[float] = None 61 | ) -> bool: 62 | try: 63 | subprocess.run( 64 | llc_command.get_args(input=seed_path, output="-"), 65 | timeout=timeout_secs, 66 | check=True, 67 | stdout=subprocess.DEVNULL, 68 | stderr=subprocess.DEVNULL, 69 | ) 70 | 71 | return True 72 | except subprocess.CalledProcessError: 73 | logging.warning(f"Seed candidate {seed_path} does not compile.") 74 | except subprocess.TimeoutExpired: 75 | logging.warning(f"Seed candidate {seed_path} timed out when compiling.") 76 | 77 | return False 78 | 79 | 80 | def collect_seeds_from_tests( 81 | target: Target, 82 | global_isel: bool, 83 | out_dir_parent: Path, 84 | props_to_match: list[TargetProp] = ["triple", "cpu", "attrs"], 85 | dump_bc: bool = True, 86 | symlink_to_ll: bool = False, 87 | timeout_secs: Optional[float] = None, 88 | ) -> Path: 89 | print(f"Collecting seeds for target {target}...") 90 | 91 | out_dir = out_dir_parent.joinpath( 92 | "gisel" if global_isel else "dagisel", str(target) 93 | ) 94 | out_dir.mkdir(parents=True) 95 | 96 | llc_command = LLCCommand(target=target, global_isel=global_isel) 97 | 98 | for test in get_runnable_llc_tests( 99 | backend=target.backend, 100 | global_isel=global_isel, 101 | target_filter=create_target_filter(target, props_to_match), 102 | ): 103 | if symlink_to_ll and validate_seed(test.path, llc_command, timeout_secs): 104 | out_dir.joinpath(test.path.name).symlink_to(test.path.absolute()) 105 | 106 | if dump_bc: 107 | bc_path = test.dump_bc(out_dir) 108 | 109 | if not validate_seed(bc_path, llc_command, timeout_secs): 110 | bc_path.unlink(missing_ok=True) 111 | 112 | print(f"{count_files(out_dir)} seeds written to {out_dir}.") 113 | 114 | return out_dir 115 | 116 | 117 | def main() -> None: 118 | args = Args(underscores_to_dashes=True).parse_args() 119 | 120 | target = Target( 121 | triple=Triple.parse(args.triple), 122 | cpu=args.cpu, 123 | attrs=args.attrs[0] if len(args.attrs) == 1 else args.attrs, 124 | ) 125 | 126 | collect_seeds_from_tests( 127 | target=target, 128 | global_isel=args.global_isel, 129 | out_dir_parent=Path(args.output), 130 | props_to_match=args.props_to_match, 131 | dump_bc=args.seed_format == "bc", 132 | symlink_to_ll=args.seed_format == "ll", 133 | timeout_secs=args.timeout, 134 | ) 135 | 136 | 137 | if __name__ == "__main__": 138 | main() 139 | -------------------------------------------------------------------------------- /llvm-isel-afl/llvm-isel-fuzzer.cpp: -------------------------------------------------------------------------------- 1 | //===--- llvm-isel-fuzzer.cpp - Fuzzer for instruction selection ----------===// 2 | // 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 | // See https://llvm.org/LICENSE.txt for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | // 7 | //===----------------------------------------------------------------------===// 8 | // 9 | // Tool to fuzz instruction selection using libFuzzer. 10 | // 11 | //===----------------------------------------------------------------------===// 12 | 13 | #include "llvm/ADT/StringRef.h" 14 | #include "llvm/Analysis/TargetLibraryInfo.h" 15 | #include "llvm/Bitcode/BitcodeReader.h" 16 | #include "llvm/Bitcode/BitcodeWriter.h" 17 | #include "llvm/CodeGen/CommandFlags.h" 18 | #include "llvm/FuzzMutate/FuzzerCLI.h" 19 | #include "llvm/FuzzMutate/IRMutator.h" 20 | #include "llvm/FuzzMutate/Operations.h" 21 | #include "llvm/IR/Constants.h" 22 | #include "llvm/IR/LLVMContext.h" 23 | #include "llvm/IR/LegacyPassManager.h" 24 | #include "llvm/IR/Module.h" 25 | #include "llvm/IR/Verifier.h" 26 | #include "llvm/IRReader/IRReader.h" 27 | #if LLVM_VERSION_MAJOR >= 14 28 | #include "llvm/MC/TargetRegistry.h" 29 | #else 30 | #include "llvm/Support/TargetRegistry.h" 31 | #endif 32 | #include "llvm/Support/CommandLine.h" 33 | #include "llvm/Support/DataTypes.h" 34 | #include "llvm/Support/Debug.h" 35 | #include "llvm/Support/SourceMgr.h" 36 | #include "llvm/Support/TargetSelect.h" 37 | #include "llvm/Target/TargetMachine.h" 38 | 39 | #define DEBUG_TYPE "isel-fuzzer" 40 | 41 | using namespace llvm; 42 | 43 | static codegen::RegisterCodeGenFlags CGF; 44 | 45 | static cl::opt 46 | OptLevel("O", 47 | cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] " 48 | "(default = '-O2')"), 49 | cl::Prefix, cl::ZeroOrMore, cl::init('2')); 50 | 51 | static cl::opt 52 | TargetTriple("mtriple", cl::desc("Override target triple for module")); 53 | 54 | static std::unique_ptr TM; 55 | 56 | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) { 57 | if (Size <= 1) 58 | // We get bogus data given an empty corpus - ignore it. 59 | return 0; 60 | 61 | LLVMContext Context; 62 | auto M = parseAndVerify(Data, Size, Context); 63 | if (!M) { 64 | errs() << "error: input module is broken!\n"; 65 | return 0; 66 | } 67 | 68 | // Set up the module to build for our target. 69 | M->setTargetTriple(TM->getTargetTriple().normalize()); 70 | M->setDataLayout(TM->createDataLayout()); 71 | 72 | // Build up a PM to do instruction selection. 73 | legacy::PassManager PM; 74 | TargetLibraryInfoImpl TLII(TM->getTargetTriple()); 75 | PM.add(new TargetLibraryInfoWrapperPass(TLII)); 76 | raw_null_ostream OS; 77 | TM->addPassesToEmitFile(PM, OS, nullptr, CGFT_Null); 78 | PM.run(*M); 79 | 80 | return 0; 81 | } 82 | 83 | extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc, 84 | char ***argv) { 85 | EnableDebugBuffering = true; 86 | 87 | /// TODO: Only init the one we are fuzzing, would that meke it faster? 88 | InitializeAllTargets(); 89 | InitializeAllTargetMCs(); 90 | InitializeAllAsmPrinters(); 91 | InitializeAllAsmParsers(); 92 | 93 | // handleExecNameEncodedBEOpts(*argv[0]); 94 | cl::ParseCommandLineOptions(*argc, *argv); 95 | 96 | if (TargetTriple.empty()) { 97 | errs() << *argv[0] << ": -mtriple must be specified\n"; 98 | exit(1); 99 | } 100 | 101 | Triple TheTriple = Triple(Triple::normalize(TargetTriple)); 102 | 103 | // Get the target specific parser. 104 | std::string Error; 105 | const Target *TheTarget = 106 | TargetRegistry::lookupTarget(codegen::getMArch(), TheTriple, Error); 107 | if (!TheTarget) { 108 | errs() << argv[0] << ": " << Error; 109 | return 1; 110 | } 111 | 112 | // Set up the pipeline like llc does. 113 | std::string CPUStr = codegen::getCPUStr(), 114 | FeaturesStr = codegen::getFeaturesStr(); 115 | 116 | CodeGenOpt::Level OLvl = CodeGenOpt::Default; 117 | switch (OptLevel) { 118 | default: 119 | errs() << argv[0] << ": invalid optimization level.\n"; 120 | return 1; 121 | case ' ': 122 | break; 123 | case '0': 124 | OLvl = CodeGenOpt::None; 125 | break; 126 | case '1': 127 | OLvl = CodeGenOpt::Less; 128 | break; 129 | case '2': 130 | OLvl = CodeGenOpt::Default; 131 | break; 132 | case '3': 133 | OLvl = CodeGenOpt::Aggressive; 134 | break; 135 | } 136 | 137 | TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(TheTriple); 138 | TM.reset(TheTarget->createTargetMachine( 139 | TheTriple.getTriple(), CPUStr, FeaturesStr, Options, 140 | codegen::getExplicitRelocModel(), codegen::getExplicitCodeModel(), OLvl)); 141 | assert(TM && "Could not allocate target machine!"); 142 | 143 | return 0; 144 | } 145 | -------------------------------------------------------------------------------- /scripts/lib/llc_test.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import re 3 | import subprocess 4 | from typing import Callable, Iterable, Optional 5 | from lib import LLVM, LLVM_AS 6 | 7 | from lib.llc_command import LLCCommand 8 | from lib.triple import Triple 9 | 10 | 11 | class LLCTest: 12 | path: Path 13 | 14 | backend: str 15 | 16 | test_commands: list[str] 17 | 18 | runnable_llc_commands: list[LLCCommand] 19 | """ 20 | llc commands that can be directly executed without crashing using the test case as an input 21 | without going through `opt`, `sed`, etc. first. 22 | """ 23 | 24 | code_lines: list[str] 25 | 26 | def __init__(self, backend: str, file_path: Path) -> None: 27 | assert file_path.name.endswith(".ll") 28 | 29 | self.backend = backend 30 | self.path = file_path 31 | self.test_commands = [] 32 | self.code_lines = [] 33 | 34 | with open(file_path) as file: 35 | multiline_command = False # whether last RUN header ends with '\' 36 | while line := file.readline(): 37 | if re.match(r".*;.+NOTE:(.+)", line): 38 | continue 39 | 40 | match = re.match(r".*;.*RUN:(.+)", line) 41 | 42 | if match is not None: 43 | command = match.group(1).strip() 44 | 45 | if multiline_command: 46 | last_command_prev_part = ( 47 | self.test_commands[-1].removesuffix("\\").strip() 48 | ) 49 | self.test_commands[-1] = f"{last_command_prev_part} {command}" 50 | else: 51 | self.test_commands.append(command) 52 | 53 | multiline_command = command.endswith("\\") 54 | else: 55 | assert ( 56 | not multiline_command 57 | ), f"ERROR: something unexpected happened when parsing commands for {file_path}" 58 | self.code_lines.append(line) 59 | 60 | assert ( 61 | len(self.test_commands) > 0 62 | ), f"WARNING: {file_path} does not contain any test command." 63 | 64 | assert ( 65 | len(self.code_lines) > 0 66 | ), f"WARNING: {file_path} does not contain any test code." 67 | 68 | llc_commands = [cmd for cmd in self.test_commands if "llc " in cmd] 69 | assert ( 70 | len(llc_commands) > 0 71 | ), f"WARNING: {file_path} does not contain any `llc` command." 72 | 73 | default_triple = self.get_default_triple() 74 | runnable_llc_commands_raw = filter( 75 | lambda cmd: cmd.startswith("llc"), 76 | (cmd.split("|")[0] for cmd in llc_commands), 77 | ) 78 | 79 | try: 80 | self.runnable_llc_commands = [ 81 | LLCCommand.parse(cmd, default_triple) 82 | for cmd in runnable_llc_commands_raw 83 | ] 84 | except Exception as e: 85 | raise Exception( 86 | f"ERROR: Failed to parse llc command(s) in {file_path}." 87 | ) from e 88 | 89 | assert ( 90 | len(self.runnable_llc_commands) > 0 91 | ), f"WARNING: {file_path} does not contain any runnable `llc` command." 92 | 93 | def get_default_triple(self) -> Optional[Triple]: 94 | lines_with_triple = [ 95 | line for line in self.code_lines if line.startswith("target triple") 96 | ] 97 | 98 | if len(lines_with_triple) == 0: 99 | return None 100 | 101 | assert ( 102 | len(lines_with_triple) == 1 103 | ), f"UNEXPECTED: {self.path} has more than one triple specified in code" 104 | 105 | match = re.match(r'target triple ?= ?"([a-z0-9_\.-]+)"', lines_with_triple[0]) 106 | assert ( 107 | match is not None 108 | ), f"UNEXPECTED: failed to extract triple from '{lines_with_triple[0]}'" 109 | 110 | return Triple.parse(match.group(1)) 111 | 112 | def dump_bc(self, out_dir: Path) -> Path: 113 | out_path = out_dir.joinpath(self.path.name.removesuffix(".ll") + ".bc") 114 | 115 | process = subprocess.run( 116 | [ 117 | LLVM_AS, 118 | self.path, 119 | "-o", 120 | out_path, 121 | ] 122 | ) 123 | 124 | if process.returncode != 0: 125 | print(f"WARNING: failed to convert {self.path} to {out_path}") 126 | 127 | return out_path 128 | 129 | 130 | def parse_llc_tests( 131 | backend_filter: Callable[[str], bool] = lambda _: True, 132 | verbose: bool = False, 133 | ) -> Iterable[LLCTest]: 134 | total = 0 135 | success = 0 136 | 137 | for backend_dir in Path(LLVM, "llvm/test/CodeGen").iterdir(): 138 | if not backend_dir.is_dir() or not backend_filter(backend_dir.name): 139 | continue 140 | 141 | for file_path in backend_dir.rglob("*.ll"): 142 | try: 143 | yield LLCTest(backend_dir.name, file_path) 144 | success += 1 145 | except Exception as e: 146 | if verbose: 147 | print(e) 148 | total += 1 149 | 150 | print(f"Successfully parsed {success}/{total} LLC tests.") 151 | -------------------------------------------------------------------------------- /scripts/batch_compile.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import subprocess 4 | from typing import Iterable, Optional 5 | 6 | from lib.process_concurrency import MAX_SUBPROCESSES, run_concurrent_subprocesses 7 | 8 | 9 | def build_clang_flags( 10 | target: str, 11 | sysroot: Optional[str] = None, 12 | include_paths: list[str] = [], 13 | opt_level: str = "0", 14 | ) -> Iterable[str]: 15 | yield f"--target={target}" 16 | yield "-O" + opt_level 17 | 18 | if sysroot is not None: 19 | yield f"--sysroot={sysroot}" 20 | 21 | for include_path in include_paths: 22 | yield f"-I{include_path}" 23 | 24 | yield "-emit-llvm" 25 | yield "-c" 26 | 27 | 28 | def batch_compile( 29 | src_dir: str, out_dir: str, clang_flags: list[str], n_jobs: Optional[int] = None 30 | ) -> None: 31 | print( 32 | f'Compiling source code in {src_dir} to {out_dir} using "clang {" ".join(clang_flags)}"...' 33 | ) 34 | 35 | os.makedirs(out_dir, exist_ok=True) 36 | 37 | run_concurrent_subprocesses( 38 | iter=[ 39 | file_name for file_name in os.listdir(src_dir) if file_name.endswith(".c") 40 | ], 41 | subprocess_creator=lambda file_name: subprocess.Popen( 42 | args=[ 43 | "clang", 44 | *clang_flags, 45 | os.path.join(src_dir, file_name), 46 | "-o", 47 | os.path.join(out_dir, file_name.replace(".c", ".bc")), 48 | ], 49 | stderr=subprocess.DEVNULL, 50 | stdout=subprocess.DEVNULL, 51 | ), 52 | max_jobs=MAX_SUBPROCESSES if n_jobs is None else n_jobs, 53 | ) 54 | 55 | 56 | def main() -> None: 57 | parser = argparse.ArgumentParser(description="Batch compiling C code to LLVM IR") 58 | 59 | parser.add_argument( 60 | "-i", 61 | "--input", 62 | type=str, 63 | required=True, 64 | help="The input directory containing C source code files", 65 | ) 66 | 67 | parser.add_argument( 68 | "-o", 69 | "--output", 70 | type=str, 71 | required=True, 72 | help="The output directory for LLVM IR bytecode files", 73 | ) 74 | 75 | parser.add_argument( 76 | "-j", 77 | "--jobs", 78 | type=int, 79 | help="The number of concurrent subprocesses", 80 | ) 81 | 82 | parser.add_argument( 83 | "--csmith-root", 84 | type=str, 85 | default="../csmith", 86 | help="The root directory for CSmith repo", 87 | ) 88 | 89 | args = parser.parse_args() 90 | 91 | def batch_compile_wrapper( 92 | target: str, 93 | sysroot: Optional[str] = None, 94 | include: Optional[str] = None, 95 | apt_package: Optional[str] = None, 96 | link: Optional[str] = None, 97 | ) -> None: 98 | if not os.path.exists(args.csmith_root): 99 | print(f"ERROR: missing CSmith in {args.csmith_root}.") 100 | print(f"Run `git clone https://github.com/csmith-project/csmith.git {args.csmith_root}`") 101 | return 102 | 103 | if (include is not None and not os.path.exists(include)) or ( 104 | sysroot is not None and not os.path.exists(sysroot) 105 | ): 106 | print(f"ERROR: missing headers for target {target}.") 107 | if apt_package is not None: 108 | print(f"Run `sudo apt install {apt_package}`.") 109 | if link is not None: 110 | print(f"See {link} for how to get the required headers.") 111 | return 112 | 113 | batch_compile( 114 | src_dir=args.input, 115 | out_dir=os.path.join(args.output, target), 116 | clang_flags=list( 117 | build_clang_flags( 118 | target=target, 119 | sysroot=sysroot, 120 | include_paths=[os.path.join(args.csmith_root, "runtime")] 121 | + ([] if include is None else [include]), 122 | opt_level="2", 123 | ) 124 | ), 125 | n_jobs=args.jobs, 126 | ) 127 | 128 | batch_compile_wrapper( 129 | "i686", 130 | include="/usr/i686-linux-gnu/include", 131 | apt_package="libc6-dev-i386-cross", 132 | ) 133 | batch_compile_wrapper( 134 | "x86_64", 135 | include="/usr/x86_64-linux-gnu/include", 136 | apt_package="libc6-dev-amd64-cross", 137 | ) 138 | batch_compile_wrapper( 139 | "arm", 140 | include="/usr/arm-linux-gnueabi/include", 141 | apt_package="libc6-dev-armel-cross", 142 | ) 143 | batch_compile_wrapper( 144 | "aarch64", 145 | include="/usr/aarch64-linux-gnu/include", 146 | apt_package="libc6-dev-arm64-cross", 147 | ) 148 | batch_compile_wrapper( 149 | "riscv32", 150 | include="./riscv32/sysroot/usr/include", 151 | link="https://github.com/riscv-collab/riscv-gnu-toolchain", 152 | ) 153 | batch_compile_wrapper( 154 | "riscv64", 155 | include="/usr/riscv64-linux-gnu/include", 156 | apt_package="libc6-dev-riscv64-cross", 157 | ) 158 | batch_compile_wrapper( 159 | "wasm32-wasi", 160 | sysroot="./wasi-sdk-14.0/share/wasi-sysroot", 161 | link="https://github.com/WebAssembly/wasi-sdk", 162 | ) 163 | 164 | 165 | if __name__ == "__main__": 166 | main() 167 | -------------------------------------------------------------------------------- /mutator/src/mutator.cpp: -------------------------------------------------------------------------------- 1 | #include "mutator.h" 2 | 3 | #include "llvm/ADT/StringRef.h" 4 | #include "llvm/Analysis/TargetLibraryInfo.h" 5 | #include "llvm/Bitcode/BitcodeReader.h" 6 | #include "llvm/Bitcode/BitcodeWriter.h" 7 | #include "llvm/CodeGen/CommandFlags.h" 8 | #include "llvm/FuzzMutate/FuzzerCLI.h" 9 | #include "llvm/FuzzMutate/IRMutator.h" 10 | #include "llvm/FuzzMutate/Operations.h" 11 | #include "llvm/IR/Constants.h" 12 | #include "llvm/IR/DerivedTypes.h" 13 | #include "llvm/IR/LLVMContext.h" 14 | #include "llvm/IR/LegacyPassManager.h" 15 | #include "llvm/IR/Module.h" 16 | #include "llvm/IR/Verifier.h" 17 | #include "llvm/IRReader/IRReader.h" 18 | #include "llvm/Support/CommandLine.h" 19 | #include "llvm/Support/DataTypes.h" 20 | #include "llvm/Support/Debug.h" 21 | #include "llvm/Support/SourceMgr.h" 22 | #include "llvm/Support/TargetSelect.h" 23 | #include "llvm/Target/TargetMachine.h" 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | 32 | #ifdef DEBUG 33 | #include "llvm/IR/Verifier.h" 34 | #include "llvm/Transforms/Utils/Cloning.h" 35 | #endif 36 | using namespace llvm; 37 | 38 | static std::unique_ptr Mutator; 39 | 40 | extern "C" { 41 | 42 | void dumpOnFailure(unsigned int Seed, uint8_t *Data, size_t Size, 43 | size_t MaxSize) { 44 | time_t seconds = time(NULL); 45 | errs() << "Mutation failed, seed: " << Seed << "\n"; 46 | char oldname[256]; 47 | memset(oldname, 0, 256); 48 | sprintf(oldname, "%u-%zu-%zu.old.bc", Seed, MaxSize, seconds); 49 | std::ofstream oldoutfile = 50 | std::ofstream(oldname, std::ios::out | std::ios::binary); 51 | oldoutfile.write((char *)Data, Size); 52 | oldoutfile.close(); 53 | } 54 | 55 | void addVectorTypeGetters(std::vector &Types) { 56 | int VectorLength[] = {1, 2, 4, 8, 16, 32}; 57 | std::vector BasicTypeGetters(Types); 58 | for (auto typeGetter : BasicTypeGetters) { 59 | for (int length : VectorLength) { 60 | Types.push_back([typeGetter, length](LLVMContext &C) { 61 | return VectorType::get(typeGetter(C), length, false); 62 | }); 63 | } 64 | } 65 | } 66 | 67 | /// TODO: 68 | /// Type* getStructType(Context& C); 69 | 70 | void createISelMutator() { 71 | std::vector Types{ 72 | Type::getInt1Ty, Type::getInt8Ty, Type::getInt16Ty, Type::getInt32Ty, 73 | Type::getInt64Ty, Type::getFloatTy, Type::getDoubleTy}; 74 | std::vector ScalarTypes = Types; 75 | 76 | addVectorTypeGetters(Types); 77 | 78 | TypeGetter OpaquePtrGetter = [](LLVMContext &C) { 79 | return PointerType::get(Type::getInt32Ty(C), 0); 80 | }; 81 | Types.push_back(OpaquePtrGetter); 82 | 83 | // Copy scalar types to change distribution. 84 | for (int i = 0; i < 5; i++) 85 | Types.insert(Types.end(), ScalarTypes.begin(), ScalarTypes.end()); 86 | 87 | std::vector> Strategies; 88 | std::vector Ops = InjectorIRStrategy::getDefaultOps(); 89 | 90 | Strategies.push_back(std::make_unique( 91 | InjectorIRStrategy::getDefaultOps())); 92 | Strategies.push_back(std::make_unique()); 93 | Strategies.push_back(std::make_unique()); 94 | Strategies.push_back(std::make_unique()); 95 | Strategies.push_back(std::make_unique()); 96 | Strategies.push_back(std::make_unique()); 97 | Strategies.push_back(std::make_unique()); 98 | Strategies.push_back(std::make_unique()); 99 | 100 | Mutator = 101 | std::make_unique(std::move(Types), std::move(Strategies)); 102 | } 103 | 104 | size_t LLVMFuzzerCustomMutator(uint8_t *Data, size_t Size, size_t MaxSize, 105 | unsigned int Seed) { 106 | LLVMContext Context; 107 | std::unique_ptr M; 108 | if (Size <= 1) 109 | // We get bogus data given an empty corpus - just create a new module. 110 | M.reset(new Module("M", Context)); 111 | else 112 | M = parseModule(Data, Size, Context); 113 | if (!M) { 114 | errs() << "Parse module error. No mutation is done. Data size: " << Size 115 | << ". Given data wrote to err.bc\n"; 116 | std::ofstream outfile = 117 | std::ofstream("err.bc", std::ios::out | std::ios::binary); 118 | outfile.write((char *)Data, Size); 119 | outfile.close(); 120 | #ifdef DEBUG 121 | exit(1); 122 | #else 123 | // We don't do any change. 124 | return Size; 125 | #endif 126 | } 127 | #ifdef DEBUG 128 | std::unique_ptr OldM = CloneModule(*M); 129 | #endif 130 | 131 | #ifdef DEBUG 132 | try { 133 | #endif 134 | srand(Seed); 135 | Seed = rand(); 136 | // for (int i = 0; i < 4; i++) { 137 | Mutator->mutateModule(*M, Seed, MaxSize); 138 | // } 139 | #ifdef DEBUG 140 | } catch (...) { 141 | dumpOnFailure(Seed, Data, Size, MaxSize); 142 | return Size; 143 | } 144 | #endif 145 | 146 | #ifdef DEBUG 147 | uint8_t NewData[MaxSize]; 148 | size_t NewSize = writeModule(*M, NewData, MaxSize); 149 | LLVMContext NewC; 150 | auto NewM = parseModule(NewData, NewSize, NewC); 151 | if (NewM == nullptr) { 152 | dumpOnFailure(Seed, Data, Size, MaxSize); 153 | return Size; 154 | } else { 155 | memset(Data, 0, MaxSize); 156 | memcpy(Data, NewData, NewSize); 157 | return NewSize; 158 | } 159 | #else 160 | return writeModule(*M, Data, MaxSize); 161 | #endif 162 | } 163 | } -------------------------------------------------------------------------------- /scripts/process_data.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Iterable, Tuple 3 | import pandas as pd 4 | from matplotlib import pyplot 5 | import os 6 | import argparse 7 | import logging 8 | import subprocess 9 | 10 | from lib import IRFUZZER_DATA_ENV 11 | from lib.experiment import Experiment, get_all_experiments 12 | 13 | 14 | def iterate_over_all_experiments( 15 | dir: Path | str, allow_missing_data: bool = False 16 | ) -> Iterable[Tuple[Experiment, pd.DataFrame]]: 17 | for expr in get_all_experiments(dir): 18 | try: 19 | yield (expr, expr.read_plot_data()) 20 | except FileNotFoundError: 21 | if not allow_missing_data: 22 | raise 23 | 24 | 25 | def combine_last_row_of_each_experiment_data( 26 | experiments: Iterable[Tuple[Experiment, pd.DataFrame]], columns: list[str] 27 | ) -> pd.DataFrame: 28 | return pd.DataFrame( 29 | columns=["fuzzer", "isel", "target", "replicate", *columns], 30 | data=( 31 | [ 32 | exp.fuzzer, 33 | exp.isel, 34 | str(exp.target), 35 | exp.replicate_id, 36 | *df.tail(1)[columns].values.flatten().tolist(), 37 | ] 38 | for (exp, df) in experiments 39 | ), 40 | ) 41 | 42 | 43 | def generate_plots( 44 | experiments: Iterable[Tuple[Experiment, pd.DataFrame]], dir_out: str 45 | ) -> None: 46 | pyplot.ioff() 47 | 48 | for (experiment, df) in experiments: 49 | figure_path = os.path.join( 50 | dir_out, 51 | experiment.fuzzer, 52 | experiment.isel, 53 | str(experiment.target), 54 | str(experiment.replicate_id), 55 | ) 56 | os.makedirs(figure_path, exist_ok=True) 57 | 58 | try: 59 | df.plot(x="total_execs", y="saved_crashes").figure.savefig( 60 | os.path.join(figure_path, "crashes-vs-execs.png") 61 | ) 62 | df.plot(x="total_execs", y="shw_cvg").figure.savefig( 63 | os.path.join(figure_path, "shwcvg-vs-execs.png") 64 | ) 65 | df.plot(x="# relative_time", y="saved_crashes").figure.savefig( 66 | os.path.join(figure_path, "crashes-vs-time.png") 67 | ) 68 | df.plot(x="# relative_time", y="shw_cvg").figure.savefig( 69 | os.path.join(figure_path, "shwcvg-vs-time.png") 70 | ) 71 | except: 72 | print( 73 | f"ERROR: Cannot plot {experiment.fuzzer}/{experiment.isel}/{experiment.target}/{experiment.replicate_id}" 74 | ) 75 | 76 | pyplot.close() 77 | 78 | 79 | def get_last_col(args): 80 | df = combine_last_row_of_each_experiment_data( 81 | iterate_over_all_experiments(args.input, allow_missing_data=True), 82 | columns=[ 83 | "# relative_time", 84 | "total_execs", 85 | "bit_cvg", 86 | "shw_cvg", 87 | "corpus_count", 88 | ], 89 | ) 90 | outpath = os.path.join(args.output, "last_row_of_each_experiment.csv") 91 | df.to_csv(outpath, index=False) 92 | 93 | 94 | def get_summary(args): 95 | df = combine_last_row_of_each_experiment_data( 96 | iterate_over_all_experiments(args.input, allow_missing_data=True), 97 | columns=[ 98 | "# relative_time", 99 | "total_execs", 100 | "bit_cvg", 101 | "shw_cvg", 102 | "corpus_count", 103 | ], 104 | ) 105 | outpath = os.path.join(args.output, "summary.csv") 106 | df_summary = ( 107 | df.drop(columns=["replicate"]) 108 | .groupby(["fuzzer", "isel", "target"]) 109 | .agg(["min", "max", "count", "mean", "std"]) 110 | ) 111 | 112 | df_summary.to_csv(outpath) 113 | 114 | 115 | def main() -> None: 116 | 117 | parser = argparse.ArgumentParser(description="Process fuzzing output") 118 | parser.add_argument( 119 | "-i", 120 | "--input", 121 | type=str, 122 | default="", 123 | help=f"The directory containing all inputs. Default to ${IRFUZZER_DATA_ENV}", 124 | ) 125 | parser.add_argument( 126 | "-o", 127 | "--output", 128 | type=str, 129 | default="./output", 130 | help="The directory containing processed results, will force removal if it exists.", 131 | ) 132 | parser.add_argument( 133 | "-t", 134 | "--type", 135 | type=str, 136 | choices=["LastCol", "Summary", "Plot", "Mann", "Data"], 137 | required=True, 138 | help="Type of the job you want me to do.", 139 | ) 140 | args = parser.parse_args() 141 | if args.input == "": 142 | args.input = os.getenv(IRFUZZER_DATA_ENV) 143 | if args.input == None: 144 | logging.error( 145 | f"Input directory not set, set --input or {IRFUZZER_DATA_ENV}" 146 | ) 147 | exit(1) 148 | if args.type != "Data": 149 | if os.path.exists(args.output): 150 | logging.warning(f"{args.output} exists, removing.") 151 | subprocess.run(["rm", "-rf", args.output]) 152 | os.mkdir(args.output) 153 | 154 | if args.type == "LastCol": 155 | get_last_col(args) 156 | elif args.type == "Summary": 157 | # TODO: All data required by summary can be found in expr_info now 158 | # maybe stop reading the whole csv as it is slow. 159 | get_summary(args) 160 | elif args.type == "Plot": 161 | generate_plots( 162 | experiments=iterate_over_all_experiments( 163 | args.input, allow_missing_data=True 164 | ), 165 | dir_out=args.output, 166 | ) 167 | elif args.type == "Mann": 168 | # Mann Whitney U Test to tell if we are statically significant. 169 | pass 170 | 171 | 172 | if __name__ == "__main__": 173 | logging.basicConfig() 174 | logging.getLogger().setLevel(logging.INFO) 175 | main() 176 | -------------------------------------------------------------------------------- /scripts/compare_experiments.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from pathlib import Path 4 | from typing import Iterable, Iterator, Tuple 5 | from matplotlib import pyplot 6 | import numpy as np 7 | import pandas as pd 8 | 9 | from lib.plot_data import read_plot_data 10 | 11 | 12 | def interpolate_data( 13 | df: pd.DataFrame, x_col: str, y_col: str, desired_xs: Iterator[int] 14 | ) -> Iterable[Tuple[int, int]]: 15 | desired_x = next(desired_xs) 16 | prev_row = None 17 | 18 | for _, curr_row in df.iterrows(): 19 | curr_x, curr_y = curr_row[x_col], curr_row[y_col] 20 | 21 | if desired_x is None: 22 | return 23 | 24 | if curr_x == desired_x: 25 | # no need for interpolation 26 | yield (desired_x, curr_y) 27 | desired_x = next(desired_xs, None) 28 | elif curr_x > desired_x: 29 | # linear interpolation 30 | if prev_row is None: 31 | raise Exception("Not supported yet") 32 | 33 | prev_x, prev_y = prev_row[x_col], prev_row[y_col] 34 | slope = (curr_y - prev_y) / (curr_x - prev_x) 35 | yield (desired_x, prev_y + (desired_x - prev_x) * slope) 36 | desired_x = next(desired_xs, None) 37 | 38 | prev_row = curr_row 39 | 40 | 41 | def interpolate_data_multiple( 42 | dfs: Iterable[pd.DataFrame], x_col: str, y_col: str, desired_xs: range 43 | ): 44 | return pd.DataFrame( 45 | { 46 | x_col: desired_xs, 47 | **dict( 48 | ( 49 | f"{y_col}_{idx}", 50 | [ 51 | y 52 | for _, y in interpolate_data(df, x_col, y_col, iter(desired_xs)) 53 | ], 54 | ) 55 | for idx, df in enumerate(dfs) 56 | ), 57 | } 58 | ) 59 | 60 | 61 | def get_confidence_intervals( 62 | df: pd.DataFrame, x_col: str, summary_col_prefix: str, t: float 63 | ) -> pd.DataFrame: 64 | df_temp = df.drop(columns=[x_col]) 65 | 66 | n = df_temp.count(axis=1) 67 | mean = df_temp.mean(axis=1) 68 | std_dev = df_temp.std(axis=1) 69 | std_err = std_dev / np.sqrt(n) 70 | 71 | return pd.DataFrame( 72 | { 73 | x_col: df[x_col], 74 | f"{summary_col_prefix}_ci_lower": mean - t * std_err, 75 | f"{summary_col_prefix}_mean": mean, 76 | f"{summary_col_prefix}_ci_upper": mean + t * std_err, 77 | } 78 | ) 79 | 80 | 81 | def iterate_plot_data_for_replicates( 82 | dir: str, n_replicate: int 83 | ) -> Iterable[pd.DataFrame]: 84 | return ( 85 | read_plot_data( 86 | Path( 87 | dir, 88 | str(i), 89 | "default/plot_data", 90 | ) 91 | ) 92 | for i in range(n_replicate) 93 | ) 94 | 95 | 96 | def compare( 97 | dir_mt_off: str, 98 | dir_mt_on: str, 99 | n_replicate: int, 100 | x_col: str, 101 | y_col: str, 102 | desired_xs: range, 103 | t: float, 104 | ) -> pd.DataFrame: 105 | df_off = interpolate_data_multiple( 106 | dfs=iterate_plot_data_for_replicates(dir_mt_off, n_replicate), 107 | x_col=x_col, 108 | y_col=y_col, 109 | desired_xs=desired_xs, 110 | ) 111 | 112 | df_on = interpolate_data_multiple( 113 | dfs=iterate_plot_data_for_replicates(dir_mt_on, n_replicate), 114 | x_col=x_col, 115 | y_col=y_col, 116 | desired_xs=desired_xs, 117 | ) 118 | 119 | df_off_ci = get_confidence_intervals(df_off, x_col, y_col, t) 120 | df_on_ci = get_confidence_intervals(df_on, x_col, y_col, t) 121 | 122 | return pd.merge(left=df_off_ci, right=df_on_ci, on=x_col, suffixes=("_off", "_on")) 123 | 124 | 125 | def main(): 126 | parser = argparse.ArgumentParser( 127 | description="Compare matcher table coverage of experiments", 128 | ) 129 | 130 | parser.add_argument( 131 | "-off", 132 | "--dir-mt-off", 133 | type=str, 134 | required=True, 135 | help="The dir of fuzzing results with matcher table on", 136 | ) 137 | 138 | parser.add_argument( 139 | "-on", 140 | "--dir-mt-on", 141 | type=str, 142 | required=True, 143 | help="The dir of fuzzing results with matcher table on", 144 | ) 145 | 146 | parser.add_argument( 147 | "-o", 148 | "--out", 149 | type=str, 150 | default="compare-all.png", 151 | help="The path to the figure to be saved", 152 | ) 153 | 154 | args = parser.parse_args() 155 | 156 | x_col = "# relative_time" 157 | y_col = "shw_cvg" 158 | desired_xs = range(800, 80000 + 1, 200) 159 | t = 2.776 # t(df=4, two-tail alpha=0.05) 160 | dir_mt_off = os.path.join(args.dir_mt_off, "irfuzzer/dagisel") 161 | dir_mt_on = os.path.join(args.dir_mt_on, "irfuzzer/dagisel") 162 | archs = ["aarch64", "arm", "nvptx", "riscv64", "x86_64"] 163 | 164 | fig, axs = pyplot.subplots( 165 | nrows=1, ncols=len(archs), layout="constrained", figsize=(12, 2.4) 166 | ) 167 | 168 | for i, arch in enumerate(archs): 169 | df_ci = compare( 170 | dir_mt_off=os.path.join(dir_mt_off, arch), 171 | dir_mt_on=os.path.join(dir_mt_on, arch), 172 | n_replicate=5, 173 | x_col=x_col, 174 | y_col=y_col, 175 | desired_xs=desired_xs, 176 | t=t, 177 | ) 178 | 179 | axs[i].set_title(arch) 180 | 181 | axs[i].plot(x_col, f"{y_col}_mean_off", data=df_ci, color="#4899dc") 182 | axs[i].fill_between( 183 | x=x_col, 184 | y1=f"{y_col}_ci_lower_off", 185 | y2=f"{y_col}_ci_upper_off", 186 | data=df_ci, 187 | color="#a2ccee", 188 | alpha=0.5, 189 | ) 190 | 191 | axs[i].plot(x_col, f"{y_col}_mean_on", data=df_ci, color="#f89d49") 192 | axs[i].fill_between( 193 | x=x_col, 194 | y1=f"{y_col}_ci_lower_on", 195 | y2=f"{y_col}_ci_upper_on", 196 | data=df_ci, 197 | color="#fccea7", 198 | alpha=0.5, 199 | ) 200 | 201 | axs[0].set_ylabel("Matcher Table Coverage") 202 | axs[len(archs) // 2].set_xlabel("Time (sec)") 203 | axs[0].legend( 204 | [ 205 | "Matcher Table Off (Mean)", 206 | "Matcher Table Off (95% CI)", 207 | "Matcher Table On (Mean)", 208 | "Matcher Table On (95% CI)", 209 | ], 210 | bbox_to_anchor=(0, 1.25, 6, 0.2), 211 | loc="lower left", 212 | mode="expand", 213 | ncol=4, 214 | ) 215 | 216 | fig.savefig(args.out) 217 | 218 | 219 | if __name__ == "__main__": 220 | main() 221 | -------------------------------------------------------------------------------- /mutator/src/afl-mutator.c: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include "afl-fuzz.h" 4 | #include "mutator.h" 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define DATA_SIZE (4096) 12 | typedef struct custom_ir_mutator { 13 | afl_state_t *afl; 14 | uint8_t *mutator_buf; 15 | } CustomIRMutator; 16 | 17 | /** 18 | * Initialize this custom mutator 19 | * 20 | * @param[in] afl a pointer to the internal state object. Can be ignored for 21 | * now. 22 | * @param[in] seed A seed for this mutator - the same seed should always mutate 23 | * in the same way. 24 | * @return Pointer to the data object this custom mutator instance should use. 25 | * There may be multiple instances of this mutator in one afl-fuzz run! 26 | * Return NULL on error. 27 | */ 28 | CustomIRMutator *afl_custom_init(afl_state_t *afl, unsigned int seed) { 29 | 30 | CustomIRMutator *mutator = 31 | (CustomIRMutator *)calloc(1, sizeof(CustomIRMutator)); 32 | if (!mutator) { 33 | perror("afl_custom_init alloc"); 34 | return NULL; 35 | } 36 | 37 | mutator->afl = afl; 38 | // The mutator can be think of as a deterministic function where 39 | // new_M = Mutate(M, seed); 40 | srand(seed); 41 | 42 | if ((mutator->mutator_buf = (u8 *)malloc(MAX_FILE)) == NULL) { 43 | 44 | free(mutator); 45 | perror("mutator_buf alloc"); 46 | return NULL; 47 | } 48 | createISelMutator(); 49 | return mutator; 50 | } 51 | 52 | /** 53 | * Perform custom mutations on a given input 54 | * 55 | * (Optional for now. Required in the future) 56 | * 57 | * @param[in] data pointer returned in afl_custom_init for this fuzz case 58 | * @param[in] buf Pointer to input data to be mutated 59 | * @param[in] buf_size Size of input data 60 | * @param[out] out_buf the buffer we will work on. we can reuse *buf. NULL on 61 | * error. 62 | * @param[in] add_buf Buffer containing the additional test case 63 | * @param[in] add_buf_size Size of the additional test case 64 | * @param[in] max_size Maximum size of the mutated output. The mutation must not 65 | * produce data larger than max_size. 66 | * @return Size of the mutated output. 67 | */ 68 | size_t afl_custom_fuzz(CustomIRMutator *mutator, uint8_t *buf, size_t buf_size, 69 | u8 **out_buf, uint8_t *add_buf, 70 | size_t add_buf_size, // add_buf can be NULL 71 | size_t max_size) { 72 | 73 | memcpy(mutator->mutator_buf, buf, buf_size); 74 | size_t out_size = 75 | LLVMFuzzerCustomMutator(mutator->mutator_buf, buf_size, max_size, rand()); 76 | 77 | /* return size of mutated data */ 78 | *out_buf = mutator->mutator_buf; 79 | return out_size; 80 | } 81 | 82 | /** 83 | * A post-processing function to use right before AFL writes the test case to 84 | * disk in order to execute the target. 85 | * 86 | * (Optional) If this functionality is not needed, simply don't define this 87 | * function. 88 | * 89 | * @param[in] data pointer returned in afl_custom_init for this fuzz case 90 | * @param[in] buf Buffer containing the test case to be executed 91 | * @param[in] buf_size Size of the test case 92 | * @param[out] out_buf Pointer to the buffer containing the test case after 93 | * processing. External library should allocate memory for out_buf. 94 | * The buf pointer may be reused (up to the given buf_size); 95 | * @return Size of the output buffer after processing or the needed amount. 96 | * A return of 0 indicates an error. 97 | */ 98 | /* 99 | size_t afl_custom_post_process(CustomIRMutator *data, uint8_t *buf, 100 | size_t buf_size, uint8_t **out_buf) { 101 | 102 | uint8_t *post_process_buf = 103 | maybe_grow(BUF_PARAMS(data, post_process), buf_size + 5); 104 | if (!post_process_buf) { 105 | 106 | perror("custom mutator realloc failed."); 107 | *out_buf = NULL; 108 | return 0; 109 | } 110 | 111 | memcpy(post_process_buf + 5, buf, buf_size); 112 | post_process_buf[0] = 'A'; 113 | post_process_buf[1] = 'F'; 114 | post_process_buf[2] = 'L'; 115 | post_process_buf[3] = '+'; 116 | post_process_buf[4] = '+'; 117 | 118 | *out_buf = post_process_buf; 119 | 120 | return buf_size + 5; 121 | } 122 | */ 123 | 124 | /** 125 | * This method is called at the start of each trimming operation and receives 126 | * the initial buffer. It should return the amount of iteration steps possible 127 | * on this input (e.g. if your input has n elements and you want to remove 128 | * them one by one, return n, if you do a binary search, return log(n), 129 | * and so on...). 130 | * 131 | * If your trimming algorithm doesn't allow you to determine the amount of 132 | * (remaining) steps easily (esp. while running), then you can alternatively 133 | * return 1 here and always return 0 in post_trim until you are finished and 134 | * no steps remain. In that case, returning 1 in post_trim will end the 135 | * trimming routine. The whole current index/max iterations stuff is only used 136 | * to show progress. 137 | * 138 | * (Optional) 139 | * 140 | * @param data pointer returned in afl_custom_init for this fuzz case 141 | * @param buf Buffer containing the test case 142 | * @param buf_size Size of the test case 143 | * @return The amount of possible iteration steps to trim the input. 144 | * negative on error. 145 | */ 146 | /* 147 | int32_t afl_custom_init_trim(CustomIRMutator *data, uint8_t *buf, 148 | size_t buf_size) { 149 | 150 | // We simply trim once 151 | data->trimmming_steps = 1; 152 | 153 | data->cur_step = 0; 154 | 155 | if (!maybe_grow(BUF_PARAMS(data, trim), buf_size)) { 156 | 157 | perror("init_trim grow"); 158 | return -1; 159 | } 160 | 161 | memcpy(data->trim_buf, buf, buf_size); 162 | 163 | data->trim_size_current = buf_size; 164 | 165 | return data->trimmming_steps; 166 | } 167 | */ 168 | 169 | /** 170 | * This method is called for each trimming operation. It doesn't have any 171 | * arguments because we already have the initial buffer from init_trim and we 172 | * can memorize the current state in *data. This can also save 173 | * reparsing steps for each iteration. It should return the trimmed input 174 | * buffer, where the returned data must not exceed the initial input data in 175 | * length. Returning anything that is larger than the original data (passed 176 | * to init_trim) will result in a fatal abort of AFLFuzz. 177 | * 178 | * (Optional) 179 | * 180 | * @param[in] data pointer returned in afl_custom_init for this fuzz case 181 | * @param[out] out_buf Pointer to the buffer containing the trimmed test case. 182 | * External library should allocate memory for out_buf. 183 | * AFL++ will not release the memory after saving the test case. 184 | * Keep a ref in *data. 185 | * *out_buf = NULL is treated as error. 186 | * @return Pointer to the size of the trimmed test case 187 | */ 188 | /* 189 | size_t afl_custom_trim(CustomIRMutator *data, uint8_t **out_buf) { 190 | 191 | *out_buf = data->trim_buf; 192 | 193 | // Remove the last byte of the trimming input 194 | return data->trim_size_current - 1; 195 | } 196 | */ 197 | 198 | /** 199 | * This method is called after each trim operation to inform you if your 200 | * trimming step was successful or not (in terms of coverage). If you receive 201 | * a failure here, you should reset your input to the last known good state. 202 | * 203 | * (Optional) 204 | * 205 | * @param[in] data pointer returned in afl_custom_init for this fuzz case 206 | * @param success Indicates if the last trim operation was successful. 207 | * @return The next trim iteration index (from 0 to the maximum amount of 208 | * steps returned in init_trim). negative ret on failure. 209 | */ 210 | /* 211 | int32_t afl_custom_post_trim(CustomIRMutator *data, int success) { 212 | 213 | if (success) { 214 | 215 | ++data->cur_step; 216 | return data->cur_step; 217 | } 218 | 219 | return data->trimmming_steps; 220 | } 221 | */ 222 | 223 | /** 224 | * Perform a single custom mutation on a given input. 225 | * This mutation is stacked with the other muatations in havoc. 226 | * 227 | * (Optional) 228 | * 229 | * @param[in] data pointer returned in afl_custom_init for this fuzz case 230 | * @param[in] buf Pointer to the input data to be mutated and the mutated 231 | * output 232 | * @param[in] buf_size Size of input data 233 | * @param[out] out_buf The output buffer. buf can be reused, if the content 234 | * fits. *out_buf = NULL is treated as error. 235 | * @param[in] max_size Maximum size of the mutated output. The mutation must 236 | * not produce data larger than max_size. 237 | * @return Size of the mutated output. 238 | */ 239 | /* 240 | size_t afl_custom_havoc_mutation(CustomIRMutator *mutator, u8 *buf, 241 | size_t buf_size, u8 **out_buf, 242 | size_t max_size) { 243 | memcpy(mutator->mutator_buf, buf, buf_size); 244 | size_t out_size = LLVMFuzzerCustomMutator(mutator->mutator_buf, buf_size, 245 | max_size, mutator->seed); 246 | 247 | // return size of mutated data 248 | *out_buf = mutator->mutator_buf; 249 | return out_size; 250 | } 251 | */ 252 | 253 | /** 254 | * Return the probability (in percentage) that afl_custom_havoc_mutation 255 | * is called in havoc. By default it is 6 %. 256 | * 257 | * (Optional) 258 | * 259 | * @param[in] data pointer returned in afl_custom_init for this fuzz case 260 | * @return The probability (0-100). 261 | */ 262 | /* 263 | uint8_t afl_custom_havoc_mutation_probability(CustomIRMutator *data) { 264 | return 100; // 100 % 265 | } 266 | */ 267 | 268 | /** 269 | * Determine whether the fuzzer should fuzz the queue entry or not. 270 | * 271 | * (Optional) 272 | * 273 | * @param[in] data pointer returned in afl_custom_init for this fuzz case 274 | * @param filename File name of the test case in the queue entry 275 | * @return Return True(1) if the fuzzer will fuzz the queue entry, and 276 | * False(0) otherwise. 277 | */ 278 | /* 279 | uint8_t afl_custom_queue_get(CustomIRMutator *data, const uint8_t *filename) { 280 | 281 | return 1; 282 | } 283 | */ 284 | 285 | /** 286 | * Allow for additional analysis (e.g. calling a different tool that does a 287 | * different kind of coverage and saves this for the custom mutator). 288 | * 289 | * (Optional) 290 | * 291 | * @param data pointer returned in afl_custom_init for this fuzz case 292 | * @param filename_new_queue File name of the new queue entry 293 | * @param filename_orig_queue File name of the original queue entry 294 | * @return if the file contents was modified return 1 (True), 0 (False) 295 | * otherwise 296 | */ 297 | /* 298 | uint8_t afl_custom_queue_new_entry(CustomIRMutator *data, 299 | const uint8_t *filename_new_queue, 300 | const uint8_t *filename_orig_queue) { 301 | 302 | // Additional analysis on the original or new test case 303 | return 0; 304 | } 305 | */ 306 | 307 | /** 308 | * Deinitialize everything 309 | * 310 | * @param data The data ptr from afl_custom_init 311 | */ 312 | void afl_custom_deinit(CustomIRMutator *mutator) { 313 | free(mutator->mutator_buf); 314 | free(mutator); 315 | } 316 | -------------------------------------------------------------------------------- /llvm-isel-afl/afl-driver.cpp: -------------------------------------------------------------------------------- 1 | //===- afl_driver.cpp - a glue between AFL and libFuzzer --------*- C++ -* ===// 2 | // 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 | // See https://llvm.org/LICENSE.txt for license information. 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 | //===----------------------------------------------------------------------===// 7 | 8 | /* This file allows to fuzz libFuzzer-style target functions 9 | (LLVMFuzzerTestOneInput) with AFL using AFL's persistent (in-process) mode. 10 | 11 | Usage: 12 | ################################################################################ 13 | cat << EOF > test_fuzzer.cc 14 | #include 15 | #include 16 | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { 17 | if (size > 0 && data[0] == 'H') 18 | if (size > 1 && data[1] == 'I') 19 | if (size > 2 && data[2] == '!') 20 | __builtin_trap(); 21 | return 0; 22 | } 23 | EOF 24 | # Build your target with -fsanitize-coverage=trace-pc-guard using fresh clang. 25 | clang -g -fsanitize-coverage=trace-pc-guard test_fuzzer.cc -c 26 | # Build afl-llvm-rt.o.c from the AFL distribution. 27 | clang -c -w $AFL_HOME/llvm_mode/afl-llvm-rt.o.c 28 | # Build this file, link it with afl-llvm-rt.o.o and the target code. 29 | clang++ afl_driver.cpp test_fuzzer.o afl-llvm-rt.o.o 30 | # Run AFL: 31 | rm -rf IN OUT; mkdir IN OUT; echo z > IN/z; 32 | $AFL_HOME/afl-fuzz -i IN -o OUT ./a.out 33 | ################################################################################ 34 | AFL_DRIVER_STDERR_DUPLICATE_FILENAME: Setting this *appends* stderr to the file 35 | specified. If the file does not exist, it is created. This is useful for getting 36 | stack traces (when using ASAN for example) or original error messages on hard 37 | to reproduce bugs. Note that any content written to stderr will be written to 38 | this file instead of stderr's usual location. 39 | 40 | AFL_DRIVER_CLOSE_FD_MASK: Similar to libFuzzer's -close_fd_mask behavior option. 41 | If 1, close stdout at startup. If 2 close stderr; if 3 close both. 42 | 43 | */ 44 | #include 45 | #include 46 | #include 47 | #include 48 | #include 49 | #include 50 | #include 51 | #include 52 | 53 | #include 54 | #include 55 | #include 56 | 57 | // Platform detection. Copied from FuzzerInternal.h 58 | #ifdef __linux__ 59 | #define LIBFUZZER_LINUX 1 60 | #define LIBFUZZER_APPLE 0 61 | #define LIBFUZZER_NETBSD 0 62 | #define LIBFUZZER_FREEBSD 0 63 | #elif __APPLE__ 64 | #define LIBFUZZER_LINUX 0 65 | #define LIBFUZZER_APPLE 1 66 | #define LIBFUZZER_NETBSD 0 67 | #define LIBFUZZER_FREEBSD 0 68 | #elif __NetBSD__ 69 | #define LIBFUZZER_LINUX 0 70 | #define LIBFUZZER_APPLE 0 71 | #define LIBFUZZER_NETBSD 1 72 | #define LIBFUZZER_FREEBSD 0 73 | #elif __FreeBSD__ 74 | #define LIBFUZZER_LINUX 0 75 | #define LIBFUZZER_APPLE 0 76 | #define LIBFUZZER_NETBSD 0 77 | #define LIBFUZZER_FREEBSD 1 78 | #else 79 | #error "Support for your platform has not been implemented" 80 | #endif 81 | 82 | // libFuzzer interface is thin, so we don't include any libFuzzer headers. 83 | extern "C" { 84 | int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size); 85 | __attribute__((weak)) int LLVMFuzzerInitialize(int *argc, char ***argv); 86 | } 87 | 88 | // Notify AFL about persistent mode. 89 | static volatile char AFL_PERSISTENT[] = "##SIG_AFL_PERSISTENT##"; 90 | extern "C" int __afl_persistent_loop(unsigned int); 91 | static volatile char suppress_warning2 = AFL_PERSISTENT[0]; 92 | 93 | // Notify AFL about deferred forkserver. 94 | static volatile char AFL_DEFER_FORKSVR[] = "##SIG_AFL_DEFER_FORKSRV##"; 95 | extern "C" void __afl_manual_init(); 96 | static volatile char suppress_warning1 = AFL_DEFER_FORKSVR[0]; 97 | 98 | // Input buffer. 99 | static const size_t kMaxAflInputSize = 1 << 20; 100 | static uint8_t AflInputBuf[kMaxAflInputSize]; 101 | 102 | // Use this optionally defined function to output sanitizer messages even if 103 | // user asks to close stderr. 104 | extern "C" __attribute__((weak)) void __sanitizer_set_report_fd(void *); 105 | 106 | // Keep track of where stderr content is being written to, so that 107 | // dup_and_close_stderr can use the correct one. 108 | static FILE *output_file = stderr; 109 | 110 | // Experimental feature to use afl_driver without AFL's deferred mode. 111 | // Needs to run before __afl_auto_init. 112 | __attribute__((constructor(0))) static void __decide_deferred_forkserver(void) { 113 | if (getenv("AFL_DRIVER_DONT_DEFER")) { 114 | if (unsetenv("__AFL_DEFER_FORKSRV")) { 115 | perror("Failed to unset __AFL_DEFER_FORKSRV"); 116 | abort(); 117 | } 118 | } 119 | } 120 | 121 | // If the user asks us to duplicate stderr, then do it. 122 | static void maybe_duplicate_stderr() { 123 | char *stderr_duplicate_filename = 124 | getenv("AFL_DRIVER_STDERR_DUPLICATE_FILENAME"); 125 | 126 | if (!stderr_duplicate_filename) 127 | return; 128 | 129 | FILE *stderr_duplicate_stream = 130 | freopen(stderr_duplicate_filename, "a+", stderr); 131 | 132 | if (!stderr_duplicate_stream) { 133 | fprintf( 134 | stderr, 135 | "Failed to duplicate stderr to AFL_DRIVER_STDERR_DUPLICATE_FILENAME"); 136 | abort(); 137 | } 138 | output_file = stderr_duplicate_stream; 139 | } 140 | 141 | // Most of these I/O functions were inspired by/copied from libFuzzer's code. 142 | static void discard_output(int fd) { 143 | FILE *temp = fopen("/dev/null", "w"); 144 | if (!temp) 145 | abort(); 146 | dup2(fileno(temp), fd); 147 | fclose(temp); 148 | } 149 | 150 | static void close_stdout() { discard_output(STDOUT_FILENO); } 151 | 152 | // Prevent the targeted code from writing to "stderr" but allow sanitizers and 153 | // this driver to do so. 154 | static void dup_and_close_stderr() { 155 | int output_fileno = fileno(output_file); 156 | int output_fd = dup(output_fileno); 157 | if (output_fd <= 0) 158 | abort(); 159 | FILE *new_output_file = fdopen(output_fd, "w"); 160 | if (!new_output_file) 161 | abort(); 162 | if (!__sanitizer_set_report_fd) 163 | return; 164 | __sanitizer_set_report_fd(reinterpret_cast(output_fd)); 165 | discard_output(output_fileno); 166 | } 167 | 168 | static void Printf(const char *Fmt, ...) { 169 | va_list ap; 170 | va_start(ap, Fmt); 171 | vfprintf(output_file, Fmt, ap); 172 | va_end(ap); 173 | fflush(output_file); 174 | } 175 | 176 | // Close stdout and/or stderr if user asks for it. 177 | static void maybe_close_fd_mask() { 178 | char *fd_mask_str = getenv("AFL_DRIVER_CLOSE_FD_MASK"); 179 | if (!fd_mask_str) 180 | return; 181 | int fd_mask = atoi(fd_mask_str); 182 | if (fd_mask & 2) 183 | dup_and_close_stderr(); 184 | if (fd_mask & 1) 185 | close_stdout(); 186 | } 187 | 188 | // Define LLVMFuzzerMutate to avoid link failures for targets that use it 189 | // with libFuzzer's LLVMFuzzerCustomMutator. 190 | extern "C" size_t LLVMFuzzerMutate(uint8_t *Data, size_t Size, size_t MaxSize) { 191 | assert(false && "LLVMFuzzerMutate should not be called from afl_driver"); 192 | return 0; 193 | } 194 | 195 | // Execute any files provided as parameters. 196 | static int ExecuteFilesOnyByOne(int argc, char **argv) { 197 | for (int i = 1; i < argc; i++) { 198 | std::ifstream in(argv[i], std::ios::binary); 199 | in.seekg(0, in.end); 200 | size_t length = in.tellg(); 201 | in.seekg(0, in.beg); 202 | std::cout << "Reading " << length << " bytes from " << argv[i] << std::endl; 203 | // Allocate exactly length bytes so that we reliably catch buffer overflows. 204 | std::vector bytes(length); 205 | in.read(bytes.data(), bytes.size()); 206 | assert(in); 207 | LLVMFuzzerTestOneInput(reinterpret_cast(bytes.data()), 208 | bytes.size()); 209 | std::cout << "Execution successful" << std::endl; 210 | } 211 | return 0; 212 | } 213 | 214 | #define GET_TARGET_INFO_FROM_ENV_OR_EXIT(ENV_NAME, ARG_NAME, ARGV) \ 215 | { \ 216 | char *ARG_NAME = getenv(#ENV_NAME); \ 217 | if (ARG_NAME) { \ 218 | static char arg_##ARG_NAME[256]; \ 219 | memset(arg_##ARG_NAME, 0, 256); \ 220 | sprintf(arg_##ARG_NAME, "-m%s=%s", #ARG_NAME, ARG_NAME); \ 221 | Printf("%s: %s\n", #ENV_NAME, arg_##ARG_NAME); \ 222 | ARGV.push_back(arg_##ARG_NAME); \ 223 | } else { \ 224 | Printf("%s not found, abort.\n", #ENV_NAME); \ 225 | exit(1); \ 226 | } \ 227 | } 228 | 229 | int main(int argc, char **argv) { 230 | Printf("======================= INFO =========================\n" 231 | "This binary is built for AFL-fuzz.\n" 232 | "To run the target function on individual input(s) execute this:\n" 233 | " %s < INPUT_FILE\n" 234 | "or\n" 235 | " %s INPUT_FILE1 [INPUT_FILE2 ... ]\n" 236 | "To fuzz with afl-fuzz execute this:\n" 237 | " afl-fuzz [afl-flags] %s [-N]\n" 238 | "afl-fuzz will run N iterations before " 239 | "re-spawning the process (default: 1000)\n" 240 | "======================================================\n", 241 | argv[0], argv[0], argv[0]); 242 | 243 | maybe_duplicate_stderr(); 244 | maybe_close_fd_mask(); 245 | if (LLVMFuzzerInitialize) { 246 | std::vector Argv({argv[0]}); 247 | char *g = getenv("GLOBAL_ISEL"); 248 | if (g && g[0] == '1') { 249 | Printf("Fuzzing GlobalISel\n"); 250 | Argv.push_back((char *)"-global-isel"); 251 | } else { 252 | Printf("Fuzzing DAGISel\n"); 253 | } 254 | 255 | GET_TARGET_INFO_FROM_ENV_OR_EXIT(TRIPLE, triple, Argv); 256 | GET_TARGET_INFO_FROM_ENV_OR_EXIT(CPU, cpu, Argv); 257 | GET_TARGET_INFO_FROM_ENV_OR_EXIT(ATTR, attr, Argv); 258 | 259 | char *tbl_size = getenv("MATCHER_TABLE_SIZE"); 260 | if (tbl_size) { 261 | Printf("MATCHER_TABLE_SIZE set to %s", tbl_size); 262 | } else { 263 | Printf("MATCHER_TABLE_SIZE not found, abort.\n"); 264 | exit(1); 265 | } 266 | char **AArgv = Argv.data(); 267 | int AArgc = Argv.size(); 268 | LLVMFuzzerInitialize(&AArgc, &AArgv); 269 | } 270 | // Do any other expensive one-time initialization here. 271 | 272 | if (!getenv("AFL_DRIVER_DONT_DEFER")) 273 | __afl_manual_init(); 274 | 275 | int N = 1000; 276 | if (argc == 2 && argv[1][0] == '-') 277 | N = atoi(argv[1] + 1); 278 | else if (argc == 2 && (N = atoi(argv[1])) > 0) 279 | Printf("WARNING: using the deprecated call style `%s %d`\n", argv[0], N); 280 | else if (argc > 1) 281 | return ExecuteFilesOnyByOne(argc, argv); 282 | 283 | assert(N > 0); 284 | 285 | // Call LLVMFuzzerTestOneInput here so that coverage caused by initialization 286 | // on the first execution of LLVMFuzzerTestOneInput is ignored. 287 | uint8_t dummy_input[1] = {0}; 288 | LLVMFuzzerTestOneInput(dummy_input, 1); 289 | 290 | int num_runs = 0; 291 | while (__afl_persistent_loop(N)) { 292 | ssize_t n_read = read(0, AflInputBuf, kMaxAflInputSize); 293 | if (n_read > 0) { 294 | // Copy AflInputBuf into a separate buffer to let asan find buffer 295 | // overflows. Don't use unique_ptr/etc to avoid extra dependencies. 296 | uint8_t *copy = new uint8_t[n_read]; 297 | memcpy(copy, AflInputBuf, n_read); 298 | num_runs++; 299 | LLVMFuzzerTestOneInput(copy, n_read); 300 | delete[] copy; 301 | } 302 | } 303 | Printf("%s: successfully executed %d input(s)\n", argv[0], num_runs); 304 | } 305 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2023 Yuyang (Peter) Rong 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /scripts/classify.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import subprocess 3 | import os 4 | import re 5 | from typing import Iterable, Iterator, List, Optional, Set, Tuple 6 | import shutil 7 | from pathlib import Path 8 | import tempfile 9 | 10 | from lib.process_concurrency import run_concurrent_subprocesses 11 | 12 | 13 | class StackTrace: 14 | # using tuple instead of list for easier equality check 15 | stack_frames: Tuple[Tuple[str, str], ...] 16 | 17 | def __init__(self, stacktrace: Iterable[str], remove_addr: bool = False): 18 | stack_frames: List[Tuple[str, str]] = [] 19 | 20 | for line in stacktrace: 21 | words = line.strip().split(" ") 22 | assert words[0].startswith("#") 23 | function = " ".join(words[2:-1]) 24 | location = words[-1] 25 | if remove_addr: 26 | location = re.sub(r"0x[0-9a-f]+", "0x_", location) 27 | stack_frames.append((function, location)) 28 | 29 | self.stack_frames = tuple(stack_frames) 30 | 31 | def __str__(self) -> str: 32 | ret = "" 33 | for (f, l) in self.stack_frames: 34 | ret += f"\t{f} {l}\n" 35 | return ret 36 | 37 | def __len__(self) -> int: 38 | return len(self.stack_frames) 39 | 40 | def __eq__(self, other) -> bool: 41 | return self.stack_frames == other.stack_frames 42 | 43 | def __hash__(self) -> int: 44 | return hash(self.stack_frames) 45 | 46 | 47 | class CrashError: 48 | return_code: int 49 | failed_pass: Optional[str] 50 | message_raw: str 51 | message_minimized: str 52 | type: str 53 | subtype: Optional[str] 54 | undefined_external_symbol: bool 55 | stack_trace: StackTrace 56 | hash_stacktrace_only: bool 57 | hash_op_code_only_for_isel_crash: bool 58 | 59 | def __init__( 60 | self, 61 | args: List[str], 62 | return_code: int, 63 | stderr_iter: Iterator[str], 64 | hash_stacktrace_only: bool = False, 65 | hash_op_code_only_for_isel_crash: bool = False, 66 | remove_addr_in_stacktrace: bool = False, 67 | ): 68 | self.return_code = return_code 69 | self.hash_stacktrace_only = hash_stacktrace_only 70 | self.hash_op_code_only_for_isel_crash = hash_op_code_only_for_isel_crash 71 | self.undefined_external_symbol = False 72 | 73 | # extract and minimize error message 74 | message_lines = [] 75 | while ( 76 | (curr_line := next(stderr_iter, None)) 77 | and curr_line 78 | != "PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.\n" 79 | ): 80 | # do not include the entire DAG in the error message 81 | if curr_line == "\n" or re.match(r"^ +0x[0-9a-f]+: .+ = .+\n$", curr_line): 82 | continue 83 | 84 | if re.match(r'LLVM ERROR: Undefined external symbol ".+"\n', curr_line): 85 | self.undefined_external_symbol = True 86 | 87 | message_lines.append(curr_line) 88 | 89 | self.message_raw = "".join(message_lines) 90 | 91 | self.message_minimized = ( 92 | re.sub(r"%[0-9]+", "%_", self.message_raw) 93 | .replace(args[0], os.path.basename(args[0])) 94 | .replace(args[-1], "ir.bc") 95 | ) 96 | 97 | self.message_minimized = re.sub(r"0x[0-9a-f]+", "0x_", self.message_minimized) 98 | self.message_minimized = re.sub( 99 | r"(unable to allocate function argument #)[0-9]+", 100 | r"\1_", 101 | self.message_minimized, 102 | ) 103 | self.message_minimized = re.sub( 104 | r"(Error while trying to spill )(.+)( from class )(.+)(: Cannot scavenge register without an emergency spill slot!)", 105 | r"\1_\3\4\5", 106 | self.message_minimized, 107 | ) 108 | 109 | # extract failed pass and stack trace 110 | self.failed_pass = None 111 | if (curr_line := next(stderr_iter, None)) and curr_line == "Stack dump:\n": 112 | # extract failed pass 113 | while ( 114 | curr_line := next(stderr_iter, None) 115 | ) and "llvm::sys::PrintStackTrace" not in curr_line: 116 | if ( 117 | match := re.match( 118 | r" *[0-9]+\.\tRunning pass \'([A-Za-z0-9 ]+)\'", curr_line 119 | ) 120 | ) is not None: 121 | self.failed_pass = match.group(1) 122 | 123 | # extract stack trace 124 | try: 125 | self.stack_trace = StackTrace(stderr_iter, remove_addr_in_stacktrace) 126 | except: 127 | print(f"WARNING: Unable to parse stack trace for {args[-1]}") 128 | self.stack_trace = StackTrace([]) 129 | else: 130 | self.stack_trace = StackTrace([]) 131 | 132 | # determine error type 133 | if self.message_raw.startswith("LLVM ERROR: unable to legalize instruction:"): 134 | self.type = "instruction-legalization" 135 | matches = re.findall(r"G_[A-Z_]+", message_lines[0]) 136 | assert len(matches) == 1 137 | self.subtype = matches[0] 138 | elif self.message_raw.startswith("LLVM ERROR: cannot select:"): 139 | self.type = "global-instruction-selection" 140 | matches = re.findall(r"G_[A-Z_]+", message_lines[0]) 141 | assert len(matches) == 1 142 | self.subtype = matches[0] 143 | elif self.message_raw.startswith("LLVM ERROR: Cannot select:"): 144 | self.type = "dag-instruction-selection" 145 | match = re.match( 146 | r"LLVM ERROR: Cannot select:.+ = ([a-zA-Z0-9_:]+(<.+>)?)", 147 | message_lines[0], 148 | ) 149 | if match is None: 150 | print(f'ERROR: failed to extract instruction from "{message_lines[0]}"') 151 | self.subtype = "Unknown" 152 | else: 153 | self.subtype = match.group(1).split("<")[0] 154 | else: 155 | if self.failed_pass is None: 156 | self.type = "other" 157 | else: 158 | self.type = self.failed_pass.lower().replace(" ", "-") 159 | self.subtype = None 160 | 161 | def __str__(self) -> str: 162 | return "\n".join( 163 | [ 164 | f"Return Code: {self.return_code}", 165 | f"Error Type: {self.type}", 166 | f"Failed Pass: {self.failed_pass}", 167 | "Minimized Message:", 168 | self.message_minimized, 169 | "Stack Trace:", 170 | str(self.stack_trace), 171 | ] 172 | ) 173 | 174 | def get_folder_name(self) -> str: 175 | return os.path.join( 176 | self.type, 177 | self.subtype if self.subtype is not None else "", 178 | f"tracedepth_{len(self.stack_trace)}__hash_0x{hash(self):08x}", 179 | ) 180 | 181 | def __hash__(self): 182 | if self.hash_op_code_only_for_isel_crash and ( 183 | self.type == "dag-instruction-selection" 184 | or self.type == "global-instruction-selection" 185 | ): 186 | return hash(self.subtype) 187 | 188 | if self.hash_stacktrace_only: 189 | return hash(self.stack_trace) 190 | 191 | return hash(self.stack_trace) ^ hash(self.message_minimized) 192 | 193 | 194 | def classify( 195 | cmd: List[str], 196 | input_dir: str | Path, 197 | output_dir: str | Path, 198 | force: bool, 199 | verbose: bool = False, 200 | create_symlink_to_source: bool = True, 201 | hash_stacktrace_only: bool = False, 202 | hash_op_code_only_for_isel_crash: bool = False, 203 | remove_addr_in_stacktrace: bool = False, 204 | ignore_undefined_external_symbol: bool = False, 205 | ) -> None: 206 | output_dir = os.path.abspath(output_dir) 207 | input_dir = os.path.abspath(input_dir) 208 | temp_dir = tempfile.gettempdir() 209 | 210 | if os.path.exists(output_dir): 211 | if force: 212 | shutil.rmtree(output_dir) 213 | else: 214 | print(f"{output_dir} already exists, use -f to remove it. Abort.") 215 | exit(1) 216 | 217 | Path(output_dir).mkdir(parents=True) 218 | 219 | crash_hashes: Set[int] = set() 220 | false_alarms: List[str] = [] 221 | 222 | def on_process_exit(file_name: str, exit_code: Optional[int], p: subprocess.Popen) -> None: 223 | ir_bc_path: str = p.args[-1] # type: ignore 224 | stderr_dump_path = os.path.join(temp_dir, file_name + ".stderr") 225 | stderr_dump_file = open(stderr_dump_path) 226 | 227 | if os.stat(stderr_dump_path).st_size == 0: 228 | false_alarms.append(ir_bc_path) 229 | return 230 | 231 | crash = CrashError( 232 | p.args, # type: ignore 233 | p.returncode, 234 | stderr_dump_file, 235 | hash_stacktrace_only, 236 | hash_op_code_only_for_isel_crash, 237 | remove_addr_in_stacktrace, 238 | ) 239 | 240 | stderr_dump_file.close() 241 | os.remove(stderr_dump_path) 242 | 243 | if ignore_undefined_external_symbol and crash.undefined_external_symbol: 244 | return 245 | 246 | folder_name = crash.get_folder_name() 247 | folder_path = os.path.join(output_dir, folder_name) 248 | Path(folder_path).mkdir(parents=True, exist_ok=True) 249 | 250 | if hash(crash) not in crash_hashes: 251 | crash_hashes.add(hash(crash)) 252 | with open( 253 | os.path.join(output_dir, folder_name + ".log"), "w+" 254 | ) as report_path: 255 | print(crash, file=report_path) 256 | 257 | if verbose: 258 | print("New crash type:", folder_name) 259 | 260 | if create_symlink_to_source: 261 | os.symlink( 262 | ir_bc_path, 263 | os.path.join(folder_path, os.path.basename(ir_bc_path) + ".bc"), 264 | ) 265 | 266 | run_concurrent_subprocesses( 267 | iter=list( 268 | filter( 269 | lambda file_name: file_name.split(".")[-1] not in ["md", "txt", "s"], 270 | os.listdir(input_dir), 271 | ) 272 | ), 273 | subprocess_creator=lambda file_name: subprocess.Popen( 274 | cmd + [os.path.join(input_dir, file_name)], 275 | stdout=subprocess.DEVNULL, 276 | stderr=open(os.path.join(temp_dir, file_name + ".stderr"), "w"), 277 | ), 278 | on_exit=on_process_exit, 279 | ) 280 | 281 | print(f"{len(false_alarms)} false positives, {len(crash_hashes)} unique crashes") 282 | with open(os.path.join(output_dir, "false_positives.txt"), "a+") as file: 283 | file.writelines(line + "\n" for line in false_alarms) 284 | 285 | with open(os.path.join(output_dir, "unique_crashes"), "w+") as file: 286 | file.write(str(len(crash_hashes))) 287 | 288 | 289 | def main() -> None: 290 | parser = argparse.ArgumentParser( 291 | description="Run all crashed cases and classify them" 292 | ) 293 | parser.add_argument( 294 | "--cmd", 295 | type=str, 296 | required=True, 297 | help="The command to run on all files in the input dir", 298 | ) 299 | parser.add_argument( 300 | "--input", type=str, required=True, help="The directory containing input files" 301 | ) 302 | parser.add_argument( 303 | "--output", 304 | type=str, 305 | required=False, 306 | default="output", 307 | help="The directory to store all organized output", 308 | ) 309 | parser.add_argument( 310 | "-f", 311 | "--force", 312 | action="store_true", 313 | help="force delete the output directory if it already exists.", 314 | ) 315 | args = parser.parse_args() 316 | classify(args.cmd.split(" "), args.input, args.output, args.force, verbose=True) 317 | 318 | 319 | if __name__ == "__main__": 320 | main() 321 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IR Fuzzer 2 | 3 | You can find the camera ready paper [here](https://github.com/user-attachments/files/19461085/IRFuzzer.pdf) 4 | 5 | This repo hasn't been actively maintained, but I will review and accept any PR if that helps. 6 | 7 | # Quick start 8 | 9 | ## Compile 10 | 11 | You should be able to prepare everything by running `./build.sh`. 12 | It should compile everything for you. 13 | If it failed for any reason, please send an issue to this repo. 14 | 15 | The script will set some environment variables. 16 | You may want to leave these in your `.bashrc` for further fuzzing: 17 | 18 | ```sh 19 | # Path to this directory 20 | export FUZZING_HOME=$(pwd) 21 | # The LLVM you want to fuzz 22 | export LLVM= 23 | export AFL=AFLplusplus 24 | export PATH=$PATH:$HOME/clang+llvm/bin 25 | # Tell AFL++ to only use our mutator 26 | export AFL_CUSTOM_MUTATOR_ONLY=1 27 | # Tell AFL++ Where our mutator is 28 | export AFL_CUSTOM_MUTATOR_LIBRARY=$FUZZING_HOME/mutator/build/libAFLCustomIRMutator.so 29 | # AFL instrumentation method 30 | export AFL_LLVM_INSTRUMENT=CLASSIC 31 | ``` 32 | 33 | If you want to use dockerized environment, you can also do 34 | 35 | ```sh 36 | docker build . -t irfuzzer 37 | ``` 38 | 39 | ## Seed selection 40 | 41 | Seed is the initial input we give fuzzers, they have a directly impact on fuzzing performance. 42 | `seeds` provides a default seed start fuzzing, it is an empty module with some function signatures. 43 | For better fuzzing performance, you are more than welcome to move modules in `$LLVM/llvm/test/CodeGen/` into `seeds`. 44 | Notice that `seeds` only accepts bytecode, not LLVM IR. 45 | 46 | ## Run 47 | 48 | ### Env vars 49 | 50 | You can specify different arguments for the driver using environment variables. 51 | 52 | **Required** 53 | 54 | ```sh 55 | export TRIPLE= 56 | export CPU= 57 | export ATTR= 58 | ``` 59 | You can specify triples like `x86_64`, `aarch64`, `aie`, etc. 60 | If you don't know what triples you have, try `llc --version`, it will list all triples you have. 61 | `CPU` and `ATTR` can be left empty, but it is a must have. 62 | They are equivalent to `-mcpu` and `-mattr` you would normally put when using `llc`. 63 | 64 | ```sh 65 | export MATCHER_TABLE_SIZE=13780 66 | ``` 67 | Matcher table size refers to the size of the matcher table generated by TableGen. 68 | The table is automatically generated as a static variable in in `SelectCode(SDNode *N) GenDAGISel.inc`(For SelectionDAG) and in `InstructionSelector::getMatchTable() GenGlobalISel.inc`(For GlobalIsel). You have three ways to find its length: 69 | 70 | 1. every time AFL's compiler compiles the project, it counts the table size and pops a `[+] MatcherTable size: 22660`. You can look out for that. 71 | 2. If you missed it, you can delete the object file (`ISelDAGToDAG.cpp.o` or `InstructionSelector.cpp.o`) and force a re-compilation. 72 | ```sh 73 | $ cd build-afl 74 | $ rm lib/Target/AIE/CMakeFiles/LLVMAIECodeGen.dir/AIEISelDAGToDAG.cpp.o 75 | $ ninja 76 | 77 | [6/27] Building CXX object lib/Target/AIE/CMakeFiles/LLVMAIECodeGen.dir/AIEISelDAGToDAG.cpp.o 78 | [+] MatcherTable size: 22660 79 | ``` 80 | 3. You can also find this data in [`scripts/common.py`](./scripts/common.py). 81 | It may not be 100% accurate as the code gets updated. 82 | 83 | 84 | **Optional** 85 | 86 | ``` 87 | export GLOBAL_ISEL=1; 88 | ``` 89 | By default, we are fuzzing SelectionDAG. If you want to fuzz GlobalIsel, attach this environment variable. Please make sure `MATCHER_TABLE_SIZE` matches with GlobalIsel's table size. 90 | 91 | ### Command line 92 | 93 | **Once the environments are set**, the easiest way to start fuzzing is to do 94 | ```sh 95 | ./AFLplusplus/afl-fuzz -i -o fuzzing llvm-isel-afl/build/isel-fuzzing 96 | ``` 97 | It would start a fuzzing instant to fuzz SelectionDAG. 98 | Some useful argument you might give `afl-fuzz` includes: 99 | - `-E `: execute/mutate the input for `n` times and quit 100 | - `-V `: run the fuzzer for `t` seconds and quit 101 | 102 | Fuzzing can take weeks, if not days. 103 | I recommend using [`screen`](https://www.gnu.org/software/screen/) to run the fuzzing in the background. 104 | 105 | AFL++ will give you a fancy UI to describe what's happening. 106 | You may check [this](https://github.com/mirrorer/afl/blob/master/docs/status_screen.txt) page to help you understand the stats. 107 | 108 | ### Archs and table size 109 | 110 | Check `./script/common.py`. 111 | 112 | ## Scripts 113 | 114 | ### Dependencies 115 | 116 | We prepared many scripts to automate the fuzzing process. 117 | These scripts runs on Python 3.10+, as it supports type hints to make it look less messy. 118 | Use `python3.10` explictly to avoid conflict with `python3.6`... suppose you are still using ubuntu 18.04 or order. 119 | To install some dependencies you may want to: 120 | ```sh 121 | # If your ubuntu is so old you don't have python3.10 in your apt I can't help you... 122 | # `apt install -y python3.10 python3-pip wget` 123 | wget https://bootstrap.pypa.io/get-pip.py 124 | python3.10 get-pip.py 125 | 126 | # You can install all the dependencies of the scripts with: 127 | pip3.10 install -r scripts/requirements.txt 128 | ``` 129 | 130 | ### Description and usage 131 | 132 | - `common.py`: this is not intended to be directly called, yet it have many metadata inside, you are welcome to take a look. 133 | - `fuzz.py`: this fuzzes a lot of triples using `docker` or `screen`. 134 | - `batch_classify.py`: this script runs all the crashed inputs and cluster the same ones together using the stack trace. You may want to run this after a fuzzing process. 135 | - `combine-fuzzing-results.py`: this script combines multiple fuzzing directories into one. If you are not writing a paper and need massive data you probably don't need it. 136 | - `process_data.py`: summarize the fuzzing result. 137 | 138 | Using `fuzz.py` don't need you to set any environment variables, the script will take care of it. 139 | You would most likely use the `fuzz.py` like this: 140 | 141 | ```sh 142 | python3.10 scripts/fuzz.py -i seeds -o fuzzing -r 5 --set=" aie" --type=screen --isel=dagisel --fuzzer=irfuzzer --time=1w -j 80 --on_exist=force 143 | ``` 144 | 145 | It means: start fuzzing using input from `seeds` (`-i seed`), put the result in `fuzzing` (`-o fuzzing`), repeat the experiment for five times (`-r 5`), test aie without attribute and cpu setting (`--set=" aie"`), use screen to monitor the fuzzing (`--type=screen`), test SelectionDAG (`--isel=dagisel`), use our fuzzer (`--fuzzer=irfuzzer`), test for a week (`--time=1w`), start at most 80 jobs in parallel (`-j 80`) and if the output directory already exists, force remove it (`--on_exist=force`) 146 | 147 | # How do we fuzz 148 | 149 | See the details in our paper 150 | 151 | # Trophies & Findings 152 | 153 | (I think I will attach more links to keep track of these later) 154 | 155 | ## AI Engine 156 | - AIE1 GlobalIsel lacks floating point support 157 | - G_FCONSTANT [fixed.](https://gitenterprise.xilinx.com/XRLabs/llvm-aie/pull/194) 158 | - AIE1 GlobalIsel lacks vector support. 159 | - AIE1 SelectionDAG has bugs in the memory store. 160 | - AIE1 SelectionDAG has truncation errors. [Fixed.](https://gitenterprise.xilinx.com/XRLabs/llvm-aie/pull/161/) 161 | - AIE1 `vst.spil` generates two stores to the same address. [PoC.](https://gitenterprise.xilinx.com/XRLabs/peano_usage/pull/15) [Fixed.](https://gitenterprise.xilinx.com/XRLabs/llvm-aie/pull/203) 162 | 163 | ## Open sourced architecture 164 | 165 | See our [trophies repo](https://github.com/DataCorrupted/LLVM-fuzzing-trophies). 166 | 167 | # FAQ 168 | 169 | __Why build two versions of LLVM?__ 170 | 171 | One version is built by AFL's compiler, and another is built by LLVM14 and contains a new mutator we designed. 172 | AFL needs to inject some code to the AIE compiler to keep track of runtime info (Edge coverage, MatcherTable coverage, etc.) 173 | Besides, the driver also depends on it. 174 | The other version is the dependency for the mutator. You __can__ use AFL instrumented mutator, but it would slow down mutation speed and thus not recommended. 175 | 176 | __Why fuzz a fork of AIE that is not up-to-date?__ 177 | 178 | Mainly because mutator also needs to understand the architecture we are fuzzing, although it only generates mid-end IR. 179 | Therefore, until we merge mutator's code into AIE, all you can do is keep merging the code you want to test to mutator branch and compile everything. 180 | 181 | __Are we fuzzing AIE2?__ 182 | 183 | Currently we are only fuzzing AIE1 since it is more complete than AIE2. 184 | But you can fuzz AIE2 if you want to. In principle fuzzing AIE1 is no different than AIE2. 185 | All you need to do is set `TRIPLE=aie2` and set `MATCHER_TABLE_SIZE` correctly. 186 | 187 | __AIE compilation hangs__ 188 | 189 | It's an known issue that `Target/AIE/MCTargetDesc/AIEMCFormats.cpp` will take a long time (~10 minutes) to compile. A function in it `__cxx_global_var_init()` will cause the optimizer to run for a really long time. It is an interesting bug, but we haven't had time to fix it. 190 | 191 | __What is a seed and what to use__ 192 | 193 | Seed is the initial file you give fuzzer to work on. 194 | Unfortunately, this is required for AFL. (libFuzzer can cold-start without seed). 195 | In this repo, we included a minimal seed in `seeds/` so you can start fuzzing without really worrying about it. 196 | 197 | However, academic research and industry practice have shown that a better seed can lead to better results. You may reach the same result faster or find behavior unseen before with different seeds. 198 | So if you can manually craft some seeds to cover different codes you want to test, for example, if you want to focus on floating point, you can create seeds with floating point calculations in them. 199 | 200 | To create a seed, you can write LLVM IR manually and convert it to bitcode using `llvm-as`. Or you can cast bitcode to IR using `llvm-dis` and change some of the instructions. 201 | 202 | __Matcher table coverage is 0.0%__ 203 | 204 | Table coverage may be low but never 0.0% in any cases. Please make sure the matcher table is correctly instrumented. 205 | 206 | 1. Make sure your binary is linked against the library compiled by AFL. 207 | 2. Make sure AFL instrumented it. During compilation, there should be a line telling you `[+] Instrumenting matcher table.` 208 | 209 | __What does the stats in AFL's UI mean?__ 210 | 211 | You may check [this](https://github.com/mirrorer/afl/blob/master/docs/status_screen.txt) page to help you understand the stats. 212 | 213 | We introduced a new coverage, so `map density` shows two stats. The first one is edge coverage, which should reach 70~80% in a day or two, meaning that (almost) all control flow has been tested. 214 | The second stat is matcher table coverage. It shows how much the table has been referenced. The higher, the better. 215 | 216 | __My fuzzer is running slow__ 217 | 218 | There are two reasons it could happen. 219 | AFL has high file system interactions. Therefore, make sure your directory is not a nfs or any remotely mounted hard drive. If you want even faster speed, you can mount a tmpfs to do fuzzing in the memory. 220 | 221 | Another reason is your seeds are taking a long time to execute. You may either choose smaller initial seeds or use shorter timeouts by adding `-t ` to AFL's arguments. 222 | 223 | __Where are the crashes located?__ 224 | 225 | `$FUZZING_HOME/fuzzing_output/default/crashes` 226 | 227 | __How to reproduce errors?__ 228 | 229 | One upside of fuzzing is it always gives you reproducible PoC. 230 | You can run `build-release/bin/llc `. 231 | 232 | We have also find cases where `llc` won't reproduce. 233 | In that case try 234 | ```sh 235 | export CPU= 236 | export ATTR= 237 | export TRIPLE= 238 | export MATCHER_TABLE_SIZE= 239 | ./llvm-isel-fuzzing/build/isel-fuzzing < 240 | ``` 241 | 242 | We have noticed some setting difference between `llc` and our driver `isel-fuzzing`. 243 | We haven't had time to deal with it. Will update this later. 244 | 245 | If there are any input that can't be reproduced even using `isel-fuzzing`, there are two possibilities: 246 | - Your matcher table size is set wrong. 247 | - It may be a bug and please send us an issue. 248 | 249 | __What if `MatcherTable` is not set or set incorrectly?__ 250 | 251 | To pass compilation and AFL's self-testing, `MATCHER_TABLE_SIZE` is defaulted to a small amount. You would most like to see `Shadow table size: 32 too small. Did you set it properly?` that means it is not set. 252 | If `MATCHER_TABLE_SIZE` is not set correctly, you will have false positives where the seed is stored in `crashes` (Indicating the fuzzer finds the seed crashing), but you can't reproduce it with `llc`. 253 | That means the runtime code we injected is crashing, not the LLVM itself. Most likely, it's because `MATCHER_TABLE_SIZE` is set too small, and an OOB Write happened. 254 | 255 | __My mutator aborted during fuzzing?__ 256 | 257 | This is a common issue, its not a bug in the mutator. 258 | Most likely you didn't set the types correctly. 259 | If mutator can't find a typed value to complete an instruction generation, it aborts. 260 | Therefore, it is important to write all types when creating the mutator. 261 | 262 | Mutator is non-deterministic, debuging is hard. 263 | But here's a trick, the mutator is deterministic is the seed is the same. 264 | If your fuzzer crashed, go find the `.cur_input` in your repo, this is the last input that mutator worked on before it crashed. 265 | Use `./mutator/scripts/validate.sh .cur_input` to verify the mutator with this input. 266 | The script will (hopefully) give you the seed that crashed the mutator. 267 | You can then debug the mutator by providing it with a deterministic seed that validator just poped out: `./mutator/build/MutatorDriver .cur_input `. 268 | If you can confirm that the last stack trace is `SourcePred.generate`, that's it, you didn't provide all the types required. 269 | If you see any other reasons for crashing, contact me. 270 | 271 | Also, when mutator dies, the fuzzer become a zombie process, don't forget to clean it up :) -------------------------------------------------------------------------------- /scripts/fuzz.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from pathlib import Path 3 | import subprocess 4 | from typing import Iterable, Literal, NamedTuple, Optional 5 | import typing 6 | import os 7 | from tap import Tap 8 | import docker 9 | from time import sleep 10 | 11 | from collect_seeds import TargetProp, collect_seeds_from_tests 12 | from lib.process_concurrency import MAX_SUBPROCESSES, run_concurrent_subprocesses 13 | from lib.target import Target 14 | from lib.matcher_table_sizes import ( 15 | DAGISEL_MATCHER_TABLE_SIZES, 16 | GISEL_MATCHER_TABLE_SIZES, 17 | ) 18 | from lib.target_lists import TARGET_LISTS 19 | from lib.time_parser import get_time_in_seconds 20 | 21 | 22 | class FuzzerConfig(NamedTuple): 23 | extra_env: dict[str, str] 24 | extra_cmd: list[str] = [] 25 | 26 | def getIRFuzzer(other_env: dict[str, str] = {}, other_cmd: list[str] = []): 27 | extra_env = { 28 | "AFL_CUSTOM_MUTATOR_ONLY": "1", 29 | "AFL_CUSTOM_MUTATOR_LIBRARY": "mutator/build/libAFLCustomIRMutator.so", 30 | } 31 | extra_env.update(other_env) 32 | return FuzzerConfig( 33 | extra_env=extra_env, 34 | extra_cmd=other_cmd, 35 | ) 36 | 37 | 38 | Fuzzer = Literal["aflplusplus", "libfuzzer", "irfuzzer"] 39 | ISel = Literal["dagisel", "gisel"] 40 | ClutserType = Literal["screen", "docker", "stdout"] 41 | 42 | 43 | DOCKER_IMAGE = "irfuzzer" 44 | FUZZERS: dict[str, FuzzerConfig] = { 45 | "aflplusplus": FuzzerConfig(extra_env={"AFL_CUSTOM_MUTATOR_ONLY": "0"}), 46 | "libfuzzer": FuzzerConfig( 47 | extra_env={ 48 | "AFL_CUSTOM_MUTATOR_ONLY": "1", 49 | "AFL_CUSTOM_MUTATOR_LIBRARY": "mutator/build/libAFLFuzzMutate.so", 50 | }, 51 | ), 52 | "irfuzzer": FuzzerConfig.getIRFuzzer(other_cmd=[" -w"]), 53 | } 54 | # Check Fuzzer and FUZZERS match. 55 | assert list(FUZZERS.keys()) == list( 56 | typing.get_args(Fuzzer) 57 | ), "FUZZERS and Fuzzer don't match" 58 | 59 | 60 | class ExperimentConfig(NamedTuple): 61 | fuzzer: Fuzzer 62 | target: Target 63 | isel: ISel 64 | seed_dir: Path 65 | expr_root: Path 66 | time: int 67 | replicate_id: int 68 | 69 | @property 70 | def name(self) -> str: 71 | return f"{self.fuzzer}:{self.isel}:{self.target}:{self.replicate_id}" 72 | 73 | @property 74 | def matcher_table_size(self) -> Optional[int]: 75 | matcher_table_sizes = ( 76 | GISEL_MATCHER_TABLE_SIZES 77 | if self.isel == "gisel" 78 | else DAGISEL_MATCHER_TABLE_SIZES 79 | ) 80 | 81 | backend = self.target.backend 82 | 83 | if backend not in matcher_table_sizes: 84 | return None 85 | 86 | return matcher_table_sizes[backend] 87 | 88 | def get_fuzzing_env(self) -> dict[str, str]: 89 | envs = { 90 | "TRIPLE": str(self.target.triple), 91 | "CPU": self.target.cpu if self.target.cpu else "", 92 | "ATTR": ",".join(self.target.attrs), 93 | "GLOBAL_ISEL": "1" if self.isel == "gisel" else "0", 94 | "MATCHER_TABLE_SIZE": str(self.matcher_table_size), 95 | } 96 | envs.update(FUZZERS[self.fuzzer].extra_env) 97 | return envs 98 | 99 | def get_fuzzing_command(self, output_dir: str | Path) -> str: 100 | cmd = [ 101 | "$AFL/afl-fuzz", 102 | "-V", 103 | str(self.time), 104 | "-i", 105 | str(self.seed_dir), 106 | "-o", 107 | str(output_dir), 108 | ] 109 | 110 | cmd += FUZZERS[self.fuzzer].extra_cmd 111 | 112 | cmd.append("llvm-isel-afl/build/isel-fuzzing") 113 | 114 | return " ".join(cmd) 115 | 116 | def get_output_dir(self) -> Path: 117 | return self.expr_root.joinpath( 118 | self.fuzzer, 119 | self.isel, 120 | str(self.target), 121 | str(self.replicate_id), 122 | ) 123 | 124 | 125 | class Args(Tap): 126 | """ 127 | Command-line Arguments 128 | (Reference: https://github.com/swansonk14/typed-argument-parser) 129 | """ 130 | 131 | fuzzers: list[Fuzzer] = ["irfuzzer"] 132 | """the fuzzer used for fuzzing""" 133 | 134 | seeds: str 135 | """ 136 | the directory containing input seeds for fuzzing (if 'seeding-from-tests' flag is not set) 137 | or the directory to store the seeds collected from tests (if 'seeding-from-tests' flag is set) 138 | """ 139 | 140 | seeding_from_tests: bool = False 141 | """whether to use tests as seeds for fuzzing""" 142 | 143 | props_to_match: list[TargetProp] = ["triple", "cpu", "attrs"] 144 | """ 145 | the properties of a test target to match those of the fuzzing target, 146 | used to determine which tests should be included as seeds. 147 | (if 'seeding_from_tests' flag is not set, this option as no effect) 148 | """ 149 | 150 | timeout: Optional[float] = 0.1 151 | """ 152 | only include test cases that can be compiled within the specified in seconds. 153 | (if 'seeding_from_tests' flag is not set, this option has no effect) 154 | """ 155 | 156 | output: str = "./fuzzing" 157 | """the output directory""" 158 | 159 | on_exist: Literal["abort", "force", "ignore"] = "abort" 160 | """the action to take if the output directory already exists""" 161 | 162 | isel: ISel = "dagisel" 163 | """the LLVM instruction selection method to fuzz""" 164 | 165 | target_lists: Optional[list[str]] = None 166 | """ 167 | the name(s) of pre-defined list(s) of targets 168 | (see 'lib/target_lists.py' for details) 169 | (can be overriden by `--targets`) 170 | """ 171 | 172 | targets: Optional[list[str]] = None 173 | """ 174 | manually specify targets to fuzz ('tier' will be ignored). 175 | Format for each target can be 176 | " [] [ ...]", 177 | " [] [,,...]", or 178 | "[,][,,,...]". 179 | (An attribute must start with '+' or '-' to avoid ambiguity.) 180 | """ 181 | 182 | time: str = "5m" 183 | """duration for each experiment (e.g. '100s', '30m', '2h', '1d')""" 184 | 185 | repeat: int = 1 186 | """how many times each experiemt should run""" 187 | 188 | offset: int = 0 189 | """the offset to start counting experiments""" 190 | 191 | jobs: int = MAX_SUBPROCESSES 192 | """the max number of concurrent subprocesses""" 193 | 194 | type: Optional[ClutserType] = None 195 | """the method to start fuzzing cluster""" 196 | 197 | def configure(self): 198 | self.add_argument("-j", "--jobs") 199 | self.add_argument("-o", "--output") 200 | self.add_argument("-r", "--repeat") 201 | self.add_argument("-t", "--time") 202 | 203 | def get_fuzzing_targets(self) -> list[Target]: 204 | if self.target_lists is not None: 205 | return [target for key in self.target_lists for target in TARGET_LISTS[key]] 206 | elif self.targets is not None: 207 | return [Target.parse(s) for s in self.targets] 208 | else: 209 | logging.error("Either '--tier' or '--set' has to be specified.") 210 | exit(1) 211 | 212 | def get_time_in_seconds(self) -> int: 213 | return get_time_in_seconds(self.time) 214 | 215 | 216 | def get_experiment_configs( 217 | fuzzers: list[Fuzzer], 218 | isel: ISel, 219 | targets: list[Target], 220 | time: int, 221 | repeat: int, 222 | offset: int, 223 | seed_dir: Path, 224 | expr_root: Path, 225 | seeding_from_tests: bool, 226 | props_to_match: list[TargetProp], 227 | compilation_timout_secs: Optional[float], 228 | ) -> Iterable[ExperimentConfig]: 229 | for fuzzer in fuzzers: 230 | for target in targets: 231 | expr_seed_dir = seed_dir 232 | 233 | if seeding_from_tests: 234 | expr_seed_dir = collect_seeds_from_tests( 235 | target=target, 236 | global_isel=isel == "gisel", 237 | out_dir_parent=seed_dir, 238 | props_to_match=props_to_match, 239 | dump_bc=True, 240 | symlink_to_ll=False, 241 | timeout_secs=compilation_timout_secs, 242 | ) 243 | 244 | for r in range(repeat): 245 | expr_config = ExperimentConfig( 246 | fuzzer=fuzzer, 247 | target=target, 248 | isel=isel, 249 | seed_dir=expr_seed_dir, 250 | expr_root=expr_root, 251 | time=time, 252 | replicate_id=r + offset, 253 | ) 254 | 255 | if expr_config.matcher_table_size is None: 256 | logging.warn( 257 | f"Can't find matcher table size for target '{expr_config.target}', not fuzzing" 258 | ) 259 | continue 260 | 261 | yield expr_config 262 | 263 | 264 | def combine_commands(*commands: str) -> str: 265 | return " && ".join(commands) 266 | 267 | 268 | def batch_fuzz_using_docker( 269 | experiment_configs: list[ExperimentConfig], 270 | jobs: int, 271 | ) -> None: 272 | """ 273 | Run each experiment inside a dedicated Docker container. 274 | (Docker Python SDK Reference: https://docker-py.readthedocs.io/en/stable/) 275 | """ 276 | 277 | client = docker.client.from_env() 278 | container_queue = [] 279 | 280 | def dequeue_and_wait(): 281 | dequeued_container = container_queue.pop(0) # FIFO 282 | if dequeued_container.status != "exited": 283 | dequeued_container.wait() 284 | 285 | for i, experiment in enumerate(experiment_configs): 286 | if len(container_queue) == jobs: 287 | dequeue_and_wait() 288 | 289 | logging.info(f"Starting experiment {experiment.name}...") 290 | 291 | seed_dir = experiment.seed_dir 292 | out_dir = experiment.get_output_dir() 293 | out_dir.mkdir(parents=True) 294 | 295 | container = client.containers.run( 296 | image=DOCKER_IMAGE, 297 | command=[ 298 | "bash", 299 | "-c", 300 | combine_commands( 301 | # Docker is responsible for core binding, 302 | # if AFL_NO_AFFINITY is not set, fuzzer will fail to start 303 | "export AFL_NO_AFFINITY=1", 304 | experiment.get_fuzzing_command("/fuzzing"), 305 | f"chown -R {os.getuid()} /fuzzing/default", 306 | "mv /fuzzing/default /output/default", 307 | ), 308 | ], 309 | remove=True, 310 | detach=True, 311 | name=experiment.name.replace("+", "").replace(",", "-").replace(":", "-"), 312 | environment=experiment.get_fuzzing_env(), 313 | cpuset_cpus=str(i % jobs), # core binding 314 | tmpfs={"/fuzzing": "size=1G"}, 315 | volumes=[ 316 | f"{seed_dir.absolute()}:{seed_dir.absolute()}", 317 | f"{out_dir.absolute()}:/output", 318 | ], 319 | ) 320 | 321 | container_queue.append(container) 322 | 323 | # wait for all running containers to exit 324 | while len(container_queue) > 0: 325 | dequeue_and_wait() 326 | 327 | 328 | def batch_fuzz( 329 | experiment_configs: list[ExperimentConfig], 330 | type: ClutserType, 331 | jobs: int, 332 | ) -> None: 333 | if type == "docker": 334 | batch_fuzz_using_docker(experiment_configs, jobs) 335 | return 336 | 337 | def start_subprocess(experiment: ExperimentConfig) -> subprocess.Popen: 338 | logging.info(f"Starting experiment {experiment.name}...") 339 | 340 | out_dir = experiment.get_output_dir() 341 | out_dir.mkdir(parents=True) 342 | 343 | env = experiment.get_fuzzing_env() 344 | 345 | if type == "stdout": 346 | env["AFL_NO_UI"] = "1" 347 | 348 | fuzzing_command = experiment.get_fuzzing_command(out_dir) 349 | 350 | if type == "screen": 351 | # If using screen, this script will not be able to detect whether the fuzzing process fails early or did not 352 | # complete within the estimated time. 353 | fuzzing_command = f'screen -S "{experiment.name}" -dm bash -c "{fuzzing_command}" && sleep {experiment.time + 180}' 354 | 355 | process = subprocess.Popen( 356 | fuzzing_command, 357 | env={**os.environ, **env}, 358 | shell=True, 359 | stdin=subprocess.PIPE, 360 | stdout=subprocess.DEVNULL, 361 | ) 362 | 363 | # Sleep for 1s so aflplusplus has time to bind core. Otherwise two fuzzers may bind to the same core. 364 | sleep(1) 365 | 366 | return process 367 | 368 | run_concurrent_subprocesses( 369 | iter=experiment_configs, 370 | subprocess_creator=start_subprocess, 371 | on_exit=lambda expr_cfg, exit_code, p: print( 372 | f"Experiment {expr_cfg.name} exited with code {exit_code}" 373 | ), 374 | max_jobs=jobs, 375 | ) 376 | 377 | 378 | def fuzz(expr_config: ExperimentConfig) -> int: 379 | out_dir = expr_config.get_output_dir() 380 | out_dir.mkdir(parents=True) 381 | 382 | process = subprocess.run( 383 | expr_config.get_fuzzing_command(out_dir), 384 | env={**os.environ, **expr_config.get_fuzzing_env()}, 385 | shell=True, 386 | ) 387 | 388 | print(f"Fuzzing process exited with code {process.returncode}.") 389 | return process.returncode 390 | 391 | 392 | def main() -> None: 393 | args = Args(underscores_to_dashes=True).parse_args() 394 | 395 | out_root = Path(args.output) 396 | if out_root.exists(): 397 | logging.info(f"{args.output} already exists.") 398 | if args.on_exist == "force": 399 | logging.info(f"'on-exist' set to {args.on_exist}, will force remove") 400 | subprocess.run(["rm", "-rf", out_root]) 401 | elif args.on_exist == "abort": 402 | logging.error(f"'on-exist' set to {args.on_exist}, won't work on it.") 403 | exit(1) 404 | 405 | expr_configs = list( 406 | get_experiment_configs( 407 | fuzzers=args.fuzzers, 408 | isel=args.isel, 409 | targets=args.get_fuzzing_targets(), 410 | time=args.get_time_in_seconds(), 411 | repeat=args.repeat, 412 | offset=args.offset, 413 | seed_dir=Path(args.seeds), 414 | expr_root=out_root, 415 | seeding_from_tests=args.seeding_from_tests, 416 | props_to_match=args.props_to_match, 417 | compilation_timout_secs=args.timeout, 418 | ) 419 | ) 420 | 421 | # Pause for some seconds before starting. 422 | start_pause = 5 423 | print( 424 | f"\nThe following {len(expr_configs)} experiment(s) will start in {start_pause} seconds:\n" 425 | ) 426 | for expr in expr_configs: 427 | print(f" - {expr.name}") 428 | print() 429 | 430 | sleep(start_pause) 431 | 432 | if len(expr_configs) == 1 and args.type is None: 433 | exit(fuzz(expr_config=expr_configs[0])) 434 | elif args.type is None: 435 | logging.error( 436 | "'--type' must be specified when running multiple fuzzing experiments" 437 | ) 438 | else: 439 | batch_fuzz( 440 | experiment_configs=expr_configs, 441 | type=args.type, 442 | jobs=args.jobs, 443 | ) 444 | 445 | 446 | if __name__ == "__main__": 447 | logging.basicConfig() 448 | logging.getLogger().setLevel(logging.INFO) 449 | main() 450 | --------------------------------------------------------------------------------