├── mypy.ini ├── requirements.txt ├── VERSIONS.py ├── .dockerignore ├── .gitignore ├── test_preprocessed.py ├── callchain_checker ├── src │ ├── CMakeLists.txt │ ├── CallChainChecker.hpp │ ├── CallChainCheckerTool.cpp │ └── CallChainChecker.cpp └── CMakeLists.txt ├── LICENSE ├── .gitlab-ci.yml ├── patches ├── gcc-libsanitizer.sh ├── gcc-simple-object-declaration.patch ├── llvm-MicrosoftDemangleNodes-missing-includes.patch ├── llvm-MicrosoftDemangleNodes.h-fix-includes.patch ├── gcc-fix-simple-object-decl-and-use-in-gcc-lto.patch ├── llvm-GCOpenMPRuntime.cpp-lambda-issues.patch └── gcc-ustat.patch ├── git-hooks └── pre-commit ├── .github └── workflows │ └── lint-python.yaml ├── run_parallel.sh ├── Dockerfile ├── dockerconfig.json ├── preprocessing.py ├── init.py ├── bugs.md ├── reducer.py ├── README.md ├── generator.py ├── bisector.py ├── checker.py ├── database.py └── parsers.py /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | strict = True 3 | 4 | files = 5 | *.py 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | dead-instrumenter==0.1.0 2 | ccbuilder==0.0.9 3 | requests>=2.27.1 4 | -------------------------------------------------------------------------------- /VERSIONS.py: -------------------------------------------------------------------------------- 1 | instrumenter_version = 0 2 | generator_version = 0 3 | bisector_version = 0 4 | reducer_version = 0 5 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | ./gcc 2 | ./llvm-project 3 | ./logs 4 | ./compiler_cache 5 | ./docker_storage 6 | 7 | ./dce_instrumenter/build/ 8 | ./callchain_checker/build/ 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.tar 3 | *.c 4 | *.s 5 | *.o 6 | *.log 7 | *.sqlite3 8 | *.bak 9 | platform.info 10 | callchain_checker/build/ 11 | dce_instrumenter/build/ 12 | gcc/ 13 | llvm-project/ 14 | compiler_cache/ 15 | logs/ 16 | docker_storage/ 17 | -------------------------------------------------------------------------------- /test_preprocessed.py: -------------------------------------------------------------------------------- 1 | import preprocessing 2 | 3 | 4 | def test_extern_removal() -> None: 5 | with open("./gcc_preprocessed_code.c", "r") as f: 6 | lines = f.read().split("\n") 7 | 8 | with open("./preprocessed_oracle.c", "r") as f: 9 | oracle = f.read() 10 | assert oracle == preprocessing.preprocess_lines(lines).strip() 11 | -------------------------------------------------------------------------------- /callchain_checker/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_library(ccc-lib CallChainChecker.cpp) 2 | target_include_directories(ccc-lib PUBLIC ${LLVM_INCLUDE_DIRS} ${CLANG_INCLUDE_DIRS}) 3 | target_link_libraries(ccc-lib PUBLIC clang-cpp Boost::headers) 4 | 5 | add_executable(ccc CallChainCheckerTool.cpp) 6 | target_include_directories(ccc PRIVATE ${LLVM_INCLUDE_DIRS} ${CLANG_INCLUDE_DIRS}) 7 | target_link_libraries(ccc PUBLIC ccc-lib LLVM) 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2022 Yann Girsberger, Theodoros Theodoridis 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 4 | 5 | http://www.apache.org/licenses/LICENSE-2.0 6 | 7 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 8 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | image: "python:3.9" 2 | 3 | before_script: 4 | - python --version 5 | - pip3 install -r requirements_hook.txt 6 | 7 | stages: 8 | - Printing Environment 9 | - Format Checking 10 | - Static Type Checking 11 | - Static Analysis 12 | 13 | print_env: 14 | stage: Printing Environment 15 | script: 16 | - pwd 17 | - ls -l 18 | 19 | black: 20 | stage: Format Checking 21 | script: 22 | - python3 -m black --check *.py 23 | 24 | mypy: 25 | stage: Static Type Checking 26 | script: 27 | - python3 -m mypy --strict *.py 28 | 29 | pytype: 30 | stage: Static Analysis 31 | script: 32 | - python3 -m pytype *.py 33 | -------------------------------------------------------------------------------- /patches/gcc-libsanitizer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | OLD=libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc 3 | NEW=libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp 4 | 5 | INPLACE="-i" 6 | if [ "$1" = "--check" ]; then 7 | INPLACE="" 8 | fi 9 | 10 | 11 | if [ -f "$OLD" ]; then 12 | #https://stackoverflow.com/a/15966279 13 | sed '/CHECK_SIZE_AND_OFFSET(ipc_perm, mode)/{s//\/\/CHECK_SIZE_AND_OFFSET(ipc_perm, mode)/;h};${x;/./{x;q0};x;q1}' \ 14 | $INPLACE $OLD > /dev/null 15 | elif [ -f "$NEW" ]; then 16 | sed '/CHECK_SIZE_AND_OFFSET(ipc_perm, mode)/{s//\/\/CHECK_SIZE_AND_OFFSET(ipc_perm, mode)/;h};${x;/./{x;q0};x;q1}' \ 17 | $INPLACE $NEW > /dev/null 18 | else 19 | exit 1 20 | fi 21 | -------------------------------------------------------------------------------- /git-hooks/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | CHANGED_FILES=$(git diff --cached --name-only --diff-filter=ACM -- '*.py') 4 | REF_SHEBANG="#!/usr/bin/env python3" 5 | 6 | if [[ -n "$CHANGED_FILES" ]]; then 7 | for f in $CHANGED_FILES; do 8 | SHEBANG=$(head -n 1 $f | awk '{ gsub(/^[ \t\n]+|[ \t\n]+$/, ""); print}') 9 | if [[ ( -x $f ) && ( "$SHEBANG" != "$REF_SHEBANG" ) ]]; then 10 | echo "Shebang of $f doesn't match $REF_SHEBANG" 11 | exit 1 12 | fi 13 | done 14 | 15 | isort --profile black $CHANGED_FILES 16 | black --target-version py39 --safe $CHANGED_FILES 17 | git add $CHANGED_FILES 18 | 19 | mypy --strict *.py 20 | pytype -j 10 *.py 21 | 22 | cat $CHANGED_FILES | awk '/pdb.set_trace/ || /import pdb/ || /reveal_type/ { print $0; f=1 } END { exit f }' 23 | 24 | fi 25 | -------------------------------------------------------------------------------- /patches/gcc-simple-object-declaration.patch: -------------------------------------------------------------------------------- 1 | From 9d1ebb15d6cbabe767ae28a86c15f63a1ba2851f Mon Sep 17 00:00:00 2001 2 | From: Theodoros Theodoridis 3 | Date: Wed, 2 Feb 2022 11:19:26 +0100 4 | Subject: [PATCH] simple-object declaration 5 | 6 | --- 7 | include/simple-object.h | 2 +- 8 | 1 file changed, 1 insertion(+), 1 deletion(-) 9 | 10 | diff --git a/include/simple-object.h b/include/simple-object.h 11 | index db72f86de17..82f4722782a 100644 12 | --- a/include/simple-object.h 13 | +++ b/include/simple-object.h 14 | @@ -203,7 +203,7 @@ simple_object_release_write (simple_object_write *); 15 | extern const char * 16 | simple_object_copy_lto_debug_sections (simple_object_read *src_object, 17 | const char *dest, 18 | - int *err); 19 | + int *err, int rename); 20 | 21 | #ifdef __cplusplus 22 | } 23 | -- 24 | 2.34.1 25 | 26 | -------------------------------------------------------------------------------- /callchain_checker/src/CallChainChecker.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | namespace ccc { 6 | 7 | struct CallPair { 8 | const std::string Caller; 9 | const std::string Callee; 10 | 11 | CallPair(const std::string &Caller, const std::string &Callee) 12 | : Caller{Caller}, Callee{Callee} {} 13 | }; 14 | 15 | bool callChainExists(const std::vector &Calls, std::string From, 16 | std::string To); 17 | 18 | class CallChainCollector 19 | : public clang::ast_matchers::MatchFinder::MatchCallback { 20 | public: 21 | CallChainCollector(std::vector &Calls) : Calls{Calls} {} 22 | void registerMatchers(clang::ast_matchers::MatchFinder &Finder); 23 | void 24 | run(const clang::ast_matchers::MatchFinder::MatchResult &Result) override; 25 | 26 | private: 27 | std::vector &Calls; 28 | }; 29 | 30 | } // namespace ccc 31 | -------------------------------------------------------------------------------- /patches/llvm-MicrosoftDemangleNodes-missing-includes.patch: -------------------------------------------------------------------------------- 1 | From 0ca677077d621fa1d3ca1f0334b71a154ca1e35c Mon Sep 17 00:00:00 2001 2 | From: Theodoros Theodoridis 3 | Date: Fri, 28 Jan 2022 13:28:30 +0100 4 | Subject: [PATCH] MicrosoftDemangleNodes missing includes 5 | 6 | --- 7 | llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h | 2 ++ 8 | 1 file changed, 2 insertions(+) 9 | 10 | diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h 11 | index da9d9d5bfdc0..3d47471f0ef0 100644 12 | --- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h 13 | +++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h 14 | @@ -16,6 +16,8 @@ 15 | #include "llvm/Demangle/DemangleConfig.h" 16 | #include "llvm/Demangle/StringView.h" 17 | #include 18 | +#include 19 | +#include 20 | 21 | namespace llvm { 22 | namespace itanium_demangle { 23 | -- 24 | 2.34.1 25 | 26 | -------------------------------------------------------------------------------- /.github/workflows/lint-python.yaml: -------------------------------------------------------------------------------- 1 | on: [ push, pull_request ] 2 | name: Lint Python and Format 3 | jobs: 4 | mypy: 5 | runs-on: ubuntu-latest 6 | container: 7 | image: python:3.10 8 | steps: 9 | - uses: actions/checkout@v2 10 | - run: pip3 install -r requirements.txt mypy 11 | - run: mypy --install-types --non-interactive 12 | - name: run mypy 13 | run: python3 -m mypy --strict *.py 14 | black: 15 | runs-on: ubuntu-latest 16 | container: 17 | image: python:3.10 18 | steps: 19 | - uses: actions/checkout@v2 20 | - run: pip3 install -r requirements.txt 'black<=22.12.0' 21 | - name: Check formating with black 22 | run: python3 -m black --check *.py 23 | pytest: 24 | runs-on: ubuntu-latest 25 | container: 26 | image: python:3.10 27 | steps: 28 | - uses: actions/checkout@v2 29 | - run: pip3 install -r requirements.txt pytest 30 | - name: run pytest 31 | run: python3 -m pytest 32 | -------------------------------------------------------------------------------- /patches/llvm-MicrosoftDemangleNodes.h-fix-includes.patch: -------------------------------------------------------------------------------- 1 | From 902e0e717fcb1796f540d4fea95b010ee821caa8 Mon Sep 17 00:00:00 2001 2 | From: Theodoros Theodoridis 3 | Date: Fri, 28 Jan 2022 14:13:13 +0100 4 | Subject: [PATCH] MicrosoftDemangleNodes.h fix includes 5 | 6 | --- 7 | llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h | 4 +++- 8 | 1 file changed, 3 insertions(+), 1 deletion(-) 9 | 10 | diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h 11 | index 9e3478e9fd29..f54e8d161e04 100644 12 | --- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h 13 | +++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h 14 | @@ -4,6 +4,8 @@ 15 | #include "llvm/Demangle/Compiler.h" 16 | #include "llvm/Demangle/StringView.h" 17 | #include 18 | +#include 19 | +#include 20 | 21 | class OutputStream; 22 | 23 | @@ -602,4 +604,4 @@ struct FunctionSymbolNode : public SymbolNode { 24 | } // namespace ms_demangle 25 | } // namespace llvm 26 | 27 | -#endif 28 | \ No newline at end of file 29 | +#endif 30 | -- 31 | 2.34.1 32 | 33 | -------------------------------------------------------------------------------- /callchain_checker/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.20) 2 | 3 | project(callchain_checker C CXX) 4 | 5 | find_package(Boost REQUIRED) 6 | message(STATUS "Found Boost ${Boost_VERSION_STRING}") 7 | 8 | find_package(LLVM REQUIRED CONFIG) 9 | 10 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") 11 | message(STATUS "Using LLVMConfig.cmake in ${LLVM_DIR}") 12 | 13 | find_package(Clang REQUIRED CONFIG) 14 | message(STATUS "Using ClangConfig.cmake in ${Clang_DIR}") 15 | 16 | if (NOT CMAKE_BUILD_TYPE) 17 | set(CMAKE_BUILD_TYPE Release) 18 | endif () 19 | 20 | set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wpedantic -march=native") 21 | set(CMAKE_CXX_FLAGS_DEBUG "-g") 22 | set(CMAKE_CXX_FLAGS_RELEASE "-O3") 23 | set(CMAKE_CXX_STANDARD 17) 24 | 25 | if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") 26 | add_compile_options (-fdiagnostics-color=always) 27 | elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") 28 | add_compile_options (-fcolor-diagnostics) 29 | endif () 30 | 31 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 32 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) 33 | 34 | add_subdirectory(src) 35 | -------------------------------------------------------------------------------- /patches/gcc-fix-simple-object-decl-and-use-in-gcc-lto.patch: -------------------------------------------------------------------------------- 1 | From b44637d9aac854de180e9125fcacb504a1c38aef Mon Sep 17 00:00:00 2001 2 | From: Theodoros Theodoridis 3 | Date: Fri, 4 Feb 2022 11:15:44 +0100 4 | Subject: [PATCH] Fix simple-object decl and use in gcc-lto 5 | 6 | --- 7 | gcc/lto-wrapper.c | 2 +- 8 | include/simple-object.h | 2 +- 9 | 2 files changed, 2 insertions(+), 2 deletions(-) 10 | 11 | diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c 12 | index 7de58d47781..6759597a849 100644 13 | --- a/gcc/lto-wrapper.c 14 | +++ b/gcc/lto-wrapper.c 15 | @@ -1008,7 +1008,7 @@ debug_objcopy (const char *infile) 16 | } 17 | 18 | outfile = make_temp_file ("debugobjtem"); 19 | - errmsg = simple_object_copy_lto_debug_sections (inobj, outfile, &err); 20 | + errmsg = simple_object_copy_lto_debug_sections (inobj, outfile, &err, 0); 21 | if (errmsg) 22 | { 23 | unlink_if_ordinary (outfile); 24 | diff --git a/include/simple-object.h b/include/simple-object.h 25 | index db72f86de17..82f4722782a 100644 26 | --- a/include/simple-object.h 27 | +++ b/include/simple-object.h 28 | @@ -203,7 +203,7 @@ simple_object_release_write (simple_object_write *); 29 | extern const char * 30 | simple_object_copy_lto_debug_sections (simple_object_read *src_object, 31 | const char *dest, 32 | - int *err); 33 | + int *err, int rename); 34 | 35 | #ifdef __cplusplus 36 | } 37 | -- 38 | 2.34.1 39 | 40 | -------------------------------------------------------------------------------- /run_parallel.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | function run_gcc(){ 4 | ./main.py -ll info --cores $CORES_PER_JOB run \ 5 | --no-parallel-generation\ 6 | -t gcc trunk 1 2 3 s\ 7 | -ac gcc releases/gcc-11.2.0\ 8 | gcc releases/gcc-10.3.0\ 9 | gcc releases/gcc-9.4.0\ 10 | gcc releases/gcc-8.5.0\ 11 | gcc releases/gcc-7.5.0\ 12 | -acdol 1 2 3 s\ 13 | --no-reducer &>> split_$1.log 14 | } 15 | 16 | function run_llvm(){ 17 | 18 | # Don't compile LLVM with less than 8 cores. 19 | if [[ $CORES_PER_JOB -lt 8 ]]; then 20 | CORES_PER_JOB=8 21 | fi 22 | ./main.py -ll info --cores $CORES_PER_JOB run \ 23 | --no-parallel-generation\ 24 | -t llvm trunk 1 2 3 s z\ 25 | -ac llvm llvmorg-13.0.1\ 26 | llvm llvmorg-12.0.1\ 27 | llvm llvmorg-11.1.0\ 28 | llvm llvmorg-10.0.1\ 29 | llvm llvmorg-7.1.0\ 30 | llvm llvmorg-6.0.1\ 31 | llvm llvmorg-5.0.2\ 32 | llvm llvmorg-4.0.1\ 33 | -acdol 1 2 3 s z\ 34 | --no-reducer &>> split_$1.log 35 | } 36 | 37 | export -f run_llvm 38 | export -f run_gcc 39 | 40 | PROJECT=$1 41 | TOTAL_CORES=$2 42 | JOBS=$3 43 | export CORES_PER_JOB=$(expr $TOTAL_CORES / $JOBS) 44 | 45 | if [ $PROJECT = "llvm" ]; then 46 | RUN_CMD='run_llvm "{}"' 47 | elif [ $PROJECT = "clang" ]; then 48 | RUN_CMD='run_llvm "{}"' 49 | else 50 | RUN_CMD='run_gcc "{}"' 51 | fi 52 | 53 | seq $JOBS | xargs --max-procs=$JOBS -I {} sh -c $RUN_CMD 54 | -------------------------------------------------------------------------------- /callchain_checker/src/CallChainCheckerTool.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "CallChainChecker.hpp" 8 | 9 | using namespace llvm; 10 | using namespace clang; 11 | using namespace clang::tooling; 12 | 13 | using namespace ccc; 14 | 15 | namespace { 16 | cl::OptionCategory CCCOptions("call-chain-checker options"); 17 | cl::opt From("from", cl::desc("Beginning of call chain."), 18 | cl::value_desc("function name"), cl::cat(CCCOptions)); 19 | cl::opt To("to", cl::desc("End of call chain."), 20 | cl::value_desc("function name"), cl::cat(CCCOptions)); 21 | 22 | } // namespace 23 | 24 | int main(int argc, const char **argv) { 25 | auto ExpectedParser = 26 | CommonOptionsParser::create(argc, argv, CCCOptions); 27 | if (!ExpectedParser) { 28 | llvm::errs() << ExpectedParser.takeError(); 29 | return 1; 30 | } 31 | CommonOptionsParser &OptionsParser = ExpectedParser.get(); 32 | 33 | ClangTool Tool(OptionsParser.getCompilations(), 34 | OptionsParser.getSourcePathList()); 35 | 36 | std::vector Calls; 37 | CallChainCollector CCC{Calls}; 38 | ast_matchers::MatchFinder Finder; 39 | CCC.registerMatchers(Finder); 40 | auto ret = Tool.run(newFrontendActionFactory(&Finder).get()); 41 | if (ret != 0) 42 | return ret; 43 | if (callChainExists(Calls, From, To)) 44 | outs() << "call chain exists between " << From << " -> " << To << '\n'; 45 | else 46 | outs() << "no call chain between " << From << " -> " << To << '\n'; 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /patches/llvm-GCOpenMPRuntime.cpp-lambda-issues.patch: -------------------------------------------------------------------------------- 1 | From 6cce61ba38772cf4e4a9c0a389032b947e5ab71b Mon Sep 17 00:00:00 2001 2 | From: Theodoros Theodoridis 3 | Date: Fri, 28 Jan 2022 15:19:38 +0100 4 | Subject: [PATCH] GCOpenMPRuntime.cpp lambda issues 5 | 6 | --- 7 | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 6 +++--- 8 | 1 file changed, 3 insertions(+), 3 deletions(-) 9 | 10 | diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp 11 | index 40252171368b..40a73ef7429e 100644 12 | --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp 13 | +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp 14 | @@ -6271,7 +6271,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( 15 | // Generate the code for the opening of the data environment. Capture all the 16 | // arguments of the runtime call by reference because they are used in the 17 | // closing of the region. 18 | - auto &&BeginThenGen = [&D, &CGF, Device, &Info, &CodeGen, &NoPrivAction]( 19 | + auto &&BeginThenGen = [&D, Device, &Info, &CodeGen, &NoPrivAction]( 20 | CodeGenFunction &CGF, PrePostActionTy &) { 21 | // Fill up the arrays with all the mapped variables. 22 | MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 23 | @@ -6318,7 +6318,7 @@ void CGOpenMPRuntime::emitTargetDataCalls( 24 | }; 25 | 26 | // Generate code for the closing of the data region. 27 | - auto &&EndThenGen = [&CGF, Device, &Info](CodeGenFunction &CGF, 28 | + auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, 29 | PrePostActionTy &) { 30 | assert(Info.isValid() && "Invalid data environment closing arguments."); 31 | 32 | @@ -6397,7 +6397,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( 33 | "Expecting either target enter, exit data, or update directives."); 34 | 35 | // Generate the code for the opening of the data environment. 36 | - auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) { 37 | + auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) { 38 | // Fill up the arrays with all the mapped variables. 39 | MappableExprsHandler::MapBaseValuesArrayTy BasePointers; 40 | MappableExprsHandler::MapValuesArrayTy Pointers; 41 | -- 42 | 2.34.1 43 | 44 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM archlinux:latest 2 | 3 | RUN pacman -Syyu --noconfirm --noprogressbar &&\ 4 | pacman -S --noconfirm --needed --noprogressbar base-devel 5 | 6 | # Adding user 7 | RUN /usr/sbin/groupadd --system sudo && \ 8 | /usr/sbin/useradd --create-home \ 9 | --groups sudo \ 10 | --uid 1337 --user-group \ 11 | dead && \ 12 | /usr/sbin/sed -i -e "s/Defaults requiretty.*/ #Defaults requiretty/g" /etc/sudoers && \ 13 | /usr/sbin/echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers 14 | 15 | USER dead 16 | WORKDIR /home/dead 17 | 18 | # Installing yay 11.1.1 19 | RUN sudo pacman -S --noconfirm --noprogressbar git 20 | RUN git clone https://aur.archlinux.org/yay.git &&\ 21 | cd yay &&\ 22 | git checkout cdf06b6781263e24d98754a99d70857aa959f691 &&\ 23 | makepkg -si --noconfirm --noprogressbar 24 | RUN rm -r yay/ 25 | 26 | # Installing dependencies 27 | # These need compilation 28 | RUN yay -S --noconfirm --noprogressbar csmith\ 29 | creduce-git\ 30 | compcert 31 | 32 | # These don't 33 | RUN yay -S --noconfirm --noprogressbar python\ 34 | python-pip\ 35 | gcc\ 36 | clang\ 37 | llvm\ 38 | compiler-rt\ 39 | cmake\ 40 | boost\ 41 | ninja\ 42 | entr 43 | 44 | COPY --chown=dead callchain_checker/ ./callchain_checker/ 45 | 46 | RUN mkdir /home/dead/callchain_checker/build/ &&\ 47 | cd /home/dead/callchain_checker/build/ &&\ 48 | cmake .. &&\ 49 | make -j 50 | 51 | COPY requirements.txt . 52 | RUN python3 -m pip install -r requirements.txt 53 | 54 | RUN mkdir /home/dead/.config/dead/ 55 | 56 | RUN python3 -c 'from pathlib import Path; from dead_instrumenter import utils; utils.make_config(Path.home() / ".config/dead/instrumenter.json", True)' 57 | 58 | COPY dockerconfig.json /home/dead/.config/dead/config.json 59 | 60 | COPY --chown=dead *.py /home/dead/ 61 | COPY --chown=dead patches/ /home/dead/patches/ 62 | 63 | COPY --chown=dead ./run_parallel.sh /home/dead/run_parallel.sh 64 | 65 | ENV PATH="/home/dead/.local/bin:${PATH}" 66 | -------------------------------------------------------------------------------- /dockerconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "gcc": { 3 | "name": "gcc", 4 | "main_branch": "master", 5 | "repo": "/persistent/gcc", 6 | "sane_version": "gcc", 7 | "releases": [ 8 | "trunk", 9 | "releases/gcc-11.2.0", 10 | "releases/gcc-11.1.0", 11 | "releases/gcc-10.3.0", 12 | "releases/gcc-10.2.0", 13 | "releases/gcc-10.1.0", 14 | "releases/gcc-9.4.0", 15 | "releases/gcc-9.3.0", 16 | "releases/gcc-9.2.0", 17 | "releases/gcc-9.1.0", 18 | "releases/gcc-8.5.0", 19 | "releases/gcc-8.4.0", 20 | "releases/gcc-8.3.0", 21 | "releases/gcc-8.2.0", 22 | "releases/gcc-8.1.0", 23 | "releases/gcc-7.5.0", 24 | "releases/gcc-7.4.0", 25 | "releases/gcc-7.3.0", 26 | "releases/gcc-7.2.0" 27 | ] 28 | }, 29 | "llvm": { 30 | "name": "clang", 31 | "main_branch": "main", 32 | "repo": "/persistent/llvm-project", 33 | "sane_version": "clang", 34 | "releases": [ 35 | "trunk", 36 | "llvmorg-13.0.1", 37 | "llvmorg-13.0.0", 38 | "llvmorg-12.0.1", 39 | "llvmorg-12.0.0", 40 | "llvmorg-11.1.0", 41 | "llvmorg-11.0.1", 42 | "llvmorg-11.0.0", 43 | "llvmorg-10.0.1", 44 | "llvmorg-10.0.0", 45 | "llvmorg-9.0.1", 46 | "llvmorg-9.0.0", 47 | "llvmorg-8.0.1", 48 | "llvmorg-8.0.0", 49 | "llvmorg-7.1.0", 50 | "llvmorg-7.0.1", 51 | "llvmorg-7.0.0", 52 | "llvmorg-6.0.1", 53 | "llvmorg-6.0.0", 54 | "llvmorg-5.0.2", 55 | "llvmorg-5.0.1", 56 | "llvmorg-5.0.0", 57 | "llvmorg-4.0.1", 58 | "llvmorg-4.0.0" 59 | ] 60 | }, 61 | "repodir": "/persistent", 62 | "csmith": { 63 | "max_size": 50000, 64 | "min_size": 10000, 65 | "executable": "csmith", 66 | "include_path": "/usr/include/csmith-2.3.0" 67 | }, 68 | "dcei": "dead-instrument", 69 | "ccc": "./callchain_checker/build/bin/ccc", 70 | "patchdb": "/persistent/patchdb.json", 71 | "logdir": "/persistent/logs", 72 | "cachedir": "/persistent/compiler_cache", 73 | "creduce": "creduce", 74 | "ccomp": "ccomp", 75 | "casedb": "/persistent/casedb.sqlite3" 76 | } 77 | -------------------------------------------------------------------------------- /callchain_checker/src/CallChainChecker.cpp: -------------------------------------------------------------------------------- 1 | #include "CallChainChecker.hpp" 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | using namespace clang; 14 | using namespace clang::ast_matchers; 15 | 16 | using namespace boost; 17 | 18 | using StaticCallGraph = adjacency_list; 19 | 20 | namespace ccc { 21 | 22 | bool callChainExists(const std::vector &Calls, std::string From, 23 | std::string To) { 24 | std::unordered_map FunctionToIdx; 25 | size_t idx = 0; 26 | StaticCallGraph SCG; 27 | for (const auto &[Caller, Callee] : Calls) { 28 | if (not FunctionToIdx.count(Caller)) { 29 | SCG.added_vertex(idx); 30 | FunctionToIdx[Caller] = idx++; 31 | } 32 | if (not FunctionToIdx.count(Callee)) { 33 | SCG.added_vertex(idx); 34 | FunctionToIdx[Callee] = idx++; 35 | } 36 | boost::add_edge(FunctionToIdx[Caller], FunctionToIdx[Callee], SCG); 37 | } 38 | if (not FunctionToIdx.count(From)) { 39 | llvm::errs() << From << " is not part of the call graph\n"; 40 | return false; 41 | } 42 | if (not FunctionToIdx.count(To)) { 43 | llvm::errs() << To << " is not part of the call graph\n"; 44 | return false; 45 | } 46 | 47 | std::vector Colors(num_vertices(SCG)); 48 | iterator_property_map ColorMap(Colors.begin(), 49 | boost::get(boost::vertex_index, SCG)); 50 | breadth_first_search(SCG, FunctionToIdx.at(From), color_map(ColorMap)); 51 | return Colors[FunctionToIdx.at(To)] == default_color_type::black_color; 52 | } 53 | 54 | void CallChainCollector::registerMatchers( 55 | clang::ast_matchers::MatchFinder &Finder) { 56 | Finder.addMatcher(callExpr(clang::ast_matchers::isExpansionInMainFile(), 57 | callee(functionDecl().bind("callee")), 58 | hasAncestor(functionDecl().bind("caller"))), 59 | this); 60 | } 61 | 62 | void CallChainCollector::run( 63 | const clang::ast_matchers::MatchFinder::MatchResult &Result) { 64 | if (const auto *Callee = Result.Nodes.getNodeAs("callee")) 65 | if (const auto *Caller = Result.Nodes.getNodeAs("caller")) 66 | Calls.emplace_back(Caller->getNameAsString(), 67 | Callee->getNameAsString()); 68 | } 69 | 70 | } // namespace ccc 71 | -------------------------------------------------------------------------------- /patches/gcc-ustat.patch: -------------------------------------------------------------------------------- 1 | From 61f38c64c01a15560026115a157b7021ec67bd3b Mon Sep 17 00:00:00 2001 2 | From: hjl 3 | Date: Thu, 24 May 2018 20:21:54 +0000 4 | Subject: [PATCH] libsanitizer: Use pre-computed size of struct ustat for Linux 5 | 6 | Cherry-pick compiler-rt revision 333213: 7 | 8 | has been removed from glibc 2.28 by: 9 | 10 | commit cf2478d53ad7071e84c724a986b56fe17f4f4ca7 11 | Author: Adhemerval Zanella 12 | Date: Sun Mar 18 11:28:59 2018 +0800 13 | 14 | Deprecate ustat syscall interface 15 | 16 | This patch uses pre-computed size of struct ustat for Linux. 17 | 18 | PR sanitizer/85835 19 | * sanitizer_common/sanitizer_platform_limits_posix.cc: Don't 20 | include for Linux. 21 | (SIZEOF_STRUCT_USTAT): New. 22 | (struct_ustat_sz): Use SIZEOF_STRUCT_USTAT for Linux. 23 | 24 | 25 | 26 | git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-7-branch@260688 138bc75d-0d04-0410-961f-82ee72b054a4 27 | 28 | Downloaded from upstream commit 29 | https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=61f38c64c01a15560026115a157b7021ec67bd3b 30 | 31 | Signed-off-by: Bernd Kuhls 32 | --- 33 | libsanitizer/ChangeLog | 8 ++++++++ 34 | .../sanitizer_common/sanitizer_platform_limits_posix.cc | 15 +++++++++++++-- 35 | 2 files changed, 21 insertions(+), 2 deletions(-) 36 | 37 | diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc 38 | index 31a5e69..8017afd 100644 39 | --- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc 40 | +++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc 41 | @@ -154,7 +154,6 @@ typedef struct user_fpregs elf_fpregset_t; 42 | # include 43 | #endif 44 | #include 45 | -#include 46 | #include 47 | #include 48 | #include 49 | @@ -247,7 +246,19 @@ namespace __sanitizer { 50 | #endif // SANITIZER_LINUX || SANITIZER_FREEBSD 51 | 52 | #if SANITIZER_LINUX && !SANITIZER_ANDROID 53 | - unsigned struct_ustat_sz = sizeof(struct ustat); 54 | + // Use pre-computed size of struct ustat to avoid which 55 | + // has been removed from glibc 2.28. 56 | +#if defined(__aarch64__) || defined(__s390x__) || defined (__mips64) \ 57 | + || defined(__powerpc64__) || defined(__arch64__) || defined(__sparcv9) \ 58 | + || defined(__x86_64__) 59 | +#define SIZEOF_STRUCT_USTAT 32 60 | +#elif defined(__arm__) || defined(__i386__) || defined(__mips__) \ 61 | + || defined(__powerpc__) || defined(__s390__) 62 | +#define SIZEOF_STRUCT_USTAT 20 63 | +#else 64 | +#error Unknown size of struct ustat 65 | +#endif 66 | + unsigned struct_ustat_sz = SIZEOF_STRUCT_USTAT; 67 | unsigned struct_rlimit64_sz = sizeof(struct rlimit64); 68 | unsigned struct_statvfs64_sz = sizeof(struct statvfs64); 69 | #endif // SANITIZER_LINUX && !SANITIZER_ANDROID 70 | -- 71 | 2.9.3 72 | 73 | -------------------------------------------------------------------------------- /preprocessing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import shutil 4 | import tempfile 5 | from pathlib import Path 6 | from typing import Iterable, Optional 7 | 8 | from ccbuilder import Builder 9 | 10 | import utils 11 | 12 | """ 13 | Functions to preprocess code for creduce. 14 | See creduce --help to see what it wants. 15 | """ 16 | 17 | 18 | class PreprocessError(Exception): 19 | pass 20 | 21 | 22 | def find_marker_decl_range(lines: list[str], marker_prefix: str) -> tuple[int, int]: 23 | p = re.compile(rf"void {marker_prefix}(.*)\(void\);") 24 | first = 0 25 | for i, line in enumerate(lines): 26 | if p.match(line): 27 | first = i 28 | break 29 | last = first + 1 30 | for i, line in enumerate(lines[first + 1 :], start=first + 1): 31 | if p.match(line): 32 | continue 33 | else: 34 | last = i 35 | break 36 | return first, last 37 | 38 | 39 | def find_platform_main_end(lines: Iterable[str]) -> Optional[int]: 40 | p = re.compile(r".*platform_main_end.*") 41 | for i, line in enumerate(lines): 42 | if p.match(line): 43 | return i 44 | return None 45 | 46 | 47 | def remove_platform_main_begin(lines: Iterable[str]) -> list[str]: 48 | p = re.compile(r".*platform_main_begin.*") 49 | return [line for line in lines if not p.match(line)] 50 | 51 | 52 | def remove_print_hash_value(lines: Iterable[str]) -> list[str]: 53 | p = re.compile(r".*print_hash_value = 1.*") 54 | return [line for line in lines if not p.match(line)] 55 | 56 | 57 | def preprocess_lines(lines: list[str]) -> str: 58 | start_patterns = [ 59 | re.compile(r"^extern.*"), 60 | re.compile(r"^typedef.*"), 61 | re.compile(r"^struct.*"), 62 | # The following patterns are to catch if the last of the previous 63 | # patterns in the file was tainted and we'd otherwise mark the rest 64 | # of the file as tainted, as we'll find no end in this case. 65 | re.compile(r"^static.*"), 66 | re.compile(r"^void.*"), 67 | ] 68 | taint_patterns = [ 69 | re.compile(r".*__access__.*"), # LLVM doesn't know about this 70 | re.compile(r".*__malloc__.*"), 71 | re.compile( 72 | r".*_[F|f]loat[0-9]{1,3}x{0,1}.*" 73 | ), # https://gcc.gnu.org/onlinedocs/gcc/Floating-Types.html#Floating-Types 74 | re.compile(r".*__asm__.*"), # CompCert has problems 75 | ] 76 | 77 | def is_start(l: str) -> bool: 78 | return any([p_start.match(l) for p_start in start_patterns]) 79 | 80 | lines_to_skip: list[int] = [] 81 | for i, line in enumerate(lines): 82 | for p in taint_patterns: 83 | if p.match(line): 84 | # Searching for start of tainted region 85 | up_i = i 86 | up_line = lines[up_i] 87 | while up_i > 0 and not is_start(up_line): 88 | up_i -= 1 89 | up_line = lines[up_i] 90 | 91 | # Searching for end of tainted region 92 | down_i = i + 1 93 | down_line = lines[down_i] 94 | while down_i < len(lines) and not is_start(down_line): 95 | down_i += 1 96 | down_line = lines[down_i] 97 | 98 | lines_to_skip.extend(list(range(up_i, down_i))) 99 | 100 | return "\n".join([line for i, line in enumerate(lines) if i not in lines_to_skip]) 101 | 102 | 103 | def preprocess_csmith_file( 104 | path: os.PathLike[str], 105 | marker_prefix: str, 106 | compiler_setting: utils.CompilerSetting, 107 | bldr: Builder, 108 | ) -> str: 109 | 110 | with tempfile.NamedTemporaryFile(suffix=".c") as tf: 111 | shutil.copy(path, tf.name) 112 | 113 | additional_flags = ( 114 | [] 115 | if compiler_setting.additional_flags is None 116 | else compiler_setting.additional_flags 117 | ) 118 | cmd = [ 119 | str(utils.get_compiler_executable(compiler_setting, bldr)), 120 | tf.name, 121 | "-P", 122 | "-E", 123 | ] + additional_flags 124 | lines = utils.run_cmd(cmd).split("\n") 125 | 126 | return preprocess_lines(lines) 127 | 128 | 129 | def preprocess_csmith_code( 130 | code: str, 131 | marker_prefix: str, 132 | compiler_setting: utils.CompilerSetting, 133 | bldr: Builder, 134 | ) -> Optional[str]: 135 | """Will *try* to preprocess code as if it comes from csmith. 136 | 137 | Args: 138 | code (str): code to preprocess 139 | marker_prefix (str): Marker prefix 140 | compiler_setting (utils.CompilerSetting): Setting to preprocess with 141 | bldr (builder.Builder): 142 | 143 | Returns: 144 | Optional[str]: preprocessed code if it was able to preprocess it. 145 | """ 146 | tf = utils.save_to_tmp_file(code) 147 | try: 148 | res = preprocess_csmith_file( 149 | Path(tf.name), marker_prefix, compiler_setting, bldr 150 | ) 151 | return res 152 | except PreprocessError: 153 | return None 154 | -------------------------------------------------------------------------------- /init.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import grp 4 | import json 5 | import os 6 | import shutil 7 | import stat 8 | from pathlib import Path 9 | from typing import Any 10 | 11 | from dead_instrumenter.utils import Binary, find_binary 12 | 13 | import utils 14 | 15 | 16 | def main() -> None: 17 | print( 18 | "Have you installed the following programs/projects: llvm, clang, compiler-rt, gcc, cmake, ccomp, csmith and creduce?" 19 | ) 20 | print("Press enter to continue if you believe you have") 21 | input() 22 | 23 | not_found = [] 24 | for p in ["clang", "gcc", "cmake", "ccomp", "csmith", "creduce"]: 25 | if not shutil.which(p): 26 | not_found.append(p) 27 | 28 | if not_found: 29 | print("Can't find", " ".join(not_found), " in $PATH.") 30 | 31 | if not Path("/usr/include/llvm/").exists(): 32 | print("Can't find /usr/include/llvm/") 33 | not_found.append("kill") 34 | 35 | if not_found: 36 | exit(1) 37 | 38 | print("Creating default ~/.config/dead/config.json...") 39 | 40 | path = Path.home() / ".config/dead/config.json" 41 | if path.exists(): 42 | print(f"{path} already exists! Aborting to prevent overriding data...") 43 | exit(1) 44 | 45 | config: dict[Any, Any] = {} 46 | # ====== GCC ====== 47 | gcc: dict[str, Any] = {} 48 | gcc["name"] = "gcc" 49 | gcc["main_branch"] = "master" 50 | 51 | # Git clone repo 52 | print("Cloning gcc to ./gcc ...") 53 | if not Path("./gcc").exists(): 54 | utils.run_cmd("git clone git://gcc.gnu.org/git/gcc.git") 55 | gcc["repo"] = "./gcc" 56 | 57 | if shutil.which("gcc"): 58 | gcc["sane_version"] = "gcc" 59 | else: 60 | gcc["sane_version"] = "???" 61 | print( 62 | "gcc is not in $PATH, you have to specify the executable yourself in gcc.sane_version" 63 | ) 64 | 65 | gcc["releases"] = [ 66 | "trunk", 67 | "releases/gcc-12.1.0", 68 | "releases/gcc-11.3.0", 69 | "releases/gcc-11.2.0", 70 | "releases/gcc-11.1.0", 71 | "releases/gcc-10.3.0", 72 | "releases/gcc-10.2.0", 73 | "releases/gcc-10.1.0", 74 | "releases/gcc-9.4.0", 75 | "releases/gcc-9.3.0", 76 | "releases/gcc-9.2.0", 77 | "releases/gcc-9.1.0", 78 | "releases/gcc-8.5.0", 79 | "releases/gcc-8.4.0", 80 | "releases/gcc-8.3.0", 81 | "releases/gcc-8.2.0", 82 | "releases/gcc-8.1.0", 83 | "releases/gcc-7.5.0", 84 | "releases/gcc-7.4.0", 85 | "releases/gcc-7.3.0", 86 | "releases/gcc-7.2.0", 87 | ] 88 | config["gcc"] = gcc 89 | 90 | # ====== LLVM ====== 91 | llvm: dict[str, Any] = {} 92 | llvm["name"] = "clang" 93 | llvm["main_branch"] = "main" 94 | 95 | # Git clone repo 96 | print("Cloning llvm to ./llvm-project ...") 97 | if not Path("./llvm-project").exists(): 98 | utils.run_cmd("git clone https://github.com/llvm/llvm-project") 99 | llvm["repo"] = "./llvm-project" 100 | 101 | if shutil.which("clang"): 102 | llvm["sane_version"] = "clang" 103 | else: 104 | llvm["sane_version"] = "???" 105 | print( 106 | "clang is not in $PATH, you have to specify the executable yourself in llvm.sane_version" 107 | ) 108 | 109 | llvm["releases"] = [ 110 | "trunk", 111 | "llvmorg-14.0.3", 112 | "llvmorg-14.0.2", 113 | "llvmorg-14.0.1", 114 | "llvmorg-14.0.0", 115 | "llvmorg-13.0.1", 116 | "llvmorg-13.0.0", 117 | "llvmorg-12.0.1", 118 | "llvmorg-12.0.0", 119 | "llvmorg-11.1.0", 120 | "llvmorg-11.0.1", 121 | "llvmorg-11.0.0", 122 | "llvmorg-10.0.1", 123 | "llvmorg-10.0.0", 124 | "llvmorg-9.0.1", 125 | "llvmorg-9.0.0", 126 | "llvmorg-8.0.1", 127 | "llvmorg-8.0.0", 128 | "llvmorg-7.1.0", 129 | "llvmorg-7.0.1", 130 | "llvmorg-7.0.0", 131 | "llvmorg-6.0.1", 132 | "llvmorg-6.0.0", 133 | "llvmorg-5.0.2", 134 | "llvmorg-5.0.1", 135 | "llvmorg-5.0.0", 136 | "llvmorg-4.0.1", 137 | "llvmorg-4.0.0", 138 | ] 139 | 140 | config["llvm"] = llvm 141 | 142 | config["repodir"] = str(Path(os.getcwd()).absolute()) 143 | # ====== CSmith ====== 144 | csmith: dict[str, Any] = {} 145 | csmith["max_size"] = 50000 146 | csmith["min_size"] = 10000 147 | if shutil.which("csmith"): 148 | csmith["executable"] = "csmith" 149 | res = utils.run_cmd("csmith --version") 150 | # $ csmith --version csmith 2.3.0 151 | # Git version: 30dccd7 152 | version = res.split("\n")[0].split()[1] 153 | if Path("/usr/include/csmith").exists(): 154 | csmith["include_path"] = "/usr/include/csmith" 155 | else: 156 | csmith["include_path"] = "/usr/include/csmith-" + version 157 | else: 158 | print( 159 | "Can't find csmith in $PATH. You have to specify the executable and the include path yourself" 160 | ) 161 | csmith["executable"] = "???" 162 | csmith["include_path"] = "???" 163 | config["csmith"] = csmith 164 | 165 | # ====== Cpp programs ====== 166 | 167 | print("Building instrumenter...") 168 | find_binary(Binary.INSTRUMENTER, no_questions=True) 169 | config["dcei"] = "dead-instrument" 170 | 171 | print("Compiling callchain checker (ccc)...") 172 | os.makedirs("./callchain_checker/build", exist_ok=True) 173 | utils.run_cmd("cmake ..", working_dir=Path("./callchain_checker/build/")) 174 | utils.run_cmd("make -j", working_dir=Path("./callchain_checker/build/")) 175 | config["ccc"] = "./callchain_checker/build/bin/ccc" 176 | 177 | # ====== Rest ====== 178 | config["patchdb"] = "./patches/patchdb.json" 179 | 180 | os.makedirs("logs", exist_ok=True) 181 | config["logdir"] = "./logs" 182 | 183 | os.makedirs("compiler_cache", exist_ok=True) 184 | os.chmod("compiler_cache", 0o770 | stat.S_ISGID) 185 | config["cachedir"] = "./compiler_cache" 186 | 187 | config["creduce"] = "creduce" 188 | if not shutil.which("creduce"): 189 | print( 190 | "creduce was not found in $PATH. You have to specify the executable yourself" 191 | ) 192 | config["creduce"] = "???" 193 | 194 | config["ccomp"] = "ccomp" 195 | if not shutil.which("ccomp"): 196 | print( 197 | "ccomp was not found in $PATH. You have to specify the executable yourself" 198 | ) 199 | config["ccomp"] = "???" 200 | 201 | config["casedb"] = "./casedb.sqlite3" 202 | 203 | Path(config["casedb"]).touch() 204 | os.chmod(config["casedb"], 0o660) 205 | 206 | print("Saving config...") 207 | os.makedirs(path.parent, exist_ok=True) 208 | with open(path, "w") as f: 209 | json.dump(config, f, indent=4) 210 | 211 | print("Done!") 212 | 213 | 214 | if __name__ == "__main__": 215 | main() 216 | -------------------------------------------------------------------------------- /bugs.md: -------------------------------------------------------------------------------- 1 | ### GCC 2 | - [99357](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99357) 3 | - [99373](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99373) 4 | - [99419](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99419) 5 | - [99428](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99428) 6 | - ~~[99776](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99776)~~ Fixed 7 | - [99788](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99788) 8 | - ~~[99793](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99793)~~ Fixed 9 | - [99834](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99834) 10 | - [99835](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99835) 11 | - ~~[99986](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99986)~~ Duplicate 12 | - [99987](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99987) 13 | - [99991](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99991) 14 | - [99993](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99993) 15 | - [100033](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100033) 16 | - [100034](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100034) 17 | - [100036](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100036) 18 | - ~~[100050](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100050)~~ Duplicate 19 | - ~~[100051](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100051)~~ Fixed 20 | - [100080](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100080) 21 | - [100082](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100082) 22 | - [100095](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100095) 23 | - [100100](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100100) 24 | - ~~[100112](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100112)~~ Fixed 25 | - [100113](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100113) 26 | - [100145](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100145) 27 | - [100162](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100162) 28 | - [100188](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100188) 29 | - [100191](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100191) 30 | - [100220](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100220) 31 | - ~~[100221](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100221)~~ Fixed 32 | - [100314](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100314) 33 | - ~~[100315](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100315)~~ Invalid 34 | - ~~[100359](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100359)~~ Fixed 35 | - ~~[102540](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102540)~~ Fixed 36 | - ~~[102546](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102546)~~ Fixed 37 | - ~~[102648](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102648)~~ Fixed 38 | - ~~[102650](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102650)~~ Fixed 39 | - ~~[102703](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102703)~~ Fixed 40 | - [102705](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102705) 41 | - [102879](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102879) 42 | - ~~[102880](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102880)~~ Fixed 43 | - ~~[102892](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102892)~~ ~~Fixed~~ Reopened 44 | - ~~[102895](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102895)~~ Duplicate 45 | - ~~[103280](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103280)~~ Duplicate 46 | - [103281](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103281) 47 | - ~~[102950](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102950)~~ Fixed 48 | - [102981](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102981) 49 | - [102982](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102982) 50 | - ~~[102983](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102983)~~ Fixed 51 | - ~~[103257](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103257)~~ Fixed 52 | - [103388](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103388) 53 | - ~~[103359](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103359)~~ Fixed 54 | - ~~[104526](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104526)~~ Fixed 55 | - ~~[104530](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104530)~~ Fixed 56 | - [105086](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105086) 57 | - [105832](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105832) 58 | - [105833](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105833) 59 | - [105834](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105834) 60 | - ~~[105835](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105835)~~ Fixed 61 | - [107822](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107822) 62 | - [107823](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107823) 63 | - [108351](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108351) 64 | - ~~[108352](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108352)~~ Fixed 65 | - ~~[108353](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108353)~~ Fixed 66 | - [108354](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108354) 67 | - [108355](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108355) 68 | - [108356](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108356) 69 | - [108357](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108357) 70 | - [108358](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108358) 71 | - [108359](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108359) 72 | - [108360](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108360) 73 | - [108368](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108368) 74 | 75 | 76 | ### LLVM 77 | - [49434](https://bugs.llvm.org/show_bug.cgi?id=49434) 78 | - [49436](https://bugs.llvm.org/show_bug.cgi?id=49436) 79 | - [49457](https://bugs.llvm.org/show_bug.cgi?id=49457) 80 | - ~~[49731](https://bugs.llvm.org/show_bug.cgi?id=49731)~~ Fixed 81 | - [49773](https://bugs.llvm.org/show_bug.cgi?id=49773) 82 | - [49775](https://bugs.llvm.org/show_bug.cgi?id=49775) 83 | - [49776](https://bugs.llvm.org/show_bug.cgi?id=49776) 84 | - [51090](https://bugs.llvm.org/show_bug.cgi?id=51090) 85 | - [51136](https://bugs.llvm.org/show_bug.cgi?id=51136) 86 | - [51137](https://bugs.llvm.org/show_bug.cgi?id=51137) 87 | - [51138](https://bugs.llvm.org/show_bug.cgi?id=51138) 88 | - ~~[52535](https://github.com/llvm/llvm-project/issues/51877)~~ Fixed 89 | - [51139](https://bugs.llvm.org/show_bug.cgi?id=51139) 90 | - [51140](https://bugs.llvm.org/show_bug.cgi?id=51140) 91 | - ~~[51141](https://bugs.llvm.org/show_bug.cgi?id=51141)~~ Fixed 92 | - ~~[52078](https://bugs.llvm.org/show_bug.cgi?id=52078)~~ Fixed 93 | - ~~[52102](https://bugs.llvm.org/show_bug.cgi?id=52102)~~ Fixed 94 | - ~~[52253](https://bugs.llvm.org/show_bug.cgi?id=52253)~~ Fixed 95 | - [52255](https://bugs.llvm.org/show_bug.cgi?id=52255) 96 | - ~~[52260](https://bugs.llvm.org/show_bug.cgi?id=52260)~~ Fixed 97 | - ~~[52261](https://bugs.llvm.org/show_bug.cgi?id=52261)~~ Fixed 98 | - ~~[52289](https://bugs.llvm.org/show_bug.cgi?id=52289)~~ Fixed 99 | - [52347](https://bugs.llvm.org/show_bug.cgi?id=52347) 100 | - ~~[52543](https://bugs.llvm.org/show_bug.cgi?id=52543)~~ Fixed 101 | - [52580](https://bugs.llvm.org/show_bug.cgi?id=52580) 102 | - ~~[52592](https://bugs.llvm.org/show_bug.cgi?id=52592)~~ Fixed 103 | - ~~[51444](https://github.com/llvm/llvm-project/issues/51444)~~ Fixed 104 | - [51688](https://github.com/llvm/llvm-project/issues/51688) 105 | - ~~[52525](https://github.com/llvm/llvm-project/issues/52525)~~ Fixed 106 | - ~~[52965](https://github.com/llvm/llvm-project/issues/52965)~~ Fixed 107 | - ~~[53130](https://github.com/llvm/llvm-project/issues/53130)~~ Fixed 108 | - ~~[53131](https://github.com/llvm/llvm-project/issues/53131)~~ Fixed 109 | - ~~[53130](https://github.com/llvm/llvm-project/issues/53130)~~ Fixed 110 | - ~~[53131](https://github.com/llvm/llvm-project/issues/53131)~~ Fixed 111 | - ~~[53316](https://github.com/llvm/llvm-project/issues/53316)~~ Fixed 112 | - [53320](https://github.com/llvm/llvm-project/issues/53320) 113 | - ~~[53317](https://github.com/llvm/llvm-project/issues/53317)~~ Fixed 114 | - [53318](https://github.com/llvm/llvm-project/issues/53318) 115 | - [53322](https://github.com/llvm/llvm-project/issues/53322) 116 | - [53384](https://github.com/llvm/llvm-project/issues/53384) 117 | - [53385](https://github.com/llvm/llvm-project/issues/53385) 118 | - [53321](https://github.com/llvm/llvm-project/issues/53321) 119 | - ~~[53319](https://github.com/llvm/llvm-project/issues/53319)~~ Fixed 120 | - ~~[54980](https://github.com/llvm/llvm-project/issues/54980)~~ Fixed 121 | - ~~[56046](https://github.com/llvm/llvm-project/issues/56046)~~ Fixed 122 | - ~~[56048](https://github.com/llvm/llvm-project/issues/56048)~~ Fixed 123 | - [56049](https://github.com/llvm/llvm-project/issues/56049) 124 | - [56118](https://github.com/llvm/llvm-project/issues/56118) 125 | - ~~[56119](https://github.com/llvm/llvm-project/issues/56119)~~ Fixed 126 | - [56120](https://github.com/llvm/llvm-project/issues/56120) 127 | - [56761](https://github.com/llvm/llvm-project/issues/56761) 128 | - [56762](https://github.com/llvm/llvm-project/issues/56762) 129 | -------------------------------------------------------------------------------- /reducer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import json 4 | import logging 5 | import os 6 | import random 7 | import shutil 8 | import subprocess 9 | import tarfile 10 | import tempfile 11 | import time 12 | from copy import copy 13 | from dataclasses import dataclass 14 | from pathlib import Path 15 | from types import TracebackType 16 | from typing import Any, Optional 17 | 18 | import ccbuilder 19 | 20 | from ccbuilder.utils.utils import select_repo 21 | from ccbuilder import ( 22 | Builder, 23 | BuildException, 24 | CompilerProject, 25 | PatchDB, 26 | Repo, 27 | get_compiler_info, 28 | ) 29 | 30 | import generator 31 | import parsers 32 | import preprocessing 33 | import utils 34 | 35 | 36 | # ==================== Reducer ==================== 37 | class TempDirEnv: 38 | def __init__(self) -> None: 39 | self.td: tempfile.TemporaryDirectory[str] 40 | 41 | def __enter__(self) -> Path: 42 | self.td = tempfile.TemporaryDirectory() 43 | tempfile.tempdir = self.td.name 44 | return Path(self.td.name) 45 | 46 | def __exit__( 47 | self, 48 | exc_type: Optional[type[BaseException]], 49 | exc_value: Optional[BaseException], 50 | exc_traceback: Optional[TracebackType], 51 | ) -> None: 52 | tempfile.tempdir = None 53 | 54 | 55 | @dataclass 56 | class Reducer: 57 | config: utils.NestedNamespace 58 | bldr: Builder 59 | 60 | def reduce_file(self, file: Path, force: bool = False) -> bool: 61 | """Reduce a case given in the .tar format. 62 | Interface for `reduced_code`. 63 | 64 | Args: 65 | file (Path): Path to .tar case. 66 | force (bool): Force a reduction (even if the case is already reduced). 67 | Returns: 68 | bool: If the reduction was successful. 69 | """ 70 | case = utils.Case.from_file(self.config, file) 71 | 72 | if self.reduce_case(case, force=force): 73 | case.to_file(file) 74 | return True 75 | return False 76 | 77 | def reduce_case(self, case: utils.Case, force: bool = False) -> bool: 78 | """Reduce a case. 79 | 80 | Args: 81 | case (utils.Case): Case to reduce. 82 | force (bool): Force a reduction (even if the case is already reduced). 83 | 84 | Returns: 85 | bool: If the reduction was successful. 86 | """ 87 | if not force and case.reduced_code: 88 | 89 | return True 90 | 91 | case.reduced_code = self.reduce_code( 92 | case.code, case.marker, case.bad_setting, case.good_settings, case.bisection 93 | ) 94 | return bool(case.reduced_code) 95 | 96 | def reduce_code( 97 | self, 98 | code: str, 99 | marker: str, 100 | bad_setting: utils.CompilerSetting, 101 | good_settings: list[utils.CompilerSetting], 102 | bisection: Optional[str] = None, 103 | preprocess: bool = True, 104 | ) -> Optional[str]: 105 | """Reduce given code w.r.t. `marker` 106 | 107 | Args: 108 | code (str): 109 | marker (str): Marker which exhibits the interesting behaviour. 110 | bad_setting (utils.CompilerSetting): Setting which can not eliminate the marker. 111 | good_settings (list[utils.CompilerSetting]): Settings which can eliminate the marker. 112 | bisection (Optional[str]): if present the reducer will also check for the bisection 113 | preprocess (bool): Whether or not to run the code through preprocessing. 114 | 115 | Returns: 116 | Optional[str]: Reduced code, if successful. 117 | """ 118 | 119 | bad_settings = [bad_setting] 120 | if bisection: 121 | bad_settings.append(copy(bad_setting)) 122 | bad_settings[-1].rev = bisection 123 | repo = select_repo( 124 | bad_setting.compiler_project, 125 | llvm_repo=self.bldr.llvm_repo, 126 | gcc_repo=self.bldr.gcc_repo, 127 | ) 128 | good_settings = good_settings + [copy(bad_setting)] 129 | good_settings[-1].rev = repo.rev_to_commit(f"{bisection}~") 130 | 131 | # creduce likes to kill unfinished processes with SIGKILL 132 | # so they can't clean up after themselves. 133 | # Setting a temporary temporary directory for creduce to be able to clean 134 | # up everything 135 | with TempDirEnv() as tmpdir: 136 | 137 | # preprocess file 138 | if preprocess: 139 | tmp = preprocessing.preprocess_csmith_code( 140 | code, 141 | utils.get_marker_prefix(marker), 142 | bad_setting, 143 | self.bldr, 144 | ) 145 | # Preprocesssing may fail 146 | pp_code = tmp if tmp else code 147 | 148 | else: 149 | pp_code = code 150 | 151 | pp_code_path = tmpdir / "code_pp.c" 152 | with open(pp_code_path, "w") as f: 153 | f.write(pp_code) 154 | 155 | # save interesting_settings 156 | settings_path = tmpdir / "interesting_settings.json" 157 | 158 | int_settings: dict[str, Any] = {} 159 | int_settings["bad_settings"] = [ 160 | bs.to_jsonable_dict() for bs in bad_settings 161 | ] 162 | int_settings["good_settings"] = [ 163 | gs.to_jsonable_dict() for gs in good_settings 164 | ] 165 | with open(settings_path, "w") as f: 166 | json.dump(int_settings, f) 167 | 168 | # create script for creduce 169 | script_path = tmpdir / "check.sh" 170 | with open(script_path, "w") as f: 171 | print("#/bin/sh", file=f) 172 | print("TMPD=$(mktemp -d)", file=f) 173 | print("trap '{ rm -rf \"$TMPD\"; }' INT TERM EXIT", file=f) 174 | print( 175 | "timeout 15 " 176 | f"{Path(__file__).parent.resolve()}/checker.py" 177 | f" --dont-preprocess" 178 | f" --config {self.config.config_path}" 179 | f" --marker {marker}" 180 | f" --interesting-settings {str(settings_path)}" 181 | f" --file code_pp.c", 182 | # f' --file {str(pp_code_path)}', 183 | file=f, 184 | ) 185 | 186 | os.chmod(script_path, 0o777) 187 | # run creduce 188 | creduce_cmd = [ 189 | self.config.creduce, 190 | "--n", 191 | f"{self.bldr.jobs}", 192 | str(script_path.name), 193 | str(pp_code_path.name), 194 | ] 195 | 196 | try: 197 | current_time = time.strftime("%Y%m%d-%H%M%S") 198 | build_log_path = ( 199 | Path(self.config.logdir) 200 | / f"{current_time}-creduce-{random.randint(0,1000)}.log" 201 | ) 202 | build_log_path.touch() 203 | # Set permissions of logfile 204 | os.chmod(build_log_path, 0o660) 205 | logging.info(f"creduce logfile at {build_log_path}") 206 | with open(build_log_path, "a") as build_log: 207 | utils.run_cmd_to_logfile( 208 | creduce_cmd, log_file=build_log, working_dir=Path(tmpdir) 209 | ) 210 | except subprocess.CalledProcessError as e: 211 | logging.info(f"Failed to process code. Exception: {e}") 212 | return None 213 | 214 | # save result in tar 215 | with open(pp_code_path, "r") as f: 216 | reduced_code = f.read() 217 | 218 | return reduced_code 219 | 220 | 221 | if __name__ == "__main__": 222 | config, args = utils.get_config_and_parser(parsers.reducer_parser()) 223 | 224 | patchdb = PatchDB(Path(config.patchdb)) 225 | _, llvm_repo = get_compiler_info("llvm", Path(config.repodir)) 226 | _, gcc_repo = get_compiler_info("gcc", Path(config.repodir)) 227 | bldr = Builder( 228 | Path(config.cachedir), 229 | gcc_repo, 230 | llvm_repo, 231 | patchdb, 232 | args.cores, 233 | logdir=Path(config.logdir), 234 | ) 235 | gnrtr = generator.CSmithCaseGenerator(config, patchdb) 236 | rdcr = Reducer(config, bldr) 237 | 238 | if args.work_through: 239 | if args.output_directory is None: 240 | print("Missing output/work-through directory!") 241 | exit(1) 242 | else: 243 | output_dir = Path(os.path.abspath(args.output_directory)) 244 | os.makedirs(output_dir, exist_ok=True) 245 | 246 | tars = [ 247 | output_dir / d 248 | for d in os.listdir(output_dir) 249 | if tarfile.is_tarfile(output_dir / d) 250 | ] 251 | 252 | print(f"Processing {len(tars)} tars") 253 | for tf in tars: 254 | print(f"Processing {tf}") 255 | try: 256 | rdcr.reduce_file(tf, args.force) 257 | except BuildException as e: 258 | print("{e}") 259 | 260 | # if (We want to generate something and not only reduce a file) 261 | if args.generate: 262 | if args.output_directory is None: 263 | print("Missing output directory!") 264 | exit(1) 265 | else: 266 | output_dir = os.path.abspath(args.output_directory) 267 | os.makedirs(output_dir, exist_ok=True) 268 | 269 | scenario = utils.Scenario([], []) 270 | # When file is specified, use scenario of file as base 271 | if args.file: 272 | file = Path(args.file).absolute() 273 | scenario = utils.Case.from_file(config, file).scenario 274 | 275 | tmp = utils.get_scenario(config, args) 276 | if tmp.target_settings: 277 | scenario.target_settings = tmp.target_settings 278 | if tmp.attacker_settings: 279 | scenario.attacker_settings = tmp.attacker_settings 280 | 281 | gen = gnrtr.parallel_interesting_case_file( 282 | config, scenario, bldr.jobs, output_dir, start_stop=True 283 | ) 284 | if args.amount == 0: 285 | while True: 286 | path = next(gen) 287 | try: 288 | rdcr.reduce_file(path) 289 | except BuildException as e: 290 | print(f"{e}") 291 | else: 292 | for i in range(args.amount): 293 | path = next(gen) 294 | try: 295 | rdcr.reduce_file(path) 296 | except BuildException as e: 297 | print(f"{e}") 298 | 299 | elif not args.work_through: 300 | if not args.file: 301 | print( 302 | "--file is needed when just running checking for a file. Have you forgotten to set --generate?" 303 | ) 304 | file = Path(args.file).absolute() 305 | if args.re_reduce: 306 | case = utils.Case.from_file(config, file) 307 | if not case.reduced_code: 308 | print("No reduced code available...") 309 | exit(1) 310 | print(f"BEFORE\n{case.reduced_code}") 311 | if reduce_code := rdcr.reduce_code( 312 | case.reduced_code, 313 | case.marker, 314 | case.bad_setting, 315 | case.good_settings, 316 | case.bisection, 317 | preprocess=False, 318 | ): 319 | case.reduced_code = reduce_code 320 | print(f"AFTER\n{case.reduced_code}") 321 | case.to_file(file) 322 | else: 323 | if rdcr.reduce_file(file, args.force): 324 | print(file) 325 | 326 | gnrtr.terminate_processes() 327 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DEAD: Dead Code Elimination based Automatic Differential Testing 2 | 3 | DEAD is a tool to find and process compiler regressions and other missed optimizations automatically to produce reports. 4 | 5 | It is based on the paper [Finding missed optimizations through the lens of dead code elimination](https://dl.acm.org/doi/10.1145/3503222.3507764) and was first written during a [master thesis](https://doi.org/10.3929/ethz-b-000547786) at the [AST Lab](https://ast.ethz.ch/). 6 | 7 | For a list of reported bugs look at [bugs.md](./bugs.md). 8 | 9 | ## Setup 10 | Clone the latest release with, for example, `git clone -b v0.0.2 https://github.com/DeadCodeProductions/dead`. 11 | 12 | After navigating into the cloned repository, choose if you want to run DEAD [locally](#local-setup) or in a [Docker container](#setup-with-docker). 13 | ### Setup with Docker 14 | ``` 15 | ./build_docker.sh 16 | 17 | # Enter the container 18 | docker run -it -v $(realpath ./docker_storage):/persistent deaddocker 19 | ``` 20 | Continue by reading the [Run Section](#run). 21 | 22 | ### Local Setup 23 | The following programs or libraries must be installed: 24 | - `python >= 3.10` 25 | - `gcc` 26 | - `clang` 27 | - `csmith` 28 | - `creduce` 29 | - `cmake` 30 | - `ccomp` (CompCert) 31 | - `llvm 13.0.0` or `llvm 14.0.0` (for the include files) 32 | - `compiler-rt` (for the sanitization libraries. It's also part of LLVM) 33 | - `boost` 34 | - `ninja` 35 | 36 | Optional programs: 37 | - `entr` 38 | 39 | We are running on Arch Linux and have not (yet) tested any other distribution. 40 | 41 | To achieve this in Arch with `yay` as AUR overlay helper, you can run: 42 | ``` 43 | yay -Sy --noconfirm python\ 44 | python-pip\ 45 | gcc\ 46 | clang\ 47 | llvm\ 48 | compiler-rt\ 49 | cmake\ 50 | boost\ 51 | ninja\ 52 | csmith\ 53 | creduce-git\ 54 | compcert-git 55 | ``` 56 | 57 | Then run: 58 | ``` 59 | # Create python environment 60 | python3 -m venv ./deadenv 61 | source ./deadenv/bin/activate 62 | pip install -r requirements.txt 63 | 64 | # Initialize DEAD 65 | ./init.py 66 | ``` 67 | `init.py` will: 68 | - create a config file located at `~/.config/dead/config.json` 69 | - Compile the callchain-checker: `ccc` 70 | - Clone repositories of `gcc` and `llvm` into the local directory 71 | - Create the `compiler_cache` and `logs` directory 72 | - Check if it can find the programs and paths required in the prerequisite-section and complain if not. 73 | 74 | 75 | ## Run 76 | As DEAD is based on differential testing, it requires two informations to be able to run: 77 | - Which compilers to find missed optimizations in. These are called *target* compilers. This is typically the current `trunk`. 78 | - Which compilers to use as a comparison to find missed optimizations in the target compilers. These are called *additional* or *attacking* compilers. 79 | 80 | A compiler on the CLI is specified by writing `PROJECT REVISION [OPT_LEVEL ...]`. For example, to get `gcc 11.2.0` with all optimizations, write `gcc releases/gcc-11.2.0 1 2 3 s`. This can be repeated to specify more compilers. 81 | 82 | ```sh 83 | # Don't run it yet 84 | ./main.py run --targets gcc trunk 1 2 3 s\ 85 | --additional-compilers\ 86 | gcc releases/gcc-11.2.0 1 2 3 s\ 87 | gcc releases/gcc-10.3.0 1 2 3 s 88 | ``` 89 | To not have to repeat oneself, it is possible to specify default optimization levels. 90 | The resulting used optimizations for a specified compiler is the union of the default levels and the specifically specified optimization levels. 91 | The flags are `--additional_compilers_default_opt_levels` and `--targets_default_opt_levels` or `-acdol` and `-tdol` respectively. 92 | 93 | 94 | ```sh 95 | # Don't run it yet 96 | ./main.py run --targets gcc trunk 1 2 3 s\ 97 | --additional-compilers\ 98 | gcc releases/gcc-11.2.0 \ # Opt levels: 3,s 99 | gcc releases/gcc-10.3.0 1\ # Opt levels: 1,3,s 100 | -acdol 3 s # Additional compilers 101 | ``` 102 | 103 | DEAD consists of three parts which are: 104 | - Generator, which finds missed optimizations from the given target and attacking compilers. We call such a missed optimization and any additional information related to it a *case*. 105 | - Bisector, which finds the introducing commit of the found case. 106 | - Reducer, which extracts a small part of the code, which still exhibits the missed optimization found. 107 | 108 | By default, the Reducer is only enabled for cases which have a new bisection commit, as reducing takes long and is often not necessary. 109 | It can be enabled for all cases with `--reducer` and completely disabled with `--no-reducer`. 110 | 111 | The last two important options are `--cores POSITIVE_INT` and `--log-level debug|info|warning|error|critical`. 112 | When not specified, `--cores` will equal to the amount of logical cores on the machine. 113 | The default verbosity level is `warning`. However, to have a sense of progress, we suggest setting it to `info`. 114 | 115 | Finally, to find missed optimizations in `trunk`, run 116 | ```sh 117 | # For GCC 118 | ./main.py -ll info\ 119 | --cores $CORES\ 120 | run --targets gcc trunk 1 2 3 s\ 121 | --additional-compilers\ 122 | gcc releases/gcc-11.2.0\ 123 | gcc releases/gcc-10.3.0\ 124 | gcc releases/gcc-9.4.0\ 125 | gcc releases/gcc-8.5.0\ 126 | gcc releases/gcc-7.5.0\ 127 | -acdol 1 2 3 s 128 | #--amount N # Terminate after finding N cases 129 | 130 | # For LLVM 131 | ./main.py -ll info\ 132 | --cores $CORES\ 133 | run --targets llvm trunk 1 2 3 s z\ 134 | --additional-compilers\ 135 | llvm llvmorg-13.0.1\ 136 | llvm llvmorg-12.0.1\ 137 | llvm llvmorg-11.1.0\ 138 | llvm llvmorg-10.0.1\ 139 | llvm llvmorg-9.0.1\ 140 | llvm llvmorg-8.0.1\ 141 | llvm llvmorg-7.1.0\ 142 | llvm llvmorg-6.0.1\ 143 | llvm llvmorg-5.0.2\ 144 | llvm llvmorg-4.0.1\ 145 | -acdol 1 2 3 s z 146 | #--amount N # Terminate after finding N cases 147 | ``` 148 | 149 | Please run `./main.py run -h` and `./main.py -h` to see more options. 150 | 151 | ### Performance considerations 152 | Assigning all cores of the machine to just one instance of DEAD can lead to less than optimal machine utilization. Some parts of the pipeline can not always use all cores. 153 | 154 | - The Bisector is written in a single threaded way and only requires multiple cores when building a new compiler. 155 | As the cache grows and many regression have already been found, the cache hit rate increases drastically, making the Bisector essentially a single threaded part. 156 | - GCC compilation includes several single-threaded parts. Compiling with sufficiently many cores will make it look like a mostly single-threaded task due to Amdahl's law. LLVM compilation also includes some single-threaded parts, but these are way less noticeable. 157 | - The Reducer uses `creduce` to shrink the case. `creduce` also does not perfectly utilize the machine all the time, when using a lot of threads. 158 | 159 | Just oversubscribing the machine is not an option, as some checks are time dependent. Failing these checks will especially impact the throughput of the Reducer. 160 | 161 | One fairly good solution is to run multiple smaller instances in parallel. 162 | 163 | For the Reducer, 8 logical cores per pipeline did yield good results. 164 | 165 | Finding new cases in parallel has the big caveat that the instances wait on each other when one is building a compiler that the other needs. This dependence is very common when the cache is not populated enough. Running multiple instances in parallel too early is detrimental to machine utilization! 166 | 167 | Pinpointing when the switch to multiple instances is beneficial is difficult. 168 | For this reason we provide `run_parallel.sh` which spawns multiple instances with the appropriate amount of cores assigned. 169 | ```sh 170 | ./run_parallel.sh llvm|gcc TOTAL_CORES AMOUNT_JOBS 171 | ``` 172 | 173 | ## Generating a report 174 | 175 | Imagine DEAD ran for some time and it is now time to create a bug report. 176 | 177 | Not-yet-reported cases can be explored with the `unreported` sub-command. 178 | 179 | ```sh 180 | $ ./main.py unreported 181 | ID Bisection Count 182 | ---------------------------------------------------------------- 183 | 2 0b92cf305dcf34387a8e2564e55ca8948df3b47a 45 184 | ... 185 | 39 008e7397dad971c03c08fc1b0a4a98fddccaaed8 1 186 | ---------------------------------------------------------------- 187 | ID Bisection Count 188 | ``` 189 | On the left you see an ID for a case that has the bisection commit shown in the bisection column. 190 | Often times, many cases are found which bisect to the same commit. How many cases that bisected to this particular commit have been found is displayed in the 'Count' column. 191 | Note that a fix for a reported case may not fix all cases of the bisection! 192 | 193 | Select one of the IDs and check if there is already a bug report which includes its bisection commit. 194 | 195 | If this is not the case, run 196 | ```sh 197 | ./main.py report $ID > report.txt 198 | ``` 199 | 200 | It will pull the compiler project of the case, build `trunk` and test if the missed optimization can still be observed. 201 | You can disable pulling with `--no-pull`. 202 | If so, it will output a copy-and-pasteable report into `report.txt` (don't forget to remove the title if there is one) and `case.txt`[^1], a copy of the reported code. 203 | [^1]: It is `.txt` instead of `.c` because GitHub does not allow `.c` files to be attached to issues. 204 | 205 | When you have submitted the bug report, you can save the link to the report via 206 | ``` 207 | ./main.py set link $ID $LINK 208 | ``` 209 | so that the bisection isn't displayed anymore. 210 | 211 | Hopefully, the missed optimization gets fixed. When this is the case, you can extract the case ID from the bug report and note down the fixing commit. Then save it with 212 | ``` 213 | ./main.py set fixed $ID $COMMIT 214 | ``` 215 | 216 | Inspecting reported cases can be done via 217 | ``` 218 | ./main.py reported 219 | ``` 220 | 221 | ### Massaging workflow 222 | Sometimes it is possible to further reduce the automatically reduced code manually. We call this step *massaging*, the product of which is *massaged code*. 223 | 224 | Instead of directly generating the report after having selected an ID and checked if the bisection commit was already reported, get the reduced code and try to make it smaller. 225 | ```sh 226 | ./main.py get rcode $ID > rcode.c 227 | ``` 228 | To continuously check if the changes still exhibit the missed optimization, open a separate terminal in the same directory and run 229 | ```sh 230 | echo rcode.c | entr -c ./main.py checkreduced $ID ./rcode.c 231 | ``` 232 | This will rerun some checks whenever `rcode.c` is saved. 233 | 234 | When the massaging is done, save it into DEAD with 235 | ```sh 236 | ./main.py set mcode $ID ./rcode.c 237 | ``` 238 | DEAD will check if the massaged code still bisects to the same commit as before and will reject the change if not. 239 | Empirically, changes to cases who's bisection is rarely found often don't allow any further massaging. 240 | 241 | ## Subcommand overview of `main.py` 242 | 243 | - `run`: Find new regressions/missed optimizations. 244 | - `tofile ID`: Save a case into a tar-file. 245 | - `absorb PATH`: Read tar-files into the database of DEAD. 246 | - `report ID`: Generate a report for a given case. 247 | - `rereduce ID FILE`: Reduce a file (again) w.r.t. a case. 248 | - `diagnose`: Run a set of tests when something seems odd with a case. 249 | - `checkreduced ID FILE`: Run some lightweight tests based on a case on a piece of code. 250 | - `cache`: Cache related functionality. 251 | - `asm ID`: Generate assembly for all code of a case. 252 | - `set | get {link,fixed,mcode,rcode,ocode,bisection}`: Set or get the specified field of a case. 253 | - `build PROJECT REV`: Build `REV` of compiler project `PROJECT`. 254 | - `reduce ID`: Reduce case `ID`. 255 | - `edit`: Open DEADs configuration in `$EDITOR` 256 | - `unreported`: List unreported cases grouped by bisection commit. 257 | - `reported`: List reported cases. 258 | - `findby`: Find case ID given some part of the case. 259 | 260 | ## Overview of important files 261 | - `bisector.py`: Bisects a given interesting case. 262 | - `builder.py`: Builds the compiler. 263 | - `checker.py`: Checks if a given case is interesting. 264 | - `generator.py`: Finds new interesting cases. 265 | - `patcher.py`: Automatically finds the region in the history where a patch needs to be applied. 266 | - `reducer.py`: Reduce the code of a given. 267 | 268 | ## Q&A for potential issues 269 | ### I set flag X which I found in the help, but DEAD says the option does not exist! 270 | Sadly, flags are position dependent. You have to put it after the command whose help you found the flag in and before any other subcommand. 271 | ### I want to do XYZ. How? 272 | Maybe there's already an option for it. Consult the program with `--help` for all the options. 273 | 274 | ### Why don't I see anything? 275 | Are you running with `-ll info`? 276 | 277 | ### DEAD wants to work with a commit that doesn't exist! 278 | If you are checking things manually: Are you sure you are looking in the right repository? 279 | 280 | If you are processing a case and `git` throws an exception, try pulling `llvm-project` and `gcc` so you are sure to have all the commits. 281 | 282 | ### Why does this case fail? 283 | Maybe `./main.py diagnose -ci $ID` can illuminate the situation. 284 | 285 | ### This case does not reduce but `diagnose` says everything is fine! 286 | Try throwing your whole machine at it (`./main.py reduce ID`). 287 | 288 | ### The compilers should already be built and the logs just say `INFO:root: [...] is currently building; need to wait`. 289 | Stop DEAD, run `./main.py cache clean`, restart. 290 | What happened? The most likely scenario is that DEAD was interrupted while building a compiler and unable to run the clean-up procedure, confusing DEAD the next time the compiler has to be built. 291 | Do *not* run `cache clean` while DEAD is running. 292 | 293 | ### A compiler I want to build has a build issue; where do I find the build-logs? 294 | The logs can be found in the path specified by the `logdir` entry of the `$HOME/.config/dead/config.json`. 295 | For an installation with `init.py`, this is `$PROJECTDIR/logs`. 296 | For a docker installation this is `/persistent/logs` in the container. 297 | -------------------------------------------------------------------------------- /generator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from __future__ import annotations 4 | 5 | import json 6 | import logging 7 | import os 8 | import signal 9 | import subprocess 10 | from multiprocessing import Process, Queue 11 | from os.path import join as pjoin 12 | from pathlib import Path 13 | from random import randint 14 | from tempfile import NamedTemporaryFile 15 | from typing import TYPE_CHECKING, Generator, Optional, Union 16 | 17 | from ccbuilder import Builder, PatchDB, get_compiler_info 18 | from dead_instrumenter.instrumenter import instrument_program 19 | 20 | import checker 21 | import parsers 22 | import utils 23 | 24 | 25 | def run_csmith(csmith: str) -> str: 26 | """Generate random code with csmith. 27 | 28 | Args: 29 | csmith (str): Path to executable or name in $PATH to csmith. 30 | 31 | Returns: 32 | str: csmith generated program. 33 | """ 34 | tries = 0 35 | while True: 36 | options = [ 37 | "arrays", 38 | "bitfields", 39 | "checksum", 40 | "comma-operators", 41 | "compound-assignment", 42 | "consts", 43 | "divs", 44 | "embedded-assigns", 45 | "jumps", 46 | "longlong", 47 | "force-non-uniform-arrays", 48 | "math64", 49 | "muls", 50 | "packed-struct", 51 | "paranoid", 52 | "pointers", 53 | "structs", 54 | "inline-function", 55 | "return-structs", 56 | "arg-structs", 57 | "dangling-global-pointers", 58 | ] 59 | 60 | cmd = [ 61 | csmith, 62 | "--no-unions", 63 | "--safe-math", 64 | "--no-argc", 65 | "--no-volatiles", 66 | "--no-volatile-pointers", 67 | ] 68 | for option in options: 69 | if randint(0, 1): 70 | cmd.append(f"--{option}") 71 | else: 72 | cmd.append(f"--no-{option}") 73 | result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 74 | if result.returncode == 0: 75 | return result.stdout.decode("utf-8") 76 | else: 77 | tries += 1 78 | if tries > 10: 79 | raise Exception("CSmith failed 10 times in a row!") 80 | 81 | 82 | def generate_file( 83 | config: utils.NestedNamespace, additional_flags: str 84 | ) -> tuple[str, str]: 85 | """Generate an instrumented csmith program. 86 | 87 | Args: 88 | config (utils.NestedNamespace): THE config 89 | additional_flags (str): Additional flags to use when 90 | compiling the program when checking. 91 | 92 | Returns: 93 | tuple[str, str]: Marker prefix and instrumented code. 94 | """ 95 | additional_flags += f" -I {config.csmith.include_path}" 96 | while True: 97 | try: 98 | logging.debug("Generating new candidate...") 99 | candidate = run_csmith(config.csmith.executable) 100 | if len(candidate) > config.csmith.max_size: 101 | continue 102 | if len(candidate) < config.csmith.min_size: 103 | continue 104 | with NamedTemporaryFile(suffix=".c") as ntf: 105 | with open(ntf.name, "w") as f: 106 | print(candidate, file=f) 107 | logging.debug("Checking if program is sane...") 108 | if not checker.sanitize( 109 | config.gcc.sane_version, 110 | config.llvm.sane_version, 111 | config.ccomp, 112 | Path(ntf.name), 113 | additional_flags, 114 | ): 115 | continue 116 | logging.debug("Instrumenting candidate...") 117 | marker_prefix = instrument_program( 118 | Path(ntf.name), [f"-I{config.csmith.include_path}"] 119 | ) 120 | with open(ntf.name, "r") as f: 121 | return marker_prefix, f.read() 122 | 123 | return marker_prefix, candidate 124 | except subprocess.TimeoutExpired: 125 | pass 126 | 127 | 128 | class CSmithCaseGenerator: 129 | def __init__( 130 | self, 131 | config: utils.NestedNamespace, 132 | patchdb: PatchDB, 133 | cores: Optional[int] = None, 134 | ): 135 | self.config: utils.NestedNamespace = config 136 | 137 | _, llvm_repo = get_compiler_info("llvm", Path(config.repodir)) 138 | _, gcc_repo = get_compiler_info("gcc", Path(config.repodir)) 139 | self.builder: Builder = Builder( 140 | Path(config.cachedir), 141 | gcc_repo, 142 | llvm_repo, 143 | patchdb, 144 | cores, 145 | logdir=Path(config.logdir), 146 | ) 147 | self.chkr: checker.Checker = checker.Checker(config, self.builder) 148 | self.procs: list[Process] = [] 149 | self.try_counter: int = 0 150 | 151 | def generate_interesting_case(self, scenario: utils.Scenario) -> utils.Case: 152 | """Generate a case which is interesting i.e. has one compiler which does 153 | not eliminate a marker (from the target settings) a and at least one from 154 | the attacker settings. 155 | 156 | Args: 157 | scenario (utils.Scenario): Which compiler to compare. 158 | 159 | Returns: 160 | utils.Case: Intersting case. 161 | """ 162 | # Because the resulting code will be of csmith origin, we have to add 163 | # the csmith include path to all settings 164 | csmith_include_flag = f"-I{self.config.csmith.include_path}" 165 | scenario.add_flags([csmith_include_flag]) 166 | 167 | self.try_counter = 0 168 | while True: 169 | self.try_counter += 1 170 | logging.debug("Generating new candidate...") 171 | marker_prefix, candidate_code = generate_file(self.config, "") 172 | 173 | # Find alive markers 174 | logging.debug("Getting alive markers...") 175 | try: 176 | target_alive_marker_list = [ 177 | ( 178 | tt, 179 | utils.find_alive_markers( 180 | candidate_code, tt, marker_prefix, self.builder 181 | ), 182 | ) 183 | for tt in scenario.target_settings 184 | ] 185 | 186 | tester_alive_marker_list = [ 187 | ( 188 | tt, 189 | utils.find_alive_markers( 190 | candidate_code, tt, marker_prefix, self.builder 191 | ), 192 | ) 193 | for tt in scenario.attacker_settings 194 | ] 195 | except utils.CompileError: 196 | continue 197 | 198 | target_alive_markers = set() 199 | for _, marker_set in target_alive_marker_list: 200 | target_alive_markers.update(marker_set) 201 | 202 | # Extract reduce cases 203 | logging.debug("Extracting reduce cases...") 204 | for marker in target_alive_markers: 205 | good: list[utils.CompilerSetting] = [] 206 | for good_setting, good_alive_markers in tester_alive_marker_list: 207 | if ( 208 | marker not in good_alive_markers 209 | ): # i.e. the setting eliminated the call 210 | good.append(good_setting) 211 | 212 | # Find bad cases 213 | if len(good) > 0: 214 | good_opt_levels = [gs.opt_level for gs in good] 215 | for bad_setting, bad_alive_markers in target_alive_marker_list: 216 | # XXX: Here you can enable inter-opt_level comparison! 217 | if ( 218 | marker in bad_alive_markers 219 | and bad_setting.opt_level in good_opt_levels 220 | ): # i.e. the setting didn't eliminate the call 221 | # Create reduce case 222 | case = utils.Case( 223 | code=candidate_code, 224 | marker=marker, 225 | bad_setting=bad_setting, 226 | good_settings=good, 227 | scenario=scenario, 228 | reduced_code=None, 229 | bisection=None, 230 | path=None, 231 | ) 232 | # TODO: Optimize interestingness test and document behaviour 233 | try: 234 | if self.chkr.is_interesting(case): 235 | logging.info( 236 | f"Try {self.try_counter}: Found case! LENGTH: {len(candidate_code)}" 237 | ) 238 | return case 239 | except utils.CompileError: 240 | continue 241 | else: 242 | logging.debug( 243 | f"Try {self.try_counter}: Found no case. Onto the next one!" 244 | ) 245 | 246 | def _wrapper_interesting(self, queue: Queue[str], scenario: utils.Scenario) -> None: 247 | """Wrapper for generate_interesting_case for easier use 248 | with python multiprocessing. 249 | 250 | Args: 251 | queue (Queue): The multiprocessing queue to do IPC with. 252 | scenario (utils.Scenario): Scenario 253 | """ 254 | logging.info("Starting worker...") 255 | while True: 256 | case = self.generate_interesting_case(scenario) 257 | queue.put(json.dumps(case.to_jsonable_dict())) 258 | 259 | def parallel_interesting_case_file( 260 | self, 261 | config: utils.NestedNamespace, 262 | scenario: utils.Scenario, 263 | processes: int, 264 | output_dir: os.PathLike[str], 265 | start_stop: Optional[bool] = False, 266 | ) -> Generator[Path, None, None]: 267 | """Generate interesting cases in parallel 268 | WARNING: If you use this method, you have to call `terminate_processes` 269 | 270 | Args: 271 | config (utils.NestedNamespace): THE config. 272 | scenario (utils.Scenario): Scenario. 273 | processes (int): Amount of jobs. 274 | output_dir (os.PathLike): Directory where to output the found cases. 275 | start_stop (Optional[bool]): Whether or not stop the processes when 276 | finding a case. This is useful when running a pipeline and thus 277 | the processing power is needed somewhere else. 278 | 279 | Returns: 280 | Generator[Path, None, None]: Interesting case generator giving paths. 281 | """ 282 | gen = self.parallel_interesting_case(config, scenario, processes, start_stop) 283 | 284 | counter = 0 285 | while True: 286 | case = next(gen) 287 | h = hash(str(case)) 288 | h = max(h, -h) 289 | path = Path(pjoin(output_dir, f"case_{counter:08}-{h:019}.tar")) 290 | logging.debug("Writing case to {path}...") 291 | case.to_file(path) 292 | yield path 293 | counter += 1 294 | 295 | def parallel_interesting_case( 296 | self, 297 | config: utils.NestedNamespace, 298 | scenario: utils.Scenario, 299 | processes: int, 300 | start_stop: Optional[bool] = False, 301 | ) -> Generator[utils.Case, None, None]: 302 | """Generate interesting cases in parallel 303 | WARNING: If you use this method, you have to call `terminate_processes` 304 | 305 | Args: 306 | config (utils.NestedNamespace): THE config. 307 | scenario (utils.Scenario): Scenario. 308 | processes (int): Amount of jobs. 309 | output_dir (os.PathLike): Directory where to output the found cases. 310 | start_stop (Optional[bool]): Whether or not stop the processes when 311 | finding a case. This is useful when running a pipeline and thus 312 | the processing power is needed somewhere else. 313 | 314 | Returns: 315 | Generator[utils.Case, None, None]: Interesting case generator giving Cases. 316 | """ 317 | 318 | queue: Queue[str] = Queue() 319 | 320 | # Create processes 321 | self.procs = [ 322 | Process( 323 | target=self._wrapper_interesting, 324 | args=(queue, scenario), 325 | ) 326 | for _ in range(processes) 327 | ] 328 | 329 | # Start processes 330 | for p in self.procs: 331 | p.daemon = True 332 | p.start() 333 | 334 | # read queue 335 | while True: 336 | # TODO: handle process failure 337 | case_str: str = queue.get() 338 | 339 | case = utils.Case.from_jsonable_dict(config, json.loads(case_str)) 340 | 341 | if start_stop: 342 | # Send processes to "sleep" 343 | logging.debug("Stopping workers...") 344 | for p in self.procs: 345 | if p.pid is None: 346 | continue 347 | os.kill(p.pid, signal.SIGSTOP) 348 | yield case 349 | if start_stop: 350 | logging.debug("Restarting workers...") 351 | # Awake processes again for further search 352 | for p in self.procs: 353 | if p.pid is None: 354 | continue 355 | os.kill(p.pid, signal.SIGCONT) 356 | 357 | def terminate_processes(self) -> None: 358 | for p in self.procs: 359 | if p.pid is None: 360 | continue 361 | # This is so cruel 362 | os.kill(p.pid, signal.SIGCONT) 363 | p.terminate() 364 | 365 | 366 | if __name__ == "__main__": 367 | config, args = utils.get_config_and_parser(parsers.generator_parser()) 368 | 369 | cores = args.cores 370 | 371 | patchdb = PatchDB(Path(config.patchdb)) 372 | case_generator = CSmithCaseGenerator(config, patchdb, cores) 373 | 374 | if args.interesting: 375 | scenario = utils.Scenario([], []) 376 | if args.scenario: 377 | scenario = utils.Scenario.from_file(config, Path(args.scenario)) 378 | 379 | if not args.scenario and args.targets is None: 380 | print( 381 | "--targets is required for --interesting if you don't specify a scenario" 382 | ) 383 | exit(1) 384 | elif args.targets: 385 | target_settings = utils.get_compiler_settings( 386 | config, args.targets, default_opt_levels=args.targets_default_opt_levels 387 | ) 388 | scenario.target_settings = target_settings 389 | 390 | if not args.scenario and args.additional_compilers is None: 391 | print( 392 | "--additional-compilers is required for --interesting if you don't specify a scenario" 393 | ) 394 | exit(1) 395 | elif args.additional_compilers: 396 | additional_compilers = utils.get_compiler_settings( 397 | config, 398 | args.additional_compilers, 399 | default_opt_levels=args.additional_compilers_default_opt_levels, 400 | ) 401 | 402 | scenario.attacker_settings = additional_compilers 403 | 404 | if args.output_directory is None: 405 | print("Missing output directory!") 406 | exit(1) 407 | else: 408 | output_dir = os.path.abspath(args.output_directory) 409 | os.makedirs(output_dir, exist_ok=True) 410 | 411 | if args.parallel is not None: 412 | amount_cases = args.amount if args.amount is not None else 0 413 | amount_processes = max(1, args.parallel) 414 | gen = case_generator.parallel_interesting_case_file( 415 | config=config, 416 | scenario=scenario, 417 | processes=amount_processes, 418 | output_dir=output_dir, 419 | start_stop=False, 420 | ) 421 | if amount_cases == 0: 422 | while True: 423 | print(next(gen)) 424 | else: 425 | for i in range(amount_cases): 426 | print(next(gen)) 427 | 428 | else: 429 | print(case_generator.generate_interesting_case(scenario)) 430 | else: 431 | # TODO 432 | print("Not implemented yet") 433 | 434 | # This is not needed here but I don't know why. 435 | case_generator.terminate_processes() 436 | -------------------------------------------------------------------------------- /bisector.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import copy 4 | import functools 5 | import logging 6 | import math 7 | import os 8 | import subprocess 9 | import tarfile 10 | from pathlib import Path 11 | from typing import Optional 12 | 13 | import ccbuilder 14 | from ccbuilder import ( 15 | Builder, 16 | BuildException, 17 | CompilerProject, 18 | PatchDB, 19 | Repo, 20 | get_compiler_info, 21 | ) 22 | from ccbuilder.utils.utils import select_repo 23 | 24 | import checker 25 | import generator 26 | import parsers 27 | import reducer 28 | import utils 29 | 30 | 31 | class BisectionException(Exception): 32 | pass 33 | 34 | 35 | def find_cached_revisions( 36 | compiler_name: str, config: utils.NestedNamespace 37 | ) -> list[str]: 38 | if compiler_name == "llvm": 39 | compiler_name = "clang" 40 | compilers = [] 41 | for entry in Path(config.cachedir).iterdir(): 42 | if entry.is_symlink() or not entry.stem.startswith(compiler_name): 43 | continue 44 | if not (entry / "bin" / compiler_name).exists(): 45 | continue 46 | rev = str(entry).split("-")[-1] 47 | compilers.append(rev) 48 | return compilers 49 | 50 | 51 | class Bisector: 52 | """Class to bisect a given case.""" 53 | 54 | def __init__( 55 | self, 56 | config: utils.NestedNamespace, 57 | bldr: Builder, 58 | chkr: checker.Checker, 59 | ) -> None: 60 | self.config = config 61 | self.bldr = bldr 62 | self.chkr = chkr 63 | self.steps = 0 64 | 65 | def _is_interesting(self, case: utils.Case, rev: str) -> bool: 66 | """_is_interesting. 67 | 68 | Args: 69 | case (utils.Case): Case to check 70 | rev (str): What revision to check the case against. 71 | 72 | Returns: 73 | bool: True if the case is interesting wrt `rev`. 74 | 75 | Raises: 76 | builder.CompileError: 77 | """ 78 | case_cpy = copy.deepcopy(case) 79 | case_cpy.bad_setting.rev = rev 80 | try: 81 | if case_cpy.reduced_code: 82 | case_cpy.code = case_cpy.reduced_code 83 | return self.chkr.is_interesting(case_cpy, preprocess=False) 84 | else: 85 | return self.chkr.is_interesting(case_cpy, preprocess=True) 86 | except subprocess.CalledProcessError as e: 87 | raise utils.CompileError(e) 88 | 89 | def bisect_file(self, file: Path, force: bool = False) -> bool: 90 | """Bisect case found in `file`. 91 | 92 | Args: 93 | file (Path): Path to case file to bisect. 94 | force (bool): Whether or not to force a bisection 95 | if there's already one. 96 | 97 | Returns: 98 | bool: True if the bisection of the case in `file` succeeded. 99 | """ 100 | case = utils.Case.from_file(self.config, file) 101 | if self.bisect_case(case, force): 102 | case.to_file(file) 103 | return True 104 | return False 105 | 106 | def bisect_case(self, case: utils.Case, force: bool = False) -> bool: 107 | """Bisect a given case. 108 | 109 | Args: 110 | case (utils.Case): Case to bisect. 111 | force (bool): Whether or not to force a bisection 112 | if there's already one. 113 | 114 | Returns: 115 | bool: True if the bisection succeeded. 116 | """ 117 | if not force and case.bisection: 118 | logging.info(f"Ignoring case: Already bisected") 119 | return True 120 | try: 121 | if res := self.bisect_code( 122 | case.code, case.marker, case.bad_setting, case.good_settings 123 | ): 124 | case.bisection = res 125 | return True 126 | except BisectionException: 127 | return False 128 | return False 129 | 130 | def bisect_code( 131 | self, 132 | code: str, 133 | marker: str, 134 | bad_setting: utils.CompilerSetting, 135 | good_settings: list[utils.CompilerSetting], 136 | ) -> Optional[str]: 137 | """Bisect a given code wrt. marker, the bad setting and the good settings. 138 | 139 | Args: 140 | self: 141 | code (str): code 142 | marker (str): marker 143 | bad_setting (utils.CompilerSetting): bad_setting 144 | good_settings (list[utils.CompilerSetting]): good_settings 145 | 146 | Returns: 147 | Optional[str]: Revision the code bisects to, if it is successful. 148 | None otherwise. 149 | 150 | Raises: 151 | BisectionException: Raised if the bisection failed somehow. 152 | """ 153 | case = utils.Case( 154 | code, 155 | marker, 156 | bad_setting, 157 | good_settings, 158 | utils.Scenario([bad_setting], good_settings), 159 | None, 160 | None, 161 | None, 162 | ) 163 | 164 | bad_compiler_config = case.bad_setting.compiler_project 165 | repo = select_repo( 166 | bad_setting.compiler_project, 167 | gcc_repo=self.bldr.gcc_repo, 168 | llvm_repo=self.bldr.llvm_repo, 169 | ) 170 | 171 | # ===== Get good and bad commits 172 | bad_commit = case.bad_setting.rev 173 | # Only the ones which are on the same opt_level and have the same compiler can be bisected 174 | possible_good_commits = [ 175 | gs.rev 176 | for gs in case.good_settings 177 | if gs.opt_level == case.bad_setting.opt_level 178 | and gs.compiler_project.to_string() == bad_compiler_config.to_string() 179 | ] 180 | 181 | if len(possible_good_commits) == 0: 182 | logging.info(f"No matching optimization level found. Aborting...") 183 | return None 184 | # Sort commits based on branch point wrt to the bad commit 185 | # Why? Look at the following commit graph 186 | # Bad 187 | # | Good_1 188 | # | / 189 | # A Good_2 190 | # | / 191 | # | / 192 | # B 193 | # | 194 | # We want to bisect between Bad and Good_1 because it's less bisection work. 195 | possible_good_commits_t = [ 196 | (rev, repo.get_best_common_ancestor(bad_commit, rev)) 197 | for rev in possible_good_commits 198 | ] 199 | 200 | good_commit: str 201 | common_ancestor: str 202 | 203 | def cmp_func(x: tuple[str, str], y: tuple[str, str]) -> bool: 204 | return repo.is_ancestor(x[1], y[1]) 205 | 206 | good_commit, common_ancestor = min( 207 | possible_good_commits_t, 208 | key=functools.cmp_to_key(cmp_func), 209 | ) 210 | 211 | # ====== Figure out in which part the introducer or fixer lies 212 | # 213 | # Bad Bad 214 | # | | 215 | # | | Good 216 | # | or |b1 / 217 | # |b0 | / b2 218 | # | | / 219 | # Good CA 220 | # 221 | # if good is_ancestor of bad: 222 | # case b0 223 | # searching regression 224 | # else: 225 | # if CA is not interesting: 226 | # case b1 227 | # searching regression 228 | # else: 229 | # case b2 230 | # searching fixer 231 | 232 | try: 233 | if repo.is_ancestor(good_commit, bad_commit): 234 | res = self._bisection(good_commit, bad_commit, case, repo) 235 | print(f"{res}") 236 | else: 237 | if not self._is_interesting(case, common_ancestor): 238 | # b1 case 239 | logging.info("B1 Case") 240 | res = self._bisection( 241 | common_ancestor, bad_commit, case, repo, interesting_is_bad=True 242 | ) 243 | print(f"{res}") 244 | self._check(case, res, repo) 245 | else: 246 | # b2 case 247 | logging.info("B2 Case") 248 | # TODO: Figure out how to save and handle b2 249 | logging.critical(f"Currently ignoring b2, sorry") 250 | raise BisectionException("Currently ignoring Case type B2, sorry") 251 | 252 | # res = self._bisection( 253 | # common_ancestor, good_commit, case, repo, interesting_is_bad=False 254 | # ) 255 | # self._check(case, res, repo, interesting_is_bad=False) 256 | # print(f"First good commit {res}") 257 | except utils.CompileError: 258 | return None 259 | 260 | return res 261 | 262 | def _check( 263 | self, 264 | case: utils.Case, 265 | rev: str, 266 | repo: Repo, 267 | interesting_is_bad: bool = True, 268 | ) -> None: 269 | """Sanity check, that the bisected commit is actually 270 | correct. 271 | 272 | Args: 273 | case (utils.Case): Case to check. 274 | rev (str): Revision believed to the bisection commit. 275 | repo (repository.Repo): Repository to get the previous commit from. 276 | interesting_is_bad (bool): Whether or not to switch the expected result 277 | of the interestingness-test. 278 | Raises: 279 | AssertionError: Raised when the check fails. 280 | """ 281 | # TODO(Yann): Don't use assertion errors. 282 | 283 | prev_commit = repo.rev_to_commit(f"{rev}~") 284 | if interesting_is_bad: 285 | assert self._is_interesting(case, rev) and not self._is_interesting( 286 | case, prev_commit 287 | ) 288 | else: 289 | assert not self._is_interesting(case, rev) and self._is_interesting( 290 | case, prev_commit 291 | ) 292 | 293 | def _bisection( 294 | self, 295 | good_rev: str, 296 | bad_rev: str, 297 | case: utils.Case, 298 | repo: Repo, 299 | interesting_is_bad: bool = True, 300 | max_build_fail: int = 2, 301 | ) -> str: 302 | """Actual bisection part. 303 | First bisects within the cache, then continues with a normal bisection. 304 | 305 | Args: 306 | good_rev (str): Revision that is ancestor of bad_rev. 307 | bad_rev (str): Rev that comes later in the tree. 308 | case (utils.Case): Case to bisect. 309 | repo (repository.Repo): Repo to get the revisions from. 310 | interesting_is_bad (bool): Whether or not to switch how to interpret 311 | the outcome of the interestingness-test. 312 | max_build_fail (int): How many times the builder can fail to build w/o 313 | aborting the bisection. 314 | """ 315 | 316 | self.steps = 0 317 | # check cache 318 | possible_revs = repo.direct_first_parent_path(good_rev, bad_rev) 319 | cached_revs = find_cached_revisions( 320 | case.bad_setting.compiler_project.to_string(), self.config 321 | ) 322 | cached_revs = [r for r in cached_revs if r in possible_revs] 323 | 324 | # Create enumeration dict to sort cached_revs with 325 | sort_dict = dict((r, v) for v, r in enumerate(possible_revs)) 326 | cached_revs = sorted(cached_revs, key=lambda x: sort_dict[x]) 327 | 328 | # bisect in cache 329 | len_region = len(repo.direct_first_parent_path(good_rev, bad_rev)) 330 | logging.info(f"Bisecting in cache...") 331 | midpoint = "" 332 | old_midpoint = "" 333 | failed_to_compile = False 334 | while True: 335 | if failed_to_compile: 336 | failed_to_compile = False 337 | cached_revs.remove(midpoint) 338 | 339 | logging.info(f"{len(cached_revs): 4}, bad: {bad_rev}, good: {good_rev}") 340 | if len(cached_revs) == 0: 341 | break 342 | midpoint_idx = len(cached_revs) // 2 343 | old_midpoint = midpoint 344 | midpoint = cached_revs[midpoint_idx] 345 | if old_midpoint == midpoint: 346 | break 347 | 348 | # There should be no build failure here, as we are working on cached builds 349 | # But there could be a CompileError 350 | self.steps += 1 351 | try: 352 | test: bool = self._is_interesting(case, midpoint) 353 | except utils.CompileError: 354 | logging.warning( 355 | f"Failed to compile code with {case.bad_setting.compiler_project.to_string()}-{midpoint}" 356 | ) 357 | failed_to_compile = True 358 | continue 359 | 360 | if test: 361 | # bad is always "on top" in the history tree 362 | # git rev-list returns commits in order of the parent relation 363 | # cached_revs is also sorted in that order 364 | # Thus when finding something bad i.e interesting, we have to cut the head 365 | # and when finding something good, we have to cut the tail 366 | if interesting_is_bad: 367 | bad_rev = midpoint 368 | cached_revs = cached_revs[midpoint_idx + 1 :] 369 | else: 370 | good_rev = midpoint 371 | cached_revs = cached_revs[:midpoint_idx] 372 | else: 373 | if interesting_is_bad: 374 | good_rev = midpoint 375 | cached_revs = cached_revs[:midpoint_idx] 376 | else: 377 | bad_rev = midpoint 378 | cached_revs = cached_revs[midpoint_idx + 1 :] 379 | 380 | len_region2 = len(repo.direct_first_parent_path(good_rev, bad_rev)) 381 | logging.info(f"Cache bisection: range size {len_region} -> {len_region2}") 382 | 383 | # bisect 384 | len_region = len(repo.direct_first_parent_path(good_rev, bad_rev)) 385 | logging.info(f"Bisecting for approx. {math.ceil(math.log2(len_region))} steps") 386 | midpoint = "" 387 | old_midpoint = "" 388 | failed_to_build_or_compile = False 389 | failed_to_build_counter = 0 390 | 391 | guaranteed_termination_counter = 0 392 | while True: 393 | if not failed_to_build_or_compile: 394 | old_midpoint = midpoint 395 | midpoint = repo.next_bisection_commit(good_rev, bad_rev) 396 | failed_to_build_counter = 0 397 | if midpoint == "" or midpoint == old_midpoint: 398 | break 399 | else: 400 | if failed_to_build_counter >= max_build_fail: 401 | raise BisectionException( 402 | "Failed too many times in a row while bisecting. Aborting bisection..." 403 | ) 404 | if failed_to_build_counter % 2 == 0: 405 | # Get size of range 406 | range_size = len(repo.direct_first_parent_path(midpoint, bad_rev)) 407 | 408 | # Move 10% towards the last bad 409 | step = max(int(0.9 * range_size), 1) 410 | midpoint = repo.rev_to_commit(f"{bad_rev}~{step}") 411 | else: 412 | # Symmetric to case above but jumping 10% into the other directory i.e 20% from our position. 413 | range_size = len(repo.direct_first_parent_path(good_rev, midpoint)) 414 | step = max(int(0.2 * range_size), 1) 415 | midpoint = repo.rev_to_commit(f"{midpoint}~{step}") 416 | 417 | failed_to_build_counter += 1 418 | failed_to_build_or_compile = False 419 | 420 | if guaranteed_termination_counter >= 20: 421 | raise BisectionException( 422 | "Failed too many times in a row while bisecting. Aborting bisection..." 423 | ) 424 | guaranteed_termination_counter += 1 425 | 426 | logging.info(f"Midpoint: {midpoint}") 427 | 428 | try: 429 | test = self._is_interesting(case, midpoint) 430 | except BuildException: 431 | logging.warning( 432 | f"Could not build {case.bad_setting.compiler_project.to_string()} {midpoint}!" 433 | ) 434 | failed_to_build_or_compile = True 435 | continue 436 | except utils.CompileError: 437 | logging.warning( 438 | f"Failed to compile code with {case.bad_setting.compiler_project.to_string()}-{midpoint}" 439 | ) 440 | failed_to_build_or_compile = True 441 | continue 442 | 443 | if test: 444 | if interesting_is_bad: 445 | # "As if not_interesting_is_good does not exist"-case 446 | bad_rev = midpoint 447 | else: 448 | good_rev = midpoint 449 | else: 450 | if interesting_is_bad: 451 | # "As if not_interesting_is_good does not exist"-case 452 | good_rev = midpoint 453 | else: 454 | bad_rev = midpoint 455 | 456 | return bad_rev 457 | 458 | 459 | if __name__ == "__main__": 460 | config, args = utils.get_config_and_parser(parsers.bisector_parser()) 461 | 462 | patchdb = PatchDB(Path(config.patchdb)) 463 | _, llvm_repo = get_compiler_info("llvm", Path(config.repodir)) 464 | _, gcc_repo = get_compiler_info("gcc", Path(config.repodir)) 465 | bldr = Builder( 466 | Path(config.cachedir), 467 | gcc_repo, 468 | llvm_repo, 469 | patchdb, 470 | args.cores, 471 | logdir=Path(config.logdir), 472 | ) 473 | chkr = checker.Checker(config, bldr) 474 | gnrtr = generator.CSmithCaseGenerator(config, patchdb, args.cores) 475 | rdcr = reducer.Reducer(config, bldr) 476 | bsctr = Bisector(config, bldr, chkr) 477 | 478 | # TODO: This is duplicate code 479 | if args.work_through: 480 | if args.output_directory is None: 481 | print("Missing output/work-through directory!") 482 | exit(1) 483 | else: 484 | output_dir = Path(os.path.abspath(args.output_directory)) 485 | os.makedirs(output_dir, exist_ok=True) 486 | 487 | tars = [ 488 | output_dir / d 489 | for d in os.listdir(output_dir) 490 | if tarfile.is_tarfile(output_dir / d) 491 | ] 492 | 493 | print(f"Processing {len(tars)} tars") 494 | for tf in tars: 495 | print(f"Processing {tf}") 496 | try: 497 | bsctr.bisect_file(tf, force=args.force) 498 | except BisectionException as e: 499 | print(f"BisectionException in {tf}: '{e}'") 500 | continue 501 | except AssertionError as e: 502 | print(f"AssertionError in {tf}: '{e}'") 503 | continue 504 | except BuildException as e: 505 | print(f"BuildException in {tf}: '{e}'") 506 | continue 507 | 508 | if args.generate: 509 | if args.output_directory is None: 510 | print("Missing output directory!") 511 | exit(1) 512 | else: 513 | output_dir = os.path.abspath(args.output_directory) 514 | os.makedirs(output_dir, exist_ok=True) 515 | 516 | scenario = utils.Scenario([], []) 517 | # When file is specified, use scenario of file as base 518 | if args.file: 519 | file = Path(args.file).absolute() 520 | scenario = utils.Case.from_file(config, file).scenario 521 | 522 | tmp = utils.get_scenario(config, args) 523 | if len(tmp.target_settings) > 0: 524 | scenario.target_settings = tmp.target_settings 525 | if len(tmp.attacker_settings) > 0: 526 | scenario.attacker_settings = tmp.attacker_settings 527 | 528 | gen = gnrtr.parallel_interesting_case_file( 529 | config, scenario, bldr.jobs, output_dir, start_stop=True 530 | ) 531 | 532 | if args.amount == 0: 533 | while True: 534 | path = next(gen) 535 | worked = False 536 | if args.reducer: 537 | try: 538 | worked = rdcr.reduce_file(path) 539 | except BuildException as e: 540 | print(f"BuildException in {path}: {e}") 541 | continue 542 | 543 | if not args.reducer or worked: 544 | try: 545 | bsctr.bisect_file(path, force=args.force) 546 | except BisectionException as e: 547 | print(f"BisectionException in {path}: '{e}'") 548 | continue 549 | except AssertionError as e: 550 | print(f"AssertionError in {path}: '{e}'") 551 | continue 552 | except BuildException as e: 553 | print(f"BuildException in {path}: '{e}'") 554 | continue 555 | else: 556 | for i in range(args.amount): 557 | path = next(gen) 558 | worked = False 559 | if args.reducer: 560 | try: 561 | worked = rdcr.reduce_file(path) 562 | except BuildException as e: 563 | print(f"BuildException in {path}: {e}") 564 | continue 565 | if not args.reducer or worked: 566 | try: 567 | bsctr.bisect_file(path, force=args.force) 568 | except BisectionException as e: 569 | print(f"BisectionException in {path}: '{e}'") 570 | continue 571 | except AssertionError as e: 572 | print(f"AssertionError in {path}: '{e}'") 573 | continue 574 | except BuildException as e: 575 | print(f"BuildException in {path}: '{e}'") 576 | continue 577 | 578 | elif args.file: 579 | file = Path(args.file) 580 | bsctr.bisect_file(file, force=args.force) 581 | 582 | gnrtr.terminate_processes() 583 | -------------------------------------------------------------------------------- /checker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import copy 4 | import logging 5 | import os 6 | import re 7 | import subprocess 8 | import sys 9 | import tarfile 10 | import tempfile 11 | from pathlib import Path 12 | from types import TracebackType 13 | from typing import Optional 14 | 15 | import ccbuilder 16 | from ccbuilder import ( 17 | Builder, 18 | BuildException, 19 | CompilerProject, 20 | PatchDB, 21 | get_compiler_info, 22 | Repo, 23 | ) 24 | from dead_instrumenter.instrumenter import annotate_with_static 25 | 26 | import parsers 27 | import preprocessing 28 | import utils 29 | 30 | 31 | # ==================== Sanitize ==================== 32 | def get_cc_output(cc: str, file: Path, flags: str, cc_timeout: int) -> tuple[int, str]: 33 | cmd = [ 34 | cc, 35 | str(file), 36 | "-c", 37 | "-o/dev/null", 38 | "-Wall", 39 | "-Wextra", 40 | "-Wpedantic", 41 | "-O3", 42 | "-Wno-builtin-declaration-mismatch", 43 | ] 44 | if flags: 45 | cmd.extend(flags.split()) 46 | try: 47 | # Not using utils.run_cmd because of redirects 48 | result = subprocess.run( 49 | cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, timeout=cc_timeout 50 | ) 51 | except subprocess.TimeoutExpired: 52 | return 1, "" 53 | except subprocess.CalledProcessError: 54 | # Possibly a compilation failure 55 | return 1, "" 56 | return result.returncode, result.stdout.decode("utf-8") 57 | 58 | 59 | def check_compiler_warnings( 60 | clang: str, gcc: str, file: Path, flags: str, cc_timeout: int 61 | ) -> bool: 62 | """ 63 | Check if the compiler outputs any warnings that indicate 64 | undefined behaviour. 65 | 66 | Args: 67 | clang (str): Normal executable of clang. 68 | gcc (str): Normal executable of gcc. 69 | file (Path): File to compile. 70 | flags (str): (additional) flags to be used when compiling. 71 | cc_timeout (int): Timeout for the compilation in seconds. 72 | 73 | Returns: 74 | bool: True if no warnings were found. 75 | """ 76 | clang_rc, clang_output = get_cc_output(clang, file, flags, cc_timeout) 77 | gcc_rc, gcc_output = get_cc_output(gcc, file, flags, cc_timeout) 78 | 79 | if clang_rc != 0 or gcc_rc != 0: 80 | return False 81 | 82 | warnings = [ 83 | "conversions than data arguments", 84 | "incompatible redeclaration", 85 | "ordered comparison between pointer", 86 | "eliding middle term", 87 | "end of non-void function", 88 | "invalid in C99", 89 | "specifies type", 90 | "should return a value", 91 | "uninitialized", 92 | "incompatible pointer to", 93 | "incompatible integer to", 94 | "comparison of distinct pointer types", 95 | "type specifier missing", 96 | "uninitialized", 97 | "Wimplicit-int", 98 | "division by zero", 99 | "without a cast", 100 | "control reaches end", 101 | "return type defaults", 102 | "cast from pointer to integer", 103 | "useless type name in empty declaration", 104 | "no semicolon at end", 105 | "type defaults to", 106 | "too few arguments for format", 107 | "incompatible pointer", 108 | "ordered comparison of pointer with integer", 109 | "declaration does not declare anything", 110 | "expects type", 111 | "comparison of distinct pointer types", 112 | "pointer from integer", 113 | "incompatible implicit", 114 | "excess elements in struct initializer", 115 | "comparison between pointer and integer", 116 | "return type of ‘main’ is not ‘int’", 117 | "past the end of the array", 118 | "no return statement in function returning non-void", 119 | "undefined behavior", 120 | ] 121 | 122 | ws = [w for w in warnings if w in clang_output or w in gcc_output] 123 | if len(ws) > 0: 124 | logging.debug(f"Compiler warnings found: {ws}") 125 | return False 126 | 127 | return True 128 | 129 | 130 | class CCompEnv: 131 | def __init__(self) -> None: 132 | self.td: tempfile.TemporaryDirectory[str] 133 | 134 | def __enter__(self) -> Path: 135 | self.td = tempfile.TemporaryDirectory() 136 | tempfile.tempdir = self.td.name 137 | return Path(self.td.name) 138 | 139 | def __exit__( 140 | self, 141 | exc_type: Optional[type[BaseException]], 142 | exc_value: Optional[BaseException], 143 | exc_traceback: Optional[TracebackType], 144 | ) -> None: 145 | tempfile.tempdir = None 146 | 147 | 148 | def verify_with_ccomp( 149 | ccomp: str, file: Path, flags: str, compcert_timeout: int 150 | ) -> bool: 151 | """Check if CompCert is unhappy about something. 152 | 153 | Args: 154 | ccomp (str): Path to ccomp executable or name in $PATH. 155 | file (Path): File to compile. 156 | flags (str): Additional flags to use. 157 | compcert_timeout (int): Timeout in seconds. 158 | 159 | Returns: 160 | bool: True if CompCert does not complain. 161 | """ 162 | with CCompEnv() as tmpdir: 163 | cmd = [ 164 | ccomp, 165 | str(file), 166 | "-interp", 167 | "-fall", 168 | ] 169 | if flags: 170 | cmd.extend(flags.split()) 171 | res = True 172 | try: 173 | utils.run_cmd( 174 | cmd, 175 | additional_env={"TMPDIR": str(tmpdir)}, 176 | timeout=compcert_timeout, 177 | ) 178 | res = True 179 | except subprocess.CalledProcessError: 180 | res = False 181 | except subprocess.TimeoutExpired: 182 | res = False 183 | 184 | logging.debug(f"CComp returncode {res}") 185 | return res 186 | 187 | 188 | def use_ub_sanitizers( 189 | clang: str, file: Path, flags: str, cc_timeout: int, exe_timeout: int 190 | ) -> bool: 191 | """Run clang undefined-behaviour tests 192 | 193 | Args: 194 | clang (str): Path to clang executable or name in $PATH. 195 | file (Path): File to test. 196 | flags (str): Additional flags to use. 197 | cc_timeout (int): Timeout for compiling in seconds. 198 | exe_timeout (int): Timeout for running the resulting exe in seconds. 199 | 200 | Returns: 201 | bool: True if no undefined was found. 202 | """ 203 | cmd = [clang, str(file), "-O0", "-fsanitize=undefined,address"] 204 | if flags: 205 | cmd.extend(flags.split()) 206 | 207 | with CCompEnv(): 208 | with tempfile.NamedTemporaryFile(suffix=".exe", delete=False) as exe: 209 | exe.close() 210 | os.chmod(exe.name, 0o777) 211 | cmd.append(f"-o{exe.name}") 212 | result = subprocess.run( 213 | cmd, 214 | stdout=subprocess.DEVNULL, 215 | stderr=subprocess.DEVNULL, 216 | timeout=cc_timeout, 217 | ) 218 | if result.returncode != 0: 219 | logging.debug(f"UB Sanitizer returncode {result.returncode}") 220 | if os.path.exists(exe.name): 221 | os.remove(exe.name) 222 | return False 223 | result = subprocess.run( 224 | exe.name, 225 | stdout=subprocess.DEVNULL, 226 | stderr=subprocess.DEVNULL, 227 | timeout=exe_timeout, 228 | ) 229 | os.remove(exe.name) 230 | logging.debug(f"UB Sanitizer returncode {result.returncode}") 231 | return result.returncode == 0 232 | 233 | 234 | def sanitize( 235 | gcc: str, 236 | clang: str, 237 | ccomp: str, 238 | file: Path, 239 | flags: str, 240 | cc_timeout: int = 8, 241 | exe_timeout: int = 2, 242 | compcert_timeout: int = 16, 243 | ) -> bool: 244 | """Check if there is anything that could indicate undefined behaviour. 245 | 246 | Args: 247 | gcc (str): Path to gcc executable or name in $PATH. 248 | clang (str): Path to clang executable or name in $PATH. 249 | ccomp (str): Path to ccomp executable or name in $PATH. 250 | file (Path): File to check. 251 | flags (str): Additional flags to use. 252 | cc_timeout (int): Compiler timeout in seconds. 253 | exe_timeout (int): Undef.-Behaviour. runtime timeout in seconds. 254 | compcert_timeout (int): CompCert timeout in seconds. 255 | 256 | Returns: 257 | bool: True if nothing indicative of undefined behaviour is found. 258 | """ 259 | try: 260 | return ( 261 | check_compiler_warnings(gcc, clang, file, flags, cc_timeout) 262 | and use_ub_sanitizers(clang, file, flags, cc_timeout, exe_timeout) 263 | and verify_with_ccomp(ccomp, file, flags, compcert_timeout) 264 | ) 265 | except subprocess.TimeoutExpired: 266 | return False 267 | 268 | 269 | # ==================== Checker ==================== 270 | 271 | 272 | class Checker: 273 | def __init__(self, config: utils.NestedNamespace, bldr: Builder): 274 | self.config = config 275 | self.builder = bldr 276 | return 277 | 278 | def is_interesting_wrt_marker(self, case: utils.Case) -> bool: 279 | """Checks if the marker is eliminated by all good compilers/setting 280 | and not eliminated by the bad compiler/setting. 281 | 282 | Args: 283 | case (utils.Case): Case to check. 284 | 285 | Returns: 286 | bool: True if the maker is not eliminated by the bad setting and 287 | eliminated by all good settings. 288 | 289 | Raises: 290 | builder.CompileError: Finding alive markers may fail. 291 | """ 292 | # Checks if the bad_setting does include the marker and 293 | # all the good settings do not. 294 | 295 | marker_prefix = utils.get_marker_prefix(case.marker) 296 | found_in_bad = utils.find_alive_markers( 297 | case.code, case.bad_setting, marker_prefix, self.builder 298 | ) 299 | uninteresting = False 300 | if case.marker not in found_in_bad: 301 | return False 302 | for good_setting in case.good_settings: 303 | found_in_good = utils.find_alive_markers( 304 | case.code, good_setting, marker_prefix, self.builder 305 | ) 306 | if case.marker in found_in_good: 307 | uninteresting = True 308 | break 309 | return not uninteresting 310 | 311 | def is_interesting_wrt_ccc(self, case: utils.Case) -> bool: 312 | """Check if there is a call chain between main and the marker. 313 | 314 | Args: 315 | case (utils.Case): Case to check. 316 | 317 | Returns: 318 | bool: If there is a call chain between main and the marker 319 | """ 320 | with tempfile.NamedTemporaryFile(suffix=".c") as tf: 321 | with open(tf.name, "w") as f: 322 | f.write(case.code) 323 | 324 | # TODO: Handle include_paths better 325 | include_paths = utils.find_include_paths( 326 | self.config.llvm.sane_version, tf.name, case.bad_setting.get_flag_str() 327 | ) 328 | cmd = [self.config.ccc, tf.name, "--from=main", f"--to={case.marker}"] 329 | 330 | for path in include_paths: 331 | cmd.append(f"--extra-arg=-isystem{path}") 332 | try: 333 | result = utils.run_cmd(cmd, timeout=8) 334 | return ( 335 | f"call chain exists between main -> {case.marker}".strip() 336 | == result.strip() 337 | ) 338 | except subprocess.CalledProcessError: 339 | logging.debug("CCC failed") 340 | return False 341 | except subprocess.TimeoutExpired: 342 | logging.debug("CCC timed out") 343 | return False 344 | 345 | def is_interesting_with_static_globals(self, case: utils.Case) -> bool: 346 | """Checks if the given case is still interesting, even when making all 347 | variables and functions static. 348 | 349 | Args: 350 | case (utils.Case): The case to check 351 | 352 | Returns: 353 | bool: If the case is interesting when using static globals 354 | 355 | Raises: 356 | builder.CompileError: Getting the assembly may fail. 357 | """ 358 | 359 | with tempfile.NamedTemporaryFile(suffix=".c") as tf: 360 | with open(tf.name, "w") as new_cfile: 361 | print(case.code, file=new_cfile) 362 | 363 | # TODO: Handle include_paths better 364 | annotate_with_static(Path(tf.name), case.bad_setting.get_flag_cmd()) 365 | 366 | with open(tf.name, "r") as annotated_file: 367 | static_code = annotated_file.read() 368 | 369 | asm_bad = utils.get_asm_str(static_code, case.bad_setting, self.builder) 370 | uninteresting = False 371 | if case.marker not in asm_bad: 372 | uninteresting = True 373 | for good_setting in case.good_settings: 374 | asm_good = utils.get_asm_str(static_code, good_setting, self.builder) 375 | if case.marker in asm_good: 376 | uninteresting = True 377 | break 378 | return not uninteresting 379 | 380 | def _empty_marker_code_str(self, case: utils.Case) -> str: 381 | marker_prefix = utils.get_marker_prefix(case.marker) 382 | p = re.compile(rf"void {marker_prefix}(.*)\((void|)\);(.*)") 383 | empty_body_code = "" 384 | for line in case.code.split("\n"): 385 | m = p.match(line) 386 | if m: 387 | empty_body_code += ( 388 | "\n" 389 | + rf"void {marker_prefix}{m.group(1)}({m.group(2)}){{}}" 390 | + "\n" 391 | + rf"{m.group(3)}" 392 | ) 393 | else: 394 | empty_body_code += f"\n{line}" 395 | 396 | return empty_body_code 397 | 398 | def is_interesting_with_empty_marker_bodies(self, case: utils.Case) -> bool: 399 | """Check if `case.code` does not exhibit undefined behaviour, 400 | compile errors or makes CompCert unhappy. 401 | To compile, all markers need to get an empty body, thus the name. 402 | 403 | Args: 404 | case (utils.Case): Case to check 405 | 406 | Returns: 407 | bool: True if the code passes the 'sanity-check' 408 | """ 409 | 410 | empty_body_code = self._empty_marker_code_str(case) 411 | 412 | with tempfile.NamedTemporaryFile(suffix=".c") as tf: 413 | with open(tf.name, "w") as f: 414 | f.write(empty_body_code) 415 | 416 | return sanitize( 417 | self.config.gcc.sane_version, 418 | self.config.llvm.sane_version, 419 | self.config.ccomp, 420 | Path(tf.name), 421 | case.bad_setting.get_flag_str(), 422 | ) 423 | 424 | def is_interesting(self, case: utils.Case, preprocess: bool = True) -> bool: 425 | """Check if a code passes all the 'interestingness'-checks. 426 | Preprocesses code by default to prevent surprises when preprocessing 427 | later. 428 | 429 | Args: 430 | self: 431 | case (utils.Case): Case to check. 432 | preprocess (bool): Whether or not to preprocess the code 433 | 434 | Returns: 435 | bool: True if the case passes all 'interestingness'-checks 436 | 437 | Raises: 438 | builder.CompileError 439 | """ 440 | # TODO: Optimization potential. Less calls to clang etc. 441 | # when tests are combined. 442 | 443 | if preprocess: 444 | code_pp = preprocessing.preprocess_csmith_code( 445 | case.code, 446 | utils.get_marker_prefix(case.marker), 447 | case.bad_setting, 448 | self.builder, 449 | ) 450 | case_cpy = copy.deepcopy(case) 451 | if code_pp: 452 | case_cpy.code = code_pp 453 | case = case_cpy 454 | # Taking advantage of shortciruit logic 455 | return ( 456 | self.is_interesting_wrt_marker(case) 457 | and self.is_interesting_wrt_ccc(case) 458 | and self.is_interesting_with_static_globals(case) 459 | and self.is_interesting_with_empty_marker_bodies(case) 460 | ) 461 | 462 | 463 | def copy_flag( 464 | frm: utils.CompilerSetting, to: list[utils.CompilerSetting] 465 | ) -> list[utils.CompilerSetting]: 466 | res: list[utils.CompilerSetting] = [] 467 | for setting in to: 468 | cpy = copy.deepcopy(setting) 469 | cpy.additional_flags = frm.additional_flags 470 | res.append(cpy) 471 | return res 472 | 473 | 474 | def override_bad( 475 | case: utils.Case, override_settings: list[utils.CompilerSetting] 476 | ) -> list[utils.Case]: 477 | res = [] 478 | bsettings = copy_flag(case.bad_setting, override_settings) 479 | for s in bsettings: 480 | cpy = copy.deepcopy(case) 481 | cpy.bad_setting = s 482 | res.append(cpy) 483 | return res 484 | 485 | 486 | def override_good( 487 | case: utils.Case, override_settings: list[utils.CompilerSetting] 488 | ) -> utils.Case: 489 | gsettings = copy_flag(case.good_settings[0], override_settings) 490 | cpy = copy.deepcopy(case) 491 | cpy.good_settings = gsettings 492 | return cpy 493 | 494 | 495 | if __name__ == "__main__": 496 | config, args = utils.get_config_and_parser(parsers.checker_parser()) 497 | 498 | patchdb = PatchDB(Path(config.patchdb)) 499 | _, llvm_repo = ccbuilder.get_compiler_info("llvm", Path(config.repodir)) 500 | _, gcc_repo = ccbuilder.get_compiler_info("gcc", Path(config.repodir)) 501 | bldr = Builder( 502 | Path(config.cachedir), 503 | gcc_repo, 504 | llvm_repo, 505 | patchdb, 506 | args.cores, 507 | logdir=Path(config.logdir), 508 | ) 509 | chkr = Checker(config, bldr) 510 | 511 | file = Path(args.file) 512 | 513 | bad_settings = [] 514 | good_settings = [] 515 | 516 | if args.check_pp: 517 | file = Path(args.file).absolute() 518 | case = utils.Case.from_file(config, file) 519 | # preprocess file 520 | pp_code = preprocessing.preprocess_csmith_code( 521 | case.code, 522 | utils.get_marker_prefix(case.marker), 523 | case.bad_setting, 524 | bldr, 525 | ) 526 | 527 | if pp_code: 528 | case.code = pp_code 529 | else: 530 | print("Could not preprocess code. Exiting") 531 | exit(1) 532 | # Taking advantage of shortciruit logic 533 | a = chkr.is_interesting_wrt_marker(case) 534 | b = chkr.is_interesting_wrt_ccc(case) 535 | c = chkr.is_interesting_with_static_globals(case) 536 | d = chkr.is_interesting_with_empty_marker_bodies(case) 537 | print(f"Marker:\t{a}") 538 | print(f"CCC:\t{b}") 539 | print(f"Static:\t{c}") 540 | print(f"Empty:\t{d}") 541 | if not all((a, b, c, d)): 542 | exit(1) 543 | exit(0) 544 | 545 | if args.scenario: 546 | scenario = utils.Scenario.from_file(config, Path(args.scenario)) 547 | bad_settings = scenario.target_settings 548 | good_settings = scenario.attacker_settings 549 | elif args.interesting_settings: 550 | bad_settings, good_settings = utils.get_interesting_settings( 551 | config, args.interesting_settings 552 | ) 553 | 554 | if args.bad_settings: 555 | bad_settings = utils.get_compiler_settings( 556 | config, args.bad_settings, args.bad_settings_default_opt_levels 557 | ) 558 | 559 | if args.good_settings: 560 | good_settings = utils.get_compiler_settings( 561 | config, args.good_settings, args.good_settings_default_opt_levels 562 | ) 563 | 564 | cases_to_test: list[utils.Case] = [] 565 | check_marker: bool = False 566 | if args.bad_settings and args.good_settings or args.interesting_settings: 567 | # Override all options defined in the case 568 | scenario = utils.Scenario(bad_settings, good_settings) 569 | if tarfile.is_tarfile(file): 570 | case = utils.Case.from_file(config, file) 571 | code = case.code 572 | args.marker = case.marker 573 | if not bad_settings: 574 | bad_settings = copy_flag(case.scenario.target_settings[0], bad_settings) 575 | if not good_settings: 576 | good_settings = copy_flag( 577 | case.scenario.attacker_settings[0], good_settings 578 | ) 579 | else: 580 | with open(file, "r") as f: 581 | code = f.read() 582 | check_marker = True 583 | 584 | cases_to_test = [ 585 | utils.Case(code, args.marker, bs, good_settings, scenario, None, None, None) 586 | for bs in bad_settings 587 | ] 588 | 589 | elif args.bad_settings and not args.good_settings: 590 | # TODO: Get flags from somewhere. For now, 591 | # take the ones from the first config. 592 | case = utils.Case.from_file(config, file) 593 | 594 | cases_to_test = override_bad(case, bad_settings) 595 | 596 | elif not args.bad_settings and args.good_settings: 597 | case = utils.Case.from_file(config, file) 598 | 599 | cases_to_test = [override_good(case, good_settings)] 600 | 601 | else: 602 | cases_to_test = [utils.Case.from_file(config, file)] 603 | 604 | if args.marker is not None: 605 | for cs in cases_to_test: 606 | cs.marker = args.marker 607 | elif check_marker: 608 | raise Exception("You need to specify a marker") 609 | 610 | if not cases_to_test: 611 | print("No cases arrived. Have you forgotten to specify an optimization level?") 612 | exit(2) 613 | 614 | if args.check_reduced: 615 | for cs in cases_to_test: 616 | if not cs.reduced_code: 617 | raise Exception("Case does not include reduced code!") 618 | cs.code = cs.reduced_code 619 | 620 | if all( 621 | chkr.is_interesting( 622 | c, preprocess=(not (args.dont_preprocess or args.check_reduced)) 623 | ) 624 | for c in cases_to_test 625 | ): 626 | sys.exit(0) 627 | else: 628 | sys.exit(1) 629 | -------------------------------------------------------------------------------- /database.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import os 3 | import sqlite3 4 | import sys 5 | import zlib 6 | from dataclasses import dataclass 7 | from functools import cache, reduce 8 | from itertools import chain 9 | from pathlib import Path 10 | from typing import ClassVar, Optional 11 | 12 | from ccbuilder import get_compiler_project 13 | 14 | import utils 15 | from utils import Case, CompilerSetting, NestedNamespace, Scenario 16 | 17 | 18 | class DatabaseError(Exception): 19 | pass 20 | 21 | 22 | @dataclass 23 | class ColumnInfo: 24 | name: str 25 | typename: str 26 | constrains: str = "" 27 | 28 | def __str__(self) -> str: 29 | return f"{self.name} {self.typename} {self.constrains}" 30 | 31 | 32 | RowID = int 33 | 34 | 35 | class CaseDatabase: 36 | config: NestedNamespace 37 | con: sqlite3.Connection 38 | tables: ClassVar[dict[str, list[ColumnInfo]]] = { 39 | "cases": [ 40 | ColumnInfo("case_id", "INTEGER", "PRIMARY KEY AUTOINCREMENT"), 41 | ColumnInfo("code_sha1", "", "REFERENCES code(code_sha1) NOT NULL"), 42 | ColumnInfo("marker", "TEXT", "NOT NULL"), 43 | ColumnInfo("bad_setting_id", "INTEGER", "NOT NULL"), 44 | ColumnInfo("scenario_id", "INTEGER", "NOT NULL"), 45 | ColumnInfo("bisection", "CHAR(40)"), 46 | ColumnInfo("reduced_code_sha1", "CHAR(40)"), 47 | ColumnInfo("timestamp", "FLOAT", "NOT NULL"), 48 | ColumnInfo( 49 | "UNIQUE(code_sha1, marker, bad_setting_id, scenario_id, bisection, reduced_code_sha1) " 50 | "ON CONFLICT REPLACE", 51 | "", 52 | ), 53 | ], 54 | "code": [ 55 | ColumnInfo("code_sha1", "CHAR(40)", "PRIMARY KEY"), 56 | ColumnInfo("compressed_code", "BLOB"), 57 | ], 58 | "reported_cases": [ 59 | ColumnInfo("case_id", "", "REFERENCES cases(case_id) PRIMARY KEY"), 60 | ColumnInfo("massaged_code_sha1", "", "REFERENCES code(code_sha1)"), 61 | ColumnInfo("bug_report_link", "TEXT"), 62 | ColumnInfo("fixed_by", "CHAR(40)"), 63 | ], 64 | "compiler_setting": [ 65 | ColumnInfo("compiler_setting_id", "INTEGER", "PRIMARY KEY AUTOINCREMENT"), 66 | ColumnInfo("compiler", "TEXT", "NOT NULL"), 67 | ColumnInfo("rev", "CHAR(40)", "NOT NULL"), 68 | ColumnInfo("opt_level", "TEXT", "NOT NULL"), 69 | ColumnInfo("additional_flags", "TEXT"), 70 | ], 71 | "good_settings": [ 72 | ColumnInfo("case_id", "", "REFERENCES cases(case_id) NOT NULL"), 73 | ColumnInfo( 74 | "compiler_setting_id", 75 | "", 76 | "REFERENCES compiler_setting(compiler_setting_id) NOT NULL", 77 | ), 78 | ], 79 | "scenario_ids": [ 80 | ColumnInfo("scenario_id", "INTEGER", "PRIMARY KEY AUTOINCREMENT"), 81 | ], 82 | "scenario": [ 83 | ColumnInfo( 84 | "scenario_id", "", "REFERENCES scenario_ids(scenario_id) PRIMARY KEY" 85 | ), 86 | ColumnInfo("generator_version", "INTEGER", "NOT NULL"), 87 | ColumnInfo("bisector_version", "INTEGER", "NOT NULL"), 88 | ColumnInfo("reducer_version", "INTEGER", "NOT NULL"), 89 | ColumnInfo("instrumenter_version", "INTEGER", "NOT NULL"), 90 | ColumnInfo("csmith_min", "INTEGER", "NOT NULL"), 91 | ColumnInfo("csmith_max", "INTEGER", "NOT NULL"), 92 | ColumnInfo("reduce_program", "TEXT", "NOT NULL"), 93 | ], 94 | "scenario_attacker": [ 95 | ColumnInfo( 96 | "scenario_id", "", "REFERENCES scenario_ids(scenario_id) NOT NULL" 97 | ), 98 | ColumnInfo( 99 | "compiler_setting_id", 100 | "", 101 | "REFERENCES compiler_setting(compiler_setting_id) NOT NULL", 102 | ), 103 | ], 104 | "scenario_target": [ 105 | ColumnInfo( 106 | "scenario_id", "", "REFERENCES scenario_ids(scenario_id) NOT NULL" 107 | ), 108 | ColumnInfo( 109 | "compiler_setting_id", 110 | "", 111 | "REFERENCES compiler_setting(compiler_setting_id) NOT NULL", 112 | ), 113 | ], 114 | "timing": [ 115 | ColumnInfo("case_id", "", "REFERENCES cases(case_id) PRIMARY KEY"), 116 | ColumnInfo("generator_time", "FLOAT"), 117 | ColumnInfo("generator_try_count", "INTEGER"), 118 | ColumnInfo("bisector_time", "FLOAT"), 119 | ColumnInfo("bisector_steps", "INTEGER"), 120 | ColumnInfo("reducer_time", "FLOAT"), 121 | ], 122 | } 123 | 124 | def __init__(self, config: NestedNamespace, db_path: Path) -> None: 125 | self.config = config 126 | self.con = sqlite3.connect(db_path, timeout=60) 127 | self.create_tables() 128 | 129 | def create_tables(self) -> None: 130 | def make_query(table: str, columns: list[ColumnInfo]) -> str: 131 | column_decl = ",".join(str(column) for column in columns) 132 | return f"CREATE TABLE IF NOT EXISTS {table} (" + column_decl + ")" 133 | 134 | for table, columns in CaseDatabase.tables.items(): 135 | self.con.execute(make_query(table, columns)) 136 | 137 | def record_code(self, code: str) -> str: 138 | """Inserts `code` into the database's `code`-table and returns its 139 | sha1-hash which serves as a key. 140 | 141 | Args: 142 | code (str): code to be inserted 143 | 144 | Returns: 145 | str: SHA1 of code which serves as the key. 146 | """ 147 | # Take the hash before the compression to handle changes 148 | # in the compression library. 149 | code_sha1 = hashlib.sha1(code.encode("utf-8")).hexdigest() 150 | compressed_code = zlib.compress(code.encode("utf-8"), level=9) 151 | 152 | self.con.execute( 153 | "INSERT OR IGNORE INTO code VALUES (?, ?)", (code_sha1, compressed_code) 154 | ) 155 | return code_sha1 156 | 157 | def get_code_from_id(self, code_id: str) -> Optional[str]: 158 | """Get code from the database if it exists. 159 | 160 | Args: 161 | code_id (str): SHA1 of code 162 | 163 | Returns: 164 | Optional[str]: Saved code if it exists, else None 165 | """ 166 | 167 | res = self.con.execute( 168 | "SELECT compressed_code FROM code WHERE code_sha1 == ?", (code_id,) 169 | ).fetchone() 170 | if res: 171 | code = zlib.decompress(res[0]).decode("utf-8") 172 | return code 173 | else: 174 | return None 175 | 176 | def record_reported_case( 177 | self, 178 | case_id: RowID, 179 | massaged_code: Optional[str], 180 | bug_report_link: Optional[str], 181 | fixed_by: Optional[str], 182 | ) -> None: 183 | """Save additional information for an already saved case. 184 | 185 | Args: 186 | case_id (RowID): case_id 187 | massaged_code (Optional[str]): adapted reduced code for better reduction. 188 | bug_report_link (Optional[str]): Link to the bug report. 189 | fixed_by (Optional[str]): If the case is already fixed. 190 | 191 | Returns: 192 | None: 193 | """ 194 | code_sha1 = None 195 | if massaged_code: 196 | code_sha1 = self.record_code(massaged_code) 197 | 198 | with self.con: 199 | self.con.execute( 200 | "INSERT OR REPLACE INTO reported_cases VALUES (?,?,?,?)", 201 | ( 202 | case_id, 203 | code_sha1, 204 | bug_report_link, 205 | fixed_by, 206 | ), 207 | ) 208 | 209 | def record_case(self, case: Case) -> RowID: 210 | """Save a case to the DB and get its ID. 211 | 212 | Args: 213 | case (Case): Case to save. 214 | 215 | Returns: 216 | RowID: ID of case. 217 | """ 218 | 219 | bad_setting_id = self.record_compiler_setting(case.bad_setting) 220 | with self.con: 221 | good_setting_ids = [ 222 | self.record_compiler_setting(good_setting) 223 | for good_setting in case.good_settings 224 | ] 225 | scenario_id = self.record_scenario(case.scenario) 226 | 227 | with self.con: 228 | cur = self.con.cursor() 229 | bisection = case.bisection 230 | reduced_code_sha1 = ( 231 | self.record_code(case.reduced_code) if case.reduced_code else None 232 | ) 233 | 234 | code_sha1 = self.record_code(case.code) 235 | 236 | cur.execute( 237 | "INSERT INTO cases VALUES (NULL,?,?,?,?,?,?,?)", 238 | ( 239 | code_sha1, 240 | case.marker, 241 | bad_setting_id, 242 | scenario_id, 243 | bisection, 244 | reduced_code_sha1, 245 | case.timestamp, 246 | ), 247 | ) 248 | if not cur.lastrowid: 249 | raise DatabaseError("No last row id was returned") 250 | case_id = RowID(cur.lastrowid) 251 | cur.executemany( 252 | "INSERT INTO good_settings VALUES (?,?)", 253 | ((case_id, gs_id) for gs_id in good_setting_ids), 254 | ) 255 | 256 | return case_id 257 | 258 | def record_compiler_setting(self, compiler_setting: CompilerSetting) -> RowID: 259 | """Save a compiler setting to the DB and get its ID. 260 | 261 | Args: 262 | self: 263 | compiler_setting (CompilerSetting): compiler setting to save. 264 | 265 | Returns: 266 | RowID: ID of saved compiler setting. 267 | """ 268 | if s_id := self.get_compiler_setting_id(compiler_setting): 269 | return s_id 270 | with self.con: 271 | cur = self.con.cursor() 272 | cur.execute( 273 | "INSERT INTO compiler_setting VALUES (NULL,?,?,?,?)", 274 | ( 275 | compiler_setting.compiler_project.to_string(), 276 | compiler_setting.rev, 277 | compiler_setting.opt_level, 278 | compiler_setting.get_flag_str(), 279 | ), 280 | ) 281 | if not cur.lastrowid: 282 | raise DatabaseError("No last row id was returned") 283 | ns_id = RowID(cur.lastrowid) 284 | 285 | return ns_id 286 | 287 | def record_scenario(self, scenario: Scenario) -> RowID: 288 | """Save a scenario to the DB and get its ID. 289 | 290 | Args: 291 | scenario (Scenario): Scenario to save. 292 | 293 | Returns: 294 | RowID: ID of `scenario` 295 | """ 296 | if s_id := self.get_scenario_id(scenario): 297 | return s_id 298 | target_ids = [ 299 | self.record_compiler_setting(target_setting) 300 | for target_setting in scenario.target_settings 301 | ] 302 | attacker_ids = [ 303 | self.record_compiler_setting(attacker_setting) 304 | for attacker_setting in scenario.attacker_settings 305 | ] 306 | with self.con: 307 | ns_id = self.get_new_scenario_id(no_commit=True) 308 | 309 | def insert_settings(table: str, settings: list[RowID]) -> None: 310 | self.con.executemany( 311 | f"INSERT INTO {table} VALUES (?,?)", 312 | ((ns_id, s) for s in settings), 313 | ) 314 | 315 | insert_settings("scenario_target", target_ids) 316 | insert_settings("scenario_attacker", attacker_ids) 317 | 318 | self.con.execute( 319 | "INSERT INTO scenario VALUES (?,?,?,?,?,?,?,?)", 320 | ( 321 | ns_id, 322 | scenario.generator_version, 323 | scenario.bisector_version, 324 | scenario.reducer_version, 325 | scenario.instrumenter_version, 326 | self.config.csmith.min_size, 327 | self.config.csmith.max_size, 328 | os.path.basename(self.config.creduce), 329 | ), 330 | ) 331 | return ns_id 332 | 333 | def get_new_scenario_id(self, no_commit: bool) -> RowID: 334 | """Get a new scenario ID. 335 | 336 | Args: 337 | no_commit (bool): Don't commit the change. 338 | 339 | Returns: 340 | RowID: New scenario id 341 | """ 342 | cur = self.con.cursor() 343 | cur.execute("INSERT INTO scenario_ids VALUES (NULL)") 344 | if not no_commit: 345 | self.con.commit() 346 | if not cur.lastrowid: 347 | raise DatabaseError("No row id was returned") 348 | return RowID(cur.lastrowid) 349 | 350 | def get_scenario_id(self, scenario: Scenario) -> Optional[RowID]: 351 | """See if there is already an ID for `scenario` in the database 352 | and return it if it does. 353 | 354 | Args: 355 | scenario (Scenario): scenario to get an ID for 356 | 357 | Returns: 358 | Optional[RowID]: RowID if the scenario exists 359 | """ 360 | 361 | def get_scenario_ids(id_: RowID, table: str, id_str: str) -> set[int]: 362 | cursor = self.con.cursor() 363 | return set( 364 | s_id[0] 365 | for s_id in cursor.execute( 366 | f"SELECT scenario_id FROM {table} WHERE {id_str}== ? ", 367 | (id_,), 368 | ).fetchall() 369 | ) 370 | 371 | # Get all scenario's which have the same versions 372 | candidate_ids: set[RowID] = set( 373 | [ 374 | r[0] 375 | for r in self.con.execute( 376 | "SELECT scenario_id FROM scenario" 377 | " WHERE generator_version == ?" 378 | " AND bisector_version == ?" 379 | " AND reducer_version == ?" 380 | " AND instrumenter_version == ?" 381 | " AND csmith_min == ?" 382 | " AND csmith_max == ?" 383 | " AND reduce_program == ?", 384 | ( 385 | scenario.generator_version, 386 | scenario.bisector_version, 387 | scenario.reducer_version, 388 | scenario.instrumenter_version, 389 | self.config.csmith.min_size, 390 | self.config.csmith.max_size, 391 | self.config.creduce, 392 | ), 393 | ).fetchall() 394 | ] 395 | ) 396 | 397 | # Get compiler setting ids of scenario 398 | target_ids: list[RowID] = [] 399 | for setting in scenario.target_settings: 400 | if not (s_id := self.get_compiler_setting_id(setting)): 401 | return None 402 | target_ids.append(s_id) 403 | 404 | attacker_ids: list[RowID] = [] 405 | for setting in scenario.attacker_settings: 406 | if not (s_id := self.get_compiler_setting_id(setting)): 407 | return None 408 | attacker_ids.append(s_id) 409 | 410 | # Compare compiler setting IDs 411 | candidate_ids = candidate_ids & reduce( 412 | lambda x, y: x & y, 413 | ( 414 | get_scenario_ids(target_id, "scenario_target", "compiler_setting_id") 415 | for target_id in target_ids 416 | ), 417 | ) 418 | if not candidate_ids: 419 | return None 420 | 421 | candidate_ids = reduce( 422 | lambda x, y: x & y, 423 | chain( 424 | ( 425 | get_scenario_ids( 426 | attacker_id, "scenario_attacker", "compiler_setting_id" 427 | ) 428 | for attacker_id in attacker_ids 429 | ), 430 | (candidate_ids,), 431 | ), 432 | ) 433 | 434 | if not candidate_ids: 435 | return None 436 | return RowID(next(candidate_ids.__iter__())) 437 | 438 | def get_compiler_setting_id( 439 | self, compiler_setting: CompilerSetting 440 | ) -> Optional[RowID]: 441 | """Get the ID of a given CompilerSetting, if it is in the DB. 442 | 443 | Args: 444 | compiler_setting (CompilerSetting): CompilerSetting to get the id of. 445 | 446 | Returns: 447 | Optional[RowID]: The ID, if found. 448 | """ 449 | result = self.con.execute( 450 | "SELECT compiler_setting_id " 451 | "FROM compiler_setting " 452 | "WHERE compiler == ? AND rev == ? AND opt_level == ? AND additional_flags == ?", 453 | ( 454 | compiler_setting.compiler_project.to_string(), 455 | compiler_setting.rev, 456 | compiler_setting.opt_level, 457 | "|".join(compiler_setting.get_flag_cmd()), 458 | ), 459 | ).fetchone() 460 | 461 | if not result: 462 | return None 463 | s_id = RowID(result[0]) 464 | 465 | return s_id 466 | 467 | @cache 468 | def get_compiler_setting_from_id( 469 | self, compiler_setting_id: int 470 | ) -> Optional[CompilerSetting]: 471 | """Get a compiler setting from a compiler_setting_id, if the ID exists. 472 | 473 | Args: 474 | self: 475 | compiler_setting_id (int): Compiler setting ID to get the compiler setting of 476 | 477 | Returns: 478 | Optional[CompilerSetting]: Compiler setting with ID `compiler_setting_id` 479 | """ 480 | 481 | res = self.con.execute( 482 | "SELECT compiler, rev, opt_level, additional_flags" 483 | " FROM compiler_setting" 484 | " WHERE compiler_setting_id == ?", 485 | (compiler_setting_id,), 486 | ).fetchone() 487 | 488 | if not res: 489 | return None 490 | 491 | compiler, rev, opt_level, flags = res 492 | return CompilerSetting( 493 | get_compiler_project(compiler), 494 | rev, 495 | opt_level, 496 | flags.split("|"), 497 | ) 498 | 499 | @cache 500 | def get_scenario_from_id(self, scenario_id: RowID) -> Optional[Scenario]: 501 | """Get a scenario from a specified ID. 502 | 503 | Args: 504 | scenario_id (RowID): ID of scenario to get 505 | 506 | Returns: 507 | Optional[Scenario]: Scenario corresponding to RowID 508 | """ 509 | 510 | def get_settings( 511 | self: CaseDatabase, table: str, s_id: int 512 | ) -> list[CompilerSetting]: 513 | 514 | ids = self.con.execute( 515 | f"SELECT compiler_setting_id FROM {table} WHERE scenario_id == ?", 516 | (s_id,), 517 | ).fetchall() 518 | pre = [self.get_compiler_setting_from_id(row[0]) for row in ids] 519 | 520 | # For the type checker. It can't possibly know about the constraints 521 | # in the DB. 522 | settings = [c for c in pre if c] 523 | 524 | return settings 525 | 526 | target_settings = get_settings(self, "scenario_target", scenario_id) 527 | attacker_settings = get_settings(self, "scenario_attacker", scenario_id) 528 | scenario = Scenario(target_settings, attacker_settings) 529 | 530 | res = self.con.execute( 531 | "SELECT generator_version, bisector_version, reducer_version, instrumenter_version FROM scenario WHERE scenario_id == ?", 532 | (scenario_id,), 533 | ).fetchone() 534 | 535 | if not res: 536 | return None 537 | 538 | generator_version, bisector_version, reducer_version, instrumenter_version = res 539 | 540 | scenario.generator_version = generator_version 541 | scenario.bisector_version = bisector_version 542 | scenario.reducer_version = reducer_version 543 | scenario.instrumenter_version = instrumenter_version 544 | 545 | return scenario 546 | 547 | def get_case_from_id(self, case_id: RowID) -> Optional[Case]: 548 | """Get a case from the database based on its ID. 549 | Note: the case will *NOT* replace reduced code with 550 | massaged code. 551 | 552 | Args: 553 | case_id (int): ID of wanted case 554 | 555 | Returns: 556 | Optional[Case]: Returns case if it exists 557 | """ 558 | if not ( 559 | res := self.con.execute( 560 | "SELECT * FROM cases WHERE case_id == ?", (case_id,) 561 | ).fetchone() 562 | ): 563 | return None 564 | 565 | ( 566 | _, 567 | code_sha1, 568 | marker, 569 | bad_setting_id, 570 | scenario_id, 571 | bisection, 572 | reduced_code_sha1, 573 | timestamp, 574 | ) = res 575 | 576 | good_settings_ids = self.con.execute( 577 | "SELECT compiler_setting_id FROM good_settings WHERE case_id == ?", 578 | (case_id,), 579 | ).fetchall() 580 | 581 | code = self.get_code_from_id(code_sha1) 582 | if not code: 583 | raise DatabaseError("Missing original code") 584 | 585 | reduced_code = self.get_code_from_id(reduced_code_sha1) 586 | 587 | scenario = self.get_scenario_from_id(scenario_id) 588 | 589 | # Get Settings 590 | bad_setting = self.get_compiler_setting_from_id(bad_setting_id) 591 | pre_good_settings = [ 592 | self.get_compiler_setting_from_id(row[0]) for row in good_settings_ids 593 | ] 594 | 595 | # There should never be a problem here (TM) because of the the DB 596 | # FOREIGN KEY constraints. 597 | good_settings = [gs for gs in pre_good_settings if gs] 598 | if not bad_setting: 599 | raise DatabaseError("Bad setting id was not found") 600 | if not scenario: 601 | raise DatabaseError("Scenario id was not found") 602 | 603 | case = Case( 604 | code, 605 | marker, 606 | bad_setting, 607 | good_settings, 608 | scenario, 609 | reduced_code=reduced_code, 610 | bisection=bisection, 611 | path=None, 612 | timestamp=timestamp, 613 | ) 614 | 615 | return case 616 | 617 | def get_case_from_id_or_die(self, case_id: RowID) -> Case: 618 | pre_check_case = self.get_case_from_id(case_id) 619 | if not pre_check_case: 620 | print("No case with this ID.", file=sys.stderr) 621 | exit(1) 622 | else: 623 | case = pre_check_case 624 | return case 625 | 626 | def update_case(self, case_id: RowID, case: Case) -> None: 627 | """Update case with ID `case_id` with the values of `case` 628 | 629 | Args: 630 | case_id (str): ID of case to update 631 | case (Case): Case to get the info from 632 | 633 | Returns: 634 | None: 635 | """ 636 | code_sha1 = self.record_code(case.code) 637 | 638 | if case.reduced_code: 639 | reduced_code_sha1: Optional[str] = self.record_code(case.reduced_code) 640 | else: 641 | reduced_code_sha1 = None 642 | 643 | bad_setting_id = self.record_compiler_setting(case.bad_setting) 644 | scenario_id = self.record_scenario(case.scenario) 645 | 646 | with self.con: 647 | # REPLACE is just an alias for INSERT OR REPLACE 648 | self.con.execute( 649 | "INSERT OR REPLACE INTO cases VALUES (?,?,?,?,?,?,?,?)", 650 | ( 651 | case_id, 652 | code_sha1, 653 | case.marker, 654 | bad_setting_id, 655 | scenario_id, 656 | case.bisection, 657 | reduced_code_sha1, 658 | case.timestamp, 659 | ), 660 | ) 661 | 662 | def record_timing( 663 | self, 664 | case_id: RowID, 665 | generator_time: Optional[float] = None, 666 | generator_try_count: Optional[int] = None, 667 | bisector_time: Optional[float] = None, 668 | bisector_steps: Optional[int] = None, 669 | reducer_time: Optional[float] = None, 670 | ) -> None: 671 | """Record timing metric for `case_id` 672 | 673 | Args: 674 | case_id (RowID): 675 | generator_time (Optional[float]): Time the generator took 676 | generator_try_count (Optional[int]): How often the generator tried 677 | bisector_time (Optional[float]): How long the bisector took 678 | bisector_steps (Optional[int]): How many steps the bisector made 679 | reducer_time (Optional[float]): How long the reducer took 680 | 681 | Returns: 682 | None: 683 | """ 684 | 685 | with self.con: 686 | self.con.execute( 687 | "INSERT OR REPLACE INTO timing VALUES(?,?,?,?,?,?)", 688 | ( 689 | case_id, 690 | generator_time, 691 | generator_try_count, 692 | bisector_time, 693 | bisector_steps, 694 | reducer_time, 695 | ), 696 | ) 697 | 698 | def get_timing_from_id( 699 | self, case_id: RowID 700 | ) -> tuple[ 701 | Optional[float], Optional[int], Optional[float], Optional[int], Optional[float] 702 | ]: 703 | """Get the timing entries for a case. 704 | 705 | Args: 706 | self: 707 | case_id (RowID): case_id 708 | 709 | Returns: 710 | tuple[ 711 | Optional[float], Optional[int], Optional[float], Optional[int], Optional[float] 712 | ]: Generator time, generator try count, bisector time, bisector steps, reducer time 713 | """ 714 | 715 | res = self.con.execute( 716 | "SELECT * FROM timing WHERE case_id == ?", (case_id,) 717 | ).fetchone() 718 | if not res: 719 | return (None, None, None, None, None) 720 | _, g_time, gtc, b_time, b_steps, r_time = res 721 | return g_time, gtc, b_time, b_steps, r_time 722 | 723 | def get_report_info_from_id( 724 | self, case_id: RowID 725 | ) -> tuple[Optional[str], Optional[str], Optional[str]]: 726 | """Get report infos for case_id. 727 | The order is massaged_code, link, fixed_by commit. 728 | 729 | Args: 730 | self: 731 | case_id (RowID): case_id 732 | 733 | Returns: 734 | tuple[Optional[str], Optional[str], Optional[str]]: 735 | """ 736 | 737 | res = self.con.execute( 738 | "SELECT * FROM reported_cases WHERE case_id == ?", (case_id,) 739 | ).fetchone() 740 | if not res: 741 | return (None, None, None) 742 | 743 | _, massaged_code_sha1, link, fixed_by = res 744 | 745 | massaged_code = self.get_code_from_id(massaged_code_sha1) 746 | return massaged_code, link, fixed_by 747 | -------------------------------------------------------------------------------- /parsers.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import multiprocessing as mp 3 | from typing import Any, Sequence 4 | 5 | 6 | def config_parser( 7 | expected_entries: Sequence[tuple[Any, ...]] 8 | ) -> argparse.ArgumentParser: 9 | parser = argparse.ArgumentParser(add_help=False) 10 | 11 | for _, path, desc in expected_entries: 12 | parser.add_argument("--" + ".".join(path), help=desc) 13 | parser.add_argument("--config", type=str, help="Path to config.json") 14 | 15 | parser.add_argument( 16 | "-ll", 17 | "--log-level", 18 | type=str, 19 | choices=("debug", "info", "warning", "error", "critical"), 20 | help="Log level", 21 | ) 22 | 23 | parser.add_argument( 24 | "--cores", 25 | help="Amount of build cores to use. Defaults to all.", 26 | type=int, 27 | default=mp.cpu_count(), 28 | ) 29 | 30 | return parser 31 | 32 | 33 | def builder_parser() -> argparse.ArgumentParser: 34 | parser = argparse.ArgumentParser(add_help=False) 35 | 36 | parser.add_argument( 37 | "-c", "--compiler", help="Which compiler project to use", nargs=1, type=str 38 | ) 39 | 40 | parser.add_argument( 41 | "-r", 42 | "--revision", 43 | help="Which revision of the compiler project to use. Use 'trunk' to use the latest commit", 44 | nargs="+", 45 | type=str, 46 | ) 47 | 48 | parser.add_argument( 49 | "--build-releases", help="Build release versions", action="store_true" 50 | ) 51 | 52 | parser.add_argument( 53 | "--add-patches", 54 | help="Which patches to apply in addition to the ones found in patchDB", 55 | nargs="+", 56 | type=str, 57 | ) 58 | 59 | parser.add_argument( 60 | "-f", 61 | "--force", 62 | help="Force build even if patch combo is known to be bad", 63 | action="store_true", 64 | ) 65 | return parser 66 | 67 | 68 | def patcher_parser() -> argparse.ArgumentParser: 69 | parser = argparse.ArgumentParser(add_help=False) 70 | 71 | mut_excl_group = parser.add_mutually_exclusive_group(required=True) 72 | 73 | # ==================== 74 | mut_excl_group.add_argument( 75 | "--find-range", 76 | help="Try to find the range where a patch is required", 77 | action="store_true", 78 | ) 79 | 80 | parser.add_argument( 81 | "-c", 82 | "--compiler", 83 | help="Which compiler project to use", 84 | nargs=1, 85 | type=str, 86 | required=True, 87 | ) 88 | 89 | parser.add_argument( 90 | "-pr", 91 | "--patchable-revision", 92 | help="Which revision is patchable with the commit specified in --patches", 93 | type=str, 94 | ) 95 | 96 | parser.add_argument( 97 | "--patches", 98 | nargs="*", 99 | help="Which patch(es) to apply.", 100 | type=str, 101 | ) 102 | # ==================== 103 | mut_excl_group.add_argument( 104 | "--find-introducer", 105 | help="Try to find the introducer commit of a build failure.", 106 | action="store_true", 107 | ) 108 | 109 | parser.add_argument( 110 | "-br", "--broken-revision", help="Which revision is borken", type=str 111 | ) 112 | # ==================== 113 | 114 | return parser 115 | 116 | 117 | def generator_parser() -> argparse.ArgumentParser: 118 | parser = argparse.ArgumentParser(add_help=False) 119 | 120 | parser.add_argument( 121 | "-a", "--amount", help="Amount of cases to generate.", type=int, default=0 122 | ) 123 | 124 | parser.add_argument( 125 | "--interesting", 126 | help="If the generated case should be an interesting one.", 127 | action=argparse.BooleanOptionalAction, 128 | default=True, 129 | ) 130 | 131 | parser.add_argument( 132 | "-t", 133 | "--targets", 134 | help="Project name and revision of compiler to use.", 135 | nargs="+", 136 | type=str, 137 | ) 138 | 139 | parser.add_argument( 140 | "-tdol", 141 | "--targets-default-opt-levels", 142 | help="Default optimization levels for the target to be checked against.", 143 | nargs="+", 144 | default=[], 145 | type=str, 146 | ) 147 | 148 | parser.add_argument( 149 | "-ac", 150 | "--additional-compilers", 151 | help="Additional compiler to compare the target against.", 152 | nargs="*", 153 | type=str, 154 | ) 155 | 156 | parser.add_argument( 157 | "-acdol", 158 | "--additional-compilers-default-opt-levels", 159 | help="Default optimization levels for the additional compilers to be checked against.", 160 | nargs="+", 161 | default=[], 162 | type=str, 163 | ) 164 | 165 | parser.add_argument("-s", "--scenario", help="Which scenario to work on.", type=str) 166 | 167 | parser.add_argument( 168 | "-p", 169 | "--parallel", 170 | help="Run the search in parallel for --parallel processes. Works only in combination with --interesting.", 171 | type=int, 172 | ) 173 | 174 | parser.add_argument( 175 | "-d", "--output-directory", help="Where the cases should be saved to.", type=str 176 | ) 177 | 178 | return parser 179 | 180 | 181 | def checker_parser() -> argparse.ArgumentParser: 182 | parser = argparse.ArgumentParser(add_help=False) 183 | 184 | group = parser.add_mutually_exclusive_group() 185 | 186 | parser.add_argument( 187 | "-f", "--file", help="Which file to work on.", type=str, required=True 188 | ) 189 | 190 | parser.add_argument("-m", "--marker", help="Marker to check for.", type=str) 191 | 192 | group.add_argument( 193 | "-s", 194 | "--scenario", 195 | help="Which scenario to use as testing replacement.", 196 | type=str, 197 | ) 198 | 199 | group.add_argument( 200 | "-is", 201 | "--interesting-settings", 202 | help="Which interesting settings to use.", 203 | type=str, 204 | ) 205 | 206 | parser.add_argument( 207 | "-bad", 208 | "--bad-settings", 209 | help="Settings which are supposed to *not* eliminate the marker", 210 | nargs="+", 211 | type=str, 212 | ) 213 | 214 | parser.add_argument( 215 | "-bsdol", 216 | "--bad-settings-default-opt-levels", 217 | help="Default optimization levels for the bad-settings to be checked against.", 218 | nargs="+", 219 | default=[], 220 | type=str, 221 | ) 222 | 223 | parser.add_argument( 224 | "-good", 225 | "--good-settings", 226 | help="Settings which are supposed to eliminate the marker", 227 | nargs="+", 228 | type=str, 229 | ) 230 | 231 | parser.add_argument( 232 | "-gsdol", 233 | "--good-settings-default-opt-levels", 234 | help="Default optimization levels for the good-settings to be checked against.", 235 | nargs="+", 236 | default=[], 237 | type=str, 238 | ) 239 | 240 | parser.add_argument( 241 | "-cr", 242 | "--check-reduced", 243 | help="Instead of checking the original file, check the latest reduced code.", 244 | action="store_true", 245 | ) 246 | 247 | parser.add_argument( 248 | "--check-pp", 249 | help="Run the preprocessed version through the checker.", 250 | action="store_true", 251 | ) 252 | 253 | parser.add_argument( 254 | "--dont-preprocess", 255 | help="Force no preprocessing", 256 | action="store_true", 257 | ) 258 | 259 | return parser 260 | 261 | 262 | def reducer_parser() -> argparse.ArgumentParser: 263 | parser = argparse.ArgumentParser(add_help=False) 264 | 265 | parser.add_argument("-f", "--file", help="Which file to work on.", type=str) 266 | 267 | parser.add_argument( 268 | "-g", 269 | "--generate", 270 | help="Whether or not to generate and reduce cases", 271 | action="store_true", 272 | ) 273 | 274 | parser.add_argument( 275 | "--work-through", 276 | help="Look at all cases found in directory specified in --output-directory and reduce them when they are not.", 277 | action="store_true", 278 | ) 279 | 280 | parser.add_argument("-s", "--scenario", help="Which scenario to work on.", type=str) 281 | 282 | parser.add_argument( 283 | "-a", "--amount", help="How many cases to find and reduce.", type=int, default=0 284 | ) 285 | 286 | parser.add_argument( 287 | "-d", "--output-directory", help="Where the cases should be saved to.", type=str 288 | ) 289 | 290 | parser.add_argument( 291 | "-t", 292 | "--targets", 293 | help="Project name and revision of compiler to use.", 294 | nargs="+", 295 | type=str, 296 | ) 297 | 298 | parser.add_argument( 299 | "-tdol", 300 | "--targets-default-opt-levels", 301 | help="Default optimization levels for the target to be checked against.", 302 | nargs="+", 303 | default=[], 304 | type=str, 305 | ) 306 | 307 | parser.add_argument( 308 | "-ac", 309 | "--additional-compilers", 310 | help="Additional compiler to compare the target against.", 311 | nargs="*", 312 | type=str, 313 | ) 314 | 315 | parser.add_argument( 316 | "-acdol", 317 | "--additional-compilers-default-opt-levels", 318 | help="Default optimization levels for the additional compilers to be checked against.", 319 | nargs="+", 320 | default=[], 321 | type=str, 322 | ) 323 | 324 | parser.add_argument( 325 | "--force", 326 | help="Force another reduction even if one already exists.", 327 | action="store_true", 328 | ) 329 | 330 | parser.add_argument( 331 | "-rr", 332 | "--re-reduce", 333 | help="Re-reduce the last reduce code", 334 | action="store_true", 335 | ) 336 | 337 | return parser 338 | 339 | 340 | def bisector_parser() -> argparse.ArgumentParser: 341 | parser = argparse.ArgumentParser(add_help=False) 342 | 343 | parser.add_argument("-f", "--file", help="Which file to work on.", type=str) 344 | 345 | parser.add_argument( 346 | "-d", "--output-directory", help="Where the cases should be saved to.", type=str 347 | ) 348 | 349 | parser.add_argument( 350 | "-a", "--amount", help="How many cases to find and reduce.", type=int, default=0 351 | ) 352 | 353 | parser.add_argument( 354 | "-g", 355 | "--generate", 356 | help="Whether or not to generate, reduce and bisect cases", 357 | action="store_true", 358 | ) 359 | 360 | parser.add_argument("-s", "--scenario", help="Which scenario to work on.", type=str) 361 | 362 | parser.add_argument( 363 | "-t", 364 | "--targets", 365 | help="Project name and revision of compiler to use.", 366 | nargs="+", 367 | type=str, 368 | ) 369 | 370 | parser.add_argument( 371 | "-tdol", 372 | "--targets-default-opt-levels", 373 | help="Default optimization levels for the target to be checked against.", 374 | nargs="+", 375 | default=[], 376 | type=str, 377 | ) 378 | 379 | parser.add_argument( 380 | "-ac", 381 | "--additional-compilers", 382 | help="Additional compiler to compare the target against.", 383 | nargs="*", 384 | type=str, 385 | ) 386 | 387 | parser.add_argument( 388 | "-acdol", 389 | "--additional-compilers-default-opt-levels", 390 | help="Default optimization levels for the additional compilers to be checked against.", 391 | nargs="+", 392 | default=[], 393 | type=str, 394 | ) 395 | 396 | parser.add_argument( 397 | "--work-through", 398 | help="Look at all cases found in directory specified in --output-directory and bisect them when they are not.", 399 | action="store_true", 400 | ) 401 | 402 | parser.add_argument( 403 | "--force", 404 | help="Force another bisection even if they already exist", 405 | action="store_true", 406 | ) 407 | 408 | parser.add_argument( 409 | "--reducer", 410 | help="If the generated case should be reduced or not.", 411 | action=argparse.BooleanOptionalAction, 412 | default=True, 413 | ) 414 | 415 | return parser 416 | 417 | 418 | def debugtool_parser() -> argparse.ArgumentParser: 419 | parser = argparse.ArgumentParser(add_help=False) 420 | 421 | parser.add_argument("-f", "--file", help="Which file to work on.", type=str) 422 | 423 | parser.add_argument( 424 | "-crb", 425 | "--clean-reduced-bisections", 426 | help="Delete all files related to reduction and bisection", 427 | action="store_true", 428 | ) 429 | 430 | parser.add_argument( 431 | "--reduced", 432 | help="Work on reduced files. (where applicable)", 433 | action="store_true", 434 | ) 435 | 436 | parser.add_argument( 437 | "--preprocessed", 438 | help="Work on preprocessed files. (where applicable)", 439 | action="store_true", 440 | ) 441 | 442 | parser.add_argument( 443 | "--asm", 444 | help="Get assembly for a case asmgood.s and asmbad.s", 445 | action="store_true", 446 | ) 447 | 448 | parser.add_argument( 449 | "--static", 450 | help="Get code where functions and global variables are static in static.c", 451 | action="store_true", 452 | ) 453 | 454 | # TODO: help information for --viz 455 | parser.add_argument("--viz", help="", action="store_true") 456 | 457 | parser.add_argument("--preprocess-code", help="", action="store_true") 458 | 459 | parser.add_argument( 460 | "-di", "--diagnose", help="Run general tests.", action="store_true" 461 | ) 462 | 463 | parser.add_argument( 464 | "--empty-marker-code", 465 | help="Get empty marker body code in empty_body.c", 466 | action="store_true", 467 | ) 468 | 469 | return parser 470 | 471 | 472 | def main_parser() -> argparse.ArgumentParser: 473 | parser = argparse.ArgumentParser(add_help=False) 474 | 475 | subparser = parser.add_subparsers(title="sub", dest="sub") 476 | run_parser = subparser.add_parser("run", help="Let DEAD search for cases.") 477 | 478 | run_parser.add_argument( 479 | "-d", "--output-directory", help="Where the cases should be saved to.", type=str 480 | ) 481 | 482 | run_parser.add_argument( 483 | "-a", "--amount", help="How many cases to find and reduce.", type=int, default=0 484 | ) 485 | 486 | run_parser.add_argument( 487 | "-s", "--scenario", help="Which scenario to work on.", type=str 488 | ) 489 | run_parser.add_argument( 490 | "-t", 491 | "--targets", 492 | help="Project name and revision of compiler to use.", 493 | nargs="+", 494 | type=str, 495 | ) 496 | 497 | run_parser.add_argument( 498 | "-tdol", 499 | "--targets-default-opt-levels", 500 | help="Default optimization levels for the target to be checked against.", 501 | nargs="+", 502 | default=[], 503 | type=str, 504 | ) 505 | 506 | run_parser.add_argument( 507 | "-ac", 508 | "--additional-compilers", 509 | help="Additional compiler to compare the target against.", 510 | nargs="*", 511 | type=str, 512 | ) 513 | 514 | run_parser.add_argument( 515 | "-acdol", 516 | "--additional-compilers-default-opt-levels", 517 | help="Default optimization levels for the additional compilers to be checked against.", 518 | nargs="+", 519 | default=[], 520 | type=str, 521 | ) 522 | 523 | run_parser.add_argument( 524 | "--reducer", 525 | help="If the generated case should be reduced or not.", 526 | action=argparse.BooleanOptionalAction, 527 | default=None, 528 | ) 529 | 530 | run_parser.add_argument( 531 | "--bisector", 532 | help="If the generated case should be bisected or not.", 533 | action=argparse.BooleanOptionalAction, 534 | default=True, 535 | ) 536 | 537 | run_parser.add_argument( 538 | "-pg", 539 | "--parallel-generation", 540 | action=argparse.BooleanOptionalAction, 541 | default=True, 542 | help="Run the case generation part in parallel. This will disable timing for the generation part.", 543 | ) 544 | 545 | run_parser.add_argument( 546 | "--update-trunk-after-X-hours", 547 | help="Enable automatic updating target compilers which are at the current trunk after X hours of running.", 548 | metavar="X", 549 | type=int, 550 | ) 551 | 552 | absorb_parser = subparser.add_parser( 553 | "absorb", help="Read cases outside of the database into the database." 554 | ) 555 | 556 | absorb_parser.add_argument( 557 | "absorb_object", 558 | metavar="DIR|FILE", 559 | help="Directory or file to read .tar cases from into the database.", 560 | ) 561 | 562 | report_parser = subparser.add_parser("report", help="Generate a report for a case.") 563 | 564 | report_parser.add_argument( 565 | "case_id", 566 | metavar="CASE_ID", 567 | type=int, 568 | help="Generate a bug report for the given id.", 569 | ) 570 | 571 | report_parser.add_argument( 572 | "--pull", 573 | help="Pull the repo to check against upsteam.", 574 | action=argparse.BooleanOptionalAction, 575 | default=False, 576 | ) 577 | 578 | tofile_parser = subparser.add_parser( 579 | "tofile", 580 | help="Save a case from the database into a file. This is a LOSSY operation.", 581 | ) 582 | 583 | tofile_parser.add_argument( 584 | "case_id", 585 | metavar="CASE_ID", 586 | type=int, 587 | help="Case to get a .tar from ", 588 | ) 589 | 590 | rereduce_parser = subparser.add_parser( 591 | "rereduce", 592 | help="Reduce code from outside the database w.r.t. a specified case.", 593 | ) 594 | 595 | rereduce_parser.add_argument( 596 | "case_id", 597 | metavar="CASE_ID", 598 | type=int, 599 | help="Case to work with.", 600 | ) 601 | 602 | rereduce_parser.add_argument( 603 | "code_path", 604 | metavar="CODE_PATH", 605 | type=str, 606 | help="Path to code to rereduce", 607 | ) 608 | 609 | diagnose_parser = subparser.add_parser( 610 | "diagnose", help="Run tests on a specified case and print a summary." 611 | ) 612 | 613 | diagnose_parser.add_argument( 614 | "-ci", 615 | "--case-id", 616 | metavar="CASE_ID", 617 | type=int, 618 | help="Case to work with.", 619 | ) 620 | 621 | diagnose_parser.add_argument( 622 | "--file", 623 | metavar="PATH", 624 | type=str, 625 | help="Path to case to work with", 626 | ) 627 | 628 | diagnose_parser.add_argument( 629 | "-t", 630 | "--targets", 631 | help="Option to override the bad setting/target of the case. Only the first specified target will be used!", 632 | nargs="+", 633 | type=str, 634 | ) 635 | 636 | diagnose_parser.add_argument( 637 | "-tdol", 638 | "--targets-default-opt-levels", 639 | help="Default optimization levels for the target that override the targets of the case.", 640 | nargs="+", 641 | default=[], 642 | type=str, 643 | ) 644 | 645 | diagnose_parser.add_argument( 646 | "-ac", 647 | "--additional-compilers", 648 | help="Override the attacking compilers of the case.", 649 | nargs="*", 650 | type=str, 651 | ) 652 | 653 | diagnose_parser.add_argument( 654 | "-acdol", 655 | "--additional-compilers-default-opt-levels", 656 | help="Default optimization levels for the overriding attacking compilers.", 657 | nargs="+", 658 | default=[], 659 | type=str, 660 | ) 661 | 662 | diagnose_parser.add_argument("-s", "--scenario", help="", type=str) 663 | 664 | checkreduced_parser = subparser.add_parser( 665 | "checkreduced", 666 | help="Check if code outside of the database passes the checks of a specified case.", 667 | ) 668 | 669 | checkreduced_parser.add_argument( 670 | "case_id", 671 | metavar="CASE_ID", 672 | type=int, 673 | help="Case to work with.", 674 | ) 675 | 676 | checkreduced_parser.add_argument( 677 | "code_path", 678 | metavar="CODE_PATH", 679 | type=str, 680 | help="Path to code to checkreduced", 681 | ) 682 | 683 | cache_parser = subparser.add_parser("cache", help="Perform actions on the cache.") 684 | 685 | cache_parser.add_argument( 686 | "what", 687 | choices=("clean", "stats"), 688 | type=str, 689 | help="What you want to do with the cache. Clean will search and remove all unfinished cache entries. `stats` will print some statistics about the cache.", 690 | ) 691 | 692 | asm_parser = subparser.add_parser( 693 | "asm", 694 | help="Save assembly outputs (-S) for the good and bad settings for each code found in a case.", 695 | ) 696 | asm_parser.add_argument( 697 | "case_id", 698 | metavar="CASE_ID", 699 | type=int, 700 | help="Case to work with.", 701 | ) 702 | 703 | set_parser = subparser.add_parser("set", help="Set values of a case.") 704 | get_parser = subparser.add_parser( 705 | "get", help="Print values of a case to the command line." 706 | ) 707 | 708 | get_parser.add_argument( 709 | "what", 710 | choices=("link", "fixed", "mcode", "rcode", "ocode", "bisection", "marker"), 711 | type=str, 712 | help="What you want to get. `ocode` is the original code. `rcode` is the reduced code. `mcode` is the massaged code. fixed is the commit the commit the case was `fixed` with and `link` the link to the bug report.", 713 | ) 714 | 715 | get_parser.add_argument( 716 | "case_id", 717 | metavar="CASE_ID", 718 | type=int, 719 | help="Case from which to get what you chose", 720 | ) 721 | 722 | set_parser.add_argument( 723 | "what", 724 | choices=("link", "fixed", "mcode", "rcode", "ocode", "bisection"), 725 | type=str, 726 | help="What you want to set. `ocode` is the original code. `rcode` is the reduced code. `mcode` is the massaged code. `fixed` is the commit the commit the case was fixed` with and `link` the link to the bug report. `ocode`, `rcode` and `mcode` expect files, `link`, `fixed` and `bisection` strings.", 727 | ) 728 | 729 | set_parser.add_argument( 730 | "case_id", 731 | metavar="CASE_ID", 732 | type=int, 733 | help="Case to set the value of", 734 | ) 735 | 736 | set_parser.add_argument( 737 | "var", 738 | metavar="VAR", 739 | type=str, 740 | help="What to set the chosen value to. Expected input may change based on what you are setting.", 741 | ) 742 | 743 | build_parser = subparser.add_parser( 744 | "build", help="Build a specific compiler version." 745 | ) 746 | 747 | build_parser.add_argument( 748 | "project", 749 | choices=("gcc", "llvm", "clang"), 750 | type=str, 751 | help="Which compiler to build", 752 | ) 753 | build_parser.add_argument( 754 | "rev", nargs="+", type=str, help="Which revision(s)/commit(s) to build" 755 | ) 756 | build_parser.add_argument( 757 | "--force", 758 | action=argparse.BooleanOptionalAction, 759 | help="Whether or not to force another build.", 760 | ) 761 | 762 | build_parser.add_argument( 763 | "--add-patches", 764 | help="Which patches to apply in addition to the ones found in patchDB", 765 | nargs="+", 766 | type=str, 767 | ) 768 | 769 | reduce_parser = subparser.add_parser( 770 | "reduce", help="Reduce the initially found code of a case." 771 | ) 772 | 773 | reduce_parser.add_argument( 774 | "case_id", nargs="+", type=int, help="Which case to reduce" 775 | ) 776 | reduce_parser.add_argument( 777 | "--force", 778 | action=argparse.BooleanOptionalAction, 779 | help="Whether or not to force another reduction. This will override the old reduced code.", 780 | ) 781 | 782 | bisect_parser = subparser.add_parser( 783 | "bisect", help="Find the bisection commit for a specified case." 784 | ) 785 | 786 | bisect_parser.add_argument( 787 | "case_id", nargs="+", type=int, help="Which case to bisect" 788 | ) 789 | bisect_parser.add_argument( 790 | "--force", 791 | action=argparse.BooleanOptionalAction, 792 | help="Whether or not to force another bisection. This will override the old bisection.", 793 | ) 794 | 795 | edit_parser = subparser.add_parser("edit", help="Open DEADs config in $EDITOR.") 796 | 797 | unreported_parser = subparser.add_parser( 798 | "unreported", help="List cases which have not been reported or fixed." 799 | ) 800 | 801 | unreported_parser.add_argument( 802 | "--id-only", 803 | action="store_true", 804 | help="Print only the case ids. Useful for scripting.", 805 | ) 806 | 807 | unrep_mut_ex_red = unreported_parser.add_mutually_exclusive_group() 808 | unrep_mut_ex_red.add_argument( 809 | "--not-reduced", 810 | action="store_true", 811 | help="If the listed cases should NOT be reduced", 812 | ) 813 | unrep_mut_ex_red.add_argument( 814 | "--reduced", action="store_true", help="If the listed cases should be reduced" 815 | ) 816 | 817 | unrep_mut_ex = unreported_parser.add_mutually_exclusive_group() 818 | # I'd call the options --gcc, --clang etc. but 819 | # running ./main.py unreported --gcc will complain about ambiguity 820 | # wrt to --gcc.repo etc. from the config. 821 | # However when running ./main.py unreported --gcc.repo it is an unknown option 822 | # as these flags are only parsed directly after ./main.py. 823 | unrep_mut_ex.add_argument( 824 | "--gcc-only", action="store_true", help="Print only GCC related bisections." 825 | ) 826 | unrep_mut_ex.add_argument( 827 | "--llvm-only", 828 | action="store_true", 829 | help="Print only LLVM related bisections. Same as --clang-only.", 830 | ) 831 | unrep_mut_ex.add_argument( 832 | "--clang-only", 833 | action="store_true", 834 | help="Print only clang related bisections. Same as --llvm-only.", 835 | ) 836 | 837 | unreported_parser.add_argument( 838 | "--OX-only", 839 | type=str, 840 | metavar="OPT_LEVEL", 841 | help="Print only bisections with OPT_LEVEL as bad setting.", 842 | ) 843 | 844 | unreported_parser.add_argument( 845 | "--good-version", 846 | type=str, 847 | metavar="REV", 848 | help="Print only bisections which have REV as a good compiler matching the opt level of the bad compiler.", 849 | ) 850 | 851 | reported_parser = subparser.add_parser( 852 | "reported", help="List cases which have been reported." 853 | ) 854 | 855 | reported_parser.add_argument( 856 | "--id-only", 857 | action="store_true", 858 | help="Print only the case ids. Useful for scripting.", 859 | ) 860 | 861 | rep_mut_ex = reported_parser.add_mutually_exclusive_group() 862 | rep_mut_ex.add_argument( 863 | "--gcc-only", action="store_true", help="Print only GCC related bisections." 864 | ) 865 | rep_mut_ex.add_argument( 866 | "--llvm-only", 867 | action="store_true", 868 | help="Print only LLVM related bisections. Same as --clang-only.", 869 | ) 870 | rep_mut_ex.add_argument( 871 | "--clang-only", 872 | action="store_true", 873 | help="Print only clang related bisections. Same as --llvm-only.", 874 | ) 875 | 876 | reported_parser.add_argument( 877 | "--good-settings", 878 | action="store_true", 879 | help="Print the good settings of the cases.", 880 | ) 881 | 882 | findby_parser = subparser.add_parser( 883 | "findby", help="Find case IDs given only a part of a case." 884 | ) 885 | findby_parser.add_argument( 886 | "what", 887 | type=str, 888 | choices=("link", "case", "code", "fixed"), 889 | ) 890 | 891 | findby_parser.add_argument( 892 | "var", 893 | type=str, 894 | metavar="VAR", 895 | help="Is a string, when choosing link or fixed, is a path when choosing case or code.", 896 | ) 897 | 898 | return parser 899 | --------------------------------------------------------------------------------