├── mypy.ini
├── requirements.txt
├── VERSIONS.py
├── .dockerignore
├── .gitignore
├── test_preprocessed.py
├── callchain_checker
    ├── src
    │   ├── CMakeLists.txt
    │   ├── CallChainChecker.hpp
    │   ├── CallChainCheckerTool.cpp
    │   └── CallChainChecker.cpp
    └── CMakeLists.txt
├── LICENSE
├── .gitlab-ci.yml
├── patches
    ├── gcc-libsanitizer.sh
    ├── gcc-simple-object-declaration.patch
    ├── llvm-MicrosoftDemangleNodes-missing-includes.patch
    ├── llvm-MicrosoftDemangleNodes.h-fix-includes.patch
    ├── gcc-fix-simple-object-decl-and-use-in-gcc-lto.patch
    ├── llvm-GCOpenMPRuntime.cpp-lambda-issues.patch
    └── gcc-ustat.patch
├── git-hooks
    └── pre-commit
├── .github
    └── workflows
    │   └── lint-python.yaml
├── run_parallel.sh
├── Dockerfile
├── dockerconfig.json
├── preprocessing.py
├── init.py
├── bugs.md
├── reducer.py
├── README.md
├── generator.py
├── bisector.py
├── checker.py
├── database.py
└── parsers.py


/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 | strict = True
3 | 
4 | files =
5 |     *.py
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | dead-instrumenter==0.1.0
2 | ccbuilder==0.0.9
3 | requests>=2.27.1
4 | 


--------------------------------------------------------------------------------
/VERSIONS.py:
--------------------------------------------------------------------------------
1 | instrumenter_version = 0
2 | generator_version = 0
3 | bisector_version = 0
4 | reducer_version = 0
5 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | ./gcc
2 | ./llvm-project
3 | ./logs
4 | ./compiler_cache
5 | ./docker_storage
6 | 
7 | ./dce_instrumenter/build/
8 | ./callchain_checker/build/
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | *.tar
 3 | *.c
 4 | *.s
 5 | *.o
 6 | *.log
 7 | *.sqlite3
 8 | *.bak
 9 | platform.info
10 | callchain_checker/build/
11 | dce_instrumenter/build/
12 | gcc/
13 | llvm-project/
14 | compiler_cache/
15 | logs/
16 | docker_storage/
17 | 


--------------------------------------------------------------------------------
/test_preprocessed.py:
--------------------------------------------------------------------------------
 1 | import preprocessing
 2 | 
 3 | 
 4 | def test_extern_removal() -> None:
 5 |     with open("./gcc_preprocessed_code.c", "r") as f:
 6 |         lines = f.read().split("\n")
 7 | 
 8 |     with open("./preprocessed_oracle.c", "r") as f:
 9 |         oracle = f.read()
10 |     assert oracle == preprocessing.preprocess_lines(lines).strip()
11 | 


--------------------------------------------------------------------------------
/callchain_checker/src/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_library(ccc-lib CallChainChecker.cpp)
2 | target_include_directories(ccc-lib PUBLIC ${LLVM_INCLUDE_DIRS} ${CLANG_INCLUDE_DIRS})
3 | target_link_libraries(ccc-lib PUBLIC clang-cpp Boost::headers)
4 | 
5 | add_executable(ccc CallChainCheckerTool.cpp)
6 | target_include_directories(ccc PRIVATE ${LLVM_INCLUDE_DIRS} ${CLANG_INCLUDE_DIRS})
7 | target_link_libraries(ccc PUBLIC ccc-lib LLVM)
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2022 Yann Girsberger, Theodoros Theodoridis
2 | 
3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
4 | 
5 | http://www.apache.org/licenses/LICENSE-2.0
6 | 
7 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
8 | 


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
 1 | image: "python:3.9"
 2 | 
 3 | before_script:
 4 |   - python --version
 5 |   - pip3 install -r requirements_hook.txt
 6 | 
 7 | stages:
 8 |   - Printing Environment
 9 |   - Format Checking
10 |   - Static Type Checking
11 |   - Static Analysis
12 | 
13 | print_env:
14 |   stage: Printing Environment
15 |   script:
16 |   - pwd
17 |   - ls -l
18 | 
19 | black:
20 |   stage: Format Checking
21 |   script:
22 |   - python3 -m black --check *.py
23 | 
24 | mypy:
25 |   stage: Static Type Checking
26 |   script:
27 |   - python3 -m mypy --strict *.py
28 | 
29 | pytype:
30 |   stage: Static Analysis
31 |   script:
32 |   - python3 -m pytype *.py
33 | 


--------------------------------------------------------------------------------
/patches/gcc-libsanitizer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | OLD=libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
 3 | NEW=libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp
 4 | 
 5 | INPLACE="-i"
 6 | if [ "$1" = "--check" ]; then
 7 |     INPLACE=""
 8 | fi
 9 | 
10 | 
11 | if [ -f "$OLD" ]; then
12 |     #https://stackoverflow.com/a/15966279
13 |     sed '/CHECK_SIZE_AND_OFFSET(ipc_perm, mode)/{s//\/\/CHECK_SIZE_AND_OFFSET(ipc_perm, mode)/;h};${x;/./{x;q0};x;q1}' \
14 |         $INPLACE $OLD > /dev/null
15 | elif [ -f "$NEW" ]; then
16 |     sed '/CHECK_SIZE_AND_OFFSET(ipc_perm, mode)/{s//\/\/CHECK_SIZE_AND_OFFSET(ipc_perm, mode)/;h};${x;/./{x;q0};x;q1}' \
17 |         $INPLACE $NEW > /dev/null
18 | else
19 |     exit 1
20 | fi
21 | 


--------------------------------------------------------------------------------
/git-hooks/pre-commit:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -e
 3 | CHANGED_FILES=$(git diff --cached --name-only --diff-filter=ACM -- '*.py')
 4 | REF_SHEBANG="#!/usr/bin/env python3"
 5 | 
 6 | if [[ -n "$CHANGED_FILES" ]]; then
 7 |     for f in $CHANGED_FILES; do
 8 |         SHEBANG=$(head -n 1 $f | awk '{ gsub(/^[ \t\n]+|[ \t\n]+$/, ""); print}')
 9 |         if [[ ( -x $f ) && ( "$SHEBANG" !=  "$REF_SHEBANG" ) ]]; then
10 |             echo "Shebang of $f doesn't match $REF_SHEBANG"
11 |             exit 1
12 |         fi
13 |     done
14 | 
15 |     isort --profile black $CHANGED_FILES
16 |     black --target-version py39 --safe $CHANGED_FILES
17 |     git add $CHANGED_FILES
18 | 
19 |     mypy --strict *.py
20 |     pytype -j 10 *.py
21 | 
22 |     cat $CHANGED_FILES | awk '/pdb.set_trace/ || /import pdb/ || /reveal_type/ { print $0; f=1 } END { exit f }'
23 |  
24 | fi
25 | 


--------------------------------------------------------------------------------
/patches/gcc-simple-object-declaration.patch:
--------------------------------------------------------------------------------
 1 | From 9d1ebb15d6cbabe767ae28a86c15f63a1ba2851f Mon Sep 17 00:00:00 2001
 2 | From: Theodoros Theodoridis <theodort@inf.ethz.ch>
 3 | Date: Wed, 2 Feb 2022 11:19:26 +0100
 4 | Subject: [PATCH] simple-object declaration
 5 | 
 6 | ---
 7 |  include/simple-object.h | 2 +-
 8 |  1 file changed, 1 insertion(+), 1 deletion(-)
 9 | 
10 | diff --git a/include/simple-object.h b/include/simple-object.h
11 | index db72f86de17..82f4722782a 100644
12 | --- a/include/simple-object.h
13 | +++ b/include/simple-object.h
14 | @@ -203,7 +203,7 @@ simple_object_release_write (simple_object_write *);
15 |  extern const char *
16 |  simple_object_copy_lto_debug_sections (simple_object_read *src_object,
17 |  				       const char *dest,
18 | -				       int *err);
19 | +				       int *err, int rename);
20 |  
21 |  #ifdef __cplusplus
22 |  }
23 | -- 
24 | 2.34.1
25 | 
26 | 


--------------------------------------------------------------------------------
/callchain_checker/src/CallChainChecker.hpp:
--------------------------------------------------------------------------------
 1 | #include <vector>
 2 | 
 3 | #include <clang/ASTMatchers/ASTMatchFinder.h>
 4 | 
 5 | namespace ccc {
 6 | 
 7 | struct CallPair {
 8 |     const std::string Caller;
 9 |     const std::string Callee;
10 | 
11 |     CallPair(const std::string &Caller, const std::string &Callee)
12 |         : Caller{Caller}, Callee{Callee} {}
13 | };
14 | 
15 | bool callChainExists(const std::vector<CallPair> &Calls, std::string From,
16 |                      std::string To);
17 | 
18 | class CallChainCollector
19 |     : public clang::ast_matchers::MatchFinder::MatchCallback {
20 |   public:
21 |     CallChainCollector(std::vector<CallPair> &Calls) : Calls{Calls} {}
22 |     void registerMatchers(clang::ast_matchers::MatchFinder &Finder);
23 |     void
24 |     run(const clang::ast_matchers::MatchFinder::MatchResult &Result) override;
25 | 
26 |   private:
27 |     std::vector<CallPair> &Calls;
28 | };
29 | 
30 | } // namespace ccc
31 | 


--------------------------------------------------------------------------------
/patches/llvm-MicrosoftDemangleNodes-missing-includes.patch:
--------------------------------------------------------------------------------
 1 | From 0ca677077d621fa1d3ca1f0334b71a154ca1e35c Mon Sep 17 00:00:00 2001
 2 | From: Theodoros Theodoridis <theodort@inf.ethz.ch>
 3 | Date: Fri, 28 Jan 2022 13:28:30 +0100
 4 | Subject: [PATCH] MicrosoftDemangleNodes missing includes
 5 | 
 6 | ---
 7 |  llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h | 2 ++
 8 |  1 file changed, 2 insertions(+)
 9 | 
10 | diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
11 | index da9d9d5bfdc0..3d47471f0ef0 100644
12 | --- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
13 | +++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
14 | @@ -16,6 +16,8 @@
15 |  #include "llvm/Demangle/DemangleConfig.h"
16 |  #include "llvm/Demangle/StringView.h"
17 |  #include <array>
18 | +#include <cstdint>
19 | +#include <string>
20 |  
21 |  namespace llvm {
22 |  namespace itanium_demangle {
23 | -- 
24 | 2.34.1
25 | 
26 | 


--------------------------------------------------------------------------------
/.github/workflows/lint-python.yaml:
--------------------------------------------------------------------------------
 1 | on: [ push, pull_request ]
 2 | name: Lint Python and Format
 3 | jobs:
 4 |   mypy:
 5 |     runs-on: ubuntu-latest
 6 |     container:
 7 |       image: python:3.10
 8 |     steps:
 9 |       - uses: actions/checkout@v2
10 |       - run: pip3 install -r requirements.txt mypy
11 |       - run: mypy --install-types --non-interactive
12 |       - name: run mypy
13 |         run: python3 -m mypy --strict *.py 
14 |   black:
15 |     runs-on: ubuntu-latest
16 |     container:
17 |       image: python:3.10
18 |     steps:
19 |       - uses: actions/checkout@v2
20 |       - run: pip3 install -r requirements.txt 'black<=22.12.0'
21 |       - name: Check formating with black
22 |         run: python3 -m black --check *.py
23 |   pytest:
24 |     runs-on: ubuntu-latest
25 |     container:
26 |       image: python:3.10
27 |     steps:
28 |       - uses: actions/checkout@v2
29 |       - run: pip3 install -r requirements.txt pytest
30 |       - name: run pytest
31 |         run: python3 -m pytest 
32 | 


--------------------------------------------------------------------------------
/patches/llvm-MicrosoftDemangleNodes.h-fix-includes.patch:
--------------------------------------------------------------------------------
 1 | From 902e0e717fcb1796f540d4fea95b010ee821caa8 Mon Sep 17 00:00:00 2001
 2 | From: Theodoros Theodoridis <theodort@inf.ethz.ch>
 3 | Date: Fri, 28 Jan 2022 14:13:13 +0100
 4 | Subject: [PATCH] MicrosoftDemangleNodes.h fix includes
 5 | 
 6 | ---
 7 |  llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h | 4 +++-
 8 |  1 file changed, 3 insertions(+), 1 deletion(-)
 9 | 
10 | diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
11 | index 9e3478e9fd29..f54e8d161e04 100644
12 | --- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
13 | +++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
14 | @@ -4,6 +4,8 @@
15 |  #include "llvm/Demangle/Compiler.h"
16 |  #include "llvm/Demangle/StringView.h"
17 |  #include <array>
18 | +#include <cstdint>
19 | +#include <string>
20 |  
21 |  class OutputStream;
22 |  
23 | @@ -602,4 +604,4 @@ struct FunctionSymbolNode : public SymbolNode {
24 |  } // namespace ms_demangle
25 |  } // namespace llvm
26 |  
27 | -#endif
28 | \ No newline at end of file
29 | +#endif
30 | -- 
31 | 2.34.1
32 | 
33 | 


--------------------------------------------------------------------------------
/callchain_checker/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.20)
 2 | 
 3 | project(callchain_checker C CXX)
 4 | 
 5 | find_package(Boost REQUIRED)
 6 | message(STATUS "Found Boost ${Boost_VERSION_STRING}")
 7 | 
 8 | find_package(LLVM REQUIRED CONFIG)
 9 | 
10 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
11 | message(STATUS "Using LLVMConfig.cmake in ${LLVM_DIR}")
12 | 
13 | find_package(Clang REQUIRED CONFIG)
14 | message(STATUS "Using ClangConfig.cmake in ${Clang_DIR}")
15 | 
16 | if (NOT CMAKE_BUILD_TYPE)
17 |   set(CMAKE_BUILD_TYPE Release)
18 | endif ()
19 | 
20 | set(CMAKE_CXX_FLAGS "-Wall -Wextra -Wpedantic -march=native")
21 | set(CMAKE_CXX_FLAGS_DEBUG "-g")
22 | set(CMAKE_CXX_FLAGS_RELEASE "-O3")
23 | set(CMAKE_CXX_STANDARD 17)
24 | 
25 | if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
26 |    add_compile_options (-fdiagnostics-color=always)
27 | elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
28 |    add_compile_options (-fcolor-diagnostics)
29 | endif ()
30 | 
31 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
32 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
33 | 
34 | add_subdirectory(src)
35 | 


--------------------------------------------------------------------------------
/patches/gcc-fix-simple-object-decl-and-use-in-gcc-lto.patch:
--------------------------------------------------------------------------------
 1 | From b44637d9aac854de180e9125fcacb504a1c38aef Mon Sep 17 00:00:00 2001
 2 | From: Theodoros Theodoridis <theodort@inf.ethz.ch>
 3 | Date: Fri, 4 Feb 2022 11:15:44 +0100
 4 | Subject: [PATCH] Fix simple-object decl and use in gcc-lto
 5 | 
 6 | ---
 7 |  gcc/lto-wrapper.c       | 2 +-
 8 |  include/simple-object.h | 2 +-
 9 |  2 files changed, 2 insertions(+), 2 deletions(-)
10 | 
11 | diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c
12 | index 7de58d47781..6759597a849 100644
13 | --- a/gcc/lto-wrapper.c
14 | +++ b/gcc/lto-wrapper.c
15 | @@ -1008,7 +1008,7 @@ debug_objcopy (const char *infile)
16 |      }
17 |  
18 |    outfile = make_temp_file ("debugobjtem");
19 | -  errmsg = simple_object_copy_lto_debug_sections (inobj, outfile, &err);
20 | +  errmsg = simple_object_copy_lto_debug_sections (inobj, outfile, &err, 0);
21 |    if (errmsg)
22 |      {
23 |        unlink_if_ordinary (outfile);
24 | diff --git a/include/simple-object.h b/include/simple-object.h
25 | index db72f86de17..82f4722782a 100644
26 | --- a/include/simple-object.h
27 | +++ b/include/simple-object.h
28 | @@ -203,7 +203,7 @@ simple_object_release_write (simple_object_write *);
29 |  extern const char *
30 |  simple_object_copy_lto_debug_sections (simple_object_read *src_object,
31 |  				       const char *dest,
32 | -				       int *err);
33 | +				       int *err, int rename);
34 |  
35 |  #ifdef __cplusplus
36 |  }
37 | -- 
38 | 2.34.1
39 | 
40 | 


--------------------------------------------------------------------------------
/run_parallel.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | function run_gcc(){
 4 |     ./main.py -ll info --cores $CORES_PER_JOB run \
 5 |         --no-parallel-generation\
 6 |         -t gcc trunk 1 2 3 s\
 7 |         -ac gcc releases/gcc-11.2.0\
 8 |             gcc releases/gcc-10.3.0\
 9 |             gcc releases/gcc-9.4.0\
10 |             gcc releases/gcc-8.5.0\
11 |             gcc releases/gcc-7.5.0\
12 |         -acdol 1 2 3 s\
13 |         --no-reducer &>> split_$1.log
14 | }
15 | 
16 | function run_llvm(){
17 | 
18 |     # Don't compile LLVM with less than 8 cores.
19 |     if [[ $CORES_PER_JOB -lt 8 ]]; then
20 |         CORES_PER_JOB=8
21 |     fi
22 |     ./main.py -ll info --cores $CORES_PER_JOB run \
23 |         --no-parallel-generation\
24 |         -t llvm trunk 1 2 3 s z\
25 |         -ac llvm llvmorg-13.0.1\
26 |             llvm llvmorg-12.0.1\
27 |             llvm llvmorg-11.1.0\
28 |             llvm llvmorg-10.0.1\
29 |             llvm llvmorg-7.1.0\
30 |             llvm llvmorg-6.0.1\
31 |             llvm llvmorg-5.0.2\
32 |             llvm llvmorg-4.0.1\
33 |         -acdol 1 2 3 s z\
34 |         --no-reducer &>> split_$1.log
35 | }
36 | 
37 | export -f run_llvm
38 | export -f run_gcc
39 | 
40 | PROJECT=$1
41 | TOTAL_CORES=$2
42 | JOBS=$3
43 | export CORES_PER_JOB=$(expr $TOTAL_CORES / $JOBS)
44 | 
45 | if [ $PROJECT = "llvm" ]; then
46 |     RUN_CMD='run_llvm "{}"'
47 | elif [ $PROJECT = "clang" ]; then
48 |     RUN_CMD='run_llvm "{}"'
49 | else
50 |     RUN_CMD='run_gcc "{}"'
51 | fi
52 | 
53 | seq $JOBS | xargs --max-procs=$JOBS -I {} sh -c $RUN_CMD
54 | 


--------------------------------------------------------------------------------
/callchain_checker/src/CallChainCheckerTool.cpp:
--------------------------------------------------------------------------------
 1 | #include <clang/ASTMatchers/ASTMatchFinder.h>
 2 | #include <clang/Tooling/CommonOptionsParser.h>
 3 | #include <clang/Tooling/Tooling.h>
 4 | #include <llvm/Support/CommandLine.h>
 5 | #include <llvm/Support/Signals.h>
 6 | 
 7 | #include "CallChainChecker.hpp"
 8 | 
 9 | using namespace llvm;
10 | using namespace clang;
11 | using namespace clang::tooling;
12 | 
13 | using namespace ccc;
14 | 
15 | namespace {
16 | cl::OptionCategory CCCOptions("call-chain-checker options");
17 | cl::opt<std::string> From("from", cl::desc("Beginning of call chain."),
18 |                           cl::value_desc("function name"), cl::cat(CCCOptions));
19 | cl::opt<std::string> To("to", cl::desc("End of call chain."),
20 |                         cl::value_desc("function name"), cl::cat(CCCOptions));
21 | 
22 | } // namespace
23 | 
24 | int main(int argc, const char **argv) {
25 |     auto ExpectedParser =
26 |         CommonOptionsParser::create(argc, argv, CCCOptions);
27 |     if (!ExpectedParser) {
28 |         llvm::errs() << ExpectedParser.takeError();
29 |         return 1;
30 |     }
31 |     CommonOptionsParser &OptionsParser = ExpectedParser.get();
32 | 
33 |     ClangTool Tool(OptionsParser.getCompilations(),
34 |                    OptionsParser.getSourcePathList());
35 | 
36 |     std::vector<CallPair> Calls;
37 |     CallChainCollector CCC{Calls};
38 |     ast_matchers::MatchFinder Finder;
39 |     CCC.registerMatchers(Finder);
40 |     auto ret = Tool.run(newFrontendActionFactory(&Finder).get());
41 |     if (ret != 0)
42 |         return ret;
43 |     if (callChainExists(Calls, From, To))
44 |         outs() << "call chain exists between " << From << " -> " << To << '\n';
45 |     else
46 |         outs() << "no call chain between " << From << " -> " << To << '\n';
47 |     return 0;
48 | }
49 | 


--------------------------------------------------------------------------------
/patches/llvm-GCOpenMPRuntime.cpp-lambda-issues.patch:
--------------------------------------------------------------------------------
 1 | From 6cce61ba38772cf4e4a9c0a389032b947e5ab71b Mon Sep 17 00:00:00 2001
 2 | From: Theodoros Theodoridis <theodort@inf.ethz.ch>
 3 | Date: Fri, 28 Jan 2022 15:19:38 +0100
 4 | Subject: [PATCH] GCOpenMPRuntime.cpp lambda issues
 5 | 
 6 | ---
 7 |  clang/lib/CodeGen/CGOpenMPRuntime.cpp | 6 +++---
 8 |  1 file changed, 3 insertions(+), 3 deletions(-)
 9 | 
10 | diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
11 | index 40252171368b..40a73ef7429e 100644
12 | --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
13 | +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
14 | @@ -6271,7 +6271,7 @@ void CGOpenMPRuntime::emitTargetDataCalls(
15 |    // Generate the code for the opening of the data environment. Capture all the
16 |    // arguments of the runtime call by reference because they are used in the
17 |    // closing of the region.
18 | -  auto &&BeginThenGen = [&D, &CGF, Device, &Info, &CodeGen, &NoPrivAction](
19 | +  auto &&BeginThenGen = [&D, Device, &Info, &CodeGen, &NoPrivAction](
20 |        CodeGenFunction &CGF, PrePostActionTy &) {
21 |      // Fill up the arrays with all the mapped variables.
22 |      MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
23 | @@ -6318,7 +6318,7 @@ void CGOpenMPRuntime::emitTargetDataCalls(
24 |    };
25 |  
26 |    // Generate code for the closing of the data region.
27 | -  auto &&EndThenGen = [&CGF, Device, &Info](CodeGenFunction &CGF,
28 | +  auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF,
29 |                                              PrePostActionTy &) {
30 |      assert(Info.isValid() && "Invalid data environment closing arguments.");
31 |  
32 | @@ -6397,7 +6397,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
33 |           "Expecting either target enter, exit data, or update directives.");
34 |  
35 |    // Generate the code for the opening of the data environment.
36 | -  auto &&ThenGen = [&D, &CGF, Device](CodeGenFunction &CGF, PrePostActionTy &) {
37 | +  auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) {
38 |      // Fill up the arrays with all the mapped variables.
39 |      MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
40 |      MappableExprsHandler::MapValuesArrayTy Pointers;
41 | -- 
42 | 2.34.1
43 | 
44 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM archlinux:latest
 2 | 
 3 | RUN pacman -Syyu --noconfirm --noprogressbar &&\
 4 |     pacman -S --noconfirm --needed --noprogressbar base-devel
 5 | 
 6 | # Adding user
 7 | RUN /usr/sbin/groupadd --system sudo && \
 8 |     /usr/sbin/useradd --create-home \
 9 |                       --groups sudo \
10 |                       --uid 1337 --user-group \
11 |                       dead && \
12 |     /usr/sbin/sed -i -e "s/Defaults    requiretty.*/ #Defaults    requiretty/g" /etc/sudoers && \
13 |     /usr/sbin/echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
14 | 
15 | USER dead
16 | WORKDIR /home/dead
17 | 
18 | # Installing yay 11.1.1
19 | RUN sudo pacman -S --noconfirm --noprogressbar git
20 | RUN git clone https://aur.archlinux.org/yay.git &&\ 
21 |     cd yay &&\ 
22 |     git checkout cdf06b6781263e24d98754a99d70857aa959f691 &&\
23 |     makepkg -si --noconfirm --noprogressbar
24 | RUN rm -r yay/
25 | 
26 | # Installing dependencies
27 | # These need compilation
28 | RUN yay -S --noconfirm --noprogressbar csmith\
29 |                                         creduce-git\
30 |                                         compcert
31 | 
32 | # These don't
33 | RUN yay -S --noconfirm --noprogressbar python\
34 |                                         python-pip\
35 |                                         gcc\
36 |                                         clang\
37 |                                         llvm\
38 |                                         compiler-rt\
39 |                                         cmake\
40 |                                         boost\
41 |                                         ninja\
42 |                                         entr
43 | 
44 | COPY --chown=dead callchain_checker/ ./callchain_checker/
45 | 
46 | RUN mkdir /home/dead/callchain_checker/build/ &&\
47 |     cd /home/dead/callchain_checker/build/ &&\
48 |     cmake .. &&\
49 |     make -j
50 | 
51 | COPY requirements.txt .
52 | RUN python3 -m pip install -r requirements.txt
53 | 
54 | RUN mkdir /home/dead/.config/dead/
55 | 
56 | RUN python3 -c 'from pathlib import Path; from dead_instrumenter import utils; utils.make_config(Path.home() / ".config/dead/instrumenter.json", True)'
57 | 
58 | COPY dockerconfig.json /home/dead/.config/dead/config.json
59 | 
60 | COPY --chown=dead *.py /home/dead/
61 | COPY --chown=dead patches/ /home/dead/patches/
62 | 
63 | COPY --chown=dead ./run_parallel.sh /home/dead/run_parallel.sh
64 | 
65 | ENV PATH="/home/dead/.local/bin:${PATH}"
66 | 


--------------------------------------------------------------------------------
/dockerconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "gcc": {
 3 |         "name": "gcc",
 4 |         "main_branch": "master",
 5 |         "repo": "/persistent/gcc",
 6 |         "sane_version": "gcc",
 7 |         "releases": [
 8 |             "trunk",
 9 |             "releases/gcc-11.2.0",
10 |             "releases/gcc-11.1.0",
11 |             "releases/gcc-10.3.0",
12 |             "releases/gcc-10.2.0",
13 |             "releases/gcc-10.1.0",
14 |             "releases/gcc-9.4.0",
15 |             "releases/gcc-9.3.0",
16 |             "releases/gcc-9.2.0",
17 |             "releases/gcc-9.1.0",
18 |             "releases/gcc-8.5.0",
19 |             "releases/gcc-8.4.0",
20 |             "releases/gcc-8.3.0",
21 |             "releases/gcc-8.2.0",
22 |             "releases/gcc-8.1.0",
23 |             "releases/gcc-7.5.0",
24 |             "releases/gcc-7.4.0",
25 |             "releases/gcc-7.3.0",
26 |             "releases/gcc-7.2.0"
27 |         ]
28 |     },
29 |     "llvm": {
30 |         "name": "clang",
31 |         "main_branch": "main",
32 |         "repo": "/persistent/llvm-project",
33 |         "sane_version": "clang",
34 |         "releases": [
35 |             "trunk",
36 |             "llvmorg-13.0.1",
37 |             "llvmorg-13.0.0",
38 |             "llvmorg-12.0.1",
39 |             "llvmorg-12.0.0",
40 |             "llvmorg-11.1.0",
41 |             "llvmorg-11.0.1",
42 |             "llvmorg-11.0.0",
43 |             "llvmorg-10.0.1",
44 |             "llvmorg-10.0.0",
45 |             "llvmorg-9.0.1",
46 |             "llvmorg-9.0.0",
47 |             "llvmorg-8.0.1",
48 |             "llvmorg-8.0.0",
49 |             "llvmorg-7.1.0",
50 |             "llvmorg-7.0.1",
51 |             "llvmorg-7.0.0",
52 |             "llvmorg-6.0.1",
53 |             "llvmorg-6.0.0",
54 |             "llvmorg-5.0.2",
55 |             "llvmorg-5.0.1",
56 |             "llvmorg-5.0.0",
57 |             "llvmorg-4.0.1",
58 |             "llvmorg-4.0.0"
59 |         ]
60 |     },
61 |     "repodir": "/persistent",
62 |     "csmith": {
63 |         "max_size": 50000,
64 |         "min_size": 10000,
65 |         "executable": "csmith",
66 |         "include_path": "/usr/include/csmith-2.3.0"
67 |     },
68 |     "dcei": "dead-instrument",
69 |     "ccc": "./callchain_checker/build/bin/ccc",
70 |     "patchdb": "/persistent/patchdb.json",
71 |     "logdir": "/persistent/logs",
72 |     "cachedir": "/persistent/compiler_cache",
73 |     "creduce": "creduce",
74 |     "ccomp": "ccomp",
75 |     "casedb": "/persistent/casedb.sqlite3"
76 | }
77 | 


--------------------------------------------------------------------------------
/callchain_checker/src/CallChainChecker.cpp:
--------------------------------------------------------------------------------
 1 | #include "CallChainChecker.hpp"
 2 | 
 3 | #include <llvm/Support/raw_ostream.h>
 4 | #include <unordered_map>
 5 | 
 6 | #include <clang/AST/Decl.h>
 7 | #include <clang/ASTMatchers/ASTMatchers.h>
 8 | 
 9 | #include <boost/graph/adjacency_list.hpp>
10 | #include <boost/graph/breadth_first_search.hpp>
11 | #include <boost/property_map/property_map.hpp>
12 | 
13 | using namespace clang;
14 | using namespace clang::ast_matchers;
15 | 
16 | using namespace boost;
17 | 
18 | using StaticCallGraph = adjacency_list<vecS, vecS, directedS>;
19 | 
20 | namespace ccc {
21 | 
22 | bool callChainExists(const std::vector<CallPair> &Calls, std::string From,
23 |                      std::string To) {
24 |     std::unordered_map<std::string, size_t> FunctionToIdx;
25 |     size_t idx = 0;
26 |     StaticCallGraph SCG;
27 |     for (const auto &[Caller, Callee] : Calls) {
28 |         if (not FunctionToIdx.count(Caller)) {
29 |             SCG.added_vertex(idx);
30 |             FunctionToIdx[Caller] = idx++;
31 |         }
32 |         if (not FunctionToIdx.count(Callee)) {
33 |             SCG.added_vertex(idx);
34 |             FunctionToIdx[Callee] = idx++;
35 |         }
36 |         boost::add_edge(FunctionToIdx[Caller], FunctionToIdx[Callee], SCG);
37 |     }
38 |     if (not FunctionToIdx.count(From)) {
39 |         llvm::errs() << From << " is not part of the call graph\n";
40 |         return false;
41 |     }
42 |     if (not FunctionToIdx.count(To)) {
43 |         llvm::errs() << To << " is not part of the call graph\n";
44 |         return false;
45 |     }
46 | 
47 |     std::vector<default_color_type> Colors(num_vertices(SCG));
48 |     iterator_property_map ColorMap(Colors.begin(),
49 |                                    boost::get(boost::vertex_index, SCG));
50 |     breadth_first_search(SCG, FunctionToIdx.at(From), color_map(ColorMap));
51 |     return Colors[FunctionToIdx.at(To)] == default_color_type::black_color;
52 | }
53 | 
54 | void CallChainCollector::registerMatchers(
55 |     clang::ast_matchers::MatchFinder &Finder) {
56 |     Finder.addMatcher(callExpr(clang::ast_matchers::isExpansionInMainFile(),
57 |                                callee(functionDecl().bind("callee")),
58 |                                hasAncestor(functionDecl().bind("caller"))),
59 |                       this);
60 | }
61 | 
62 | void CallChainCollector::run(
63 |     const clang::ast_matchers::MatchFinder::MatchResult &Result) {
64 |     if (const auto *Callee = Result.Nodes.getNodeAs<FunctionDecl>("callee"))
65 |         if (const auto *Caller = Result.Nodes.getNodeAs<FunctionDecl>("caller"))
66 |             Calls.emplace_back(Caller->getNameAsString(),
67 |                                Callee->getNameAsString());
68 | }
69 | 
70 | } // namespace ccc
71 | 


--------------------------------------------------------------------------------
/patches/gcc-ustat.patch:
--------------------------------------------------------------------------------
 1 | From 61f38c64c01a15560026115a157b7021ec67bd3b Mon Sep 17 00:00:00 2001
 2 | From: hjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4>
 3 | Date: Thu, 24 May 2018 20:21:54 +0000
 4 | Subject: [PATCH] libsanitizer: Use pre-computed size of struct ustat for Linux
 5 | 
 6 | Cherry-pick compiler-rt revision 333213:
 7 | 
 8 | <sys/ustat.h> has been removed from glibc 2.28 by:
 9 | 
10 | commit cf2478d53ad7071e84c724a986b56fe17f4f4ca7
11 | Author: Adhemerval Zanella <adhemerval.zanella@linaro.org>
12 | Date:   Sun Mar 18 11:28:59 2018 +0800
13 | 
14 |     Deprecate ustat syscall interface
15 | 
16 | This patch uses pre-computed size of struct ustat for Linux.
17 | 
18 | 	PR sanitizer/85835
19 | 	* sanitizer_common/sanitizer_platform_limits_posix.cc: Don't
20 | 	include <sys/ustat.h> for Linux.
21 | 	(SIZEOF_STRUCT_USTAT): New.
22 | 	(struct_ustat_sz): Use SIZEOF_STRUCT_USTAT for Linux.
23 | 
24 | 
25 | 
26 | git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-7-branch@260688 138bc75d-0d04-0410-961f-82ee72b054a4
27 | 
28 | Downloaded from upstream commit
29 | https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=61f38c64c01a15560026115a157b7021ec67bd3b
30 | 
31 | Signed-off-by: Bernd Kuhls <bernd.kuhls@t-online.de>
32 | ---
33 |  libsanitizer/ChangeLog                                    |  8 ++++++++
34 |  .../sanitizer_common/sanitizer_platform_limits_posix.cc   | 15 +++++++++++++--
35 |  2 files changed, 21 insertions(+), 2 deletions(-)
36 | 
37 | diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
38 | index 31a5e69..8017afd 100644
39 | --- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
40 | +++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cc
41 | @@ -154,7 +154,6 @@ typedef struct user_fpregs elf_fpregset_t;
42 |  # include <sys/procfs.h>
43 |  #endif
44 |  #include <sys/user.h>
45 | -#include <sys/ustat.h>
46 |  #include <linux/cyclades.h>
47 |  #include <linux/if_eql.h>
48 |  #include <linux/if_plip.h>
49 | @@ -247,7 +246,19 @@ namespace __sanitizer {
50 |  #endif // SANITIZER_LINUX || SANITIZER_FREEBSD
51 |  
52 |  #if SANITIZER_LINUX && !SANITIZER_ANDROID
53 | -  unsigned struct_ustat_sz = sizeof(struct ustat);
54 | +  // Use pre-computed size of struct ustat to avoid <sys/ustat.h> which
55 | +  // has been removed from glibc 2.28.
56 | +#if defined(__aarch64__) || defined(__s390x__) || defined (__mips64) \
57 | +  || defined(__powerpc64__) || defined(__arch64__) || defined(__sparcv9) \
58 | +  || defined(__x86_64__)
59 | +#define SIZEOF_STRUCT_USTAT 32
60 | +#elif defined(__arm__) || defined(__i386__) || defined(__mips__) \
61 | +  || defined(__powerpc__) || defined(__s390__)
62 | +#define SIZEOF_STRUCT_USTAT 20
63 | +#else
64 | +#error Unknown size of struct ustat
65 | +#endif
66 | +  unsigned struct_ustat_sz = SIZEOF_STRUCT_USTAT;
67 |    unsigned struct_rlimit64_sz = sizeof(struct rlimit64);
68 |    unsigned struct_statvfs64_sz = sizeof(struct statvfs64);
69 |  #endif // SANITIZER_LINUX && !SANITIZER_ANDROID
70 | -- 
71 | 2.9.3
72 | 
73 | 


--------------------------------------------------------------------------------
/preprocessing.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import shutil
  4 | import tempfile
  5 | from pathlib import Path
  6 | from typing import Iterable, Optional
  7 | 
  8 | from ccbuilder import Builder
  9 | 
 10 | import utils
 11 | 
 12 | """
 13 | Functions to preprocess code for creduce.
 14 | See creduce --help to see what it wants.
 15 | """
 16 | 
 17 | 
 18 | class PreprocessError(Exception):
 19 |     pass
 20 | 
 21 | 
 22 | def find_marker_decl_range(lines: list[str], marker_prefix: str) -> tuple[int, int]:
 23 |     p = re.compile(rf"void {marker_prefix}(.*)\(void\);")
 24 |     first = 0
 25 |     for i, line in enumerate(lines):
 26 |         if p.match(line):
 27 |             first = i
 28 |             break
 29 |     last = first + 1
 30 |     for i, line in enumerate(lines[first + 1 :], start=first + 1):
 31 |         if p.match(line):
 32 |             continue
 33 |         else:
 34 |             last = i
 35 |             break
 36 |     return first, last
 37 | 
 38 | 
 39 | def find_platform_main_end(lines: Iterable[str]) -> Optional[int]:
 40 |     p = re.compile(r".*platform_main_end.*")
 41 |     for i, line in enumerate(lines):
 42 |         if p.match(line):
 43 |             return i
 44 |     return None
 45 | 
 46 | 
 47 | def remove_platform_main_begin(lines: Iterable[str]) -> list[str]:
 48 |     p = re.compile(r".*platform_main_begin.*")
 49 |     return [line for line in lines if not p.match(line)]
 50 | 
 51 | 
 52 | def remove_print_hash_value(lines: Iterable[str]) -> list[str]:
 53 |     p = re.compile(r".*print_hash_value = 1.*")
 54 |     return [line for line in lines if not p.match(line)]
 55 | 
 56 | 
 57 | def preprocess_lines(lines: list[str]) -> str:
 58 |     start_patterns = [
 59 |         re.compile(r"^extern.*"),
 60 |         re.compile(r"^typedef.*"),
 61 |         re.compile(r"^struct.*"),
 62 |         # The following patterns are to catch if the last of the previous
 63 |         # patterns in the file was tainted and we'd otherwise mark the rest
 64 |         # of the file as tainted, as we'll find no end in this case.
 65 |         re.compile(r"^static.*"),
 66 |         re.compile(r"^void.*"),
 67 |     ]
 68 |     taint_patterns = [
 69 |         re.compile(r".*__access__.*"),  # LLVM doesn't know about this
 70 |         re.compile(r".*__malloc__.*"),
 71 |         re.compile(
 72 |             r".*_[F|f]loat[0-9]{1,3}x{0,1}.*"
 73 |         ),  # https://gcc.gnu.org/onlinedocs/gcc/Floating-Types.html#Floating-Types
 74 |         re.compile(r".*__asm__.*"),  # CompCert has problems
 75 |     ]
 76 | 
 77 |     def is_start(l: str) -> bool:
 78 |         return any([p_start.match(l) for p_start in start_patterns])
 79 | 
 80 |     lines_to_skip: list[int] = []
 81 |     for i, line in enumerate(lines):
 82 |         for p in taint_patterns:
 83 |             if p.match(line):
 84 |                 # Searching for start of tainted region
 85 |                 up_i = i
 86 |                 up_line = lines[up_i]
 87 |                 while up_i > 0 and not is_start(up_line):
 88 |                     up_i -= 1
 89 |                     up_line = lines[up_i]
 90 | 
 91 |                 # Searching for end of tainted region
 92 |                 down_i = i + 1
 93 |                 down_line = lines[down_i]
 94 |                 while down_i < len(lines) and not is_start(down_line):
 95 |                     down_i += 1
 96 |                     down_line = lines[down_i]
 97 | 
 98 |                 lines_to_skip.extend(list(range(up_i, down_i)))
 99 | 
100 |     return "\n".join([line for i, line in enumerate(lines) if i not in lines_to_skip])
101 | 
102 | 
103 | def preprocess_csmith_file(
104 |     path: os.PathLike[str],
105 |     marker_prefix: str,
106 |     compiler_setting: utils.CompilerSetting,
107 |     bldr: Builder,
108 | ) -> str:
109 | 
110 |     with tempfile.NamedTemporaryFile(suffix=".c") as tf:
111 |         shutil.copy(path, tf.name)
112 | 
113 |         additional_flags = (
114 |             []
115 |             if compiler_setting.additional_flags is None
116 |             else compiler_setting.additional_flags
117 |         )
118 |         cmd = [
119 |             str(utils.get_compiler_executable(compiler_setting, bldr)),
120 |             tf.name,
121 |             "-P",
122 |             "-E",
123 |         ] + additional_flags
124 |         lines = utils.run_cmd(cmd).split("\n")
125 | 
126 |         return preprocess_lines(lines)
127 | 
128 | 
129 | def preprocess_csmith_code(
130 |     code: str,
131 |     marker_prefix: str,
132 |     compiler_setting: utils.CompilerSetting,
133 |     bldr: Builder,
134 | ) -> Optional[str]:
135 |     """Will *try* to preprocess code as if it comes from csmith.
136 | 
137 |     Args:
138 |         code (str): code to preprocess
139 |         marker_prefix (str): Marker prefix
140 |         compiler_setting (utils.CompilerSetting): Setting to preprocess with
141 |         bldr (builder.Builder):
142 | 
143 |     Returns:
144 |         Optional[str]: preprocessed code if it was able to preprocess it.
145 |     """
146 |     tf = utils.save_to_tmp_file(code)
147 |     try:
148 |         res = preprocess_csmith_file(
149 |             Path(tf.name), marker_prefix, compiler_setting, bldr
150 |         )
151 |         return res
152 |     except PreprocessError:
153 |         return None
154 | 


--------------------------------------------------------------------------------
/init.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import grp
  4 | import json
  5 | import os
  6 | import shutil
  7 | import stat
  8 | from pathlib import Path
  9 | from typing import Any
 10 | 
 11 | from dead_instrumenter.utils import Binary, find_binary
 12 | 
 13 | import utils
 14 | 
 15 | 
 16 | def main() -> None:
 17 |     print(
 18 |         "Have you installed the following programs/projects: llvm, clang, compiler-rt, gcc, cmake, ccomp, csmith and creduce?"
 19 |     )
 20 |     print("Press enter to continue if you believe you have")
 21 |     input()
 22 | 
 23 |     not_found = []
 24 |     for p in ["clang", "gcc", "cmake", "ccomp", "csmith", "creduce"]:
 25 |         if not shutil.which(p):
 26 |             not_found.append(p)
 27 | 
 28 |     if not_found:
 29 |         print("Can't find", " ".join(not_found), " in $PATH.")
 30 | 
 31 |     if not Path("/usr/include/llvm/").exists():
 32 |         print("Can't find /usr/include/llvm/")
 33 |         not_found.append("kill")
 34 | 
 35 |     if not_found:
 36 |         exit(1)
 37 | 
 38 |     print("Creating default ~/.config/dead/config.json...")
 39 | 
 40 |     path = Path.home() / ".config/dead/config.json"
 41 |     if path.exists():
 42 |         print(f"{path} already exists! Aborting to prevent overriding data...")
 43 |         exit(1)
 44 | 
 45 |     config: dict[Any, Any] = {}
 46 |     # ====== GCC ======
 47 |     gcc: dict[str, Any] = {}
 48 |     gcc["name"] = "gcc"
 49 |     gcc["main_branch"] = "master"
 50 | 
 51 |     # Git clone repo
 52 |     print("Cloning gcc to ./gcc ...")
 53 |     if not Path("./gcc").exists():
 54 |         utils.run_cmd("git clone git://gcc.gnu.org/git/gcc.git")
 55 |     gcc["repo"] = "./gcc"
 56 | 
 57 |     if shutil.which("gcc"):
 58 |         gcc["sane_version"] = "gcc"
 59 |     else:
 60 |         gcc["sane_version"] = "???"
 61 |         print(
 62 |             "gcc is not in $PATH, you have to specify the executable yourself in gcc.sane_version"
 63 |         )
 64 | 
 65 |     gcc["releases"] = [
 66 |         "trunk",
 67 |         "releases/gcc-12.1.0",
 68 |         "releases/gcc-11.3.0",
 69 |         "releases/gcc-11.2.0",
 70 |         "releases/gcc-11.1.0",
 71 |         "releases/gcc-10.3.0",
 72 |         "releases/gcc-10.2.0",
 73 |         "releases/gcc-10.1.0",
 74 |         "releases/gcc-9.4.0",
 75 |         "releases/gcc-9.3.0",
 76 |         "releases/gcc-9.2.0",
 77 |         "releases/gcc-9.1.0",
 78 |         "releases/gcc-8.5.0",
 79 |         "releases/gcc-8.4.0",
 80 |         "releases/gcc-8.3.0",
 81 |         "releases/gcc-8.2.0",
 82 |         "releases/gcc-8.1.0",
 83 |         "releases/gcc-7.5.0",
 84 |         "releases/gcc-7.4.0",
 85 |         "releases/gcc-7.3.0",
 86 |         "releases/gcc-7.2.0",
 87 |     ]
 88 |     config["gcc"] = gcc
 89 | 
 90 |     # ====== LLVM ======
 91 |     llvm: dict[str, Any] = {}
 92 |     llvm["name"] = "clang"
 93 |     llvm["main_branch"] = "main"
 94 | 
 95 |     # Git clone repo
 96 |     print("Cloning llvm to ./llvm-project ...")
 97 |     if not Path("./llvm-project").exists():
 98 |         utils.run_cmd("git clone https://github.com/llvm/llvm-project")
 99 |     llvm["repo"] = "./llvm-project"
100 | 
101 |     if shutil.which("clang"):
102 |         llvm["sane_version"] = "clang"
103 |     else:
104 |         llvm["sane_version"] = "???"
105 |         print(
106 |             "clang is not in $PATH, you have to specify the executable yourself in llvm.sane_version"
107 |         )
108 | 
109 |     llvm["releases"] = [
110 |         "trunk",
111 |         "llvmorg-14.0.3",
112 |         "llvmorg-14.0.2",
113 |         "llvmorg-14.0.1",
114 |         "llvmorg-14.0.0",
115 |         "llvmorg-13.0.1",
116 |         "llvmorg-13.0.0",
117 |         "llvmorg-12.0.1",
118 |         "llvmorg-12.0.0",
119 |         "llvmorg-11.1.0",
120 |         "llvmorg-11.0.1",
121 |         "llvmorg-11.0.0",
122 |         "llvmorg-10.0.1",
123 |         "llvmorg-10.0.0",
124 |         "llvmorg-9.0.1",
125 |         "llvmorg-9.0.0",
126 |         "llvmorg-8.0.1",
127 |         "llvmorg-8.0.0",
128 |         "llvmorg-7.1.0",
129 |         "llvmorg-7.0.1",
130 |         "llvmorg-7.0.0",
131 |         "llvmorg-6.0.1",
132 |         "llvmorg-6.0.0",
133 |         "llvmorg-5.0.2",
134 |         "llvmorg-5.0.1",
135 |         "llvmorg-5.0.0",
136 |         "llvmorg-4.0.1",
137 |         "llvmorg-4.0.0",
138 |     ]
139 | 
140 |     config["llvm"] = llvm
141 | 
142 |     config["repodir"] = str(Path(os.getcwd()).absolute())
143 |     # ====== CSmith ======
144 |     csmith: dict[str, Any] = {}
145 |     csmith["max_size"] = 50000
146 |     csmith["min_size"] = 10000
147 |     if shutil.which("csmith"):
148 |         csmith["executable"] = "csmith"
149 |         res = utils.run_cmd("csmith --version")
150 |         # $ csmith --version csmith 2.3.0
151 |         # Git version: 30dccd7
152 |         version = res.split("\n")[0].split()[1]
153 |         if Path("/usr/include/csmith").exists():
154 |             csmith["include_path"] = "/usr/include/csmith"
155 |         else:
156 |             csmith["include_path"] = "/usr/include/csmith-" + version
157 |     else:
158 |         print(
159 |             "Can't find csmith in $PATH. You have to specify the executable and the include path yourself"
160 |         )
161 |         csmith["executable"] = "???"
162 |         csmith["include_path"] = "???"
163 |     config["csmith"] = csmith
164 | 
165 |     # ====== Cpp programs ======
166 | 
167 |     print("Building instrumenter...")
168 |     find_binary(Binary.INSTRUMENTER, no_questions=True)
169 |     config["dcei"] = "dead-instrument"
170 | 
171 |     print("Compiling callchain checker (ccc)...")
172 |     os.makedirs("./callchain_checker/build", exist_ok=True)
173 |     utils.run_cmd("cmake ..", working_dir=Path("./callchain_checker/build/"))
174 |     utils.run_cmd("make -j", working_dir=Path("./callchain_checker/build/"))
175 |     config["ccc"] = "./callchain_checker/build/bin/ccc"
176 | 
177 |     # ====== Rest ======
178 |     config["patchdb"] = "./patches/patchdb.json"
179 | 
180 |     os.makedirs("logs", exist_ok=True)
181 |     config["logdir"] = "./logs"
182 | 
183 |     os.makedirs("compiler_cache", exist_ok=True)
184 |     os.chmod("compiler_cache", 0o770 | stat.S_ISGID)
185 |     config["cachedir"] = "./compiler_cache"
186 | 
187 |     config["creduce"] = "creduce"
188 |     if not shutil.which("creduce"):
189 |         print(
190 |             "creduce was not found in $PATH. You have to specify the executable yourself"
191 |         )
192 |         config["creduce"] = "???"
193 | 
194 |     config["ccomp"] = "ccomp"
195 |     if not shutil.which("ccomp"):
196 |         print(
197 |             "ccomp was not found in $PATH. You have to specify the executable yourself"
198 |         )
199 |         config["ccomp"] = "???"
200 | 
201 |     config["casedb"] = "./casedb.sqlite3"
202 | 
203 |     Path(config["casedb"]).touch()
204 |     os.chmod(config["casedb"], 0o660)
205 | 
206 |     print("Saving config...")
207 |     os.makedirs(path.parent, exist_ok=True)
208 |     with open(path, "w") as f:
209 |         json.dump(config, f, indent=4)
210 | 
211 |     print("Done!")
212 | 
213 | 
214 | if __name__ == "__main__":
215 |     main()
216 | 


--------------------------------------------------------------------------------
/bugs.md:
--------------------------------------------------------------------------------
  1 | ### GCC
  2 | - [99357](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99357)
  3 | - [99373](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99373)
  4 | - [99419](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99419)
  5 | - [99428](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99428)
  6 | - ~~[99776](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99776)~~ Fixed
  7 | - [99788](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99788)
  8 | - ~~[99793](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99793)~~ Fixed
  9 | - [99834](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99834)
 10 | - [99835](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99835)
 11 | - ~~[99986](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99986)~~ Duplicate
 12 | - [99987](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99987)
 13 | - [99991](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99991)
 14 | - [99993](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99993)
 15 | - [100033](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100033)
 16 | - [100034](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100034)
 17 | - [100036](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100036)
 18 | - ~~[100050](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100050)~~ Duplicate
 19 | - ~~[100051](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100051)~~ Fixed
 20 | - [100080](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100080)
 21 | - [100082](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100082)
 22 | - [100095](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100095)
 23 | - [100100](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100100)
 24 | - ~~[100112](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100112)~~ Fixed
 25 | - [100113](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100113)
 26 | - [100145](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100145)
 27 | - [100162](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100162)
 28 | - [100188](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100188)
 29 | - [100191](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100191)
 30 | - [100220](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100220)
 31 | - ~~[100221](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100221)~~ Fixed
 32 | - [100314](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100314)
 33 | - ~~[100315](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100315)~~ Invalid
 34 | - ~~[100359](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100359)~~ Fixed
 35 | - ~~[102540](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102540)~~ Fixed
 36 | - ~~[102546](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102546)~~ Fixed
 37 | - ~~[102648](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102648)~~ Fixed
 38 | - ~~[102650](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102650)~~ Fixed
 39 | - ~~[102703](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102703)~~ Fixed
 40 | - [102705](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102705)
 41 | - [102879](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102879)
 42 | - ~~[102880](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102880)~~ Fixed
 43 | - ~~[102892](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102892)~~ ~~Fixed~~ Reopened
 44 | - ~~[102895](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102895)~~ Duplicate
 45 | - ~~[103280](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103280)~~ Duplicate
 46 | - [103281](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103281)
 47 | - ~~[102950](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102950)~~ Fixed
 48 | - [102981](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102981)
 49 | - [102982](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102982)
 50 | - ~~[102983](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102983)~~ Fixed
 51 | - ~~[103257](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103257)~~ Fixed
 52 | - [103388](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103388)
 53 | - ~~[103359](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103359)~~ Fixed
 54 | - ~~[104526](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104526)~~ Fixed
 55 | - ~~[104530](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104530)~~ Fixed
 56 | - [105086](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105086)
 57 | - [105832](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105832)
 58 | - [105833](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105833)
 59 | - [105834](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105834)
 60 | - ~~[105835](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105835)~~ Fixed
 61 | - [107822](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107822)
 62 | - [107823](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107823)
 63 | - [108351](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108351)
 64 | - ~~[108352](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108352)~~ Fixed
 65 | - ~~[108353](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108353)~~ Fixed
 66 | - [108354](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108354)
 67 | - [108355](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108355)
 68 | - [108356](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108356)
 69 | - [108357](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108357)
 70 | - [108358](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108358)
 71 | - [108359](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108359)
 72 | - [108360](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108360)
 73 | - [108368](https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108368)
 74 | 
 75 | 
 76 | ### LLVM
 77 | - [49434](https://bugs.llvm.org/show_bug.cgi?id=49434)
 78 | - [49436](https://bugs.llvm.org/show_bug.cgi?id=49436)
 79 | - [49457](https://bugs.llvm.org/show_bug.cgi?id=49457)
 80 | - ~~[49731](https://bugs.llvm.org/show_bug.cgi?id=49731)~~ Fixed
 81 | - [49773](https://bugs.llvm.org/show_bug.cgi?id=49773)
 82 | - [49775](https://bugs.llvm.org/show_bug.cgi?id=49775)
 83 | - [49776](https://bugs.llvm.org/show_bug.cgi?id=49776)
 84 | - [51090](https://bugs.llvm.org/show_bug.cgi?id=51090)
 85 | - [51136](https://bugs.llvm.org/show_bug.cgi?id=51136)
 86 | - [51137](https://bugs.llvm.org/show_bug.cgi?id=51137)
 87 | - [51138](https://bugs.llvm.org/show_bug.cgi?id=51138)
 88 | - ~~[52535](https://github.com/llvm/llvm-project/issues/51877)~~ Fixed
 89 | - [51139](https://bugs.llvm.org/show_bug.cgi?id=51139)
 90 | - [51140](https://bugs.llvm.org/show_bug.cgi?id=51140)
 91 | - ~~[51141](https://bugs.llvm.org/show_bug.cgi?id=51141)~~ Fixed
 92 | - ~~[52078](https://bugs.llvm.org/show_bug.cgi?id=52078)~~ Fixed
 93 | - ~~[52102](https://bugs.llvm.org/show_bug.cgi?id=52102)~~ Fixed
 94 | - ~~[52253](https://bugs.llvm.org/show_bug.cgi?id=52253)~~ Fixed
 95 | - [52255](https://bugs.llvm.org/show_bug.cgi?id=52255)
 96 | - ~~[52260](https://bugs.llvm.org/show_bug.cgi?id=52260)~~ Fixed
 97 | - ~~[52261](https://bugs.llvm.org/show_bug.cgi?id=52261)~~ Fixed
 98 | - ~~[52289](https://bugs.llvm.org/show_bug.cgi?id=52289)~~ Fixed
 99 | - [52347](https://bugs.llvm.org/show_bug.cgi?id=52347)
100 | - ~~[52543](https://bugs.llvm.org/show_bug.cgi?id=52543)~~ Fixed
101 | - [52580](https://bugs.llvm.org/show_bug.cgi?id=52580)
102 | - ~~[52592](https://bugs.llvm.org/show_bug.cgi?id=52592)~~ Fixed
103 | - ~~[51444](https://github.com/llvm/llvm-project/issues/51444)~~ Fixed
104 | - [51688](https://github.com/llvm/llvm-project/issues/51688)
105 | - ~~[52525](https://github.com/llvm/llvm-project/issues/52525)~~ Fixed
106 | - ~~[52965](https://github.com/llvm/llvm-project/issues/52965)~~ Fixed
107 | - ~~[53130](https://github.com/llvm/llvm-project/issues/53130)~~ Fixed
108 | - ~~[53131](https://github.com/llvm/llvm-project/issues/53131)~~ Fixed
109 | - ~~[53130](https://github.com/llvm/llvm-project/issues/53130)~~ Fixed
110 | - ~~[53131](https://github.com/llvm/llvm-project/issues/53131)~~ Fixed
111 | - ~~[53316](https://github.com/llvm/llvm-project/issues/53316)~~ Fixed
112 | - [53320](https://github.com/llvm/llvm-project/issues/53320)
113 | - ~~[53317](https://github.com/llvm/llvm-project/issues/53317)~~ Fixed
114 | - [53318](https://github.com/llvm/llvm-project/issues/53318)
115 | - [53322](https://github.com/llvm/llvm-project/issues/53322)
116 | - [53384](https://github.com/llvm/llvm-project/issues/53384)
117 | - [53385](https://github.com/llvm/llvm-project/issues/53385)
118 | - [53321](https://github.com/llvm/llvm-project/issues/53321)
119 | - ~~[53319](https://github.com/llvm/llvm-project/issues/53319)~~ Fixed
120 | - ~~[54980](https://github.com/llvm/llvm-project/issues/54980)~~ Fixed
121 | - ~~[56046](https://github.com/llvm/llvm-project/issues/56046)~~ Fixed
122 | - ~~[56048](https://github.com/llvm/llvm-project/issues/56048)~~ Fixed
123 | - [56049](https://github.com/llvm/llvm-project/issues/56049)
124 | - [56118](https://github.com/llvm/llvm-project/issues/56118)
125 | - ~~[56119](https://github.com/llvm/llvm-project/issues/56119)~~ Fixed
126 | - [56120](https://github.com/llvm/llvm-project/issues/56120)
127 | - [56761](https://github.com/llvm/llvm-project/issues/56761)
128 | - [56762](https://github.com/llvm/llvm-project/issues/56762)
129 | 


--------------------------------------------------------------------------------
/reducer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import json
  4 | import logging
  5 | import os
  6 | import random
  7 | import shutil
  8 | import subprocess
  9 | import tarfile
 10 | import tempfile
 11 | import time
 12 | from copy import copy
 13 | from dataclasses import dataclass
 14 | from pathlib import Path
 15 | from types import TracebackType
 16 | from typing import Any, Optional
 17 | 
 18 | import ccbuilder
 19 | 
 20 | from ccbuilder.utils.utils import select_repo
 21 | from ccbuilder import (
 22 |     Builder,
 23 |     BuildException,
 24 |     CompilerProject,
 25 |     PatchDB,
 26 |     Repo,
 27 |     get_compiler_info,
 28 | )
 29 | 
 30 | import generator
 31 | import parsers
 32 | import preprocessing
 33 | import utils
 34 | 
 35 | 
 36 | # ==================== Reducer ====================
 37 | class TempDirEnv:
 38 |     def __init__(self) -> None:
 39 |         self.td: tempfile.TemporaryDirectory[str]
 40 | 
 41 |     def __enter__(self) -> Path:
 42 |         self.td = tempfile.TemporaryDirectory()
 43 |         tempfile.tempdir = self.td.name
 44 |         return Path(self.td.name)
 45 | 
 46 |     def __exit__(
 47 |         self,
 48 |         exc_type: Optional[type[BaseException]],
 49 |         exc_value: Optional[BaseException],
 50 |         exc_traceback: Optional[TracebackType],
 51 |     ) -> None:
 52 |         tempfile.tempdir = None
 53 | 
 54 | 
 55 | @dataclass
 56 | class Reducer:
 57 |     config: utils.NestedNamespace
 58 |     bldr: Builder
 59 | 
 60 |     def reduce_file(self, file: Path, force: bool = False) -> bool:
 61 |         """Reduce a case given in the .tar format.
 62 |         Interface for `reduced_code`.
 63 | 
 64 |         Args:
 65 |             file (Path): Path to .tar case.
 66 |             force (bool): Force a reduction (even if the case is already reduced).
 67 |         Returns:
 68 |             bool: If the reduction was successful.
 69 |         """
 70 |         case = utils.Case.from_file(self.config, file)
 71 | 
 72 |         if self.reduce_case(case, force=force):
 73 |             case.to_file(file)
 74 |             return True
 75 |         return False
 76 | 
 77 |     def reduce_case(self, case: utils.Case, force: bool = False) -> bool:
 78 |         """Reduce a case.
 79 | 
 80 |         Args:
 81 |             case (utils.Case): Case to reduce.
 82 |             force (bool): Force a reduction (even if the case is already reduced).
 83 | 
 84 |         Returns:
 85 |             bool: If the reduction was successful.
 86 |         """
 87 |         if not force and case.reduced_code:
 88 | 
 89 |             return True
 90 | 
 91 |         case.reduced_code = self.reduce_code(
 92 |             case.code, case.marker, case.bad_setting, case.good_settings, case.bisection
 93 |         )
 94 |         return bool(case.reduced_code)
 95 | 
 96 |     def reduce_code(
 97 |         self,
 98 |         code: str,
 99 |         marker: str,
100 |         bad_setting: utils.CompilerSetting,
101 |         good_settings: list[utils.CompilerSetting],
102 |         bisection: Optional[str] = None,
103 |         preprocess: bool = True,
104 |     ) -> Optional[str]:
105 |         """Reduce given code w.r.t. `marker`
106 | 
107 |         Args:
108 |             code (str):
109 |             marker (str): Marker which exhibits the interesting behaviour.
110 |             bad_setting (utils.CompilerSetting): Setting which can not eliminate the marker.
111 |             good_settings (list[utils.CompilerSetting]): Settings which can eliminate the marker.
112 |             bisection (Optional[str]): if present the reducer will also check for the bisection
113 |             preprocess (bool): Whether or not to run the code through preprocessing.
114 | 
115 |         Returns:
116 |             Optional[str]: Reduced code, if successful.
117 |         """
118 | 
119 |         bad_settings = [bad_setting]
120 |         if bisection:
121 |             bad_settings.append(copy(bad_setting))
122 |             bad_settings[-1].rev = bisection
123 |             repo = select_repo(
124 |                 bad_setting.compiler_project,
125 |                 llvm_repo=self.bldr.llvm_repo,
126 |                 gcc_repo=self.bldr.gcc_repo,
127 |             )
128 |             good_settings = good_settings + [copy(bad_setting)]
129 |             good_settings[-1].rev = repo.rev_to_commit(f"{bisection}~")
130 | 
131 |         # creduce likes to kill unfinished processes with SIGKILL
132 |         # so they can't clean up after themselves.
133 |         # Setting a temporary temporary directory for creduce to be able to clean
134 |         # up everything
135 |         with TempDirEnv() as tmpdir:
136 | 
137 |             # preprocess file
138 |             if preprocess:
139 |                 tmp = preprocessing.preprocess_csmith_code(
140 |                     code,
141 |                     utils.get_marker_prefix(marker),
142 |                     bad_setting,
143 |                     self.bldr,
144 |                 )
145 |                 # Preprocesssing may fail
146 |                 pp_code = tmp if tmp else code
147 | 
148 |             else:
149 |                 pp_code = code
150 | 
151 |             pp_code_path = tmpdir / "code_pp.c"
152 |             with open(pp_code_path, "w") as f:
153 |                 f.write(pp_code)
154 | 
155 |             # save interesting_settings
156 |             settings_path = tmpdir / "interesting_settings.json"
157 | 
158 |             int_settings: dict[str, Any] = {}
159 |             int_settings["bad_settings"] = [
160 |                 bs.to_jsonable_dict() for bs in bad_settings
161 |             ]
162 |             int_settings["good_settings"] = [
163 |                 gs.to_jsonable_dict() for gs in good_settings
164 |             ]
165 |             with open(settings_path, "w") as f:
166 |                 json.dump(int_settings, f)
167 | 
168 |             # create script for creduce
169 |             script_path = tmpdir / "check.sh"
170 |             with open(script_path, "w") as f:
171 |                 print("#/bin/sh", file=f)
172 |                 print("TMPD=$(mktemp -d)", file=f)
173 |                 print("trap '{ rm -rf \"$TMPD\"; }' INT TERM EXIT", file=f)
174 |                 print(
175 |                     "timeout 15 "
176 |                     f"{Path(__file__).parent.resolve()}/checker.py"
177 |                     f" --dont-preprocess"
178 |                     f" --config {self.config.config_path}"
179 |                     f" --marker {marker}"
180 |                     f" --interesting-settings {str(settings_path)}"
181 |                     f" --file code_pp.c",
182 |                     # f' --file {str(pp_code_path)}',
183 |                     file=f,
184 |                 )
185 | 
186 |             os.chmod(script_path, 0o777)
187 |             # run creduce
188 |             creduce_cmd = [
189 |                 self.config.creduce,
190 |                 "--n",
191 |                 f"{self.bldr.jobs}",
192 |                 str(script_path.name),
193 |                 str(pp_code_path.name),
194 |             ]
195 | 
196 |             try:
197 |                 current_time = time.strftime("%Y%m%d-%H%M%S")
198 |                 build_log_path = (
199 |                     Path(self.config.logdir)
200 |                     / f"{current_time}-creduce-{random.randint(0,1000)}.log"
201 |                 )
202 |                 build_log_path.touch()
203 |                 # Set permissions of logfile
204 |                 os.chmod(build_log_path, 0o660)
205 |                 logging.info(f"creduce logfile at {build_log_path}")
206 |                 with open(build_log_path, "a") as build_log:
207 |                     utils.run_cmd_to_logfile(
208 |                         creduce_cmd, log_file=build_log, working_dir=Path(tmpdir)
209 |                     )
210 |             except subprocess.CalledProcessError as e:
211 |                 logging.info(f"Failed to process code. Exception: {e}")
212 |                 return None
213 | 
214 |             # save result in tar
215 |             with open(pp_code_path, "r") as f:
216 |                 reduced_code = f.read()
217 | 
218 |             return reduced_code
219 | 
220 | 
221 | if __name__ == "__main__":
222 |     config, args = utils.get_config_and_parser(parsers.reducer_parser())
223 | 
224 |     patchdb = PatchDB(Path(config.patchdb))
225 |     _, llvm_repo = get_compiler_info("llvm", Path(config.repodir))
226 |     _, gcc_repo = get_compiler_info("gcc", Path(config.repodir))
227 |     bldr = Builder(
228 |         Path(config.cachedir),
229 |         gcc_repo,
230 |         llvm_repo,
231 |         patchdb,
232 |         args.cores,
233 |         logdir=Path(config.logdir),
234 |     )
235 |     gnrtr = generator.CSmithCaseGenerator(config, patchdb)
236 |     rdcr = Reducer(config, bldr)
237 | 
238 |     if args.work_through:
239 |         if args.output_directory is None:
240 |             print("Missing output/work-through directory!")
241 |             exit(1)
242 |         else:
243 |             output_dir = Path(os.path.abspath(args.output_directory))
244 |             os.makedirs(output_dir, exist_ok=True)
245 | 
246 |         tars = [
247 |             output_dir / d
248 |             for d in os.listdir(output_dir)
249 |             if tarfile.is_tarfile(output_dir / d)
250 |         ]
251 | 
252 |         print(f"Processing {len(tars)} tars")
253 |         for tf in tars:
254 |             print(f"Processing {tf}")
255 |             try:
256 |                 rdcr.reduce_file(tf, args.force)
257 |             except BuildException as e:
258 |                 print("{e}")
259 | 
260 |     # if (We want to generate something and not only reduce a file)
261 |     if args.generate:
262 |         if args.output_directory is None:
263 |             print("Missing output directory!")
264 |             exit(1)
265 |         else:
266 |             output_dir = os.path.abspath(args.output_directory)
267 |             os.makedirs(output_dir, exist_ok=True)
268 | 
269 |         scenario = utils.Scenario([], [])
270 |         # When file is specified, use scenario of file as base
271 |         if args.file:
272 |             file = Path(args.file).absolute()
273 |             scenario = utils.Case.from_file(config, file).scenario
274 | 
275 |         tmp = utils.get_scenario(config, args)
276 |         if tmp.target_settings:
277 |             scenario.target_settings = tmp.target_settings
278 |         if tmp.attacker_settings:
279 |             scenario.attacker_settings = tmp.attacker_settings
280 | 
281 |         gen = gnrtr.parallel_interesting_case_file(
282 |             config, scenario, bldr.jobs, output_dir, start_stop=True
283 |         )
284 |         if args.amount == 0:
285 |             while True:
286 |                 path = next(gen)
287 |                 try:
288 |                     rdcr.reduce_file(path)
289 |                 except BuildException as e:
290 |                     print(f"{e}")
291 |         else:
292 |             for i in range(args.amount):
293 |                 path = next(gen)
294 |                 try:
295 |                     rdcr.reduce_file(path)
296 |                 except BuildException as e:
297 |                     print(f"{e}")
298 | 
299 |     elif not args.work_through:
300 |         if not args.file:
301 |             print(
302 |                 "--file is needed when just running checking for a file. Have you forgotten to set --generate?"
303 |             )
304 |         file = Path(args.file).absolute()
305 |         if args.re_reduce:
306 |             case = utils.Case.from_file(config, file)
307 |             if not case.reduced_code:
308 |                 print("No reduced code available...")
309 |                 exit(1)
310 |             print(f"BEFORE\n{case.reduced_code}")
311 |             if reduce_code := rdcr.reduce_code(
312 |                 case.reduced_code,
313 |                 case.marker,
314 |                 case.bad_setting,
315 |                 case.good_settings,
316 |                 case.bisection,
317 |                 preprocess=False,
318 |             ):
319 |                 case.reduced_code = reduce_code
320 |                 print(f"AFTER\n{case.reduced_code}")
321 |                 case.to_file(file)
322 |         else:
323 |             if rdcr.reduce_file(file, args.force):
324 |                 print(file)
325 | 
326 |     gnrtr.terminate_processes()
327 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # DEAD: Dead Code Elimination based Automatic Differential Testing
  2 | 
  3 | DEAD is a tool to find and process compiler regressions and other missed optimizations automatically to produce reports.
  4 | 
  5 | It is based on the paper [Finding missed optimizations through the lens of dead code elimination](https://dl.acm.org/doi/10.1145/3503222.3507764) and was first written during a [master thesis](https://doi.org/10.3929/ethz-b-000547786) at the [AST Lab](https://ast.ethz.ch/).
  6 | 
  7 | For a list of reported bugs look at [bugs.md](./bugs.md).
  8 | 
  9 | ## Setup
 10 | Clone the latest release with, for example, `git clone -b v0.0.2 https://github.com/DeadCodeProductions/dead`.
 11 | 
 12 | After navigating into the cloned repository, choose if you want to run DEAD [locally](#local-setup) or in a [Docker container](#setup-with-docker).
 13 | ### Setup with Docker
 14 | ```
 15 | ./build_docker.sh 
 16 | 
 17 | # Enter the container
 18 | docker run -it -v $(realpath ./docker_storage):/persistent deaddocker
 19 | ```
 20 | Continue by reading the [Run Section](#run).
 21 | 
 22 | ### Local Setup
 23 | The following programs or libraries must be installed:
 24 | - `python >= 3.10`
 25 | - `gcc`
 26 | - `clang`
 27 | - `csmith`
 28 | - `creduce`
 29 | - `cmake`
 30 | - `ccomp` (CompCert)
 31 | - `llvm 13.0.0` or `llvm 14.0.0` (for the include files)
 32 | - `compiler-rt` (for the sanitization libraries. It's also part of LLVM)
 33 | - `boost`
 34 | - `ninja`
 35 | 
 36 | Optional programs:
 37 | - `entr`
 38 | 
 39 | We are running on Arch Linux and have not (yet) tested any other distribution.
 40 | 
 41 | To achieve this in Arch with `yay` as AUR overlay helper, you can run:
 42 | ```
 43 | yay -Sy --noconfirm python\
 44 |                     python-pip\
 45 |                     gcc\
 46 |                     clang\
 47 |                     llvm\
 48 |                     compiler-rt\
 49 |                     cmake\
 50 |                     boost\
 51 |                     ninja\
 52 |                     csmith\
 53 |                     creduce-git\
 54 |                     compcert-git
 55 | ```
 56 | 
 57 | Then run:
 58 | ```
 59 | # Create python environment
 60 | python3 -m venv ./deadenv
 61 | source ./deadenv/bin/activate
 62 | pip install -r requirements.txt
 63 | 
 64 | # Initialize DEAD
 65 | ./init.py
 66 | ```
 67 | `init.py` will:
 68 | - create a config file located at `~/.config/dead/config.json`
 69 | - Compile the callchain-checker: `ccc`
 70 | - Clone repositories of `gcc` and `llvm` into the local directory
 71 | - Create the `compiler_cache` and `logs` directory
 72 | - Check if it can find the programs and paths required in the prerequisite-section and complain if not.
 73 | 
 74 | 
 75 | ## Run
 76 | As DEAD is based on differential testing, it requires two informations to be able to run:
 77 | - Which compilers to find missed optimizations in. These are called *target* compilers. This is typically the current `trunk`.
 78 | - Which compilers to use as a comparison to find missed optimizations in the target compilers. These are called *additional* or *attacking* compilers.
 79 | 
 80 | A compiler on the CLI is specified by writing `PROJECT REVISION [OPT_LEVEL ...]`. For example, to get `gcc 11.2.0` with all optimizations, write `gcc releases/gcc-11.2.0 1 2 3 s`. This can be repeated to specify more compilers.
 81 | 
 82 | ```sh
 83 | # Don't run it yet
 84 | ./main.py run --targets gcc trunk 1 2 3 s\
 85 |               --additional-compilers\
 86 |                    gcc releases/gcc-11.2.0 1 2 3 s\
 87 |                    gcc releases/gcc-10.3.0 1 2 3 s
 88 | ```
 89 | To not have to repeat oneself, it is possible to specify default optimization levels.
 90 | The resulting used optimizations for a specified compiler is the union of the default levels and the specifically specified optimization levels.
 91 | The flags are `--additional_compilers_default_opt_levels` and `--targets_default_opt_levels` or `-acdol` and `-tdol` respectively.
 92 | 
 93 | 
 94 | ```sh
 95 | # Don't run it yet
 96 | ./main.py run --targets gcc trunk 1 2 3 s\
 97 |               --additional-compilers\
 98 |                    gcc releases/gcc-11.2.0 \ # Opt levels: 3,s
 99 |                    gcc releases/gcc-10.3.0 1\ # Opt levels: 1,3,s
100 |               -acdol 3 s # Additional compilers 
101 | ```
102 | 
103 | DEAD consists of three parts which are: 
104 | - Generator, which finds missed optimizations from the given target and attacking compilers. We call such a missed optimization and any additional information related to it a *case*.
105 | - Bisector, which finds the introducing commit of the found case.
106 | - Reducer, which extracts a small part of the code, which still exhibits the missed optimization found.
107 | 
108 | By default, the Reducer is only enabled for cases which have a new bisection commit, as reducing takes long and is often not necessary.
109 | It can be enabled for all cases with `--reducer` and completely disabled with `--no-reducer`.
110 | 
111 | The last two important options are `--cores POSITIVE_INT` and `--log-level debug|info|warning|error|critical`. 
112 | When not specified, `--cores` will equal to the amount of logical cores on the machine.
113 | The default verbosity level is `warning`. However, to have a sense of progress, we suggest setting it to `info`.
114 | 
115 | Finally, to find missed optimizations in `trunk`, run
116 | ```sh
117 | # For GCC
118 | ./main.py -ll info\
119 |           --cores $CORES\
120 |           run --targets gcc trunk 1 2 3 s\ 
121 |               --additional-compilers\
122 |                    gcc releases/gcc-11.2.0\
123 |                    gcc releases/gcc-10.3.0\
124 |                    gcc releases/gcc-9.4.0\
125 |                    gcc releases/gcc-8.5.0\
126 |                    gcc releases/gcc-7.5.0\
127 |               -acdol 1 2 3 s
128 |               #--amount N # Terminate after finding N cases
129 | 
130 | # For LLVM
131 | ./main.py -ll info\
132 |           --cores $CORES\
133 |           run --targets llvm trunk 1 2 3 s z\ 
134 |               --additional-compilers\
135 |                    llvm llvmorg-13.0.1\
136 |                    llvm llvmorg-12.0.1\
137 |                    llvm llvmorg-11.1.0\
138 |                    llvm llvmorg-10.0.1\
139 |                    llvm llvmorg-9.0.1\
140 |                    llvm llvmorg-8.0.1\
141 |                    llvm llvmorg-7.1.0\
142 |                    llvm llvmorg-6.0.1\
143 |                    llvm llvmorg-5.0.2\
144 |                    llvm llvmorg-4.0.1\
145 |               -acdol 1 2 3 s z
146 |               #--amount N # Terminate after finding N cases
147 | ```
148 | 
149 | Please run `./main.py run -h` and `./main.py -h` to see more options.
150 | 
151 | ### Performance considerations
152 | Assigning all cores of the machine to just one instance of DEAD can lead to less than optimal machine utilization. Some parts of the pipeline can not always use all cores.
153 | 
154 | - The Bisector is written in a single threaded way and only requires multiple cores when building a new compiler.
155 | As the cache grows and many regression have already been found, the cache hit rate increases drastically, making the Bisector essentially a single threaded part.
156 | - GCC compilation includes several single-threaded parts. Compiling with sufficiently many cores will make it look like a mostly single-threaded task due to Amdahl's law. LLVM compilation also includes some single-threaded parts, but these are way less noticeable.
157 | - The Reducer uses `creduce` to shrink the case. `creduce` also does not perfectly utilize the machine all the time, when using a lot of threads.
158 | 
159 | Just oversubscribing the machine is not an option, as some checks are time dependent. Failing these checks will especially impact the throughput of the Reducer.
160 | 
161 | One fairly good solution is to run multiple smaller instances in parallel.
162 | 
163 | For the Reducer, 8 logical cores per pipeline did yield good results.
164 | 
165 | Finding new cases in parallel has the big caveat that the instances wait on each other when one is building a compiler that the other needs. This dependence is very common when the cache is not populated enough. Running multiple instances in parallel too early is detrimental to machine utilization!
166 | 
167 | Pinpointing when the switch to multiple instances is beneficial is difficult. 
168 | For this reason we provide `run_parallel.sh` which spawns multiple instances with the appropriate amount of cores assigned.
169 | ```sh
170 | ./run_parallel.sh llvm|gcc TOTAL_CORES AMOUNT_JOBS
171 | ```
172 | 
173 | ## Generating a report
174 | 
175 | Imagine DEAD ran for some time and it is now time to create a bug report.
176 | 
177 | Not-yet-reported cases can be explored with the `unreported` sub-command.
178 | 
179 | ```sh
180 | $ ./main.py unreported
181 | ID       Bisection                                     Count
182 | ----------------------------------------------------------------
183 | 2        0b92cf305dcf34387a8e2564e55ca8948df3b47a      45
184 | ...
185 | 39       008e7397dad971c03c08fc1b0a4a98fddccaaed8      1
186 | ----------------------------------------------------------------
187 | ID       Bisection                                     Count
188 | ```
189 | On the left you see an ID for a case that has the bisection commit shown in the bisection column.
190 | Often times, many cases are found which bisect to the same commit. How many cases that bisected to this particular commit have been found is displayed in the 'Count' column. 
191 | Note that a fix for a reported case may not fix all cases of the bisection!
192 | 
193 | Select one of the IDs and check if there is already a bug report which includes its bisection commit.
194 | 
195 | If this is not the case, run
196 | ```sh
197 | ./main.py report $ID > report.txt
198 | ```
199 | 
200 | It will pull the compiler project of the case, build `trunk` and test if the missed optimization can still be observed. 
201 | You can disable pulling with `--no-pull`.
202 | If so, it will output a copy-and-pasteable report into `report.txt` (don't forget to remove the title if there is one) and `case.txt`[^1],  a copy of the reported code.
203 | [^1]: It is `.txt` instead of `.c` because GitHub does not allow `.c` files to be attached to issues.
204 | 
205 | When you have submitted the bug report, you can save the link to the report via
206 | ```
207 | ./main.py set link $ID $LINK
208 | ```
209 | so that the bisection isn't displayed anymore.
210 | 
211 | Hopefully, the missed optimization gets fixed. When this is the case, you can extract the case ID from the bug report and note down the fixing commit. Then save it with
212 | ```
213 | ./main.py set fixed $ID $COMMIT
214 | ```
215 | 
216 | Inspecting reported cases can be done via
217 | ```
218 | ./main.py reported
219 | ```
220 | 
221 | ### Massaging workflow
222 | Sometimes it is possible to further reduce the automatically reduced code manually. We call this step *massaging*, the product of which is *massaged code*.
223 | 
224 | Instead of directly generating the report after having selected an ID and checked if the bisection commit was already reported, get the reduced code and try to make it smaller.
225 | ```sh
226 | ./main.py get rcode $ID > rcode.c
227 | ```
228 | To continuously check if the changes still exhibit the missed optimization, open a separate terminal in the same directory and run 
229 | ```sh
230 | echo rcode.c | entr -c ./main.py checkreduced $ID ./rcode.c
231 | ```
232 | This will rerun some checks whenever `rcode.c` is saved.
233 | 
234 | When the massaging is done, save it into DEAD with
235 | ```sh
236 | ./main.py set mcode $ID ./rcode.c
237 | ```
238 | DEAD will check if the massaged code still bisects to the same commit as before and will reject the change if not.
239 | Empirically, changes to cases who's bisection is rarely found often don't allow any further massaging.
240 | 
241 | ## Subcommand overview of `main.py`
242 | 
243 | - `run`: Find new regressions/missed optimizations.
244 | - `tofile ID`: Save a case into a tar-file.
245 | - `absorb PATH`: Read tar-files into the database of DEAD.
246 | - `report ID`: Generate a report for a given case.
247 | - `rereduce ID FILE`: Reduce a file (again) w.r.t. a case.
248 | - `diagnose`: Run a set of tests when something seems odd with a case.
249 | - `checkreduced ID FILE`: Run some lightweight tests based on a case on a piece of code.
250 | - `cache`: Cache related functionality.
251 | - `asm ID`: Generate assembly for all code of a case. 
252 | - `set | get {link,fixed,mcode,rcode,ocode,bisection}`: Set or get the specified field of a case.
253 | - `build PROJECT REV`: Build `REV` of compiler project `PROJECT`.
254 | - `reduce ID`: Reduce case `ID`.
255 | - `edit`: Open DEADs configuration in `$EDITOR`
256 | - `unreported`: List unreported cases grouped by bisection commit.
257 | - `reported`: List reported cases.
258 | - `findby`: Find case ID given some part of the case.
259 | 
260 | ## Overview of important files
261 | - `bisector.py`: Bisects a given interesting case.
262 | - `builder.py`: Builds the compiler.
263 | - `checker.py`: Checks if a given case is interesting.
264 | - `generator.py`: Finds new interesting cases.
265 | - `patcher.py`: Automatically finds the region in the history where a patch needs to be applied.
266 | - `reducer.py`: Reduce the code of a given.
267 | 
268 | ## Q&A for potential issues
269 | ### I set flag X which I found in the help, but DEAD says the option does not exist!
270 | Sadly, flags are position dependent. You have to put it after the command whose help you found the flag in and before any other subcommand.
271 | ### I want to do XYZ. How?
272 | Maybe there's already an option for it. Consult the program with `--help` for all the options.
273 | 
274 | ### Why don't I see anything?
275 | Are you running with `-ll info`?
276 | 
277 | ### DEAD wants to work with a commit that doesn't exist!
278 | If you are checking things manually: Are you sure you are looking in the right repository?
279 | 
280 | If you are processing a case and `git` throws an exception, try pulling `llvm-project` and `gcc` so you are sure to have all the commits.
281 | 
282 | ### Why does this case fail?
283 | Maybe `./main.py diagnose -ci $ID` can illuminate the situation.
284 | 
285 | ### This case does not reduce but `diagnose` says everything is fine!
286 | Try throwing your whole machine at it (`./main.py reduce ID`).
287 | 
288 | ### The compilers should already be built and the logs just say `INFO:root: [...] is currently building; need to wait`.
289 | Stop DEAD, run `./main.py cache clean`, restart.
290 | What happened? The most likely scenario is that DEAD was interrupted while building a compiler and unable to run the clean-up procedure, confusing DEAD the next time the compiler has to be built.
291 | Do *not* run `cache clean` while DEAD is running.
292 | 
293 | ### A compiler I want to build has a build issue; where do I find the build-logs?
294 | The logs can be found in the path specified by the `logdir` entry of the `$HOME/.config/dead/config.json`.
295 | For an installation with `init.py`, this is `$PROJECTDIR/logs`.
296 | For a docker installation this is `/persistent/logs` in the container.
297 | 


--------------------------------------------------------------------------------
/generator.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | from __future__ import annotations
  4 | 
  5 | import json
  6 | import logging
  7 | import os
  8 | import signal
  9 | import subprocess
 10 | from multiprocessing import Process, Queue
 11 | from os.path import join as pjoin
 12 | from pathlib import Path
 13 | from random import randint
 14 | from tempfile import NamedTemporaryFile
 15 | from typing import TYPE_CHECKING, Generator, Optional, Union
 16 | 
 17 | from ccbuilder import Builder, PatchDB, get_compiler_info
 18 | from dead_instrumenter.instrumenter import instrument_program
 19 | 
 20 | import checker
 21 | import parsers
 22 | import utils
 23 | 
 24 | 
 25 | def run_csmith(csmith: str) -> str:
 26 |     """Generate random code with csmith.
 27 | 
 28 |     Args:
 29 |         csmith (str): Path to executable or name in $PATH to csmith.
 30 | 
 31 |     Returns:
 32 |         str: csmith generated program.
 33 |     """
 34 |     tries = 0
 35 |     while True:
 36 |         options = [
 37 |             "arrays",
 38 |             "bitfields",
 39 |             "checksum",
 40 |             "comma-operators",
 41 |             "compound-assignment",
 42 |             "consts",
 43 |             "divs",
 44 |             "embedded-assigns",
 45 |             "jumps",
 46 |             "longlong",
 47 |             "force-non-uniform-arrays",
 48 |             "math64",
 49 |             "muls",
 50 |             "packed-struct",
 51 |             "paranoid",
 52 |             "pointers",
 53 |             "structs",
 54 |             "inline-function",
 55 |             "return-structs",
 56 |             "arg-structs",
 57 |             "dangling-global-pointers",
 58 |         ]
 59 | 
 60 |         cmd = [
 61 |             csmith,
 62 |             "--no-unions",
 63 |             "--safe-math",
 64 |             "--no-argc",
 65 |             "--no-volatiles",
 66 |             "--no-volatile-pointers",
 67 |         ]
 68 |         for option in options:
 69 |             if randint(0, 1):
 70 |                 cmd.append(f"--{option}")
 71 |             else:
 72 |                 cmd.append(f"--no-{option}")
 73 |         result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
 74 |         if result.returncode == 0:
 75 |             return result.stdout.decode("utf-8")
 76 |         else:
 77 |             tries += 1
 78 |             if tries > 10:
 79 |                 raise Exception("CSmith failed 10 times in a row!")
 80 | 
 81 | 
 82 | def generate_file(
 83 |     config: utils.NestedNamespace, additional_flags: str
 84 | ) -> tuple[str, str]:
 85 |     """Generate an instrumented csmith program.
 86 | 
 87 |     Args:
 88 |         config (utils.NestedNamespace): THE config
 89 |         additional_flags (str): Additional flags to use when
 90 |             compiling the program when checking.
 91 | 
 92 |     Returns:
 93 |         tuple[str, str]: Marker prefix and instrumented code.
 94 |     """
 95 |     additional_flags += f" -I {config.csmith.include_path}"
 96 |     while True:
 97 |         try:
 98 |             logging.debug("Generating new candidate...")
 99 |             candidate = run_csmith(config.csmith.executable)
100 |             if len(candidate) > config.csmith.max_size:
101 |                 continue
102 |             if len(candidate) < config.csmith.min_size:
103 |                 continue
104 |             with NamedTemporaryFile(suffix=".c") as ntf:
105 |                 with open(ntf.name, "w") as f:
106 |                     print(candidate, file=f)
107 |                 logging.debug("Checking if program is sane...")
108 |                 if not checker.sanitize(
109 |                     config.gcc.sane_version,
110 |                     config.llvm.sane_version,
111 |                     config.ccomp,
112 |                     Path(ntf.name),
113 |                     additional_flags,
114 |                 ):
115 |                     continue
116 |                 logging.debug("Instrumenting candidate...")
117 |                 marker_prefix = instrument_program(
118 |                     Path(ntf.name), [f"-I{config.csmith.include_path}"]
119 |                 )
120 |                 with open(ntf.name, "r") as f:
121 |                     return marker_prefix, f.read()
122 | 
123 |             return marker_prefix, candidate
124 |         except subprocess.TimeoutExpired:
125 |             pass
126 | 
127 | 
128 | class CSmithCaseGenerator:
129 |     def __init__(
130 |         self,
131 |         config: utils.NestedNamespace,
132 |         patchdb: PatchDB,
133 |         cores: Optional[int] = None,
134 |     ):
135 |         self.config: utils.NestedNamespace = config
136 | 
137 |         _, llvm_repo = get_compiler_info("llvm", Path(config.repodir))
138 |         _, gcc_repo = get_compiler_info("gcc", Path(config.repodir))
139 |         self.builder: Builder = Builder(
140 |             Path(config.cachedir),
141 |             gcc_repo,
142 |             llvm_repo,
143 |             patchdb,
144 |             cores,
145 |             logdir=Path(config.logdir),
146 |         )
147 |         self.chkr: checker.Checker = checker.Checker(config, self.builder)
148 |         self.procs: list[Process] = []
149 |         self.try_counter: int = 0
150 | 
151 |     def generate_interesting_case(self, scenario: utils.Scenario) -> utils.Case:
152 |         """Generate a case which is interesting i.e. has one compiler which does
153 |         not eliminate a marker (from the target settings) a and at least one from
154 |         the attacker settings.
155 | 
156 |         Args:
157 |             scenario (utils.Scenario): Which compiler to compare.
158 | 
159 |         Returns:
160 |             utils.Case: Intersting case.
161 |         """
162 |         # Because the resulting code will be of csmith origin, we have to add
163 |         # the csmith include path to all settings
164 |         csmith_include_flag = f"-I{self.config.csmith.include_path}"
165 |         scenario.add_flags([csmith_include_flag])
166 | 
167 |         self.try_counter = 0
168 |         while True:
169 |             self.try_counter += 1
170 |             logging.debug("Generating new candidate...")
171 |             marker_prefix, candidate_code = generate_file(self.config, "")
172 | 
173 |             # Find alive markers
174 |             logging.debug("Getting alive markers...")
175 |             try:
176 |                 target_alive_marker_list = [
177 |                     (
178 |                         tt,
179 |                         utils.find_alive_markers(
180 |                             candidate_code, tt, marker_prefix, self.builder
181 |                         ),
182 |                     )
183 |                     for tt in scenario.target_settings
184 |                 ]
185 | 
186 |                 tester_alive_marker_list = [
187 |                     (
188 |                         tt,
189 |                         utils.find_alive_markers(
190 |                             candidate_code, tt, marker_prefix, self.builder
191 |                         ),
192 |                     )
193 |                     for tt in scenario.attacker_settings
194 |                 ]
195 |             except utils.CompileError:
196 |                 continue
197 | 
198 |             target_alive_markers = set()
199 |             for _, marker_set in target_alive_marker_list:
200 |                 target_alive_markers.update(marker_set)
201 | 
202 |             # Extract reduce cases
203 |             logging.debug("Extracting reduce cases...")
204 |             for marker in target_alive_markers:
205 |                 good: list[utils.CompilerSetting] = []
206 |                 for good_setting, good_alive_markers in tester_alive_marker_list:
207 |                     if (
208 |                         marker not in good_alive_markers
209 |                     ):  # i.e. the setting eliminated the call
210 |                         good.append(good_setting)
211 | 
212 |                 # Find bad cases
213 |                 if len(good) > 0:
214 |                     good_opt_levels = [gs.opt_level for gs in good]
215 |                     for bad_setting, bad_alive_markers in target_alive_marker_list:
216 |                         # XXX: Here you can enable inter-opt_level comparison!
217 |                         if (
218 |                             marker in bad_alive_markers
219 |                             and bad_setting.opt_level in good_opt_levels
220 |                         ):  # i.e. the setting didn't eliminate the call
221 |                             # Create reduce case
222 |                             case = utils.Case(
223 |                                 code=candidate_code,
224 |                                 marker=marker,
225 |                                 bad_setting=bad_setting,
226 |                                 good_settings=good,
227 |                                 scenario=scenario,
228 |                                 reduced_code=None,
229 |                                 bisection=None,
230 |                                 path=None,
231 |                             )
232 |                             # TODO: Optimize interestingness test and document behaviour
233 |                             try:
234 |                                 if self.chkr.is_interesting(case):
235 |                                     logging.info(
236 |                                         f"Try {self.try_counter}: Found case! LENGTH: {len(candidate_code)}"
237 |                                     )
238 |                                     return case
239 |                             except utils.CompileError:
240 |                                 continue
241 |             else:
242 |                 logging.debug(
243 |                     f"Try {self.try_counter}: Found no case. Onto the next one!"
244 |                 )
245 | 
246 |     def _wrapper_interesting(self, queue: Queue[str], scenario: utils.Scenario) -> None:
247 |         """Wrapper for generate_interesting_case for easier use
248 |         with python multiprocessing.
249 | 
250 |         Args:
251 |             queue (Queue): The multiprocessing queue to do IPC with.
252 |             scenario (utils.Scenario): Scenario
253 |         """
254 |         logging.info("Starting worker...")
255 |         while True:
256 |             case = self.generate_interesting_case(scenario)
257 |             queue.put(json.dumps(case.to_jsonable_dict()))
258 | 
259 |     def parallel_interesting_case_file(
260 |         self,
261 |         config: utils.NestedNamespace,
262 |         scenario: utils.Scenario,
263 |         processes: int,
264 |         output_dir: os.PathLike[str],
265 |         start_stop: Optional[bool] = False,
266 |     ) -> Generator[Path, None, None]:
267 |         """Generate interesting cases in parallel
268 |         WARNING: If you use this method, you have to call `terminate_processes`
269 | 
270 |         Args:
271 |             config (utils.NestedNamespace): THE config.
272 |             scenario (utils.Scenario): Scenario.
273 |             processes (int): Amount of jobs.
274 |             output_dir (os.PathLike): Directory where to output the found cases.
275 |             start_stop (Optional[bool]): Whether or not stop the processes when
276 |                 finding a case. This is useful when running a pipeline and thus
277 |                 the processing power is needed somewhere else.
278 | 
279 |         Returns:
280 |             Generator[Path, None, None]: Interesting case generator giving paths.
281 |         """
282 |         gen = self.parallel_interesting_case(config, scenario, processes, start_stop)
283 | 
284 |         counter = 0
285 |         while True:
286 |             case = next(gen)
287 |             h = hash(str(case))
288 |             h = max(h, -h)
289 |             path = Path(pjoin(output_dir, f"case_{counter:08}-{h:019}.tar"))
290 |             logging.debug("Writing case to {path}...")
291 |             case.to_file(path)
292 |             yield path
293 |             counter += 1
294 | 
295 |     def parallel_interesting_case(
296 |         self,
297 |         config: utils.NestedNamespace,
298 |         scenario: utils.Scenario,
299 |         processes: int,
300 |         start_stop: Optional[bool] = False,
301 |     ) -> Generator[utils.Case, None, None]:
302 |         """Generate interesting cases in parallel
303 |         WARNING: If you use this method, you have to call `terminate_processes`
304 | 
305 |         Args:
306 |             config (utils.NestedNamespace): THE config.
307 |             scenario (utils.Scenario): Scenario.
308 |             processes (int): Amount of jobs.
309 |             output_dir (os.PathLike): Directory where to output the found cases.
310 |             start_stop (Optional[bool]): Whether or not stop the processes when
311 |                 finding a case. This is useful when running a pipeline and thus
312 |                 the processing power is needed somewhere else.
313 | 
314 |         Returns:
315 |             Generator[utils.Case, None, None]: Interesting case generator giving Cases.
316 |         """
317 | 
318 |         queue: Queue[str] = Queue()
319 | 
320 |         # Create processes
321 |         self.procs = [
322 |             Process(
323 |                 target=self._wrapper_interesting,
324 |                 args=(queue, scenario),
325 |             )
326 |             for _ in range(processes)
327 |         ]
328 | 
329 |         # Start processes
330 |         for p in self.procs:
331 |             p.daemon = True
332 |             p.start()
333 | 
334 |         # read queue
335 |         while True:
336 |             # TODO: handle process failure
337 |             case_str: str = queue.get()
338 | 
339 |             case = utils.Case.from_jsonable_dict(config, json.loads(case_str))
340 | 
341 |             if start_stop:
342 |                 # Send processes to "sleep"
343 |                 logging.debug("Stopping workers...")
344 |                 for p in self.procs:
345 |                     if p.pid is None:
346 |                         continue
347 |                     os.kill(p.pid, signal.SIGSTOP)
348 |             yield case
349 |             if start_stop:
350 |                 logging.debug("Restarting workers...")
351 |                 # Awake processes again for further search
352 |                 for p in self.procs:
353 |                     if p.pid is None:
354 |                         continue
355 |                     os.kill(p.pid, signal.SIGCONT)
356 | 
357 |     def terminate_processes(self) -> None:
358 |         for p in self.procs:
359 |             if p.pid is None:
360 |                 continue
361 |             # This is so cruel
362 |             os.kill(p.pid, signal.SIGCONT)
363 |             p.terminate()
364 | 
365 | 
366 | if __name__ == "__main__":
367 |     config, args = utils.get_config_and_parser(parsers.generator_parser())
368 | 
369 |     cores = args.cores
370 | 
371 |     patchdb = PatchDB(Path(config.patchdb))
372 |     case_generator = CSmithCaseGenerator(config, patchdb, cores)
373 | 
374 |     if args.interesting:
375 |         scenario = utils.Scenario([], [])
376 |         if args.scenario:
377 |             scenario = utils.Scenario.from_file(config, Path(args.scenario))
378 | 
379 |         if not args.scenario and args.targets is None:
380 |             print(
381 |                 "--targets is required for --interesting if you don't specify a scenario"
382 |             )
383 |             exit(1)
384 |         elif args.targets:
385 |             target_settings = utils.get_compiler_settings(
386 |                 config, args.targets, default_opt_levels=args.targets_default_opt_levels
387 |             )
388 |             scenario.target_settings = target_settings
389 | 
390 |         if not args.scenario and args.additional_compilers is None:
391 |             print(
392 |                 "--additional-compilers is required for --interesting if you don't specify a scenario"
393 |             )
394 |             exit(1)
395 |         elif args.additional_compilers:
396 |             additional_compilers = utils.get_compiler_settings(
397 |                 config,
398 |                 args.additional_compilers,
399 |                 default_opt_levels=args.additional_compilers_default_opt_levels,
400 |             )
401 | 
402 |             scenario.attacker_settings = additional_compilers
403 | 
404 |         if args.output_directory is None:
405 |             print("Missing output directory!")
406 |             exit(1)
407 |         else:
408 |             output_dir = os.path.abspath(args.output_directory)
409 |             os.makedirs(output_dir, exist_ok=True)
410 | 
411 |         if args.parallel is not None:
412 |             amount_cases = args.amount if args.amount is not None else 0
413 |             amount_processes = max(1, args.parallel)
414 |             gen = case_generator.parallel_interesting_case_file(
415 |                 config=config,
416 |                 scenario=scenario,
417 |                 processes=amount_processes,
418 |                 output_dir=output_dir,
419 |                 start_stop=False,
420 |             )
421 |             if amount_cases == 0:
422 |                 while True:
423 |                     print(next(gen))
424 |             else:
425 |                 for i in range(amount_cases):
426 |                     print(next(gen))
427 | 
428 |         else:
429 |             print(case_generator.generate_interesting_case(scenario))
430 |     else:
431 |         # TODO
432 |         print("Not implemented yet")
433 | 
434 |     # This is not needed here but I don't know why.
435 |     case_generator.terminate_processes()
436 | 


--------------------------------------------------------------------------------
/bisector.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import copy
  4 | import functools
  5 | import logging
  6 | import math
  7 | import os
  8 | import subprocess
  9 | import tarfile
 10 | from pathlib import Path
 11 | from typing import Optional
 12 | 
 13 | import ccbuilder
 14 | from ccbuilder import (
 15 |     Builder,
 16 |     BuildException,
 17 |     CompilerProject,
 18 |     PatchDB,
 19 |     Repo,
 20 |     get_compiler_info,
 21 | )
 22 | from ccbuilder.utils.utils import select_repo
 23 | 
 24 | import checker
 25 | import generator
 26 | import parsers
 27 | import reducer
 28 | import utils
 29 | 
 30 | 
 31 | class BisectionException(Exception):
 32 |     pass
 33 | 
 34 | 
 35 | def find_cached_revisions(
 36 |     compiler_name: str, config: utils.NestedNamespace
 37 | ) -> list[str]:
 38 |     if compiler_name == "llvm":
 39 |         compiler_name = "clang"
 40 |     compilers = []
 41 |     for entry in Path(config.cachedir).iterdir():
 42 |         if entry.is_symlink() or not entry.stem.startswith(compiler_name):
 43 |             continue
 44 |         if not (entry / "bin" / compiler_name).exists():
 45 |             continue
 46 |         rev = str(entry).split("-")[-1]
 47 |         compilers.append(rev)
 48 |     return compilers
 49 | 
 50 | 
 51 | class Bisector:
 52 |     """Class to bisect a given case."""
 53 | 
 54 |     def __init__(
 55 |         self,
 56 |         config: utils.NestedNamespace,
 57 |         bldr: Builder,
 58 |         chkr: checker.Checker,
 59 |     ) -> None:
 60 |         self.config = config
 61 |         self.bldr = bldr
 62 |         self.chkr = chkr
 63 |         self.steps = 0
 64 | 
 65 |     def _is_interesting(self, case: utils.Case, rev: str) -> bool:
 66 |         """_is_interesting.
 67 | 
 68 |         Args:
 69 |             case (utils.Case): Case to check
 70 |             rev (str): What revision to check the case against.
 71 | 
 72 |         Returns:
 73 |             bool: True if the case is interesting wrt `rev`.
 74 | 
 75 |         Raises:
 76 |             builder.CompileError:
 77 |         """
 78 |         case_cpy = copy.deepcopy(case)
 79 |         case_cpy.bad_setting.rev = rev
 80 |         try:
 81 |             if case_cpy.reduced_code:
 82 |                 case_cpy.code = case_cpy.reduced_code
 83 |                 return self.chkr.is_interesting(case_cpy, preprocess=False)
 84 |             else:
 85 |                 return self.chkr.is_interesting(case_cpy, preprocess=True)
 86 |         except subprocess.CalledProcessError as e:
 87 |             raise utils.CompileError(e)
 88 | 
 89 |     def bisect_file(self, file: Path, force: bool = False) -> bool:
 90 |         """Bisect case found in `file`.
 91 | 
 92 |         Args:
 93 |             file (Path): Path to case file to bisect.
 94 |             force (bool): Whether or not to force a bisection
 95 |                 if there's already one.
 96 | 
 97 |         Returns:
 98 |             bool: True if the bisection of the case in `file` succeeded.
 99 |         """
100 |         case = utils.Case.from_file(self.config, file)
101 |         if self.bisect_case(case, force):
102 |             case.to_file(file)
103 |             return True
104 |         return False
105 | 
106 |     def bisect_case(self, case: utils.Case, force: bool = False) -> bool:
107 |         """Bisect a given case.
108 | 
109 |         Args:
110 |             case (utils.Case): Case to bisect.
111 |             force (bool): Whether or not to force a bisection
112 |                 if there's already one.
113 | 
114 |         Returns:
115 |             bool: True if the bisection succeeded.
116 |         """
117 |         if not force and case.bisection:
118 |             logging.info(f"Ignoring case: Already bisected")
119 |             return True
120 |         try:
121 |             if res := self.bisect_code(
122 |                 case.code, case.marker, case.bad_setting, case.good_settings
123 |             ):
124 |                 case.bisection = res
125 |                 return True
126 |         except BisectionException:
127 |             return False
128 |         return False
129 | 
130 |     def bisect_code(
131 |         self,
132 |         code: str,
133 |         marker: str,
134 |         bad_setting: utils.CompilerSetting,
135 |         good_settings: list[utils.CompilerSetting],
136 |     ) -> Optional[str]:
137 |         """Bisect a given code wrt. marker, the bad setting and the good settings.
138 | 
139 |         Args:
140 |             self:
141 |             code (str): code
142 |             marker (str): marker
143 |             bad_setting (utils.CompilerSetting): bad_setting
144 |             good_settings (list[utils.CompilerSetting]): good_settings
145 | 
146 |         Returns:
147 |             Optional[str]: Revision the code bisects to, if it is successful.
148 |                 None otherwise.
149 | 
150 |         Raises:
151 |             BisectionException: Raised if the bisection failed somehow.
152 |         """
153 |         case = utils.Case(
154 |             code,
155 |             marker,
156 |             bad_setting,
157 |             good_settings,
158 |             utils.Scenario([bad_setting], good_settings),
159 |             None,
160 |             None,
161 |             None,
162 |         )
163 | 
164 |         bad_compiler_config = case.bad_setting.compiler_project
165 |         repo = select_repo(
166 |             bad_setting.compiler_project,
167 |             gcc_repo=self.bldr.gcc_repo,
168 |             llvm_repo=self.bldr.llvm_repo,
169 |         )
170 | 
171 |         # ===== Get good and bad commits
172 |         bad_commit = case.bad_setting.rev
173 |         # Only the ones which are on the same opt_level and have the same compiler can be bisected
174 |         possible_good_commits = [
175 |             gs.rev
176 |             for gs in case.good_settings
177 |             if gs.opt_level == case.bad_setting.opt_level
178 |             and gs.compiler_project.to_string() == bad_compiler_config.to_string()
179 |         ]
180 | 
181 |         if len(possible_good_commits) == 0:
182 |             logging.info(f"No matching optimization level found. Aborting...")
183 |             return None
184 |         # Sort commits based on branch point wrt to the bad commit
185 |         # Why? Look at the following commit graph
186 |         # Bad
187 |         #  |  Good_1
188 |         #  | /
189 |         #  A   Good_2
190 |         #  |  /
191 |         #  | /
192 |         #  B
193 |         #  |
194 |         # We want to bisect between Bad and Good_1 because it's less bisection work.
195 |         possible_good_commits_t = [
196 |             (rev, repo.get_best_common_ancestor(bad_commit, rev))
197 |             for rev in possible_good_commits
198 |         ]
199 | 
200 |         good_commit: str
201 |         common_ancestor: str
202 | 
203 |         def cmp_func(x: tuple[str, str], y: tuple[str, str]) -> bool:
204 |             return repo.is_ancestor(x[1], y[1])
205 | 
206 |         good_commit, common_ancestor = min(
207 |             possible_good_commits_t,
208 |             key=functools.cmp_to_key(cmp_func),
209 |         )
210 | 
211 |         # ====== Figure out in which part the introducer or fixer lies
212 |         #
213 |         # Bad     Bad
214 |         #  |       |
215 |         #  |       |    Good
216 |         #  |   or  |b1 /
217 |         #  |b0     |  / b2
218 |         #  |       | /
219 |         # Good     CA
220 |         #
221 |         # if good is_ancestor of bad:
222 |         #    case b0
223 |         #    searching regression
224 |         # else:
225 |         #    if CA is not interesting:
226 |         #        case b1
227 |         #        searching regression
228 |         #    else:
229 |         #        case b2
230 |         #        searching fixer
231 | 
232 |         try:
233 |             if repo.is_ancestor(good_commit, bad_commit):
234 |                 res = self._bisection(good_commit, bad_commit, case, repo)
235 |                 print(f"{res}")
236 |             else:
237 |                 if not self._is_interesting(case, common_ancestor):
238 |                     # b1 case
239 |                     logging.info("B1 Case")
240 |                     res = self._bisection(
241 |                         common_ancestor, bad_commit, case, repo, interesting_is_bad=True
242 |                     )
243 |                     print(f"{res}")
244 |                     self._check(case, res, repo)
245 |                 else:
246 |                     # b2 case
247 |                     logging.info("B2 Case")
248 |                     # TODO: Figure out how to save and handle b2
249 |                     logging.critical(f"Currently ignoring b2, sorry")
250 |                     raise BisectionException("Currently ignoring Case type B2, sorry")
251 | 
252 |                     # res = self._bisection(
253 |                     #    common_ancestor, good_commit, case, repo, interesting_is_bad=False
254 |                     # )
255 |                     # self._check(case, res, repo, interesting_is_bad=False)
256 |                     # print(f"First good commit {res}")
257 |         except utils.CompileError:
258 |             return None
259 | 
260 |         return res
261 | 
262 |     def _check(
263 |         self,
264 |         case: utils.Case,
265 |         rev: str,
266 |         repo: Repo,
267 |         interesting_is_bad: bool = True,
268 |     ) -> None:
269 |         """Sanity check, that the bisected commit is actually
270 |         correct.
271 | 
272 |         Args:
273 |             case (utils.Case): Case to check.
274 |             rev (str): Revision believed to the bisection commit.
275 |             repo (repository.Repo): Repository to get the previous commit from.
276 |             interesting_is_bad (bool): Whether or not to switch the expected result
277 |                 of the interestingness-test.
278 |         Raises:
279 |             AssertionError: Raised when the check fails.
280 |         """
281 |         # TODO(Yann): Don't use assertion errors.
282 | 
283 |         prev_commit = repo.rev_to_commit(f"{rev}~")
284 |         if interesting_is_bad:
285 |             assert self._is_interesting(case, rev) and not self._is_interesting(
286 |                 case, prev_commit
287 |             )
288 |         else:
289 |             assert not self._is_interesting(case, rev) and self._is_interesting(
290 |                 case, prev_commit
291 |             )
292 | 
293 |     def _bisection(
294 |         self,
295 |         good_rev: str,
296 |         bad_rev: str,
297 |         case: utils.Case,
298 |         repo: Repo,
299 |         interesting_is_bad: bool = True,
300 |         max_build_fail: int = 2,
301 |     ) -> str:
302 |         """Actual bisection part.
303 |         First bisects within the cache, then continues with a normal bisection.
304 | 
305 |         Args:
306 |             good_rev (str): Revision that is ancestor of bad_rev.
307 |             bad_rev (str): Rev that comes later in the tree.
308 |             case (utils.Case): Case to bisect.
309 |             repo (repository.Repo): Repo to get the revisions from.
310 |             interesting_is_bad (bool): Whether or not to switch how to interpret
311 |                 the outcome of the interestingness-test.
312 |             max_build_fail (int): How many times the builder can fail to build w/o
313 |                 aborting the bisection.
314 |         """
315 | 
316 |         self.steps = 0
317 |         # check cache
318 |         possible_revs = repo.direct_first_parent_path(good_rev, bad_rev)
319 |         cached_revs = find_cached_revisions(
320 |             case.bad_setting.compiler_project.to_string(), self.config
321 |         )
322 |         cached_revs = [r for r in cached_revs if r in possible_revs]
323 | 
324 |         # Create enumeration dict to sort cached_revs with
325 |         sort_dict = dict((r, v) for v, r in enumerate(possible_revs))
326 |         cached_revs = sorted(cached_revs, key=lambda x: sort_dict[x])
327 | 
328 |         # bisect in cache
329 |         len_region = len(repo.direct_first_parent_path(good_rev, bad_rev))
330 |         logging.info(f"Bisecting in cache...")
331 |         midpoint = ""
332 |         old_midpoint = ""
333 |         failed_to_compile = False
334 |         while True:
335 |             if failed_to_compile:
336 |                 failed_to_compile = False
337 |                 cached_revs.remove(midpoint)
338 | 
339 |             logging.info(f"{len(cached_revs): 4}, bad: {bad_rev}, good: {good_rev}")
340 |             if len(cached_revs) == 0:
341 |                 break
342 |             midpoint_idx = len(cached_revs) // 2
343 |             old_midpoint = midpoint
344 |             midpoint = cached_revs[midpoint_idx]
345 |             if old_midpoint == midpoint:
346 |                 break
347 | 
348 |             # There should be no build failure here, as we are working on cached builds
349 |             # But there could be a CompileError
350 |             self.steps += 1
351 |             try:
352 |                 test: bool = self._is_interesting(case, midpoint)
353 |             except utils.CompileError:
354 |                 logging.warning(
355 |                     f"Failed to compile code with {case.bad_setting.compiler_project.to_string()}-{midpoint}"
356 |                 )
357 |                 failed_to_compile = True
358 |                 continue
359 | 
360 |             if test:
361 |                 # bad is always "on top" in the history tree
362 |                 # git rev-list returns commits in order of the parent relation
363 |                 # cached_revs is also sorted in that order
364 |                 # Thus when finding something bad i.e interesting, we have to cut the head
365 |                 # and when finding something good, we have to cut the tail
366 |                 if interesting_is_bad:
367 |                     bad_rev = midpoint
368 |                     cached_revs = cached_revs[midpoint_idx + 1 :]
369 |                 else:
370 |                     good_rev = midpoint
371 |                     cached_revs = cached_revs[:midpoint_idx]
372 |             else:
373 |                 if interesting_is_bad:
374 |                     good_rev = midpoint
375 |                     cached_revs = cached_revs[:midpoint_idx]
376 |                 else:
377 |                     bad_rev = midpoint
378 |                     cached_revs = cached_revs[midpoint_idx + 1 :]
379 | 
380 |         len_region2 = len(repo.direct_first_parent_path(good_rev, bad_rev))
381 |         logging.info(f"Cache bisection: range size {len_region} -> {len_region2}")
382 | 
383 |         # bisect
384 |         len_region = len(repo.direct_first_parent_path(good_rev, bad_rev))
385 |         logging.info(f"Bisecting for approx. {math.ceil(math.log2(len_region))} steps")
386 |         midpoint = ""
387 |         old_midpoint = ""
388 |         failed_to_build_or_compile = False
389 |         failed_to_build_counter = 0
390 | 
391 |         guaranteed_termination_counter = 0
392 |         while True:
393 |             if not failed_to_build_or_compile:
394 |                 old_midpoint = midpoint
395 |                 midpoint = repo.next_bisection_commit(good_rev, bad_rev)
396 |                 failed_to_build_counter = 0
397 |                 if midpoint == "" or midpoint == old_midpoint:
398 |                     break
399 |             else:
400 |                 if failed_to_build_counter >= max_build_fail:
401 |                     raise BisectionException(
402 |                         "Failed too many times in a row while bisecting. Aborting bisection..."
403 |                     )
404 |                 if failed_to_build_counter % 2 == 0:
405 |                     # Get size of range
406 |                     range_size = len(repo.direct_first_parent_path(midpoint, bad_rev))
407 | 
408 |                     # Move 10% towards the last bad
409 |                     step = max(int(0.9 * range_size), 1)
410 |                     midpoint = repo.rev_to_commit(f"{bad_rev}~{step}")
411 |                 else:
412 |                     # Symmetric to case above but jumping 10% into the other directory i.e 20% from our position.
413 |                     range_size = len(repo.direct_first_parent_path(good_rev, midpoint))
414 |                     step = max(int(0.2 * range_size), 1)
415 |                     midpoint = repo.rev_to_commit(f"{midpoint}~{step}")
416 | 
417 |                 failed_to_build_counter += 1
418 |                 failed_to_build_or_compile = False
419 | 
420 |                 if guaranteed_termination_counter >= 20:
421 |                     raise BisectionException(
422 |                         "Failed too many times in a row while bisecting. Aborting bisection..."
423 |                     )
424 |                 guaranteed_termination_counter += 1
425 | 
426 |             logging.info(f"Midpoint: {midpoint}")
427 | 
428 |             try:
429 |                 test = self._is_interesting(case, midpoint)
430 |             except BuildException:
431 |                 logging.warning(
432 |                     f"Could not build {case.bad_setting.compiler_project.to_string()} {midpoint}!"
433 |                 )
434 |                 failed_to_build_or_compile = True
435 |                 continue
436 |             except utils.CompileError:
437 |                 logging.warning(
438 |                     f"Failed to compile code with {case.bad_setting.compiler_project.to_string()}-{midpoint}"
439 |                 )
440 |                 failed_to_build_or_compile = True
441 |                 continue
442 | 
443 |             if test:
444 |                 if interesting_is_bad:
445 |                     # "As if not_interesting_is_good does not exist"-case
446 |                     bad_rev = midpoint
447 |                 else:
448 |                     good_rev = midpoint
449 |             else:
450 |                 if interesting_is_bad:
451 |                     # "As if not_interesting_is_good does not exist"-case
452 |                     good_rev = midpoint
453 |                 else:
454 |                     bad_rev = midpoint
455 | 
456 |         return bad_rev
457 | 
458 | 
459 | if __name__ == "__main__":
460 |     config, args = utils.get_config_and_parser(parsers.bisector_parser())
461 | 
462 |     patchdb = PatchDB(Path(config.patchdb))
463 |     _, llvm_repo = get_compiler_info("llvm", Path(config.repodir))
464 |     _, gcc_repo = get_compiler_info("gcc", Path(config.repodir))
465 |     bldr = Builder(
466 |         Path(config.cachedir),
467 |         gcc_repo,
468 |         llvm_repo,
469 |         patchdb,
470 |         args.cores,
471 |         logdir=Path(config.logdir),
472 |     )
473 |     chkr = checker.Checker(config, bldr)
474 |     gnrtr = generator.CSmithCaseGenerator(config, patchdb, args.cores)
475 |     rdcr = reducer.Reducer(config, bldr)
476 |     bsctr = Bisector(config, bldr, chkr)
477 | 
478 |     # TODO: This is duplicate code
479 |     if args.work_through:
480 |         if args.output_directory is None:
481 |             print("Missing output/work-through directory!")
482 |             exit(1)
483 |         else:
484 |             output_dir = Path(os.path.abspath(args.output_directory))
485 |             os.makedirs(output_dir, exist_ok=True)
486 | 
487 |         tars = [
488 |             output_dir / d
489 |             for d in os.listdir(output_dir)
490 |             if tarfile.is_tarfile(output_dir / d)
491 |         ]
492 | 
493 |         print(f"Processing {len(tars)} tars")
494 |         for tf in tars:
495 |             print(f"Processing {tf}")
496 |             try:
497 |                 bsctr.bisect_file(tf, force=args.force)
498 |             except BisectionException as e:
499 |                 print(f"BisectionException in {tf}: '{e}'")
500 |                 continue
501 |             except AssertionError as e:
502 |                 print(f"AssertionError in {tf}: '{e}'")
503 |                 continue
504 |             except BuildException as e:
505 |                 print(f"BuildException in {tf}: '{e}'")
506 |                 continue
507 | 
508 |     if args.generate:
509 |         if args.output_directory is None:
510 |             print("Missing output directory!")
511 |             exit(1)
512 |         else:
513 |             output_dir = os.path.abspath(args.output_directory)
514 |             os.makedirs(output_dir, exist_ok=True)
515 | 
516 |         scenario = utils.Scenario([], [])
517 |         # When file is specified, use scenario of file as base
518 |         if args.file:
519 |             file = Path(args.file).absolute()
520 |             scenario = utils.Case.from_file(config, file).scenario
521 | 
522 |         tmp = utils.get_scenario(config, args)
523 |         if len(tmp.target_settings) > 0:
524 |             scenario.target_settings = tmp.target_settings
525 |         if len(tmp.attacker_settings) > 0:
526 |             scenario.attacker_settings = tmp.attacker_settings
527 | 
528 |         gen = gnrtr.parallel_interesting_case_file(
529 |             config, scenario, bldr.jobs, output_dir, start_stop=True
530 |         )
531 | 
532 |         if args.amount == 0:
533 |             while True:
534 |                 path = next(gen)
535 |                 worked = False
536 |                 if args.reducer:
537 |                     try:
538 |                         worked = rdcr.reduce_file(path)
539 |                     except BuildException as e:
540 |                         print(f"BuildException in {path}: {e}")
541 |                         continue
542 | 
543 |                 if not args.reducer or worked:
544 |                     try:
545 |                         bsctr.bisect_file(path, force=args.force)
546 |                     except BisectionException as e:
547 |                         print(f"BisectionException in {path}: '{e}'")
548 |                         continue
549 |                     except AssertionError as e:
550 |                         print(f"AssertionError in {path}: '{e}'")
551 |                         continue
552 |                     except BuildException as e:
553 |                         print(f"BuildException in {path}: '{e}'")
554 |                         continue
555 |         else:
556 |             for i in range(args.amount):
557 |                 path = next(gen)
558 |                 worked = False
559 |                 if args.reducer:
560 |                     try:
561 |                         worked = rdcr.reduce_file(path)
562 |                     except BuildException as e:
563 |                         print(f"BuildException in {path}: {e}")
564 |                         continue
565 |                 if not args.reducer or worked:
566 |                     try:
567 |                         bsctr.bisect_file(path, force=args.force)
568 |                     except BisectionException as e:
569 |                         print(f"BisectionException in {path}: '{e}'")
570 |                         continue
571 |                     except AssertionError as e:
572 |                         print(f"AssertionError in {path}: '{e}'")
573 |                         continue
574 |                     except BuildException as e:
575 |                         print(f"BuildException in {path}: '{e}'")
576 |                         continue
577 | 
578 |     elif args.file:
579 |         file = Path(args.file)
580 |         bsctr.bisect_file(file, force=args.force)
581 | 
582 |     gnrtr.terminate_processes()
583 | 


--------------------------------------------------------------------------------
/checker.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import copy
  4 | import logging
  5 | import os
  6 | import re
  7 | import subprocess
  8 | import sys
  9 | import tarfile
 10 | import tempfile
 11 | from pathlib import Path
 12 | from types import TracebackType
 13 | from typing import Optional
 14 | 
 15 | import ccbuilder
 16 | from ccbuilder import (
 17 |     Builder,
 18 |     BuildException,
 19 |     CompilerProject,
 20 |     PatchDB,
 21 |     get_compiler_info,
 22 |     Repo,
 23 | )
 24 | from dead_instrumenter.instrumenter import annotate_with_static
 25 | 
 26 | import parsers
 27 | import preprocessing
 28 | import utils
 29 | 
 30 | 
 31 | # ==================== Sanitize ====================
 32 | def get_cc_output(cc: str, file: Path, flags: str, cc_timeout: int) -> tuple[int, str]:
 33 |     cmd = [
 34 |         cc,
 35 |         str(file),
 36 |         "-c",
 37 |         "-o/dev/null",
 38 |         "-Wall",
 39 |         "-Wextra",
 40 |         "-Wpedantic",
 41 |         "-O3",
 42 |         "-Wno-builtin-declaration-mismatch",
 43 |     ]
 44 |     if flags:
 45 |         cmd.extend(flags.split())
 46 |     try:
 47 |         # Not using utils.run_cmd because of redirects
 48 |         result = subprocess.run(
 49 |             cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, timeout=cc_timeout
 50 |         )
 51 |     except subprocess.TimeoutExpired:
 52 |         return 1, ""
 53 |     except subprocess.CalledProcessError:
 54 |         # Possibly a compilation failure
 55 |         return 1, ""
 56 |     return result.returncode, result.stdout.decode("utf-8")
 57 | 
 58 | 
 59 | def check_compiler_warnings(
 60 |     clang: str, gcc: str, file: Path, flags: str, cc_timeout: int
 61 | ) -> bool:
 62 |     """
 63 |     Check if the compiler outputs any warnings that indicate
 64 |     undefined behaviour.
 65 | 
 66 |     Args:
 67 |         clang (str): Normal executable of clang.
 68 |         gcc (str): Normal executable of gcc.
 69 |         file (Path): File to compile.
 70 |         flags (str): (additional) flags to be used when compiling.
 71 |         cc_timeout (int): Timeout for the compilation in seconds.
 72 | 
 73 |     Returns:
 74 |         bool: True if no warnings were found.
 75 |     """
 76 |     clang_rc, clang_output = get_cc_output(clang, file, flags, cc_timeout)
 77 |     gcc_rc, gcc_output = get_cc_output(gcc, file, flags, cc_timeout)
 78 | 
 79 |     if clang_rc != 0 or gcc_rc != 0:
 80 |         return False
 81 | 
 82 |     warnings = [
 83 |         "conversions than data arguments",
 84 |         "incompatible redeclaration",
 85 |         "ordered comparison between pointer",
 86 |         "eliding middle term",
 87 |         "end of non-void function",
 88 |         "invalid in C99",
 89 |         "specifies type",
 90 |         "should return a value",
 91 |         "uninitialized",
 92 |         "incompatible pointer to",
 93 |         "incompatible integer to",
 94 |         "comparison of distinct pointer types",
 95 |         "type specifier missing",
 96 |         "uninitialized",
 97 |         "Wimplicit-int",
 98 |         "division by zero",
 99 |         "without a cast",
100 |         "control reaches end",
101 |         "return type defaults",
102 |         "cast from pointer to integer",
103 |         "useless type name in empty declaration",
104 |         "no semicolon at end",
105 |         "type defaults to",
106 |         "too few arguments for format",
107 |         "incompatible pointer",
108 |         "ordered comparison of pointer with integer",
109 |         "declaration does not declare anything",
110 |         "expects type",
111 |         "comparison of distinct pointer types",
112 |         "pointer from integer",
113 |         "incompatible implicit",
114 |         "excess elements in struct initializer",
115 |         "comparison between pointer and integer",
116 |         "return type of ‘main’ is not ‘int’",
117 |         "past the end of the array",
118 |         "no return statement in function returning non-void",
119 |         "undefined behavior",
120 |     ]
121 | 
122 |     ws = [w for w in warnings if w in clang_output or w in gcc_output]
123 |     if len(ws) > 0:
124 |         logging.debug(f"Compiler warnings found: {ws}")
125 |         return False
126 | 
127 |     return True
128 | 
129 | 
130 | class CCompEnv:
131 |     def __init__(self) -> None:
132 |         self.td: tempfile.TemporaryDirectory[str]
133 | 
134 |     def __enter__(self) -> Path:
135 |         self.td = tempfile.TemporaryDirectory()
136 |         tempfile.tempdir = self.td.name
137 |         return Path(self.td.name)
138 | 
139 |     def __exit__(
140 |         self,
141 |         exc_type: Optional[type[BaseException]],
142 |         exc_value: Optional[BaseException],
143 |         exc_traceback: Optional[TracebackType],
144 |     ) -> None:
145 |         tempfile.tempdir = None
146 | 
147 | 
148 | def verify_with_ccomp(
149 |     ccomp: str, file: Path, flags: str, compcert_timeout: int
150 | ) -> bool:
151 |     """Check if CompCert is unhappy about something.
152 | 
153 |     Args:
154 |         ccomp (str): Path to ccomp executable or name in $PATH.
155 |         file (Path): File to compile.
156 |         flags (str): Additional flags to use.
157 |         compcert_timeout (int): Timeout in seconds.
158 | 
159 |     Returns:
160 |         bool: True if CompCert does not complain.
161 |     """
162 |     with CCompEnv() as tmpdir:
163 |         cmd = [
164 |             ccomp,
165 |             str(file),
166 |             "-interp",
167 |             "-fall",
168 |         ]
169 |         if flags:
170 |             cmd.extend(flags.split())
171 |         res = True
172 |         try:
173 |             utils.run_cmd(
174 |                 cmd,
175 |                 additional_env={"TMPDIR": str(tmpdir)},
176 |                 timeout=compcert_timeout,
177 |             )
178 |             res = True
179 |         except subprocess.CalledProcessError:
180 |             res = False
181 |         except subprocess.TimeoutExpired:
182 |             res = False
183 | 
184 |         logging.debug(f"CComp returncode {res}")
185 |         return res
186 | 
187 | 
188 | def use_ub_sanitizers(
189 |     clang: str, file: Path, flags: str, cc_timeout: int, exe_timeout: int
190 | ) -> bool:
191 |     """Run clang undefined-behaviour tests
192 | 
193 |     Args:
194 |         clang (str): Path to clang executable or name in $PATH.
195 |         file (Path): File to test.
196 |         flags (str): Additional flags to use.
197 |         cc_timeout (int): Timeout for compiling in seconds.
198 |         exe_timeout (int): Timeout for running the resulting exe in seconds.
199 | 
200 |     Returns:
201 |         bool: True if no undefined was found.
202 |     """
203 |     cmd = [clang, str(file), "-O0", "-fsanitize=undefined,address"]
204 |     if flags:
205 |         cmd.extend(flags.split())
206 | 
207 |     with CCompEnv():
208 |         with tempfile.NamedTemporaryFile(suffix=".exe", delete=False) as exe:
209 |             exe.close()
210 |             os.chmod(exe.name, 0o777)
211 |             cmd.append(f"-o{exe.name}")
212 |             result = subprocess.run(
213 |                 cmd,
214 |                 stdout=subprocess.DEVNULL,
215 |                 stderr=subprocess.DEVNULL,
216 |                 timeout=cc_timeout,
217 |             )
218 |             if result.returncode != 0:
219 |                 logging.debug(f"UB Sanitizer returncode {result.returncode}")
220 |                 if os.path.exists(exe.name):
221 |                     os.remove(exe.name)
222 |                 return False
223 |             result = subprocess.run(
224 |                 exe.name,
225 |                 stdout=subprocess.DEVNULL,
226 |                 stderr=subprocess.DEVNULL,
227 |                 timeout=exe_timeout,
228 |             )
229 |             os.remove(exe.name)
230 |             logging.debug(f"UB Sanitizer returncode {result.returncode}")
231 |             return result.returncode == 0
232 | 
233 | 
234 | def sanitize(
235 |     gcc: str,
236 |     clang: str,
237 |     ccomp: str,
238 |     file: Path,
239 |     flags: str,
240 |     cc_timeout: int = 8,
241 |     exe_timeout: int = 2,
242 |     compcert_timeout: int = 16,
243 | ) -> bool:
244 |     """Check if there is anything that could indicate undefined behaviour.
245 | 
246 |     Args:
247 |         gcc (str): Path to gcc executable or name in $PATH.
248 |         clang (str): Path to clang executable or name in $PATH.
249 |         ccomp (str): Path to ccomp executable or name in $PATH.
250 |         file (Path): File to check.
251 |         flags (str): Additional flags to use.
252 |         cc_timeout (int): Compiler timeout in seconds.
253 |         exe_timeout (int): Undef.-Behaviour. runtime timeout in seconds.
254 |         compcert_timeout (int): CompCert timeout in seconds.
255 | 
256 |     Returns:
257 |         bool: True if nothing indicative of undefined behaviour is found.
258 |     """
259 |     try:
260 |         return (
261 |             check_compiler_warnings(gcc, clang, file, flags, cc_timeout)
262 |             and use_ub_sanitizers(clang, file, flags, cc_timeout, exe_timeout)
263 |             and verify_with_ccomp(ccomp, file, flags, compcert_timeout)
264 |         )
265 |     except subprocess.TimeoutExpired:
266 |         return False
267 | 
268 | 
269 | # ==================== Checker ====================
270 | 
271 | 
272 | class Checker:
273 |     def __init__(self, config: utils.NestedNamespace, bldr: Builder):
274 |         self.config = config
275 |         self.builder = bldr
276 |         return
277 | 
278 |     def is_interesting_wrt_marker(self, case: utils.Case) -> bool:
279 |         """Checks if the marker is eliminated by all good compilers/setting
280 |         and not eliminated by the bad compiler/setting.
281 | 
282 |         Args:
283 |             case (utils.Case): Case to check.
284 | 
285 |         Returns:
286 |             bool: True if the maker is not eliminated by the bad setting and
287 |                 eliminated by all good settings.
288 | 
289 |         Raises:
290 |             builder.CompileError: Finding alive markers may fail.
291 |         """
292 |         # Checks if the bad_setting does include the marker and
293 |         # all the good settings do not.
294 | 
295 |         marker_prefix = utils.get_marker_prefix(case.marker)
296 |         found_in_bad = utils.find_alive_markers(
297 |             case.code, case.bad_setting, marker_prefix, self.builder
298 |         )
299 |         uninteresting = False
300 |         if case.marker not in found_in_bad:
301 |             return False
302 |         for good_setting in case.good_settings:
303 |             found_in_good = utils.find_alive_markers(
304 |                 case.code, good_setting, marker_prefix, self.builder
305 |             )
306 |             if case.marker in found_in_good:
307 |                 uninteresting = True
308 |                 break
309 |         return not uninteresting
310 | 
311 |     def is_interesting_wrt_ccc(self, case: utils.Case) -> bool:
312 |         """Check if there is a call chain between main and the marker.
313 | 
314 |         Args:
315 |             case (utils.Case): Case to check.
316 | 
317 |         Returns:
318 |             bool: If there is a call chain between main and the marker
319 |         """
320 |         with tempfile.NamedTemporaryFile(suffix=".c") as tf:
321 |             with open(tf.name, "w") as f:
322 |                 f.write(case.code)
323 | 
324 |             # TODO: Handle include_paths better
325 |             include_paths = utils.find_include_paths(
326 |                 self.config.llvm.sane_version, tf.name, case.bad_setting.get_flag_str()
327 |             )
328 |             cmd = [self.config.ccc, tf.name, "--from=main", f"--to={case.marker}"]
329 | 
330 |             for path in include_paths:
331 |                 cmd.append(f"--extra-arg=-isystem{path}")
332 |             try:
333 |                 result = utils.run_cmd(cmd, timeout=8)
334 |                 return (
335 |                     f"call chain exists between main -> {case.marker}".strip()
336 |                     == result.strip()
337 |                 )
338 |             except subprocess.CalledProcessError:
339 |                 logging.debug("CCC failed")
340 |                 return False
341 |             except subprocess.TimeoutExpired:
342 |                 logging.debug("CCC timed out")
343 |                 return False
344 | 
345 |     def is_interesting_with_static_globals(self, case: utils.Case) -> bool:
346 |         """Checks if the given case is still interesting, even when making all
347 |         variables and functions static.
348 | 
349 |         Args:
350 |             case (utils.Case): The case to check
351 | 
352 |         Returns:
353 |             bool: If the case is interesting when using static globals
354 | 
355 |         Raises:
356 |             builder.CompileError: Getting the assembly may fail.
357 |         """
358 | 
359 |         with tempfile.NamedTemporaryFile(suffix=".c") as tf:
360 |             with open(tf.name, "w") as new_cfile:
361 |                 print(case.code, file=new_cfile)
362 | 
363 |             # TODO: Handle include_paths better
364 |             annotate_with_static(Path(tf.name), case.bad_setting.get_flag_cmd())
365 | 
366 |             with open(tf.name, "r") as annotated_file:
367 |                 static_code = annotated_file.read()
368 | 
369 |             asm_bad = utils.get_asm_str(static_code, case.bad_setting, self.builder)
370 |             uninteresting = False
371 |             if case.marker not in asm_bad:
372 |                 uninteresting = True
373 |             for good_setting in case.good_settings:
374 |                 asm_good = utils.get_asm_str(static_code, good_setting, self.builder)
375 |                 if case.marker in asm_good:
376 |                     uninteresting = True
377 |                     break
378 |             return not uninteresting
379 | 
380 |     def _empty_marker_code_str(self, case: utils.Case) -> str:
381 |         marker_prefix = utils.get_marker_prefix(case.marker)
382 |         p = re.compile(rf"void {marker_prefix}(.*)\((void|)\);(.*)")
383 |         empty_body_code = ""
384 |         for line in case.code.split("\n"):
385 |             m = p.match(line)
386 |             if m:
387 |                 empty_body_code += (
388 |                     "\n"
389 |                     + rf"void {marker_prefix}{m.group(1)}({m.group(2)}){{}}"
390 |                     + "\n"
391 |                     + rf"{m.group(3)}"
392 |                 )
393 |             else:
394 |                 empty_body_code += f"\n{line}"
395 | 
396 |         return empty_body_code
397 | 
398 |     def is_interesting_with_empty_marker_bodies(self, case: utils.Case) -> bool:
399 |         """Check if `case.code` does not exhibit undefined behaviour,
400 |         compile errors or makes CompCert unhappy.
401 |         To compile, all markers need to get an empty body, thus the name.
402 | 
403 |         Args:
404 |             case (utils.Case): Case to check
405 | 
406 |         Returns:
407 |             bool: True if the code passes the 'sanity-check'
408 |         """
409 | 
410 |         empty_body_code = self._empty_marker_code_str(case)
411 | 
412 |         with tempfile.NamedTemporaryFile(suffix=".c") as tf:
413 |             with open(tf.name, "w") as f:
414 |                 f.write(empty_body_code)
415 | 
416 |             return sanitize(
417 |                 self.config.gcc.sane_version,
418 |                 self.config.llvm.sane_version,
419 |                 self.config.ccomp,
420 |                 Path(tf.name),
421 |                 case.bad_setting.get_flag_str(),
422 |             )
423 | 
424 |     def is_interesting(self, case: utils.Case, preprocess: bool = True) -> bool:
425 |         """Check if a code passes all the 'interestingness'-checks.
426 |         Preprocesses code by default to prevent surprises when preprocessing
427 |         later.
428 | 
429 |         Args:
430 |             self:
431 |             case (utils.Case): Case to check.
432 |             preprocess (bool): Whether or not to preprocess the code
433 | 
434 |         Returns:
435 |             bool: True if the case passes all 'interestingness'-checks
436 | 
437 |         Raises:
438 |             builder.CompileError
439 |         """
440 |         # TODO: Optimization potential. Less calls to clang etc.
441 |         # when tests are combined.
442 | 
443 |         if preprocess:
444 |             code_pp = preprocessing.preprocess_csmith_code(
445 |                 case.code,
446 |                 utils.get_marker_prefix(case.marker),
447 |                 case.bad_setting,
448 |                 self.builder,
449 |             )
450 |             case_cpy = copy.deepcopy(case)
451 |             if code_pp:
452 |                 case_cpy.code = code_pp
453 |             case = case_cpy
454 |         # Taking advantage of shortciruit logic
455 |         return (
456 |             self.is_interesting_wrt_marker(case)
457 |             and self.is_interesting_wrt_ccc(case)
458 |             and self.is_interesting_with_static_globals(case)
459 |             and self.is_interesting_with_empty_marker_bodies(case)
460 |         )
461 | 
462 | 
463 | def copy_flag(
464 |     frm: utils.CompilerSetting, to: list[utils.CompilerSetting]
465 | ) -> list[utils.CompilerSetting]:
466 |     res: list[utils.CompilerSetting] = []
467 |     for setting in to:
468 |         cpy = copy.deepcopy(setting)
469 |         cpy.additional_flags = frm.additional_flags
470 |         res.append(cpy)
471 |     return res
472 | 
473 | 
474 | def override_bad(
475 |     case: utils.Case, override_settings: list[utils.CompilerSetting]
476 | ) -> list[utils.Case]:
477 |     res = []
478 |     bsettings = copy_flag(case.bad_setting, override_settings)
479 |     for s in bsettings:
480 |         cpy = copy.deepcopy(case)
481 |         cpy.bad_setting = s
482 |         res.append(cpy)
483 |     return res
484 | 
485 | 
486 | def override_good(
487 |     case: utils.Case, override_settings: list[utils.CompilerSetting]
488 | ) -> utils.Case:
489 |     gsettings = copy_flag(case.good_settings[0], override_settings)
490 |     cpy = copy.deepcopy(case)
491 |     cpy.good_settings = gsettings
492 |     return cpy
493 | 
494 | 
495 | if __name__ == "__main__":
496 |     config, args = utils.get_config_and_parser(parsers.checker_parser())
497 | 
498 |     patchdb = PatchDB(Path(config.patchdb))
499 |     _, llvm_repo = ccbuilder.get_compiler_info("llvm", Path(config.repodir))
500 |     _, gcc_repo = ccbuilder.get_compiler_info("gcc", Path(config.repodir))
501 |     bldr = Builder(
502 |         Path(config.cachedir),
503 |         gcc_repo,
504 |         llvm_repo,
505 |         patchdb,
506 |         args.cores,
507 |         logdir=Path(config.logdir),
508 |     )
509 |     chkr = Checker(config, bldr)
510 | 
511 |     file = Path(args.file)
512 | 
513 |     bad_settings = []
514 |     good_settings = []
515 | 
516 |     if args.check_pp:
517 |         file = Path(args.file).absolute()
518 |         case = utils.Case.from_file(config, file)
519 |         # preprocess file
520 |         pp_code = preprocessing.preprocess_csmith_code(
521 |             case.code,
522 |             utils.get_marker_prefix(case.marker),
523 |             case.bad_setting,
524 |             bldr,
525 |         )
526 | 
527 |         if pp_code:
528 |             case.code = pp_code
529 |         else:
530 |             print("Could not preprocess code. Exiting")
531 |             exit(1)
532 |         # Taking advantage of shortciruit logic
533 |         a = chkr.is_interesting_wrt_marker(case)
534 |         b = chkr.is_interesting_wrt_ccc(case)
535 |         c = chkr.is_interesting_with_static_globals(case)
536 |         d = chkr.is_interesting_with_empty_marker_bodies(case)
537 |         print(f"Marker:\t{a}")
538 |         print(f"CCC:\t{b}")
539 |         print(f"Static:\t{c}")
540 |         print(f"Empty:\t{d}")
541 |         if not all((a, b, c, d)):
542 |             exit(1)
543 |         exit(0)
544 | 
545 |     if args.scenario:
546 |         scenario = utils.Scenario.from_file(config, Path(args.scenario))
547 |         bad_settings = scenario.target_settings
548 |         good_settings = scenario.attacker_settings
549 |     elif args.interesting_settings:
550 |         bad_settings, good_settings = utils.get_interesting_settings(
551 |             config, args.interesting_settings
552 |         )
553 | 
554 |     if args.bad_settings:
555 |         bad_settings = utils.get_compiler_settings(
556 |             config, args.bad_settings, args.bad_settings_default_opt_levels
557 |         )
558 | 
559 |     if args.good_settings:
560 |         good_settings = utils.get_compiler_settings(
561 |             config, args.good_settings, args.good_settings_default_opt_levels
562 |         )
563 | 
564 |     cases_to_test: list[utils.Case] = []
565 |     check_marker: bool = False
566 |     if args.bad_settings and args.good_settings or args.interesting_settings:
567 |         # Override all options defined in the case
568 |         scenario = utils.Scenario(bad_settings, good_settings)
569 |         if tarfile.is_tarfile(file):
570 |             case = utils.Case.from_file(config, file)
571 |             code = case.code
572 |             args.marker = case.marker
573 |             if not bad_settings:
574 |                 bad_settings = copy_flag(case.scenario.target_settings[0], bad_settings)
575 |             if not good_settings:
576 |                 good_settings = copy_flag(
577 |                     case.scenario.attacker_settings[0], good_settings
578 |                 )
579 |         else:
580 |             with open(file, "r") as f:
581 |                 code = f.read()
582 |             check_marker = True
583 | 
584 |         cases_to_test = [
585 |             utils.Case(code, args.marker, bs, good_settings, scenario, None, None, None)
586 |             for bs in bad_settings
587 |         ]
588 | 
589 |     elif args.bad_settings and not args.good_settings:
590 |         # TODO: Get flags from somewhere. For now,
591 |         # take the ones from the first config.
592 |         case = utils.Case.from_file(config, file)
593 | 
594 |         cases_to_test = override_bad(case, bad_settings)
595 | 
596 |     elif not args.bad_settings and args.good_settings:
597 |         case = utils.Case.from_file(config, file)
598 | 
599 |         cases_to_test = [override_good(case, good_settings)]
600 | 
601 |     else:
602 |         cases_to_test = [utils.Case.from_file(config, file)]
603 | 
604 |     if args.marker is not None:
605 |         for cs in cases_to_test:
606 |             cs.marker = args.marker
607 |     elif check_marker:
608 |         raise Exception("You need to specify a marker")
609 | 
610 |     if not cases_to_test:
611 |         print("No cases arrived. Have you forgotten to specify an optimization level?")
612 |         exit(2)
613 | 
614 |     if args.check_reduced:
615 |         for cs in cases_to_test:
616 |             if not cs.reduced_code:
617 |                 raise Exception("Case does not include reduced code!")
618 |             cs.code = cs.reduced_code
619 | 
620 |     if all(
621 |         chkr.is_interesting(
622 |             c, preprocess=(not (args.dont_preprocess or args.check_reduced))
623 |         )
624 |         for c in cases_to_test
625 |     ):
626 |         sys.exit(0)
627 |     else:
628 |         sys.exit(1)
629 | 


--------------------------------------------------------------------------------
/database.py:
--------------------------------------------------------------------------------
  1 | import hashlib
  2 | import os
  3 | import sqlite3
  4 | import sys
  5 | import zlib
  6 | from dataclasses import dataclass
  7 | from functools import cache, reduce
  8 | from itertools import chain
  9 | from pathlib import Path
 10 | from typing import ClassVar, Optional
 11 | 
 12 | from ccbuilder import get_compiler_project
 13 | 
 14 | import utils
 15 | from utils import Case, CompilerSetting, NestedNamespace, Scenario
 16 | 
 17 | 
 18 | class DatabaseError(Exception):
 19 |     pass
 20 | 
 21 | 
 22 | @dataclass
 23 | class ColumnInfo:
 24 |     name: str
 25 |     typename: str
 26 |     constrains: str = ""
 27 | 
 28 |     def __str__(self) -> str:
 29 |         return f"{self.name} {self.typename} {self.constrains}"
 30 | 
 31 | 
 32 | RowID = int
 33 | 
 34 | 
 35 | class CaseDatabase:
 36 |     config: NestedNamespace
 37 |     con: sqlite3.Connection
 38 |     tables: ClassVar[dict[str, list[ColumnInfo]]] = {
 39 |         "cases": [
 40 |             ColumnInfo("case_id", "INTEGER", "PRIMARY KEY AUTOINCREMENT"),
 41 |             ColumnInfo("code_sha1", "", "REFERENCES code(code_sha1) NOT NULL"),
 42 |             ColumnInfo("marker", "TEXT", "NOT NULL"),
 43 |             ColumnInfo("bad_setting_id", "INTEGER", "NOT NULL"),
 44 |             ColumnInfo("scenario_id", "INTEGER", "NOT NULL"),
 45 |             ColumnInfo("bisection", "CHAR(40)"),
 46 |             ColumnInfo("reduced_code_sha1", "CHAR(40)"),
 47 |             ColumnInfo("timestamp", "FLOAT", "NOT NULL"),
 48 |             ColumnInfo(
 49 |                 "UNIQUE(code_sha1, marker, bad_setting_id, scenario_id, bisection, reduced_code_sha1) "
 50 |                 "ON CONFLICT REPLACE",
 51 |                 "",
 52 |             ),
 53 |         ],
 54 |         "code": [
 55 |             ColumnInfo("code_sha1", "CHAR(40)", "PRIMARY KEY"),
 56 |             ColumnInfo("compressed_code", "BLOB"),
 57 |         ],
 58 |         "reported_cases": [
 59 |             ColumnInfo("case_id", "", "REFERENCES cases(case_id) PRIMARY KEY"),
 60 |             ColumnInfo("massaged_code_sha1", "", "REFERENCES code(code_sha1)"),
 61 |             ColumnInfo("bug_report_link", "TEXT"),
 62 |             ColumnInfo("fixed_by", "CHAR(40)"),
 63 |         ],
 64 |         "compiler_setting": [
 65 |             ColumnInfo("compiler_setting_id", "INTEGER", "PRIMARY KEY AUTOINCREMENT"),
 66 |             ColumnInfo("compiler", "TEXT", "NOT NULL"),
 67 |             ColumnInfo("rev", "CHAR(40)", "NOT NULL"),
 68 |             ColumnInfo("opt_level", "TEXT", "NOT NULL"),
 69 |             ColumnInfo("additional_flags", "TEXT"),
 70 |         ],
 71 |         "good_settings": [
 72 |             ColumnInfo("case_id", "", "REFERENCES cases(case_id) NOT NULL"),
 73 |             ColumnInfo(
 74 |                 "compiler_setting_id",
 75 |                 "",
 76 |                 "REFERENCES compiler_setting(compiler_setting_id) NOT NULL",
 77 |             ),
 78 |         ],
 79 |         "scenario_ids": [
 80 |             ColumnInfo("scenario_id", "INTEGER", "PRIMARY KEY AUTOINCREMENT"),
 81 |         ],
 82 |         "scenario": [
 83 |             ColumnInfo(
 84 |                 "scenario_id", "", "REFERENCES scenario_ids(scenario_id) PRIMARY KEY"
 85 |             ),
 86 |             ColumnInfo("generator_version", "INTEGER", "NOT NULL"),
 87 |             ColumnInfo("bisector_version", "INTEGER", "NOT NULL"),
 88 |             ColumnInfo("reducer_version", "INTEGER", "NOT NULL"),
 89 |             ColumnInfo("instrumenter_version", "INTEGER", "NOT NULL"),
 90 |             ColumnInfo("csmith_min", "INTEGER", "NOT NULL"),
 91 |             ColumnInfo("csmith_max", "INTEGER", "NOT NULL"),
 92 |             ColumnInfo("reduce_program", "TEXT", "NOT NULL"),
 93 |         ],
 94 |         "scenario_attacker": [
 95 |             ColumnInfo(
 96 |                 "scenario_id", "", "REFERENCES scenario_ids(scenario_id) NOT NULL"
 97 |             ),
 98 |             ColumnInfo(
 99 |                 "compiler_setting_id",
100 |                 "",
101 |                 "REFERENCES compiler_setting(compiler_setting_id) NOT NULL",
102 |             ),
103 |         ],
104 |         "scenario_target": [
105 |             ColumnInfo(
106 |                 "scenario_id", "", "REFERENCES scenario_ids(scenario_id) NOT NULL"
107 |             ),
108 |             ColumnInfo(
109 |                 "compiler_setting_id",
110 |                 "",
111 |                 "REFERENCES compiler_setting(compiler_setting_id) NOT NULL",
112 |             ),
113 |         ],
114 |         "timing": [
115 |             ColumnInfo("case_id", "", "REFERENCES cases(case_id) PRIMARY KEY"),
116 |             ColumnInfo("generator_time", "FLOAT"),
117 |             ColumnInfo("generator_try_count", "INTEGER"),
118 |             ColumnInfo("bisector_time", "FLOAT"),
119 |             ColumnInfo("bisector_steps", "INTEGER"),
120 |             ColumnInfo("reducer_time", "FLOAT"),
121 |         ],
122 |     }
123 | 
124 |     def __init__(self, config: NestedNamespace, db_path: Path) -> None:
125 |         self.config = config
126 |         self.con = sqlite3.connect(db_path, timeout=60)
127 |         self.create_tables()
128 | 
129 |     def create_tables(self) -> None:
130 |         def make_query(table: str, columns: list[ColumnInfo]) -> str:
131 |             column_decl = ",".join(str(column) for column in columns)
132 |             return f"CREATE TABLE IF NOT EXISTS {table} (" + column_decl + ")"
133 | 
134 |         for table, columns in CaseDatabase.tables.items():
135 |             self.con.execute(make_query(table, columns))
136 | 
137 |     def record_code(self, code: str) -> str:
138 |         """Inserts `code` into the database's `code`-table and returns its
139 |         sha1-hash which serves as a key.
140 | 
141 |         Args:
142 |             code (str): code to be inserted
143 | 
144 |         Returns:
145 |             str: SHA1 of code which serves as the key.
146 |         """
147 |         # Take the hash before the compression to handle changes
148 |         # in the compression library.
149 |         code_sha1 = hashlib.sha1(code.encode("utf-8")).hexdigest()
150 |         compressed_code = zlib.compress(code.encode("utf-8"), level=9)
151 | 
152 |         self.con.execute(
153 |             "INSERT OR IGNORE INTO code VALUES (?, ?)", (code_sha1, compressed_code)
154 |         )
155 |         return code_sha1
156 | 
157 |     def get_code_from_id(self, code_id: str) -> Optional[str]:
158 |         """Get code from the database if it exists.
159 | 
160 |         Args:
161 |             code_id (str): SHA1 of code
162 | 
163 |         Returns:
164 |             Optional[str]: Saved code if it exists, else None
165 |         """
166 | 
167 |         res = self.con.execute(
168 |             "SELECT compressed_code FROM code WHERE code_sha1 == ?", (code_id,)
169 |         ).fetchone()
170 |         if res:
171 |             code = zlib.decompress(res[0]).decode("utf-8")
172 |             return code
173 |         else:
174 |             return None
175 | 
176 |     def record_reported_case(
177 |         self,
178 |         case_id: RowID,
179 |         massaged_code: Optional[str],
180 |         bug_report_link: Optional[str],
181 |         fixed_by: Optional[str],
182 |     ) -> None:
183 |         """Save additional information for an already saved case.
184 | 
185 |         Args:
186 |             case_id (RowID): case_id
187 |             massaged_code (Optional[str]): adapted reduced code for better reduction.
188 |             bug_report_link (Optional[str]): Link to the bug report.
189 |             fixed_by (Optional[str]): If the case is already fixed.
190 | 
191 |         Returns:
192 |             None:
193 |         """
194 |         code_sha1 = None
195 |         if massaged_code:
196 |             code_sha1 = self.record_code(massaged_code)
197 | 
198 |         with self.con:
199 |             self.con.execute(
200 |                 "INSERT OR REPLACE INTO reported_cases VALUES (?,?,?,?)",
201 |                 (
202 |                     case_id,
203 |                     code_sha1,
204 |                     bug_report_link,
205 |                     fixed_by,
206 |                 ),
207 |             )
208 | 
209 |     def record_case(self, case: Case) -> RowID:
210 |         """Save a case to the DB and get its ID.
211 | 
212 |         Args:
213 |             case (Case): Case to save.
214 | 
215 |         Returns:
216 |             RowID: ID of case.
217 |         """
218 | 
219 |         bad_setting_id = self.record_compiler_setting(case.bad_setting)
220 |         with self.con:
221 |             good_setting_ids = [
222 |                 self.record_compiler_setting(good_setting)
223 |                 for good_setting in case.good_settings
224 |             ]
225 |         scenario_id = self.record_scenario(case.scenario)
226 | 
227 |         with self.con:
228 |             cur = self.con.cursor()
229 |             bisection = case.bisection
230 |             reduced_code_sha1 = (
231 |                 self.record_code(case.reduced_code) if case.reduced_code else None
232 |             )
233 | 
234 |             code_sha1 = self.record_code(case.code)
235 | 
236 |             cur.execute(
237 |                 "INSERT INTO cases VALUES (NULL,?,?,?,?,?,?,?)",
238 |                 (
239 |                     code_sha1,
240 |                     case.marker,
241 |                     bad_setting_id,
242 |                     scenario_id,
243 |                     bisection,
244 |                     reduced_code_sha1,
245 |                     case.timestamp,
246 |                 ),
247 |             )
248 |             if not cur.lastrowid:
249 |                 raise DatabaseError("No last row id was returned")
250 |             case_id = RowID(cur.lastrowid)
251 |             cur.executemany(
252 |                 "INSERT INTO good_settings VALUES (?,?)",
253 |                 ((case_id, gs_id) for gs_id in good_setting_ids),
254 |             )
255 | 
256 |         return case_id
257 | 
258 |     def record_compiler_setting(self, compiler_setting: CompilerSetting) -> RowID:
259 |         """Save a compiler setting to the DB and get its ID.
260 | 
261 |         Args:
262 |             self:
263 |             compiler_setting (CompilerSetting): compiler setting to save.
264 | 
265 |         Returns:
266 |             RowID: ID of saved compiler setting.
267 |         """
268 |         if s_id := self.get_compiler_setting_id(compiler_setting):
269 |             return s_id
270 |         with self.con:
271 |             cur = self.con.cursor()
272 |             cur.execute(
273 |                 "INSERT INTO compiler_setting VALUES (NULL,?,?,?,?)",
274 |                 (
275 |                     compiler_setting.compiler_project.to_string(),
276 |                     compiler_setting.rev,
277 |                     compiler_setting.opt_level,
278 |                     compiler_setting.get_flag_str(),
279 |                 ),
280 |             )
281 |             if not cur.lastrowid:
282 |                 raise DatabaseError("No last row id was returned")
283 |             ns_id = RowID(cur.lastrowid)
284 | 
285 |         return ns_id
286 | 
287 |     def record_scenario(self, scenario: Scenario) -> RowID:
288 |         """Save a scenario to the DB and get its ID.
289 | 
290 |         Args:
291 |             scenario (Scenario): Scenario to save.
292 | 
293 |         Returns:
294 |             RowID: ID of `scenario`
295 |         """
296 |         if s_id := self.get_scenario_id(scenario):
297 |             return s_id
298 |         target_ids = [
299 |             self.record_compiler_setting(target_setting)
300 |             for target_setting in scenario.target_settings
301 |         ]
302 |         attacker_ids = [
303 |             self.record_compiler_setting(attacker_setting)
304 |             for attacker_setting in scenario.attacker_settings
305 |         ]
306 |         with self.con:
307 |             ns_id = self.get_new_scenario_id(no_commit=True)
308 | 
309 |             def insert_settings(table: str, settings: list[RowID]) -> None:
310 |                 self.con.executemany(
311 |                     f"INSERT INTO {table} VALUES (?,?)",
312 |                     ((ns_id, s) for s in settings),
313 |                 )
314 | 
315 |             insert_settings("scenario_target", target_ids)
316 |             insert_settings("scenario_attacker", attacker_ids)
317 | 
318 |             self.con.execute(
319 |                 "INSERT INTO scenario VALUES (?,?,?,?,?,?,?,?)",
320 |                 (
321 |                     ns_id,
322 |                     scenario.generator_version,
323 |                     scenario.bisector_version,
324 |                     scenario.reducer_version,
325 |                     scenario.instrumenter_version,
326 |                     self.config.csmith.min_size,
327 |                     self.config.csmith.max_size,
328 |                     os.path.basename(self.config.creduce),
329 |                 ),
330 |             )
331 |         return ns_id
332 | 
333 |     def get_new_scenario_id(self, no_commit: bool) -> RowID:
334 |         """Get a new scenario ID.
335 | 
336 |         Args:
337 |             no_commit (bool): Don't commit the change.
338 | 
339 |         Returns:
340 |             RowID: New scenario id
341 |         """
342 |         cur = self.con.cursor()
343 |         cur.execute("INSERT INTO scenario_ids VALUES (NULL)")
344 |         if not no_commit:
345 |             self.con.commit()
346 |         if not cur.lastrowid:
347 |             raise DatabaseError("No row id was returned")
348 |         return RowID(cur.lastrowid)
349 | 
350 |     def get_scenario_id(self, scenario: Scenario) -> Optional[RowID]:
351 |         """See if there is already an ID for `scenario` in the database
352 |         and return it if it does.
353 | 
354 |         Args:
355 |             scenario (Scenario): scenario to get an ID for
356 | 
357 |         Returns:
358 |             Optional[RowID]: RowID if the scenario exists
359 |         """
360 | 
361 |         def get_scenario_ids(id_: RowID, table: str, id_str: str) -> set[int]:
362 |             cursor = self.con.cursor()
363 |             return set(
364 |                 s_id[0]
365 |                 for s_id in cursor.execute(
366 |                     f"SELECT scenario_id FROM {table} WHERE {id_str}== ? ",
367 |                     (id_,),
368 |                 ).fetchall()
369 |             )
370 | 
371 |         # Get all scenario's which have the same versions
372 |         candidate_ids: set[RowID] = set(
373 |             [
374 |                 r[0]
375 |                 for r in self.con.execute(
376 |                     "SELECT scenario_id FROM scenario"
377 |                     " WHERE generator_version == ?"
378 |                     " AND bisector_version == ?"
379 |                     " AND reducer_version == ?"
380 |                     " AND instrumenter_version == ?"
381 |                     " AND csmith_min == ?"
382 |                     " AND csmith_max == ?"
383 |                     " AND reduce_program == ?",
384 |                     (
385 |                         scenario.generator_version,
386 |                         scenario.bisector_version,
387 |                         scenario.reducer_version,
388 |                         scenario.instrumenter_version,
389 |                         self.config.csmith.min_size,
390 |                         self.config.csmith.max_size,
391 |                         self.config.creduce,
392 |                     ),
393 |                 ).fetchall()
394 |             ]
395 |         )
396 | 
397 |         # Get compiler setting ids of scenario
398 |         target_ids: list[RowID] = []
399 |         for setting in scenario.target_settings:
400 |             if not (s_id := self.get_compiler_setting_id(setting)):
401 |                 return None
402 |             target_ids.append(s_id)
403 | 
404 |         attacker_ids: list[RowID] = []
405 |         for setting in scenario.attacker_settings:
406 |             if not (s_id := self.get_compiler_setting_id(setting)):
407 |                 return None
408 |             attacker_ids.append(s_id)
409 | 
410 |         # Compare compiler setting IDs
411 |         candidate_ids = candidate_ids & reduce(
412 |             lambda x, y: x & y,
413 |             (
414 |                 get_scenario_ids(target_id, "scenario_target", "compiler_setting_id")
415 |                 for target_id in target_ids
416 |             ),
417 |         )
418 |         if not candidate_ids:
419 |             return None
420 | 
421 |         candidate_ids = reduce(
422 |             lambda x, y: x & y,
423 |             chain(
424 |                 (
425 |                     get_scenario_ids(
426 |                         attacker_id, "scenario_attacker", "compiler_setting_id"
427 |                     )
428 |                     for attacker_id in attacker_ids
429 |                 ),
430 |                 (candidate_ids,),
431 |             ),
432 |         )
433 | 
434 |         if not candidate_ids:
435 |             return None
436 |         return RowID(next(candidate_ids.__iter__()))
437 | 
438 |     def get_compiler_setting_id(
439 |         self, compiler_setting: CompilerSetting
440 |     ) -> Optional[RowID]:
441 |         """Get the ID of a given CompilerSetting, if it is in the DB.
442 | 
443 |         Args:
444 |             compiler_setting (CompilerSetting): CompilerSetting to get the id of.
445 | 
446 |         Returns:
447 |             Optional[RowID]: The ID, if found.
448 |         """
449 |         result = self.con.execute(
450 |             "SELECT compiler_setting_id "
451 |             "FROM compiler_setting "
452 |             "WHERE compiler == ? AND rev == ? AND opt_level == ? AND additional_flags == ?",
453 |             (
454 |                 compiler_setting.compiler_project.to_string(),
455 |                 compiler_setting.rev,
456 |                 compiler_setting.opt_level,
457 |                 "|".join(compiler_setting.get_flag_cmd()),
458 |             ),
459 |         ).fetchone()
460 | 
461 |         if not result:
462 |             return None
463 |         s_id = RowID(result[0])
464 | 
465 |         return s_id
466 | 
467 |     @cache
468 |     def get_compiler_setting_from_id(
469 |         self, compiler_setting_id: int
470 |     ) -> Optional[CompilerSetting]:
471 |         """Get a compiler setting from a compiler_setting_id, if the ID exists.
472 | 
473 |         Args:
474 |             self:
475 |             compiler_setting_id (int): Compiler setting ID to get the compiler setting of
476 | 
477 |         Returns:
478 |             Optional[CompilerSetting]: Compiler setting with ID `compiler_setting_id`
479 |         """
480 | 
481 |         res = self.con.execute(
482 |             "SELECT compiler, rev, opt_level, additional_flags"
483 |             " FROM compiler_setting"
484 |             " WHERE compiler_setting_id == ?",
485 |             (compiler_setting_id,),
486 |         ).fetchone()
487 | 
488 |         if not res:
489 |             return None
490 | 
491 |         compiler, rev, opt_level, flags = res
492 |         return CompilerSetting(
493 |             get_compiler_project(compiler),
494 |             rev,
495 |             opt_level,
496 |             flags.split("|"),
497 |         )
498 | 
499 |     @cache
500 |     def get_scenario_from_id(self, scenario_id: RowID) -> Optional[Scenario]:
501 |         """Get a scenario from a specified ID.
502 | 
503 |         Args:
504 |             scenario_id (RowID): ID of scenario to get
505 | 
506 |         Returns:
507 |             Optional[Scenario]: Scenario corresponding to RowID
508 |         """
509 | 
510 |         def get_settings(
511 |             self: CaseDatabase, table: str, s_id: int
512 |         ) -> list[CompilerSetting]:
513 | 
514 |             ids = self.con.execute(
515 |                 f"SELECT compiler_setting_id FROM {table} WHERE scenario_id == ?",
516 |                 (s_id,),
517 |             ).fetchall()
518 |             pre = [self.get_compiler_setting_from_id(row[0]) for row in ids]
519 | 
520 |             # For the type checker. It can't possibly know about the constraints
521 |             # in the DB.
522 |             settings = [c for c in pre if c]
523 | 
524 |             return settings
525 | 
526 |         target_settings = get_settings(self, "scenario_target", scenario_id)
527 |         attacker_settings = get_settings(self, "scenario_attacker", scenario_id)
528 |         scenario = Scenario(target_settings, attacker_settings)
529 | 
530 |         res = self.con.execute(
531 |             "SELECT generator_version, bisector_version, reducer_version, instrumenter_version FROM scenario WHERE scenario_id == ?",
532 |             (scenario_id,),
533 |         ).fetchone()
534 | 
535 |         if not res:
536 |             return None
537 | 
538 |         generator_version, bisector_version, reducer_version, instrumenter_version = res
539 | 
540 |         scenario.generator_version = generator_version
541 |         scenario.bisector_version = bisector_version
542 |         scenario.reducer_version = reducer_version
543 |         scenario.instrumenter_version = instrumenter_version
544 | 
545 |         return scenario
546 | 
547 |     def get_case_from_id(self, case_id: RowID) -> Optional[Case]:
548 |         """Get a case from the database based on its ID.
549 |         Note: the case will *NOT* replace reduced code with
550 |         massaged code.
551 | 
552 |         Args:
553 |             case_id (int): ID of wanted case
554 | 
555 |         Returns:
556 |             Optional[Case]: Returns case if it exists
557 |         """
558 |         if not (
559 |             res := self.con.execute(
560 |                 "SELECT * FROM cases WHERE case_id == ?", (case_id,)
561 |             ).fetchone()
562 |         ):
563 |             return None
564 | 
565 |         (
566 |             _,
567 |             code_sha1,
568 |             marker,
569 |             bad_setting_id,
570 |             scenario_id,
571 |             bisection,
572 |             reduced_code_sha1,
573 |             timestamp,
574 |         ) = res
575 | 
576 |         good_settings_ids = self.con.execute(
577 |             "SELECT compiler_setting_id FROM good_settings WHERE case_id == ?",
578 |             (case_id,),
579 |         ).fetchall()
580 | 
581 |         code = self.get_code_from_id(code_sha1)
582 |         if not code:
583 |             raise DatabaseError("Missing original code")
584 | 
585 |         reduced_code = self.get_code_from_id(reduced_code_sha1)
586 | 
587 |         scenario = self.get_scenario_from_id(scenario_id)
588 | 
589 |         # Get Settings
590 |         bad_setting = self.get_compiler_setting_from_id(bad_setting_id)
591 |         pre_good_settings = [
592 |             self.get_compiler_setting_from_id(row[0]) for row in good_settings_ids
593 |         ]
594 | 
595 |         # There should never be a problem here (TM) because of the the DB
596 |         # FOREIGN KEY constraints.
597 |         good_settings = [gs for gs in pre_good_settings if gs]
598 |         if not bad_setting:
599 |             raise DatabaseError("Bad setting id was not found")
600 |         if not scenario:
601 |             raise DatabaseError("Scenario id was not found")
602 | 
603 |         case = Case(
604 |             code,
605 |             marker,
606 |             bad_setting,
607 |             good_settings,
608 |             scenario,
609 |             reduced_code=reduced_code,
610 |             bisection=bisection,
611 |             path=None,
612 |             timestamp=timestamp,
613 |         )
614 | 
615 |         return case
616 | 
617 |     def get_case_from_id_or_die(self, case_id: RowID) -> Case:
618 |         pre_check_case = self.get_case_from_id(case_id)
619 |         if not pre_check_case:
620 |             print("No case with this ID.", file=sys.stderr)
621 |             exit(1)
622 |         else:
623 |             case = pre_check_case
624 |         return case
625 | 
626 |     def update_case(self, case_id: RowID, case: Case) -> None:
627 |         """Update case with ID `case_id` with the values of `case`
628 | 
629 |         Args:
630 |             case_id (str): ID of case to update
631 |             case (Case): Case to get the info from
632 | 
633 |         Returns:
634 |             None:
635 |         """
636 |         code_sha1 = self.record_code(case.code)
637 | 
638 |         if case.reduced_code:
639 |             reduced_code_sha1: Optional[str] = self.record_code(case.reduced_code)
640 |         else:
641 |             reduced_code_sha1 = None
642 | 
643 |         bad_setting_id = self.record_compiler_setting(case.bad_setting)
644 |         scenario_id = self.record_scenario(case.scenario)
645 | 
646 |         with self.con:
647 |             # REPLACE is just an alias for INSERT OR REPLACE
648 |             self.con.execute(
649 |                 "INSERT OR REPLACE INTO cases VALUES (?,?,?,?,?,?,?,?)",
650 |                 (
651 |                     case_id,
652 |                     code_sha1,
653 |                     case.marker,
654 |                     bad_setting_id,
655 |                     scenario_id,
656 |                     case.bisection,
657 |                     reduced_code_sha1,
658 |                     case.timestamp,
659 |                 ),
660 |             )
661 | 
662 |     def record_timing(
663 |         self,
664 |         case_id: RowID,
665 |         generator_time: Optional[float] = None,
666 |         generator_try_count: Optional[int] = None,
667 |         bisector_time: Optional[float] = None,
668 |         bisector_steps: Optional[int] = None,
669 |         reducer_time: Optional[float] = None,
670 |     ) -> None:
671 |         """Record timing metric for `case_id`
672 | 
673 |         Args:
674 |             case_id (RowID):
675 |             generator_time (Optional[float]): Time the generator took
676 |             generator_try_count (Optional[int]): How often the generator tried
677 |             bisector_time (Optional[float]): How long the bisector took
678 |             bisector_steps (Optional[int]): How many steps the bisector made
679 |             reducer_time (Optional[float]): How long the reducer took
680 | 
681 |         Returns:
682 |             None:
683 |         """
684 | 
685 |         with self.con:
686 |             self.con.execute(
687 |                 "INSERT OR REPLACE INTO timing VALUES(?,?,?,?,?,?)",
688 |                 (
689 |                     case_id,
690 |                     generator_time,
691 |                     generator_try_count,
692 |                     bisector_time,
693 |                     bisector_steps,
694 |                     reducer_time,
695 |                 ),
696 |             )
697 | 
698 |     def get_timing_from_id(
699 |         self, case_id: RowID
700 |     ) -> tuple[
701 |         Optional[float], Optional[int], Optional[float], Optional[int], Optional[float]
702 |     ]:
703 |         """Get the timing entries for a case.
704 | 
705 |         Args:
706 |             self:
707 |             case_id (RowID): case_id
708 | 
709 |         Returns:
710 |             tuple[
711 |                 Optional[float], Optional[int], Optional[float], Optional[int], Optional[float]
712 |             ]: Generator time, generator try count, bisector time, bisector steps, reducer time
713 |         """
714 | 
715 |         res = self.con.execute(
716 |             "SELECT * FROM timing WHERE case_id == ?", (case_id,)
717 |         ).fetchone()
718 |         if not res:
719 |             return (None, None, None, None, None)
720 |         _, g_time, gtc, b_time, b_steps, r_time = res
721 |         return g_time, gtc, b_time, b_steps, r_time
722 | 
723 |     def get_report_info_from_id(
724 |         self, case_id: RowID
725 |     ) -> tuple[Optional[str], Optional[str], Optional[str]]:
726 |         """Get report infos for case_id.
727 |         The order is massaged_code, link, fixed_by commit.
728 | 
729 |         Args:
730 |             self:
731 |             case_id (RowID): case_id
732 | 
733 |         Returns:
734 |             tuple[Optional[str], Optional[str], Optional[str]]:
735 |         """
736 | 
737 |         res = self.con.execute(
738 |             "SELECT * FROM reported_cases WHERE case_id == ?", (case_id,)
739 |         ).fetchone()
740 |         if not res:
741 |             return (None, None, None)
742 | 
743 |         _, massaged_code_sha1, link, fixed_by = res
744 | 
745 |         massaged_code = self.get_code_from_id(massaged_code_sha1)
746 |         return massaged_code, link, fixed_by
747 | 


--------------------------------------------------------------------------------
/parsers.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import multiprocessing as mp
  3 | from typing import Any, Sequence
  4 | 
  5 | 
  6 | def config_parser(
  7 |     expected_entries: Sequence[tuple[Any, ...]]
  8 | ) -> argparse.ArgumentParser:
  9 |     parser = argparse.ArgumentParser(add_help=False)
 10 | 
 11 |     for _, path, desc in expected_entries:
 12 |         parser.add_argument("--" + ".".join(path), help=desc)
 13 |     parser.add_argument("--config", type=str, help="Path to config.json")
 14 | 
 15 |     parser.add_argument(
 16 |         "-ll",
 17 |         "--log-level",
 18 |         type=str,
 19 |         choices=("debug", "info", "warning", "error", "critical"),
 20 |         help="Log level",
 21 |     )
 22 | 
 23 |     parser.add_argument(
 24 |         "--cores",
 25 |         help="Amount of build cores to use. Defaults to all.",
 26 |         type=int,
 27 |         default=mp.cpu_count(),
 28 |     )
 29 | 
 30 |     return parser
 31 | 
 32 | 
 33 | def builder_parser() -> argparse.ArgumentParser:
 34 |     parser = argparse.ArgumentParser(add_help=False)
 35 | 
 36 |     parser.add_argument(
 37 |         "-c", "--compiler", help="Which compiler project to use", nargs=1, type=str
 38 |     )
 39 | 
 40 |     parser.add_argument(
 41 |         "-r",
 42 |         "--revision",
 43 |         help="Which revision of the compiler project to use. Use 'trunk' to use the latest commit",
 44 |         nargs="+",
 45 |         type=str,
 46 |     )
 47 | 
 48 |     parser.add_argument(
 49 |         "--build-releases", help="Build release versions", action="store_true"
 50 |     )
 51 | 
 52 |     parser.add_argument(
 53 |         "--add-patches",
 54 |         help="Which patches to apply in addition to the ones found in patchDB",
 55 |         nargs="+",
 56 |         type=str,
 57 |     )
 58 | 
 59 |     parser.add_argument(
 60 |         "-f",
 61 |         "--force",
 62 |         help="Force build even if patch combo is known to be bad",
 63 |         action="store_true",
 64 |     )
 65 |     return parser
 66 | 
 67 | 
 68 | def patcher_parser() -> argparse.ArgumentParser:
 69 |     parser = argparse.ArgumentParser(add_help=False)
 70 | 
 71 |     mut_excl_group = parser.add_mutually_exclusive_group(required=True)
 72 | 
 73 |     # ====================
 74 |     mut_excl_group.add_argument(
 75 |         "--find-range",
 76 |         help="Try to find the range where a patch is required",
 77 |         action="store_true",
 78 |     )
 79 | 
 80 |     parser.add_argument(
 81 |         "-c",
 82 |         "--compiler",
 83 |         help="Which compiler project to use",
 84 |         nargs=1,
 85 |         type=str,
 86 |         required=True,
 87 |     )
 88 | 
 89 |     parser.add_argument(
 90 |         "-pr",
 91 |         "--patchable-revision",
 92 |         help="Which revision is patchable with the commit specified in --patches",
 93 |         type=str,
 94 |     )
 95 | 
 96 |     parser.add_argument(
 97 |         "--patches",
 98 |         nargs="*",
 99 |         help="Which patch(es) to apply.",
100 |         type=str,
101 |     )
102 |     # ====================
103 |     mut_excl_group.add_argument(
104 |         "--find-introducer",
105 |         help="Try to find the introducer commit of a build failure.",
106 |         action="store_true",
107 |     )
108 | 
109 |     parser.add_argument(
110 |         "-br", "--broken-revision", help="Which revision is borken", type=str
111 |     )
112 |     # ====================
113 | 
114 |     return parser
115 | 
116 | 
117 | def generator_parser() -> argparse.ArgumentParser:
118 |     parser = argparse.ArgumentParser(add_help=False)
119 | 
120 |     parser.add_argument(
121 |         "-a", "--amount", help="Amount of cases to generate.", type=int, default=0
122 |     )
123 | 
124 |     parser.add_argument(
125 |         "--interesting",
126 |         help="If the generated case should be an interesting one.",
127 |         action=argparse.BooleanOptionalAction,
128 |         default=True,
129 |     )
130 | 
131 |     parser.add_argument(
132 |         "-t",
133 |         "--targets",
134 |         help="Project name and revision of compiler to use.",
135 |         nargs="+",
136 |         type=str,
137 |     )
138 | 
139 |     parser.add_argument(
140 |         "-tdol",
141 |         "--targets-default-opt-levels",
142 |         help="Default optimization levels for the target to be checked against.",
143 |         nargs="+",
144 |         default=[],
145 |         type=str,
146 |     )
147 | 
148 |     parser.add_argument(
149 |         "-ac",
150 |         "--additional-compilers",
151 |         help="Additional compiler to compare the target against.",
152 |         nargs="*",
153 |         type=str,
154 |     )
155 | 
156 |     parser.add_argument(
157 |         "-acdol",
158 |         "--additional-compilers-default-opt-levels",
159 |         help="Default optimization levels for the additional compilers to be checked against.",
160 |         nargs="+",
161 |         default=[],
162 |         type=str,
163 |     )
164 | 
165 |     parser.add_argument("-s", "--scenario", help="Which scenario to work on.", type=str)
166 | 
167 |     parser.add_argument(
168 |         "-p",
169 |         "--parallel",
170 |         help="Run the search in parallel for --parallel processes. Works only in combination with --interesting.",
171 |         type=int,
172 |     )
173 | 
174 |     parser.add_argument(
175 |         "-d", "--output-directory", help="Where the cases should be saved to.", type=str
176 |     )
177 | 
178 |     return parser
179 | 
180 | 
181 | def checker_parser() -> argparse.ArgumentParser:
182 |     parser = argparse.ArgumentParser(add_help=False)
183 | 
184 |     group = parser.add_mutually_exclusive_group()
185 | 
186 |     parser.add_argument(
187 |         "-f", "--file", help="Which file to work on.", type=str, required=True
188 |     )
189 | 
190 |     parser.add_argument("-m", "--marker", help="Marker to check for.", type=str)
191 | 
192 |     group.add_argument(
193 |         "-s",
194 |         "--scenario",
195 |         help="Which scenario to use as testing replacement.",
196 |         type=str,
197 |     )
198 | 
199 |     group.add_argument(
200 |         "-is",
201 |         "--interesting-settings",
202 |         help="Which interesting settings to use.",
203 |         type=str,
204 |     )
205 | 
206 |     parser.add_argument(
207 |         "-bad",
208 |         "--bad-settings",
209 |         help="Settings which are supposed to *not* eliminate the marker",
210 |         nargs="+",
211 |         type=str,
212 |     )
213 | 
214 |     parser.add_argument(
215 |         "-bsdol",
216 |         "--bad-settings-default-opt-levels",
217 |         help="Default optimization levels for the bad-settings to be checked against.",
218 |         nargs="+",
219 |         default=[],
220 |         type=str,
221 |     )
222 | 
223 |     parser.add_argument(
224 |         "-good",
225 |         "--good-settings",
226 |         help="Settings which are supposed to eliminate the marker",
227 |         nargs="+",
228 |         type=str,
229 |     )
230 | 
231 |     parser.add_argument(
232 |         "-gsdol",
233 |         "--good-settings-default-opt-levels",
234 |         help="Default optimization levels for the good-settings to be checked against.",
235 |         nargs="+",
236 |         default=[],
237 |         type=str,
238 |     )
239 | 
240 |     parser.add_argument(
241 |         "-cr",
242 |         "--check-reduced",
243 |         help="Instead of checking the original file, check the latest reduced code.",
244 |         action="store_true",
245 |     )
246 | 
247 |     parser.add_argument(
248 |         "--check-pp",
249 |         help="Run the preprocessed version through the checker.",
250 |         action="store_true",
251 |     )
252 | 
253 |     parser.add_argument(
254 |         "--dont-preprocess",
255 |         help="Force no preprocessing",
256 |         action="store_true",
257 |     )
258 | 
259 |     return parser
260 | 
261 | 
262 | def reducer_parser() -> argparse.ArgumentParser:
263 |     parser = argparse.ArgumentParser(add_help=False)
264 | 
265 |     parser.add_argument("-f", "--file", help="Which file to work on.", type=str)
266 | 
267 |     parser.add_argument(
268 |         "-g",
269 |         "--generate",
270 |         help="Whether or not to generate and reduce cases",
271 |         action="store_true",
272 |     )
273 | 
274 |     parser.add_argument(
275 |         "--work-through",
276 |         help="Look at all cases found in directory specified in --output-directory and reduce them when they are not.",
277 |         action="store_true",
278 |     )
279 | 
280 |     parser.add_argument("-s", "--scenario", help="Which scenario to work on.", type=str)
281 | 
282 |     parser.add_argument(
283 |         "-a", "--amount", help="How many cases to find and reduce.", type=int, default=0
284 |     )
285 | 
286 |     parser.add_argument(
287 |         "-d", "--output-directory", help="Where the cases should be saved to.", type=str
288 |     )
289 | 
290 |     parser.add_argument(
291 |         "-t",
292 |         "--targets",
293 |         help="Project name and revision of compiler to use.",
294 |         nargs="+",
295 |         type=str,
296 |     )
297 | 
298 |     parser.add_argument(
299 |         "-tdol",
300 |         "--targets-default-opt-levels",
301 |         help="Default optimization levels for the target to be checked against.",
302 |         nargs="+",
303 |         default=[],
304 |         type=str,
305 |     )
306 | 
307 |     parser.add_argument(
308 |         "-ac",
309 |         "--additional-compilers",
310 |         help="Additional compiler to compare the target against.",
311 |         nargs="*",
312 |         type=str,
313 |     )
314 | 
315 |     parser.add_argument(
316 |         "-acdol",
317 |         "--additional-compilers-default-opt-levels",
318 |         help="Default optimization levels for the additional compilers to be checked against.",
319 |         nargs="+",
320 |         default=[],
321 |         type=str,
322 |     )
323 | 
324 |     parser.add_argument(
325 |         "--force",
326 |         help="Force another reduction even if one already exists.",
327 |         action="store_true",
328 |     )
329 | 
330 |     parser.add_argument(
331 |         "-rr",
332 |         "--re-reduce",
333 |         help="Re-reduce the last reduce code",
334 |         action="store_true",
335 |     )
336 | 
337 |     return parser
338 | 
339 | 
340 | def bisector_parser() -> argparse.ArgumentParser:
341 |     parser = argparse.ArgumentParser(add_help=False)
342 | 
343 |     parser.add_argument("-f", "--file", help="Which file to work on.", type=str)
344 | 
345 |     parser.add_argument(
346 |         "-d", "--output-directory", help="Where the cases should be saved to.", type=str
347 |     )
348 | 
349 |     parser.add_argument(
350 |         "-a", "--amount", help="How many cases to find and reduce.", type=int, default=0
351 |     )
352 | 
353 |     parser.add_argument(
354 |         "-g",
355 |         "--generate",
356 |         help="Whether or not to generate, reduce and bisect cases",
357 |         action="store_true",
358 |     )
359 | 
360 |     parser.add_argument("-s", "--scenario", help="Which scenario to work on.", type=str)
361 | 
362 |     parser.add_argument(
363 |         "-t",
364 |         "--targets",
365 |         help="Project name and revision of compiler to use.",
366 |         nargs="+",
367 |         type=str,
368 |     )
369 | 
370 |     parser.add_argument(
371 |         "-tdol",
372 |         "--targets-default-opt-levels",
373 |         help="Default optimization levels for the target to be checked against.",
374 |         nargs="+",
375 |         default=[],
376 |         type=str,
377 |     )
378 | 
379 |     parser.add_argument(
380 |         "-ac",
381 |         "--additional-compilers",
382 |         help="Additional compiler to compare the target against.",
383 |         nargs="*",
384 |         type=str,
385 |     )
386 | 
387 |     parser.add_argument(
388 |         "-acdol",
389 |         "--additional-compilers-default-opt-levels",
390 |         help="Default optimization levels for the additional compilers to be checked against.",
391 |         nargs="+",
392 |         default=[],
393 |         type=str,
394 |     )
395 | 
396 |     parser.add_argument(
397 |         "--work-through",
398 |         help="Look at all cases found in directory specified in --output-directory and bisect them when they are not.",
399 |         action="store_true",
400 |     )
401 | 
402 |     parser.add_argument(
403 |         "--force",
404 |         help="Force another bisection even if they already exist",
405 |         action="store_true",
406 |     )
407 | 
408 |     parser.add_argument(
409 |         "--reducer",
410 |         help="If the generated case should be reduced or not.",
411 |         action=argparse.BooleanOptionalAction,
412 |         default=True,
413 |     )
414 | 
415 |     return parser
416 | 
417 | 
418 | def debugtool_parser() -> argparse.ArgumentParser:
419 |     parser = argparse.ArgumentParser(add_help=False)
420 | 
421 |     parser.add_argument("-f", "--file", help="Which file to work on.", type=str)
422 | 
423 |     parser.add_argument(
424 |         "-crb",
425 |         "--clean-reduced-bisections",
426 |         help="Delete all files related to reduction and bisection",
427 |         action="store_true",
428 |     )
429 | 
430 |     parser.add_argument(
431 |         "--reduced",
432 |         help="Work on reduced files. (where applicable)",
433 |         action="store_true",
434 |     )
435 | 
436 |     parser.add_argument(
437 |         "--preprocessed",
438 |         help="Work on preprocessed files. (where applicable)",
439 |         action="store_true",
440 |     )
441 | 
442 |     parser.add_argument(
443 |         "--asm",
444 |         help="Get assembly for a case asmgood.s and asmbad.s",
445 |         action="store_true",
446 |     )
447 | 
448 |     parser.add_argument(
449 |         "--static",
450 |         help="Get code where functions and global variables are static in static.c",
451 |         action="store_true",
452 |     )
453 | 
454 |     # TODO: help information for --viz
455 |     parser.add_argument("--viz", help="", action="store_true")
456 | 
457 |     parser.add_argument("--preprocess-code", help="", action="store_true")
458 | 
459 |     parser.add_argument(
460 |         "-di", "--diagnose", help="Run general tests.", action="store_true"
461 |     )
462 | 
463 |     parser.add_argument(
464 |         "--empty-marker-code",
465 |         help="Get empty marker body code in empty_body.c",
466 |         action="store_true",
467 |     )
468 | 
469 |     return parser
470 | 
471 | 
472 | def main_parser() -> argparse.ArgumentParser:
473 |     parser = argparse.ArgumentParser(add_help=False)
474 | 
475 |     subparser = parser.add_subparsers(title="sub", dest="sub")
476 |     run_parser = subparser.add_parser("run", help="Let DEAD search for cases.")
477 | 
478 |     run_parser.add_argument(
479 |         "-d", "--output-directory", help="Where the cases should be saved to.", type=str
480 |     )
481 | 
482 |     run_parser.add_argument(
483 |         "-a", "--amount", help="How many cases to find and reduce.", type=int, default=0
484 |     )
485 | 
486 |     run_parser.add_argument(
487 |         "-s", "--scenario", help="Which scenario to work on.", type=str
488 |     )
489 |     run_parser.add_argument(
490 |         "-t",
491 |         "--targets",
492 |         help="Project name and revision of compiler to use.",
493 |         nargs="+",
494 |         type=str,
495 |     )
496 | 
497 |     run_parser.add_argument(
498 |         "-tdol",
499 |         "--targets-default-opt-levels",
500 |         help="Default optimization levels for the target to be checked against.",
501 |         nargs="+",
502 |         default=[],
503 |         type=str,
504 |     )
505 | 
506 |     run_parser.add_argument(
507 |         "-ac",
508 |         "--additional-compilers",
509 |         help="Additional compiler to compare the target against.",
510 |         nargs="*",
511 |         type=str,
512 |     )
513 | 
514 |     run_parser.add_argument(
515 |         "-acdol",
516 |         "--additional-compilers-default-opt-levels",
517 |         help="Default optimization levels for the additional compilers to be checked against.",
518 |         nargs="+",
519 |         default=[],
520 |         type=str,
521 |     )
522 | 
523 |     run_parser.add_argument(
524 |         "--reducer",
525 |         help="If the generated case should be reduced or not.",
526 |         action=argparse.BooleanOptionalAction,
527 |         default=None,
528 |     )
529 | 
530 |     run_parser.add_argument(
531 |         "--bisector",
532 |         help="If the generated case should be bisected or not.",
533 |         action=argparse.BooleanOptionalAction,
534 |         default=True,
535 |     )
536 | 
537 |     run_parser.add_argument(
538 |         "-pg",
539 |         "--parallel-generation",
540 |         action=argparse.BooleanOptionalAction,
541 |         default=True,
542 |         help="Run the case generation part in parallel. This will disable timing for the generation part.",
543 |     )
544 | 
545 |     run_parser.add_argument(
546 |         "--update-trunk-after-X-hours",
547 |         help="Enable automatic updating target compilers which are at the current trunk after X hours of running.",
548 |         metavar="X",
549 |         type=int,
550 |     )
551 | 
552 |     absorb_parser = subparser.add_parser(
553 |         "absorb", help="Read cases outside of the database into the database."
554 |     )
555 | 
556 |     absorb_parser.add_argument(
557 |         "absorb_object",
558 |         metavar="DIR|FILE",
559 |         help="Directory or file to read .tar cases from into the database.",
560 |     )
561 | 
562 |     report_parser = subparser.add_parser("report", help="Generate a report for a case.")
563 | 
564 |     report_parser.add_argument(
565 |         "case_id",
566 |         metavar="CASE_ID",
567 |         type=int,
568 |         help="Generate a bug report for the given id.",
569 |     )
570 | 
571 |     report_parser.add_argument(
572 |         "--pull",
573 |         help="Pull the repo to check against upsteam.",
574 |         action=argparse.BooleanOptionalAction,
575 |         default=False,
576 |     )
577 | 
578 |     tofile_parser = subparser.add_parser(
579 |         "tofile",
580 |         help="Save a case from the database into a file. This is a LOSSY operation.",
581 |     )
582 | 
583 |     tofile_parser.add_argument(
584 |         "case_id",
585 |         metavar="CASE_ID",
586 |         type=int,
587 |         help="Case to get a .tar from ",
588 |     )
589 | 
590 |     rereduce_parser = subparser.add_parser(
591 |         "rereduce",
592 |         help="Reduce code from outside the database w.r.t. a specified case.",
593 |     )
594 | 
595 |     rereduce_parser.add_argument(
596 |         "case_id",
597 |         metavar="CASE_ID",
598 |         type=int,
599 |         help="Case to work with.",
600 |     )
601 | 
602 |     rereduce_parser.add_argument(
603 |         "code_path",
604 |         metavar="CODE_PATH",
605 |         type=str,
606 |         help="Path to code to rereduce",
607 |     )
608 | 
609 |     diagnose_parser = subparser.add_parser(
610 |         "diagnose", help="Run tests on a specified case and print a summary."
611 |     )
612 | 
613 |     diagnose_parser.add_argument(
614 |         "-ci",
615 |         "--case-id",
616 |         metavar="CASE_ID",
617 |         type=int,
618 |         help="Case to work with.",
619 |     )
620 | 
621 |     diagnose_parser.add_argument(
622 |         "--file",
623 |         metavar="PATH",
624 |         type=str,
625 |         help="Path to case to work with",
626 |     )
627 | 
628 |     diagnose_parser.add_argument(
629 |         "-t",
630 |         "--targets",
631 |         help="Option to override the bad setting/target of the case. Only the first specified target will be used!",
632 |         nargs="+",
633 |         type=str,
634 |     )
635 | 
636 |     diagnose_parser.add_argument(
637 |         "-tdol",
638 |         "--targets-default-opt-levels",
639 |         help="Default optimization levels for the target that override the targets of the case.",
640 |         nargs="+",
641 |         default=[],
642 |         type=str,
643 |     )
644 | 
645 |     diagnose_parser.add_argument(
646 |         "-ac",
647 |         "--additional-compilers",
648 |         help="Override the attacking compilers of the case.",
649 |         nargs="*",
650 |         type=str,
651 |     )
652 | 
653 |     diagnose_parser.add_argument(
654 |         "-acdol",
655 |         "--additional-compilers-default-opt-levels",
656 |         help="Default optimization levels for the overriding attacking compilers.",
657 |         nargs="+",
658 |         default=[],
659 |         type=str,
660 |     )
661 | 
662 |     diagnose_parser.add_argument("-s", "--scenario", help="", type=str)
663 | 
664 |     checkreduced_parser = subparser.add_parser(
665 |         "checkreduced",
666 |         help="Check if code outside of the database passes the checks of a specified case.",
667 |     )
668 | 
669 |     checkreduced_parser.add_argument(
670 |         "case_id",
671 |         metavar="CASE_ID",
672 |         type=int,
673 |         help="Case to work with.",
674 |     )
675 | 
676 |     checkreduced_parser.add_argument(
677 |         "code_path",
678 |         metavar="CODE_PATH",
679 |         type=str,
680 |         help="Path to code to checkreduced",
681 |     )
682 | 
683 |     cache_parser = subparser.add_parser("cache", help="Perform actions on the cache.")
684 | 
685 |     cache_parser.add_argument(
686 |         "what",
687 |         choices=("clean", "stats"),
688 |         type=str,
689 |         help="What you want to do with the cache. Clean will search and remove all unfinished cache entries. `stats` will print some statistics about the cache.",
690 |     )
691 | 
692 |     asm_parser = subparser.add_parser(
693 |         "asm",
694 |         help="Save assembly outputs (-S) for the good and bad settings for each code found in a case.",
695 |     )
696 |     asm_parser.add_argument(
697 |         "case_id",
698 |         metavar="CASE_ID",
699 |         type=int,
700 |         help="Case to work with.",
701 |     )
702 | 
703 |     set_parser = subparser.add_parser("set", help="Set values of a case.")
704 |     get_parser = subparser.add_parser(
705 |         "get", help="Print values of a case to the command line."
706 |     )
707 | 
708 |     get_parser.add_argument(
709 |         "what",
710 |         choices=("link", "fixed", "mcode", "rcode", "ocode", "bisection", "marker"),
711 |         type=str,
712 |         help="What you want to get. `ocode` is the original code. `rcode` is the reduced code. `mcode` is the massaged code. fixed is the commit the commit the case was `fixed` with and `link` the link to the bug report.",
713 |     )
714 | 
715 |     get_parser.add_argument(
716 |         "case_id",
717 |         metavar="CASE_ID",
718 |         type=int,
719 |         help="Case from which to get what you chose",
720 |     )
721 | 
722 |     set_parser.add_argument(
723 |         "what",
724 |         choices=("link", "fixed", "mcode", "rcode", "ocode", "bisection"),
725 |         type=str,
726 |         help="What you want to set. `ocode` is the original code. `rcode` is the reduced code. `mcode` is the massaged code. `fixed` is the commit the commit the case was fixed` with and `link` the link to the bug report. `ocode`, `rcode` and `mcode` expect files, `link`, `fixed` and `bisection` strings.",
727 |     )
728 | 
729 |     set_parser.add_argument(
730 |         "case_id",
731 |         metavar="CASE_ID",
732 |         type=int,
733 |         help="Case to set the value of",
734 |     )
735 | 
736 |     set_parser.add_argument(
737 |         "var",
738 |         metavar="VAR",
739 |         type=str,
740 |         help="What to set the chosen value to. Expected input may change based on what you are setting.",
741 |     )
742 | 
743 |     build_parser = subparser.add_parser(
744 |         "build", help="Build a specific compiler version."
745 |     )
746 | 
747 |     build_parser.add_argument(
748 |         "project",
749 |         choices=("gcc", "llvm", "clang"),
750 |         type=str,
751 |         help="Which compiler to build",
752 |     )
753 |     build_parser.add_argument(
754 |         "rev", nargs="+", type=str, help="Which revision(s)/commit(s) to build"
755 |     )
756 |     build_parser.add_argument(
757 |         "--force",
758 |         action=argparse.BooleanOptionalAction,
759 |         help="Whether or not to force another build.",
760 |     )
761 | 
762 |     build_parser.add_argument(
763 |         "--add-patches",
764 |         help="Which patches to apply in addition to the ones found in patchDB",
765 |         nargs="+",
766 |         type=str,
767 |     )
768 | 
769 |     reduce_parser = subparser.add_parser(
770 |         "reduce", help="Reduce the initially found code of a case."
771 |     )
772 | 
773 |     reduce_parser.add_argument(
774 |         "case_id", nargs="+", type=int, help="Which case to reduce"
775 |     )
776 |     reduce_parser.add_argument(
777 |         "--force",
778 |         action=argparse.BooleanOptionalAction,
779 |         help="Whether or not to force another reduction. This will override the old reduced code.",
780 |     )
781 | 
782 |     bisect_parser = subparser.add_parser(
783 |         "bisect", help="Find the bisection commit for a specified case."
784 |     )
785 | 
786 |     bisect_parser.add_argument(
787 |         "case_id", nargs="+", type=int, help="Which case to bisect"
788 |     )
789 |     bisect_parser.add_argument(
790 |         "--force",
791 |         action=argparse.BooleanOptionalAction,
792 |         help="Whether or not to force another bisection. This will override the old bisection.",
793 |     )
794 | 
795 |     edit_parser = subparser.add_parser("edit", help="Open DEADs config in $EDITOR.")
796 | 
797 |     unreported_parser = subparser.add_parser(
798 |         "unreported", help="List cases which have not been reported or fixed."
799 |     )
800 | 
801 |     unreported_parser.add_argument(
802 |         "--id-only",
803 |         action="store_true",
804 |         help="Print only the case ids. Useful for scripting.",
805 |     )
806 | 
807 |     unrep_mut_ex_red = unreported_parser.add_mutually_exclusive_group()
808 |     unrep_mut_ex_red.add_argument(
809 |         "--not-reduced",
810 |         action="store_true",
811 |         help="If the listed cases should NOT be reduced",
812 |     )
813 |     unrep_mut_ex_red.add_argument(
814 |         "--reduced", action="store_true", help="If the listed cases should be reduced"
815 |     )
816 | 
817 |     unrep_mut_ex = unreported_parser.add_mutually_exclusive_group()
818 |     # I'd call the options --gcc, --clang etc. but
819 |     # running ./main.py unreported --gcc will complain about ambiguity
820 |     # wrt to --gcc.repo etc. from the config.
821 |     # However when running ./main.py unreported --gcc.repo it is an unknown option
822 |     # as these flags are only parsed directly after ./main.py.
823 |     unrep_mut_ex.add_argument(
824 |         "--gcc-only", action="store_true", help="Print only GCC related bisections."
825 |     )
826 |     unrep_mut_ex.add_argument(
827 |         "--llvm-only",
828 |         action="store_true",
829 |         help="Print only LLVM related bisections. Same as --clang-only.",
830 |     )
831 |     unrep_mut_ex.add_argument(
832 |         "--clang-only",
833 |         action="store_true",
834 |         help="Print only clang related bisections. Same as --llvm-only.",
835 |     )
836 | 
837 |     unreported_parser.add_argument(
838 |         "--OX-only",
839 |         type=str,
840 |         metavar="OPT_LEVEL",
841 |         help="Print only bisections with OPT_LEVEL as bad setting.",
842 |     )
843 | 
844 |     unreported_parser.add_argument(
845 |         "--good-version",
846 |         type=str,
847 |         metavar="REV",
848 |         help="Print only bisections which have REV as a good compiler matching the opt level of the bad compiler.",
849 |     )
850 | 
851 |     reported_parser = subparser.add_parser(
852 |         "reported", help="List cases which have been reported."
853 |     )
854 | 
855 |     reported_parser.add_argument(
856 |         "--id-only",
857 |         action="store_true",
858 |         help="Print only the case ids. Useful for scripting.",
859 |     )
860 | 
861 |     rep_mut_ex = reported_parser.add_mutually_exclusive_group()
862 |     rep_mut_ex.add_argument(
863 |         "--gcc-only", action="store_true", help="Print only GCC related bisections."
864 |     )
865 |     rep_mut_ex.add_argument(
866 |         "--llvm-only",
867 |         action="store_true",
868 |         help="Print only LLVM related bisections. Same as --clang-only.",
869 |     )
870 |     rep_mut_ex.add_argument(
871 |         "--clang-only",
872 |         action="store_true",
873 |         help="Print only clang related bisections. Same as --llvm-only.",
874 |     )
875 | 
876 |     reported_parser.add_argument(
877 |         "--good-settings",
878 |         action="store_true",
879 |         help="Print the good settings of the cases.",
880 |     )
881 | 
882 |     findby_parser = subparser.add_parser(
883 |         "findby", help="Find case IDs given only a part of a case."
884 |     )
885 |     findby_parser.add_argument(
886 |         "what",
887 |         type=str,
888 |         choices=("link", "case", "code", "fixed"),
889 |     )
890 | 
891 |     findby_parser.add_argument(
892 |         "var",
893 |         type=str,
894 |         metavar="VAR",
895 |         help="Is a string, when choosing link or fixed, is a path when choosing case or code.",
896 |     )
897 | 
898 |     return parser
899 | 


--------------------------------------------------------------------------------