├── .adacore-gitlab-ci.yml ├── .clang-format ├── .dockerignore ├── .github └── workflows │ ├── check_style.yml │ ├── create_push_docker_image.yml │ └── run_tests.yml ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── README.md ├── Vagrantfile ├── compiler ├── Main.cpp ├── Pass.cpp ├── Pass.h ├── Runtime.cpp ├── Runtime.h ├── Symbolizer.cpp ├── Symbolizer.h ├── sym++.in └── symcc.in ├── docs ├── 32-bit.txt ├── C++.txt ├── Concreteness.txt ├── Configuration.txt ├── Experiments.txt ├── Fuzzing.txt ├── Ideas.txt ├── Libc.txt ├── Optimization.txt └── Testing.txt ├── sample.cpp ├── test ├── CMakeLists.txt ├── README ├── arrays.c ├── arrays.test32 ├── bcopy_bcmp_bzero.c ├── bool_cast.c ├── bswap.c ├── bswap.test32 ├── concrete_structs.ll ├── file_input.c ├── file_input.test32 ├── floats.c ├── floats.test32 ├── globals.c ├── globals.test32 ├── if.c ├── if.test32 ├── integers.c ├── integers.test32 ├── large_alloc.c ├── large_alloc.test32 ├── lit.cfg ├── lit.site.cfg.in ├── load_store.ll ├── loop.c ├── loop.test32 ├── memcpy.c ├── memcpy.test32 ├── memory_input.c ├── pointers.c ├── pointers.test32 ├── propagation_select.c ├── read.c ├── read.test32 ├── regression │ └── cxa_vector.ll ├── strings.c ├── strings.test32 ├── structs.c ├── structs.test32 ├── switch.c ├── switch.test32 ├── symbolic_structs.ll ├── test_case_handler.c ├── uadd_sat.ll ├── uadd_sat.test32 ├── usub_sat.ll └── usub_sat.test32 └── util ├── pure_concolic_execution.sh ├── quicktest.sh └── symcc_fuzzing_helper ├── .gitignore ├── Cargo.lock ├── Cargo.toml └── src ├── main.rs └── symcc.rs /.adacore-gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | include: 2 | - component: $CI_SERVER_FQDN/eng/gitlab-templates/build@~latest 3 | inputs: 4 | anod-args: build symcc 5 | generic-anod-ci-args: --add-dep eng/fuzz/qsym 6 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | BasedOnStyle: LLVM 3 | ... 4 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Ignore the third-party software that's shipped with Qsym; we don't use it anyway. 2 | runtime/qsym_backend/qsym/third_party 3 | # Similarly, we don't run its tests. 4 | runtime/qsym_backend/qsym/tests 5 | 6 | # Rust stores build artifacts in the tree 7 | util/symcc_fuzzing_helper/target 8 | 9 | # Some build utilities that we can ignore 10 | TAGS 11 | compile_commands.json 12 | 13 | # The Dockerfile itself doesn't need to be copied 14 | Dockerfile 15 | 16 | # Do not include build directories 17 | build/ 18 | cmake-* 19 | -------------------------------------------------------------------------------- /.github/workflows/check_style.yml: -------------------------------------------------------------------------------- 1 | name: Check coding style 2 | on: [pull_request] 3 | jobs: 4 | coding_style: 5 | runs-on: ubuntu-22.04 6 | steps: 7 | - uses: actions/checkout@v4 8 | with: 9 | fetch-depth: 0 10 | - name: Run clang-format 11 | shell: bash 12 | run: | 13 | format_changes=$(git clang-format-14 --quiet --diff \ 14 | ${{ github.event.pull_request.base.sha }} \ 15 | ${{ github.event.pull_request.head.sha }} | wc -c) 16 | if [[ $format_changes -ne 0 ]]; then 17 | echo "Please format your changes with clang-format using the LLVM style, e.g., git clang-format --style LLVM before committing" 18 | exit 1 19 | fi 20 | -------------------------------------------------------------------------------- /.github/workflows/create_push_docker_image.yml: -------------------------------------------------------------------------------- 1 | name: Publish SymCC Docker image 2 | on: 3 | push: 4 | branches: ['master'] 5 | 6 | jobs: 7 | upload_dockerhub: 8 | if: ${{ (github.repository == 'eurecom-s3/symcc') && (github.ref == 'refs/heads/master') }} 9 | runs-on: ubuntu-latest 10 | steps: 11 | - 12 | name: Checkout project sources 13 | uses: actions/checkout@v4 14 | - 15 | name: Login to Docker Hub 16 | uses: docker/login-action@v3 17 | with: 18 | username: ${{ secrets.DOCKER_USERNAME }} 19 | password: ${{ secrets.DOCKER_PASSWORD }} 20 | - 21 | name: Set up Docker Buildx 22 | uses: docker/setup-buildx-action@v3 23 | - 24 | name: Build and push 25 | uses: docker/build-push-action@v5 26 | with: 27 | context: . 28 | file: ./Dockerfile 29 | push: true 30 | tags: ${{ secrets.DOCKER_USERNAME }}/symcc:latest 31 | -------------------------------------------------------------------------------- /.github/workflows/run_tests.yml: -------------------------------------------------------------------------------- 1 | name: Compile and test SymCC 2 | on: [pull_request, workflow_dispatch] 3 | jobs: 4 | # Building and running the tests with Dockerfile 5 | build_and_test_symcc: 6 | runs-on: ubuntu-24.04 7 | steps: 8 | - uses: actions/checkout@v2 9 | - name: Setup docker compilation environment 10 | run: docker build --target builder -t symcc . 11 | - name: Build and test SymCC with simple backend 12 | run: docker build --target builder_simple -t symcc . 13 | - name: Build libcxx using SymCC simple backend 14 | run: docker build --target builder_libcxx -t symcc . 15 | - name: Build and test SymCC with Qsym backend 16 | run: docker build --target builder_qsym -t symcc . 17 | - name: Creation of the final SymCC docker image with Qsym backend and libcxx 18 | run: docker build -t symcc . 19 | 20 | # checking compatibility with ubuntu llvm packages 21 | llvm_compatibility: 22 | runs-on: ubuntu-24.04 23 | strategy: 24 | matrix: 25 | llvm_version: [15, 16, 17, 18, 19] 26 | steps: 27 | - uses: actions/checkout@v4 28 | with: 29 | submodules: true 30 | - name: Install dependencies 31 | run: | 32 | sudo apt-get update 33 | sudo apt-get install -y \ 34 | llvm-${{ matrix.llvm_version }}-dev \ 35 | libz3-dev \ 36 | git 37 | 38 | - name: Build SymCC with the QSYM backend 39 | run: | 40 | git submodule update --init --recursive runtime 41 | mkdir build 42 | cd build 43 | cmake \ 44 | -DCMAKE_BUILD_TYPE=Release \ 45 | -DZ3_TRUST_SYSTEM_VERSION=ON \ 46 | -DSYMCC_RT_BACKEND=qsym \ 47 | -DLLVM_DIR=/usr/lib/llvm-${{ matrix.llvm_version }}/cmake \ 48 | .. 49 | make 50 | 51 | # TODO Re-enable the section below when LLVM releases a version that isn't 52 | # supported by Ubuntu packages in our runner image. 53 | 54 | # # checking compatibility (compilation only) with more recent packages 55 | # llvm_compatibility_latest_llvm: 56 | # runs-on: ubuntu-22.04 57 | # strategy: 58 | # matrix: 59 | # llvm_version: [16, 17, 18] 60 | # steps: 61 | # - uses: actions/checkout@v4 62 | # with: 63 | # submodules: true 64 | # - name: Add LLVM project deb repository 65 | # uses: myci-actions/add-deb-repo@11 66 | # with: 67 | # repo: deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-${{ matrix.llvm_version }} main 68 | # repo-name: llvm 69 | # update: false 70 | # keys-asc: https://apt.llvm.org/llvm-snapshot.gpg.key 71 | # - name: Install dependencies 72 | # run: | 73 | # sudo apt-get update 74 | # sudo apt-get install -y \ 75 | # llvm-${{ matrix.llvm_version }}-dev \ 76 | # libz3-dev \ 77 | # git 78 | # - name: Build SymCC with the QSYM backend 79 | # run: | 80 | # git submodule update --init --recursive runtime 81 | # mkdir build 82 | # cd build 83 | # cmake \ 84 | # -DCMAKE_BUILD_TYPE=Release \ 85 | # -DZ3_TRUST_SYSTEM_VERSION=ON \ 86 | # -DSYMCC_RT_BACKEND=qsym \ 87 | # -DLLVM_DIR=/usr/lib/llvm-${{ matrix.llvm_version }}/cmake \ 88 | # .. 89 | # make 90 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | 34 | # Tags 35 | TAGS 36 | 37 | # CLion project 38 | .idea 39 | 40 | # Clang tooling 41 | compile_commands.json 42 | .clangd 43 | .cache 44 | 45 | # Build directories 46 | *build* 47 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "runtime"] 2 | path = runtime 3 | url = https://github.com/eurecom-s3/symcc-rt.git 4 | branch = main 5 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # This file is part of SymCC. 2 | # 3 | # SymCC is free software: you can redistribute it and/or modify it under the 4 | # terms of the GNU General Public License as published by the Free Software 5 | # Foundation, either version 3 of the License, or (at your option) any later 6 | # version. 7 | # 8 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License along with 13 | # SymCC. If not, see . 14 | 15 | cmake_minimum_required(VERSION 3.16) 16 | project(SymCC) 17 | 18 | set(LLVM_VERSION "" CACHE STRING "LLVM version to use. The corresponding LLVM dev package must be installed.") 19 | set(SYMCC_RT_BACKEND "qsym" CACHE STRING "The symbolic backend to use. Please check symcc-rt to get a list of the available backends.") 20 | option(TARGET_32BIT "Make the compiler work correctly with -m32" OFF) 21 | 22 | # We need to build the runtime as an external project because CMake otherwise 23 | # doesn't allow us to build it twice with different options (one 32-bit version 24 | # and one 64-bit variant). 25 | include(ExternalProject) 26 | 27 | # Find LLVM 28 | find_package(LLVM ${LLVM_VERSION} REQUIRED CONFIG) 29 | 30 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") 31 | message(STATUS "Using LLVMConfig.cmake from ${LLVM_DIR}") 32 | 33 | if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 17) 34 | message(WARNING "The software has been developed for LLVM 8 through 17; \ 35 | it is unlikely to work with other versions!") 36 | endif() 37 | 38 | set(SYM_RUNTIME_BUILD_ARGS 39 | -DCMAKE_AR=${CMAKE_AR} 40 | -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} 41 | -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} 42 | -DCMAKE_C_FLAGS_INIT=${CMAKE_C_FLAGS_INIT} 43 | -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} 44 | -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} 45 | -DCMAKE_CXX_FLAGS_INIT=${CMAKE_CXX_FLAGS_INIT} 46 | -DCMAKE_EXE_LINKER_FLAGS=${CMAKE_EXE_LINKER_FLAGS} 47 | -DCMAKE_EXE_LINKER_FLAGS_INIT=${CMAKE_EXE_LINKER_FLAGS_INIT} 48 | -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} 49 | -DCMAKE_MODULE_LINKER_FLAGS=${CMAKE_MODULE_LINKER_FLAGS} 50 | -DCMAKE_MODULE_LINKER_FLAGS_INIT=${CMAKE_MODULE_LINKER_FLAGS_INIT} 51 | -DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS} 52 | -DCMAKE_SHARED_LINKER_FLAGS_INIT=${CMAKE_SHARED_LINKER_FLAGS_INIT} 53 | -DCMAKE_MODULE_PATH=${CMAKE_MODULE_PATH} 54 | -DCMAKE_SYSROOT=${CMAKE_SYSROOT} 55 | -DSYMCC_RT_BACKEND=${SYMCC_RT_BACKEND} 56 | -DLLVM_VERSION=${LLVM_PACKAGE_VERSION} 57 | -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} 58 | -DZ3_TRUST_SYSTEM_VERSION=${Z3_TRUST_SYSTEM_VERSION}) 59 | 60 | ExternalProject_Add(SymCCRuntime 61 | SOURCE_DIR ${CMAKE_SOURCE_DIR}/runtime 62 | CMAKE_ARGS 63 | ${SYM_RUNTIME_BUILD_ARGS} 64 | -DCMAKE_EXPORT_COMPILE_COMMANDS=${CMAKE_EXPORT_COMPILE_COMMANDS} 65 | -DZ3_DIR=${Z3_DIR} 66 | -DLLVM_DIR=${LLVM_DIR} 67 | INSTALL_COMMAND "" 68 | BUILD_ALWAYS TRUE) 69 | 70 | ExternalProject_Get_Property(SymCCRuntime BINARY_DIR) 71 | set(SYMCC_RUNTIME_DIR ${BINARY_DIR}) 72 | 73 | if (${TARGET_32BIT}) 74 | ExternalProject_Add(SymCCRuntime32 75 | SOURCE_DIR ${CMAKE_SOURCE_DIR}/runtime 76 | CMAKE_ARGS 77 | ${SYM_RUNTIME_BUILD_ARGS} 78 | -DCMAKE_C_FLAGS="${CMAKE_C_FLAGS} -m32" 79 | -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -m32" 80 | -DZ3_DIR=${Z3_32BIT_DIR} 81 | -DLLVM_DIR=${LLVM_32BIT_DIR} 82 | INSTALL_COMMAND "" 83 | BUILD_ALWAYS TRUE) 84 | 85 | ExternalProject_Get_Property(SymCCRuntime32 BINARY_DIR) 86 | set(SYMCC_RUNTIME_32BIT_DIR ${BINARY_DIR}) 87 | endif() 88 | 89 | find_package(LLVM REQUIRED CONFIG) 90 | 91 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") 92 | message(STATUS "Using LLVMConfig.cmake from ${LLVM_DIR}") 93 | 94 | if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 18) 95 | message(WARNING "The software has been developed for LLVM 8 through 18; \ 96 | it is unlikely to work with other versions!") 97 | endif() 98 | 99 | add_definitions(${LLVM_DEFINITIONS}) 100 | include_directories(SYSTEM ${LLVM_INCLUDE_DIRS}) 101 | 102 | set(CMAKE_CXX_STANDARD 17) 103 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \ 104 | -Wredundant-decls -Wcast-align -Wmissing-include-dirs -Wswitch-default \ 105 | -Wextra -Wall -Winvalid-pch -Wredundant-decls -Wformat=2 \ 106 | -Wmissing-format-attribute -Wformat-nonliteral -Werror -Wno-error=deprecated-declarations") 107 | 108 | # Mark nodelete to work around unload bug in upstream LLVM 5.0+ 109 | set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,-z,nodelete") 110 | 111 | # This is the compiler pass that we later load into clang or opt. If LLVM is 112 | # built without RTTI we have to disable it for our library too, otherwise we'll 113 | # get linker errors. 114 | add_library(SymCC MODULE 115 | compiler/Symbolizer.cpp 116 | compiler/Pass.cpp 117 | compiler/Runtime.cpp 118 | compiler/Main.cpp) 119 | 120 | set_target_properties(SymCC PROPERTIES OUTPUT_NAME "symcc") 121 | if (NOT LLVM_ENABLE_RTTI) 122 | set_target_properties(SymCC PROPERTIES COMPILE_FLAGS "-fno-rtti") 123 | endif() 124 | 125 | find_program(CLANG_BINARY "clang" 126 | HINTS ${LLVM_TOOLS_BINARY_DIR} 127 | DOC "The clang binary to use in the symcc wrapper script.") 128 | find_program(CLANGPP_BINARY "clang++" 129 | HINTS ${LLVM_TOOLS_BINARY_DIR} 130 | DOC "The clang binary to use in the sym++ wrapper script.") 131 | if (NOT CLANG_BINARY) 132 | message(FATAL_ERROR "Clang not found; please make sure that the version corresponding to your LLVM installation is available.") 133 | endif() 134 | 135 | if (${LLVM_VERSION_MAJOR} LESS 13) 136 | set(CLANG_LOAD_PASS "-Xclang -load -Xclang ") 137 | else() 138 | set(CLANG_LOAD_PASS "-fpass-plugin=") 139 | endif() 140 | 141 | configure_file("compiler/symcc.in" "symcc" @ONLY) 142 | configure_file("compiler/sym++.in" "sym++" @ONLY) 143 | 144 | add_subdirectory(test) 145 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to SymCC 2 | 3 | We encourage everyone to contribute improvements and bug fixes to SymCC. Our 4 | preferred way of accepting contributions is via GitHub pull requests. Please be 5 | sure to run clang-format on any C/C++ code you change; an easy way to do so is 6 | with `git clang-format --style LLVM` just before committing. (On Ubuntu, you can 7 | get `git-clang-format` via `apt install clang-format`.) Ideally, also add a test 8 | to your patch (see the 9 | [docs](https://github.com/eurecom-s3/symcc/blob/master/docs/Testing.txt) for 10 | details). Unfortunately, since the project is a bit short on developers at the 11 | moment, we have to ask for your patience while we review your PR. 12 | 13 | Please note that any contributions you make are licensed under the same terms as 14 | the code you're contributing to, as per the GitHub Terms of Service, [section 15 | D.6](https://docs.github.com/en/site-policy/github-terms/github-terms-of-service#6-contributions-under-repository-license). 16 | At the time of writing, this means LGPL (version 3 or later) for the SymCC 17 | runtime, and GPL (version 3 or later) for the rest of SymCC. 18 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # This file is part of SymCC. 2 | # 3 | # SymCC is free software: you can redistribute it and/or modify it under the 4 | # terms of the GNU General Public License as published by the Free Software 5 | # Foundation, either version 3 of the License, or (at your option) any later 6 | # version. 7 | # 8 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License along with 13 | # SymCC. If not, see . 14 | 15 | # 16 | # The base image 17 | # 18 | FROM ubuntu:22.04 AS builder 19 | 20 | # Install dependencies 21 | RUN apt-get update \ 22 | && DEBIAN_FRONTEND=noninteractive apt-get install -y \ 23 | cargo \ 24 | cmake \ 25 | g++ \ 26 | git \ 27 | libz3-dev \ 28 | ninja-build \ 29 | python3-pip \ 30 | zlib1g-dev \ 31 | wget 32 | RUN pip3 install lit 33 | 34 | WORKDIR / 35 | 36 | # Build AFL. 37 | RUN git clone -b v2.56b https://github.com/google/AFL.git afl \ 38 | && cd afl \ 39 | && make 40 | 41 | # This is passed along to symcc and qsym backend 42 | # Version 15 is buggy https://github.com/eurecom-s3/symcc/issues/164 43 | arg LLVM_VERSION=12 44 | 45 | # installing/building with the right LLVM version, currently: 46 | # - no plan to support < 11 47 | # - 12 to 15 are in official packages, 48 | # - 16 and 17 provided by llvm.org 49 | # - TODO 18 should be fixed 50 | RUN if [ $LLVM_VERSION -le 11 ]; then echo "LLVM <= 11 not supported" ; false ;fi 51 | RUN if [ $LLVM_VERSION -ge 18 ]; then echo "LLVM >= 18 currently not supported" ; false ;fi 52 | RUN if [ $LLVM_VERSION -eq 12 ] || [ $LLVM_VERSION -eq 13 ] || [ $LLVM_VERSION -eq 14 ] || [ $LLVM_VERSION -eq 15 ]; then \ 53 | apt install -y llvm-${LLVM_VERSION} clang-${LLVM_VERSION} ; \ 54 | else \ 55 | false ; \ 56 | fi 57 | 58 | RUN rm -rf /var/lib/apt/lists/* 59 | # Download the LLVM sources already so that we don't need to get them again when 60 | # SymCC changes 61 | RUN git clone -b llvmorg-$LLVM_VERSION.0.0 --depth 1 https://github.com/llvm/llvm-project.git /llvm_source 62 | 63 | # Build a version of SymCC with the simple backend to compile libc++ 64 | COPY . /symcc_source 65 | 66 | # Init submodules if they are not initialiazed yet 67 | WORKDIR /symcc_source 68 | RUN git submodule update --init --recursive 69 | 70 | # 71 | # Build SymCC with the simple backend 72 | # 73 | FROM builder AS builder_simple 74 | WORKDIR /symcc_build_simple 75 | RUN cmake -G Ninja \ 76 | -DSYMCC_RT_BACKEND=simple \ 77 | -DCMAKE_BUILD_TYPE=RelWithDebInfo \ 78 | -DZ3_TRUST_SYSTEM_VERSION=on \ 79 | /symcc_source \ 80 | && ninja check 81 | 82 | # 83 | # Build libc++ with SymCC using the simple backend 84 | # 85 | FROM builder_simple AS builder_libcxx 86 | WORKDIR /libcxx_symcc 87 | RUN export SYMCC_REGULAR_LIBCXX=yes SYMCC_NO_SYMBOLIC_INPUT=yes \ 88 | && mkdir /libcxx_symcc_build \ 89 | && cd /libcxx_symcc_build \ 90 | && cmake -G Ninja /llvm_source/llvm \ 91 | -DLLVM_ENABLE_PROJECTS="libcxx;libcxxabi" \ 92 | -DLLVM_TARGETS_TO_BUILD="X86" \ 93 | -DLLVM_DISTRIBUTION_COMPONENTS="cxx;cxxabi;cxx-headers" \ 94 | -DCMAKE_BUILD_TYPE=Release \ 95 | -DCMAKE_INSTALL_PREFIX=/libcxx_symcc_install \ 96 | -DCMAKE_C_COMPILER=/symcc_build_simple/symcc \ 97 | -DCMAKE_CXX_COMPILER=/symcc_build_simple/sym++ \ 98 | && ninja distribution \ 99 | && ninja install-distribution 100 | 101 | 102 | # 103 | # Build SymCC with the Qsym backend 104 | # 105 | FROM builder_libcxx AS builder_qsym 106 | WORKDIR /symcc_build 107 | RUN cmake -G Ninja \ 108 | -DSYMCC_RT_BACKEND=qsym \ 109 | -DCMAKE_BUILD_TYPE=RelWithDebInfo \ 110 | -DZ3_TRUST_SYSTEM_VERSION=on \ 111 | /symcc_source \ 112 | && ninja check \ 113 | && cargo install --path /symcc_source/util/symcc_fuzzing_helper 114 | 115 | 116 | # 117 | # The final image 118 | # 119 | FROM ubuntu:22.04 as symcc 120 | 121 | RUN apt-get update \ 122 | && DEBIAN_FRONTEND=noninteractive apt-get install -y \ 123 | build-essential \ 124 | g++ \ 125 | zlib1g \ 126 | sudo \ 127 | && useradd -m -s /bin/bash ubuntu \ 128 | && echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/ubuntu 129 | 130 | arg LLVM_VERSION=15 131 | 132 | RUN apt-get update \ 133 | && DEBIAN_FRONTEND=noninteractive apt-get install -y \ 134 | llvm-$LLVM_VERSION \ 135 | clang-$LLVM_VERSION \ 136 | && rm -rf /var/lib/apt/lists/* 137 | 138 | COPY --from=builder_qsym /symcc_build /symcc_build 139 | COPY --from=builder_qsym /root/.cargo/bin/symcc_fuzzing_helper /symcc_build/ 140 | COPY util/pure_concolic_execution.sh /symcc_build/ 141 | COPY --from=builder_qsym /libcxx_symcc_install /libcxx_symcc_install 142 | COPY --from=builder_qsym /afl /afl 143 | 144 | # fix permissions 145 | RUN chmod -R og+rX /symcc_build 146 | 147 | ENV PATH /symcc_build:$PATH 148 | ENV AFL_PATH /afl 149 | ENV AFL_CC clang-$LLVM_VERSION 150 | ENV AFL_CXX clang++-$LLVM_VERSION 151 | ENV SYMCC_LIBCXX_PATH=/libcxx_symcc_install 152 | 153 | USER ubuntu 154 | WORKDIR /home/ubuntu 155 | COPY --chown=ubuntu:ubuntu sample.cpp /home/ubuntu/ 156 | 157 | RUN mkdir /tmp/output 158 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Compile and test SymCC](https://github.com/eurecom-s3/symcc/actions/workflows/run_tests.yml/badge.svg)](https://github.com/eurecom-s3/symcc/actions/workflows/run_tests.yml) 2 | 3 | Note: The SymCC project is currently understaffed and therefore maintained in a 4 | best effort mode. In fact, we are hiring, in case you are interested to join 5 | the [S3 group at Eurecom](https://www.s3.eurecom.fr/) to work on this (and other 6 | projects in the group) please [contact us](mailto:aurelien.francillon@eurecom.fr). 7 | We nevertheless appreciate PRs and apologize in advance for the slow processing 8 | of PRs, we will try to merge them when possible. 9 | 10 | # SymCC: efficient compiler-based symbolic execution 11 | 12 | SymCC is a compiler pass which embeds symbolic execution into the program 13 | during compilation, and an associated run-time support library. In essence, the 14 | compiler inserts code that computes symbolic expressions for each value in the 15 | program. The actual computation happens through calls to the support library at 16 | run time. 17 | 18 | To build the pass and the support library, install LLVM (any version between 8 19 | and 18) and Z3 (version 4.5 or later), as well as a C++ compiler with support 20 | for C++17. LLVM lit is only needed to run the tests; if it's not packaged with 21 | your LLVM, you can get it with `pip install lit`. 22 | 23 | Under Ubuntu Groovy the following one-liner should install all required 24 | packages: 25 | 26 | ``` 27 | sudo apt install -y git cargo clang-14 cmake g++ git libz3-dev llvm-14-dev llvm-14-tools ninja-build python3-pip zlib1g-dev && sudo pip3 install lit 28 | ``` 29 | 30 | Alternatively, see below for using the provided Dockerfile, or the file 31 | `util/quicktest.sh` for exact steps to perform under Ubuntu (or use with the 32 | provided Vagrant file). 33 | 34 | Make sure to pull the SymCC Runtime: 35 | 36 | ``` 37 | $ git submodule update --init --recursive 38 | ``` 39 | 40 | Note that it is not necessary or recommended to build the QSYM submodule - our 41 | build system will automatically extract the right source files and include them 42 | in the build. 43 | 44 | Create a build directory somewhere, and execute the following commands inside 45 | it: 46 | 47 | ``` 48 | $ cmake -G Ninja -DSYMCC_RT_BACKEND=qsym /path/to/compiler/sources 49 | $ ninja check 50 | ``` 51 | 52 | If LLVM is installed in a non-standard location, add the CMake parameter 53 | `-DLLVM_DIR=/path/to/llvm/cmake/module`. Similarly, you can point to a 54 | non-standard Z3 installation with `-DZ3_DIR=/path/to/z3/cmake/module` (which 55 | requires Z3 to be built with CMake). 56 | 57 | The main build artifact from the user's point of view is `symcc`, a wrapper 58 | script around clang that sets the right options to load our pass and link 59 | against the run-time library. (See below for additional C++ support.) 60 | 61 | To try the compiler, take some simple C code like the following: 62 | 63 | ``` c 64 | #include 65 | #include 66 | #include 67 | 68 | int foo(int a, int b) { 69 | if (2 * a < b) 70 | return a; 71 | else if (a % b) 72 | return b; 73 | else 74 | return a + b; 75 | } 76 | 77 | int main(int argc, char* argv[]) { 78 | int x; 79 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) { 80 | printf("Failed to read x\n"); 81 | return -1; 82 | } 83 | printf("%d\n", foo(x, 7)); 84 | return 0; 85 | } 86 | ``` 87 | 88 | Save the code as `test.c`. To compile it with symbolic execution built in, we 89 | call symcc as we would normally call clang: 90 | 91 | ``` 92 | $ ./symcc test.c -o test 93 | ``` 94 | 95 | Before starting the analysis, create a directory for the results and tell SymCC 96 | about it: 97 | 98 | ``` 99 | $ mkdir results 100 | $ export SYMCC_OUTPUT_DIR=`pwd`/results 101 | ``` 102 | 103 | Then run the program like any other binary, providing arbitrary input: 104 | 105 | ``` 106 | $ echo 'aaaa' | ./test 107 | ``` 108 | 109 | The program will execute the same computations as an uninstrumented version 110 | would, but additionally the injected code will track computations symbolically 111 | and attempt to compute diverging inputs at each branch point. All data that the 112 | program reads from standard input is treated as symbolic; alternatively, you can 113 | set the environment variable SYMCC_INPUT_FILE to the name of a file whose 114 | contents will be treated as symbolic when read. 115 | 116 | Note that due to how the QSYM backend is implemented, all input has to be available 117 | from the start. In particular, when providing symbolic data on standard input 118 | interactively, you need to terminate your input by pressing Ctrl+D before the 119 | program starts to execute. 120 | 121 | When execution is finished, the result directory will contain the new test cases 122 | generated during program execution. Try running the program again on one of 123 | those (or use [util/pure_concolic_execution.sh](util/pure_concolic_execution.sh) 124 | to automate the process). For better results, combine SymCC with a fuzzer (see 125 | [docs/Fuzzing.txt](docs/Fuzzing.txt)). 126 | 127 | 128 | ## Documentation 129 | 130 | The directory [docs](docs) contains documentation on several internal aspects of 131 | SymCC, as well as [building C++ code](docs/C++.txt), [compiling 32-bit binaries 132 | on a 64-bit host](docs/32-bit.txt), and [running SymCC with a 133 | fuzzer](docs/Fuzzing.txt). There is also a [list of all configuration 134 | options](docs/Configuration.txt). 135 | 136 | If you're interested in the research paper that we wrote about SymCC, have a 137 | look at our group's 138 | [website](http://www.s3.eurecom.fr/tools/symbolic_execution/symcc.html). It also 139 | contains detailed instructions to replicate our experiments, as well as the raw 140 | results that we obtained. 141 | 142 | ### Video demonstration 143 | On YouTube you can find [a practical introduction to 144 | SymCC](https://www.youtube.com/watch?v=htDrNBiL7Y8) as well as a video on [how 145 | to combine AFL and SymCC](https://www.youtube.com/watch?v=zmC-ptp3W3k) 146 | 147 | ## Building a Docker image 148 | 149 | If you prefer a Docker container over building SymCC natively, just tell Docker 150 | to build the image after pulling the QSYM code as above. (Be warned though: the 151 | Docker image enables optional C++ support from source, so creating the image can 152 | take quite some time!) 153 | 154 | ``` 155 | $ docker build -t symcc . 156 | $ docker run -it --rm symcc 157 | ``` 158 | 159 | Alternatively, you can pull an existing image (current master branch) from 160 | Docker Hub: 161 | 162 | ``` 163 | $ docker pull eurecoms3/symcc 164 | $ docker run -it --rm symcc 165 | ``` 166 | 167 | This will build a Docker image and run an ephemeral container to try out SymCC. 168 | Inside the container, `symcc` is available as a drop-in replacement for `clang`, 169 | using the QSYM backend; similarly, `sym++` can be used instead of `clang++`. Now 170 | try something like the following inside the container: 171 | 172 | ``` 173 | container$ cat sample.cpp 174 | (Note that "root" is the input we're looking for.) 175 | container$ sym++ -o sample sample.cpp 176 | container$ echo test | ./sample 177 | ... 178 | container$ cat /tmp/output/000008-optimistic 179 | root 180 | ``` 181 | 182 | The Docker image also has AFL and `symcc_fuzzing_helper` preinstalled, so you 183 | can use it to run SymCC with a fuzzer as described in [the 184 | docs](docs/Fuzzing.txt). (The AFL binaries are located in `/afl`.) 185 | 186 | While the Docker image is very convenient for _using_ SymCC, I recommend a local 187 | build outside Docker for _development_. Docker will rebuild most of the image on 188 | every change to SymCC (which is, in principle the right thing to do), whereas in 189 | many cases it is sufficient to let the build system figure out what to rebuild 190 | (and recompile, e.g., libc++ only when necessary). 191 | 192 | ## FAQ / BUGS / TODOs 193 | 194 | ### Why is SymCC only exploring one path and not all paths? 195 | 196 | SymCC is currently a concolic executor. As such, it follows the concrete 197 | path. In theory, it would be possible to make it a forking executor - 198 | see [issue #14](https://github.com/eurecom-s3/symcc/issues/14) 199 | 200 | ### Why does SymCC not generate some test cases? 201 | 202 | There are multiple possible reasons: 203 | 204 | #### QSym backend performs pruning 205 | 206 | When built with the QSym backend exploration (e.g., loops) symcc is 207 | subject to path pruning, this is part of the optimizations that makes 208 | SymCC/QSym fast, it isn't sound. This is not a problem for using in 209 | hybrid fuzzing, but this may be a problem for other uses. See for 210 | example [issue #88](https://github.com/eurecom-s3/symcc/issues/88). 211 | 212 | When building with the simple backend the paths should be found. If 213 | the paths are not found with the simple backend this may be a bug (or 214 | possibly a limitation of the simple backend). 215 | 216 | #### Incomplete symbolic handing of functions, systems interactions. 217 | 218 | The current symbolic understanding of libc is incomplete. So when an 219 | unsupported libc function is called SymCC can't trace the computations 220 | that happen in the function. 221 | 222 | 1. Adding the function to the [collection of wrapped libc 223 | functions](https://github.com/eurecom-s3/symcc-rt/blob/main/src/LibcWrappers.cpp) 224 | and [register the 225 | wrapper](https://github.com/eurecom-s3/symcc/blob/b29dc4db2803830ebf50798e72b336473a567655/compiler/Runtime.cpp#L159) 226 | in the compiler. 227 | 2. Build a fully instrumented libc. 228 | 3. Cherry-pick individual libc functions from a libc implementation (e.g., musl) 229 | 230 | See [issue #23](https://github.com/eurecom-s3/symcc/issues/23) for more details. 231 | 232 | 233 | ### Rust support ? 234 | 235 | This would be possible to support RUST, see [issue 236 | #1](https://github.com/eurecom-s3/symcc/issues/1) for tracking this. 237 | 238 | ### Bug reporting 239 | 240 | We appreciate bugs with test cases and steps to reproduce, PR with 241 | corresponding test cases. SymCC is currently understaffed, we hope to 242 | catch up and get back to active development at some point. 243 | 244 | ## Contact 245 | 246 | Feel free to use GitHub issues and pull requests for improvements, bug reports, 247 | etc. Alternatively, you can send an email to Sebastian Poeplau 248 | (sebastian.poeplau@eurecom.fr) and Aurélien Francillon 249 | (aurelien.francillon@eurecom.fr). 250 | 251 | 252 | ## Reference 253 | 254 | To cite SymCC in scientific work, please use the following BibTeX: 255 | 256 | ``` bibtex 257 | @inproceedings {poeplau2020symcc, 258 | author = {Sebastian Poeplau and Aurélien Francillon}, 259 | title = {Symbolic execution with {SymCC}: Don't interpret, compile!}, 260 | booktitle = {29th {USENIX} Security Symposium ({USENIX} Security 20)}, 261 | isbn = {978-1-939133-17-5}, 262 | pages = {181--198}, 263 | year = 2020, 264 | url = {https://www.usenix.org/conference/usenixsecurity20/presentation/poeplau}, 265 | publisher = {{USENIX} Association}, 266 | month = aug, 267 | } 268 | ``` 269 | 270 | More information on the paper is available 271 | [here](http://www.s3.eurecom.fr/tools/symbolic_execution/symcc.html). 272 | 273 | 274 | ## Other projects using SymCC 275 | 276 | [SymQEMU](https://github.com/eurecom-s3/symqemu) relies on SymCC. 277 | 278 | LibAFL supports concolic execution with [SymCC](https://aflplus.plus/libafl-book/advanced_features/concolic/concolic.html), 279 | requires external patches (for now). 280 | 281 | [AdaCore](https://www.adacore.com/) published [a paper describing](https://dl.acm.org/doi/10.1145/3631483.3631500) 282 | SymCC integration in GNATfuzz for test case generation and [plans to release this 283 | as part of GNATfuzz beta release](https://docs.adacore.com/live/wave/roadmap/html/roadmap/roadmap_25_GNAT%20Pro.html#symbolic-execution-to-retrieve-input-values). 284 | 285 | ## License 286 | 287 | SymCC is free software: you can redistribute it and/or modify it under the terms 288 | of the GNU General Public License as published by the Free Software Foundation, 289 | either version 3 of the License, or (at your option) any later version. 290 | 291 | SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 292 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 293 | PARTICULAR PURPOSE. See the GNU General Public License for more details. 294 | 295 | You should have received a copy of the GNU General Public License and the GNU 296 | Lesser General Public License along with SymCC. If not, see 297 | . 298 | 299 | The following pieces of software have additional or alternate copyrights, 300 | licenses, and/or restrictions: 301 | 302 | | Program | Directory | 303 | |---------------|-----------------------------| 304 | | SymCC Runtime | `runtime` | 305 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | Vagrant.configure("2") do |config| 5 | config.vm.box = "ubuntu/groovy64" 6 | config.vm.provision "shell", path: "util/quicktest.sh" 7 | end 8 | -------------------------------------------------------------------------------- /compiler/Main.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | #include 16 | #if LLVM_VERSION_MAJOR <= 15 17 | #include 18 | #endif 19 | #include 20 | #include 21 | 22 | #if LLVM_VERSION_MAJOR >= 13 23 | #include 24 | #include 25 | 26 | #if LLVM_VERSION_MAJOR >= 14 27 | #include 28 | #else 29 | using OptimizationLevel = llvm::PassBuilder::OptimizationLevel; 30 | #endif 31 | #endif 32 | 33 | #if LLVM_VERSION_MAJOR >= 15 34 | #include 35 | #else 36 | #include 37 | #endif 38 | 39 | #include "Pass.h" 40 | 41 | using namespace llvm; 42 | 43 | // 44 | // Legacy pass registration (up to LLVM 13) 45 | // 46 | 47 | #if LLVM_VERSION_MAJOR <= 15 48 | 49 | void addSymbolizeLegacyPass(const PassManagerBuilder & /* unused */, 50 | legacy::PassManagerBase &PM) { 51 | PM.add(createScalarizerPass()); 52 | PM.add(createLowerAtomicPass()); 53 | PM.add(new SymbolizeLegacyPass()); 54 | } 55 | 56 | // Make the pass known to opt. 57 | static RegisterPass X("symbolize", "Symbolization Pass"); 58 | // Tell frontends to run the pass automatically. 59 | static struct RegisterStandardPasses Y(PassManagerBuilder::EP_VectorizerStart, 60 | addSymbolizeLegacyPass); 61 | static struct RegisterStandardPasses 62 | Z(PassManagerBuilder::EP_EnabledOnOptLevel0, addSymbolizeLegacyPass); 63 | 64 | #endif 65 | 66 | // 67 | // New pass registration (LLVM 13 and above) 68 | // 69 | 70 | #if LLVM_VERSION_MAJOR >= 13 71 | 72 | PassPluginLibraryInfo getSymbolizePluginInfo() { 73 | return {LLVM_PLUGIN_API_VERSION, "Symbolization Pass", LLVM_VERSION_STRING, 74 | [](PassBuilder &PB) { 75 | // We need to act on the entire module as well as on each function. 76 | // Those actions are independent from each other, so we register a 77 | // module pass at the start of the pipeline and a function pass just 78 | // before the vectorizer. (There doesn't seem to be a way to run 79 | // module passes at the start of the vectorizer, hence the split.) 80 | PB.registerPipelineStartEPCallback( 81 | [](ModulePassManager &PM, OptimizationLevel) { 82 | PM.addPass(SymbolizePass()); 83 | }); 84 | PB.registerVectorizerStartEPCallback( 85 | [](FunctionPassManager &PM, OptimizationLevel) { 86 | PM.addPass(ScalarizerPass()); 87 | PM.addPass(LowerAtomicPass()); 88 | PM.addPass(SymbolizePass()); 89 | }); 90 | }}; 91 | } 92 | 93 | extern "C" LLVM_ATTRIBUTE_WEAK PassPluginLibraryInfo llvmGetPassPluginInfo() { 94 | return getSymbolizePluginInfo(); 95 | } 96 | 97 | #endif 98 | -------------------------------------------------------------------------------- /compiler/Pass.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | #include "Pass.h" 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | 28 | #if LLVM_VERSION_MAJOR < 14 29 | #include 30 | #else 31 | #include 32 | #endif 33 | 34 | #include "Runtime.h" 35 | #include "Symbolizer.h" 36 | 37 | using namespace llvm; 38 | 39 | #ifndef NDEBUG 40 | #define DEBUG(X) \ 41 | do { \ 42 | X; \ 43 | } while (false) 44 | #else 45 | #define DEBUG(X) ((void)0) 46 | #endif 47 | 48 | char SymbolizeLegacyPass::ID = 0; 49 | 50 | namespace { 51 | 52 | static constexpr char kSymCtorName[] = "__sym_ctor"; 53 | 54 | bool instrumentModule(Module &M) { 55 | DEBUG(errs() << "Symbolizer module instrumentation\n"); 56 | 57 | // Redirect calls to external functions to the corresponding wrappers and 58 | // rename internal functions. 59 | for (auto &function : M.functions()) { 60 | auto name = function.getName(); 61 | if (isInterceptedFunction(function)) 62 | function.setName(name + "_symbolized"); 63 | } 64 | 65 | // Insert a constructor that initializes the runtime and any globals. 66 | Function *ctor; 67 | std::tie(ctor, std::ignore) = createSanitizerCtorAndInitFunctions( 68 | M, kSymCtorName, "_sym_initialize", {}, {}); 69 | appendToGlobalCtors(M, ctor, 0); 70 | 71 | return true; 72 | } 73 | 74 | bool canLower(const CallInst *CI) { 75 | const Function *Callee = CI->getCalledFunction(); 76 | if (!Callee) 77 | return false; 78 | 79 | switch (Callee->getIntrinsicID()) { 80 | case Intrinsic::expect: 81 | case Intrinsic::ctpop: 82 | case Intrinsic::ctlz: 83 | case Intrinsic::cttz: 84 | case Intrinsic::prefetch: 85 | case Intrinsic::pcmarker: 86 | case Intrinsic::dbg_declare: 87 | case Intrinsic::dbg_label: 88 | case Intrinsic::annotation: 89 | case Intrinsic::ptr_annotation: 90 | case Intrinsic::assume: 91 | #if LLVM_VERSION_MAJOR > 11 92 | case Intrinsic::experimental_noalias_scope_decl: 93 | #endif 94 | case Intrinsic::var_annotation: 95 | case Intrinsic::sqrt: 96 | case Intrinsic::log: 97 | case Intrinsic::log2: 98 | case Intrinsic::log10: 99 | case Intrinsic::exp: 100 | case Intrinsic::exp2: 101 | case Intrinsic::pow: 102 | case Intrinsic::sin: 103 | case Intrinsic::cos: 104 | case Intrinsic::floor: 105 | case Intrinsic::ceil: 106 | case Intrinsic::trunc: 107 | case Intrinsic::round: 108 | #if LLVM_VERSION_MAJOR > 10 109 | case Intrinsic::roundeven: 110 | #endif 111 | case Intrinsic::copysign: 112 | #if LLVM_VERSION_MAJOR < 16 113 | case Intrinsic::flt_rounds: 114 | #else 115 | case Intrinsic::get_rounding: 116 | #endif 117 | case Intrinsic::invariant_start: 118 | case Intrinsic::lifetime_start: 119 | case Intrinsic::invariant_end: 120 | case Intrinsic::lifetime_end: 121 | return true; 122 | default: 123 | return false; 124 | } 125 | 126 | llvm_unreachable("Control cannot reach here"); 127 | } 128 | 129 | void liftInlineAssembly(CallInst *CI) { 130 | // TODO When we don't have to worry about the old pass manager anymore, move 131 | // the initialization to the pass constructor. (Currently there are two 132 | // passes, but only if we're on a recent enough LLVM...) 133 | 134 | Function *F = CI->getFunction(); 135 | Module *M = F->getParent(); 136 | auto triple = M->getTargetTriple(); 137 | 138 | std::string error; 139 | auto target = TargetRegistry::lookupTarget(triple, error); 140 | if (!target) { 141 | errs() << "Warning: can't get target info to lift inline assembly\n"; 142 | return; 143 | } 144 | 145 | auto cpu = F->getFnAttribute("target-cpu").getValueAsString(); 146 | auto features = F->getFnAttribute("target-features").getValueAsString(); 147 | 148 | std::unique_ptr TM( 149 | target->createTargetMachine(triple, cpu, features, TargetOptions(), {})); 150 | auto subTarget = TM->getSubtargetImpl(*F); 151 | if (subTarget == nullptr) 152 | return; 153 | 154 | auto targetLowering = subTarget->getTargetLowering(); 155 | if (targetLowering == nullptr) 156 | return; 157 | 158 | targetLowering->ExpandInlineAsm(CI); 159 | } 160 | 161 | bool instrumentFunction(Function &F) { 162 | auto functionName = F.getName(); 163 | if (functionName == kSymCtorName) 164 | return false; 165 | 166 | DEBUG(errs() << "Symbolizing function "); 167 | DEBUG(errs().write_escaped(functionName) << '\n'); 168 | 169 | SmallVector allInstructions; 170 | allInstructions.reserve(F.getInstructionCount()); 171 | for (auto &I : instructions(F)) 172 | allInstructions.push_back(&I); 173 | 174 | IntrinsicLowering IL(F.getParent()->getDataLayout()); 175 | for (auto *I : allInstructions) { 176 | if (auto *CI = dyn_cast(I)) { 177 | if (canLower(CI)) { 178 | IL.LowerIntrinsicCall(CI); 179 | } else if (isa(CI->getCalledOperand())) { 180 | liftInlineAssembly(CI); 181 | } 182 | } 183 | } 184 | 185 | allInstructions.clear(); 186 | for (auto &I : instructions(F)) 187 | allInstructions.push_back(&I); 188 | 189 | Symbolizer symbolizer(*F.getParent()); 190 | symbolizer.symbolizeFunctionArguments(F); 191 | 192 | for (auto &basicBlock : F) 193 | symbolizer.insertBasicBlockNotification(basicBlock); 194 | 195 | for (auto *instPtr : allInstructions) 196 | symbolizer.visit(instPtr); 197 | 198 | symbolizer.finalizePHINodes(); 199 | symbolizer.shortCircuitExpressionUses(); 200 | 201 | // DEBUG(errs() << F << '\n'); 202 | assert(!verifyFunction(F, &errs()) && 203 | "SymbolizePass produced invalid bitcode"); 204 | 205 | return true; 206 | } 207 | 208 | } // namespace 209 | 210 | bool SymbolizeLegacyPass::doInitialization(Module &M) { 211 | return instrumentModule(M); 212 | } 213 | 214 | bool SymbolizeLegacyPass::runOnFunction(Function &F) { 215 | return instrumentFunction(F); 216 | } 217 | 218 | #if LLVM_VERSION_MAJOR >= 13 219 | 220 | PreservedAnalyses SymbolizePass::run(Function &F, FunctionAnalysisManager &) { 221 | return instrumentFunction(F) ? PreservedAnalyses::none() 222 | : PreservedAnalyses::all(); 223 | } 224 | 225 | PreservedAnalyses SymbolizePass::run(Module &M, ModuleAnalysisManager &) { 226 | return instrumentModule(M) ? PreservedAnalyses::none() 227 | : PreservedAnalyses::all(); 228 | } 229 | 230 | #endif 231 | -------------------------------------------------------------------------------- /compiler/Pass.h: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | #ifndef PASS_H 16 | #define PASS_H 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | #if LLVM_VERSION_MAJOR >= 13 23 | #include 24 | #endif 25 | 26 | class SymbolizeLegacyPass : public llvm::FunctionPass { 27 | public: 28 | static char ID; 29 | 30 | SymbolizeLegacyPass() : FunctionPass(ID) {} 31 | 32 | virtual bool doInitialization(llvm::Module &M) override; 33 | virtual bool runOnFunction(llvm::Function &F) override; 34 | }; 35 | 36 | #if LLVM_VERSION_MAJOR >= 13 37 | 38 | class SymbolizePass : public llvm::PassInfoMixin { 39 | public: 40 | llvm::PreservedAnalyses run(llvm::Function &F, 41 | llvm::FunctionAnalysisManager &); 42 | llvm::PreservedAnalyses run(llvm::Module &M, llvm::ModuleAnalysisManager &); 43 | 44 | static bool isRequired() { return true; } 45 | }; 46 | 47 | #endif 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /compiler/Runtime.cpp: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | #include "Runtime.h" 16 | 17 | #include 18 | #include 19 | #include 20 | 21 | using namespace llvm; 22 | 23 | namespace { 24 | 25 | template 26 | SymFnT import(llvm::Module &M, llvm::StringRef name, llvm::Type *ret, 27 | ArgsTy... args) { 28 | #if LLVM_VERSION_MAJOR >= 9 && LLVM_VERSION_MAJOR < 11 29 | return M.getOrInsertFunction(name, ret, args...).getCallee(); 30 | #else 31 | return M.getOrInsertFunction(name, ret, args...); 32 | #endif 33 | } 34 | 35 | } // namespace 36 | 37 | Runtime::Runtime(Module &M) { 38 | IRBuilder<> IRB(M.getContext()); 39 | auto *intPtrType = M.getDataLayout().getIntPtrType(M.getContext()); 40 | auto *ptrT = IRB.getInt8Ty()->getPointerTo(); 41 | auto *int8T = IRB.getInt8Ty(); 42 | auto *int1T = IRB.getInt1Ty(); 43 | auto *voidT = IRB.getVoidTy(); 44 | 45 | buildInteger = import(M, "_sym_build_integer", ptrT, IRB.getInt64Ty(), int8T); 46 | buildInteger128 = import(M, "_sym_build_integer128", ptrT, IRB.getInt64Ty(), 47 | IRB.getInt64Ty()); 48 | buildFloat = import(M, "_sym_build_float", ptrT, IRB.getDoubleTy(), int1T); 49 | buildNullPointer = import(M, "_sym_build_null_pointer", ptrT); 50 | buildTrue = import(M, "_sym_build_true", ptrT); 51 | buildFalse = import(M, "_sym_build_false", ptrT); 52 | buildBool = import(M, "_sym_build_bool", ptrT, int1T); 53 | buildSExt = import(M, "_sym_build_sext", ptrT, ptrT, int8T); 54 | buildZExt = import(M, "_sym_build_zext", ptrT, ptrT, int8T); 55 | buildTrunc = import(M, "_sym_build_trunc", ptrT, ptrT, int8T); 56 | buildBswap = import(M, "_sym_build_bswap", ptrT, ptrT); 57 | buildIntToFloat = 58 | import(M, "_sym_build_int_to_float", ptrT, ptrT, int1T, int1T); 59 | buildFloatToFloat = import(M, "_sym_build_float_to_float", ptrT, ptrT, int1T); 60 | buildBitsToFloat = import(M, "_sym_build_bits_to_float", ptrT, ptrT, int1T); 61 | buildFloatToBits = import(M, "_sym_build_float_to_bits", ptrT, ptrT); 62 | buildFloatToSignedInt = 63 | import(M, "_sym_build_float_to_signed_integer", ptrT, ptrT, int8T); 64 | buildFloatToUnsignedInt = 65 | import(M, "_sym_build_float_to_unsigned_integer", ptrT, ptrT, int8T); 66 | buildFloatAbs = import(M, "_sym_build_fp_abs", ptrT, ptrT); 67 | buildBoolAnd = import(M, "_sym_build_bool_and", ptrT, ptrT, ptrT); 68 | buildBoolOr = import(M, "_sym_build_bool_or", ptrT, ptrT, ptrT); 69 | buildBoolXor = import(M, "_sym_build_bool_xor", ptrT, ptrT, ptrT); 70 | buildBoolToBit = import(M, "_sym_build_bool_to_bit", ptrT, ptrT); 71 | buildBitToBool = import(M, "_sym_build_bit_to_bool", ptrT, ptrT); 72 | buildConcat = 73 | import(M, "_sym_concat_helper", ptrT, ptrT, 74 | ptrT); // doesn't follow naming convention for historic reasons 75 | pushPathConstraint = 76 | import(M, "_sym_push_path_constraint", voidT, ptrT, int1T, intPtrType); 77 | 78 | // Overflow arithmetic 79 | buildAddOverflow = 80 | import(M, "_sym_build_add_overflow", ptrT, ptrT, ptrT, int1T, int1T); 81 | buildSubOverflow = 82 | import(M, "_sym_build_sub_overflow", ptrT, ptrT, ptrT, int1T, int1T); 83 | buildMulOverflow = 84 | import(M, "_sym_build_mul_overflow", ptrT, ptrT, ptrT, int1T, int1T); 85 | 86 | // Saturating arithmetic 87 | buildSAddSat = import(M, "_sym_build_sadd_sat", ptrT, ptrT, ptrT); 88 | buildUAddSat = import(M, "_sym_build_uadd_sat", ptrT, ptrT, ptrT); 89 | buildSSubSat = import(M, "_sym_build_ssub_sat", ptrT, ptrT, ptrT); 90 | buildUSubSat = import(M, "_sym_build_usub_sat", ptrT, ptrT, ptrT); 91 | buildSShlSat = import(M, "_sym_build_sshl_sat", ptrT, ptrT, ptrT); 92 | buildUShlSat = import(M, "_sym_build_ushl_sat", ptrT, ptrT, ptrT); 93 | 94 | buildFshl = import(M, "_sym_build_funnel_shift_left", ptrT, ptrT, ptrT, ptrT); 95 | buildFshr = 96 | import(M, "_sym_build_funnel_shift_right", ptrT, ptrT, ptrT, ptrT); 97 | buildAbs = import(M, "_sym_build_abs", ptrT, ptrT); 98 | 99 | setParameterExpression = 100 | import(M, "_sym_set_parameter_expression", voidT, int8T, ptrT); 101 | getParameterExpression = 102 | import(M, "_sym_get_parameter_expression", ptrT, int8T); 103 | setReturnExpression = import(M, "_sym_set_return_expression", voidT, ptrT); 104 | getReturnExpression = import(M, "_sym_get_return_expression", ptrT); 105 | 106 | #define LOAD_BINARY_OPERATOR_HANDLER(constant, name) \ 107 | binaryOperatorHandlers[Instruction::constant] = \ 108 | import(M, "_sym_build_" #name, ptrT, ptrT, ptrT); 109 | 110 | LOAD_BINARY_OPERATOR_HANDLER(Add, add) 111 | LOAD_BINARY_OPERATOR_HANDLER(Sub, sub) 112 | LOAD_BINARY_OPERATOR_HANDLER(Mul, mul) 113 | LOAD_BINARY_OPERATOR_HANDLER(UDiv, unsigned_div) 114 | LOAD_BINARY_OPERATOR_HANDLER(SDiv, signed_div) 115 | LOAD_BINARY_OPERATOR_HANDLER(URem, unsigned_rem) 116 | LOAD_BINARY_OPERATOR_HANDLER(SRem, signed_rem) 117 | LOAD_BINARY_OPERATOR_HANDLER(Shl, shift_left) 118 | LOAD_BINARY_OPERATOR_HANDLER(LShr, logical_shift_right) 119 | LOAD_BINARY_OPERATOR_HANDLER(AShr, arithmetic_shift_right) 120 | LOAD_BINARY_OPERATOR_HANDLER(And, and) 121 | LOAD_BINARY_OPERATOR_HANDLER(Or, or) 122 | LOAD_BINARY_OPERATOR_HANDLER(Xor, xor) 123 | 124 | // Floating-point arithmetic 125 | LOAD_BINARY_OPERATOR_HANDLER(FAdd, fp_add) 126 | LOAD_BINARY_OPERATOR_HANDLER(FSub, fp_sub) 127 | LOAD_BINARY_OPERATOR_HANDLER(FMul, fp_mul) 128 | LOAD_BINARY_OPERATOR_HANDLER(FDiv, fp_div) 129 | LOAD_BINARY_OPERATOR_HANDLER(FRem, fp_rem) 130 | 131 | #undef LOAD_BINARY_OPERATOR_HANDLER 132 | 133 | #define LOAD_UNARY_OPERATOR_HANDLER(constant, name) \ 134 | unaryOperatorHandlers[Instruction::constant] = \ 135 | import(M, "_sym_build_" #name, ptrT, ptrT); 136 | 137 | LOAD_UNARY_OPERATOR_HANDLER(FNeg, fp_neg) 138 | 139 | #undef LOAD_UNARY_OPERATOR_HANDLER 140 | 141 | #define LOAD_COMPARISON_HANDLER(constant, name) \ 142 | comparisonHandlers[CmpInst::constant] = \ 143 | import(M, "_sym_build_" #name, ptrT, ptrT, ptrT); 144 | 145 | LOAD_COMPARISON_HANDLER(ICMP_EQ, equal) 146 | LOAD_COMPARISON_HANDLER(ICMP_NE, not_equal) 147 | LOAD_COMPARISON_HANDLER(ICMP_UGT, unsigned_greater_than) 148 | LOAD_COMPARISON_HANDLER(ICMP_UGE, unsigned_greater_equal) 149 | LOAD_COMPARISON_HANDLER(ICMP_ULT, unsigned_less_than) 150 | LOAD_COMPARISON_HANDLER(ICMP_ULE, unsigned_less_equal) 151 | LOAD_COMPARISON_HANDLER(ICMP_SGT, signed_greater_than) 152 | LOAD_COMPARISON_HANDLER(ICMP_SGE, signed_greater_equal) 153 | LOAD_COMPARISON_HANDLER(ICMP_SLT, signed_less_than) 154 | LOAD_COMPARISON_HANDLER(ICMP_SLE, signed_less_equal) 155 | 156 | // Floating-point comparisons 157 | LOAD_COMPARISON_HANDLER(FCMP_OGT, float_ordered_greater_than) 158 | LOAD_COMPARISON_HANDLER(FCMP_OGE, float_ordered_greater_equal) 159 | LOAD_COMPARISON_HANDLER(FCMP_OLT, float_ordered_less_than) 160 | LOAD_COMPARISON_HANDLER(FCMP_OLE, float_ordered_less_equal) 161 | LOAD_COMPARISON_HANDLER(FCMP_OEQ, float_ordered_equal) 162 | LOAD_COMPARISON_HANDLER(FCMP_ONE, float_ordered_not_equal) 163 | LOAD_COMPARISON_HANDLER(FCMP_ORD, float_ordered) 164 | LOAD_COMPARISON_HANDLER(FCMP_UNO, float_unordered) 165 | LOAD_COMPARISON_HANDLER(FCMP_UGT, float_unordered_greater_than) 166 | LOAD_COMPARISON_HANDLER(FCMP_UGE, float_unordered_greater_equal) 167 | LOAD_COMPARISON_HANDLER(FCMP_ULT, float_unordered_less_than) 168 | LOAD_COMPARISON_HANDLER(FCMP_ULE, float_unordered_less_equal) 169 | LOAD_COMPARISON_HANDLER(FCMP_UEQ, float_unordered_equal) 170 | LOAD_COMPARISON_HANDLER(FCMP_UNE, float_unordered_not_equal) 171 | 172 | #undef LOAD_COMPARISON_HANDLER 173 | 174 | memcpy = import(M, "_sym_memcpy", voidT, ptrT, ptrT, intPtrType); 175 | memset = import(M, "_sym_memset", voidT, ptrT, ptrT, intPtrType); 176 | memmove = import(M, "_sym_memmove", voidT, ptrT, ptrT, intPtrType); 177 | readMemory = 178 | import(M, "_sym_read_memory", ptrT, intPtrType, intPtrType, int1T); 179 | writeMemory = import(M, "_sym_write_memory", voidT, intPtrType, intPtrType, 180 | ptrT, int1T); 181 | buildZeroBytes = import(M, "_sym_build_zero_bytes", ptrT, intPtrType); 182 | buildInsert = 183 | import(M, "_sym_build_insert", ptrT, ptrT, ptrT, IRB.getInt64Ty(), int1T); 184 | buildExtract = import(M, "_sym_build_extract", ptrT, ptrT, IRB.getInt64Ty(), 185 | IRB.getInt64Ty(), int1T); 186 | 187 | notifyCall = import(M, "_sym_notify_call", voidT, intPtrType); 188 | notifyRet = import(M, "_sym_notify_ret", voidT, intPtrType); 189 | notifyBasicBlock = import(M, "_sym_notify_basic_block", voidT, intPtrType); 190 | } 191 | 192 | /// Decide whether a function is called symbolically. 193 | bool isInterceptedFunction(const Function &f) { 194 | static const StringSet<> kInterceptedFunctions = { 195 | "malloc", "calloc", "mmap", "mmap64", "open", "read", 196 | "lseek", "lseek64", "fopen", "fopen64", "fread", "fseek", 197 | "fseeko", "rewind", "fseeko64", "getc", "ungetc", "memcpy", 198 | "memset", "strncpy", "strchr", "memcmp", "memmove", "ntohl", 199 | "fgets", "fgetc", "getchar", "bcopy", "bcmp", "bzero"}; 200 | 201 | return (kInterceptedFunctions.count(f.getName()) > 0); 202 | } 203 | -------------------------------------------------------------------------------- /compiler/Runtime.h: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | #ifndef RUNTIME_H 16 | #define RUNTIME_H 17 | 18 | #include 19 | #include 20 | 21 | #if LLVM_VERSION_MAJOR >= 9 && LLVM_VERSION_MAJOR < 11 22 | using SymFnT = llvm::Value *; 23 | #else 24 | using SymFnT = llvm::FunctionCallee; 25 | #endif 26 | 27 | /// Runtime functions 28 | struct Runtime { 29 | Runtime(llvm::Module &M); 30 | 31 | SymFnT buildInteger{}; 32 | SymFnT buildInteger128{}; 33 | SymFnT buildFloat{}; 34 | SymFnT buildNullPointer{}; 35 | SymFnT buildTrue{}; 36 | SymFnT buildFalse{}; 37 | SymFnT buildBool{}; 38 | SymFnT buildSExt{}; 39 | SymFnT buildZExt{}; 40 | SymFnT buildTrunc{}; 41 | SymFnT buildBswap{}; 42 | SymFnT buildIntToFloat{}; 43 | SymFnT buildFloatToFloat{}; 44 | SymFnT buildBitsToFloat{}; 45 | SymFnT buildFloatToBits{}; 46 | SymFnT buildFloatToSignedInt{}; 47 | SymFnT buildFloatToUnsignedInt{}; 48 | SymFnT buildFloatAbs{}; 49 | SymFnT buildBoolAnd{}; 50 | SymFnT buildBoolOr{}; 51 | SymFnT buildBoolXor{}; 52 | SymFnT buildBoolToBit{}; 53 | SymFnT buildBitToBool{}; 54 | SymFnT buildAddOverflow{}; 55 | SymFnT buildSubOverflow{}; 56 | SymFnT buildMulOverflow{}; 57 | SymFnT buildSAddSat{}; 58 | SymFnT buildUAddSat{}; 59 | SymFnT buildSSubSat{}; 60 | SymFnT buildUSubSat{}; 61 | SymFnT buildSShlSat{}; 62 | SymFnT buildUShlSat{}; 63 | SymFnT buildFshl{}; 64 | SymFnT buildFshr{}; 65 | SymFnT buildAbs{}; 66 | SymFnT buildConcat{}; 67 | SymFnT pushPathConstraint{}; 68 | SymFnT getParameterExpression{}; 69 | SymFnT setParameterExpression{}; 70 | SymFnT setReturnExpression{}; 71 | SymFnT getReturnExpression{}; 72 | SymFnT memcpy{}; 73 | SymFnT memset{}; 74 | SymFnT memmove{}; 75 | SymFnT readMemory{}; 76 | SymFnT writeMemory{}; 77 | SymFnT buildZeroBytes{}; 78 | SymFnT buildInsert{}; 79 | SymFnT buildExtract{}; 80 | SymFnT notifyCall{}; 81 | SymFnT notifyRet{}; 82 | SymFnT notifyBasicBlock{}; 83 | 84 | /// Mapping from icmp predicates to the functions that build the corresponding 85 | /// symbolic expressions. 86 | std::array comparisonHandlers{}; 87 | 88 | /// Mapping from binary operators to the functions that build the 89 | /// corresponding symbolic expressions. 90 | std::array binaryOperatorHandlers{}; 91 | 92 | /// Mapping from unary operators to the functions that build the 93 | /// corresponding symbolic expressions. 94 | std::array unaryOperatorHandlers{}; 95 | }; 96 | 97 | bool isInterceptedFunction(const llvm::Function &f); 98 | 99 | #endif 100 | -------------------------------------------------------------------------------- /compiler/Symbolizer.h: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | #ifndef SYMBOLIZE_H 16 | #define SYMBOLIZE_H 17 | 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "Runtime.h" 26 | 27 | class Symbolizer : public llvm::InstVisitor { 28 | public: 29 | explicit Symbolizer(llvm::Module &M) 30 | : runtime(M), dataLayout(M.getDataLayout()), 31 | ptrBits(M.getDataLayout().getPointerSizeInBits()), 32 | intPtrType(M.getDataLayout().getIntPtrType(M.getContext())) {} 33 | 34 | /// Insert code to obtain the symbolic expressions for the function arguments. 35 | void symbolizeFunctionArguments(llvm::Function &F); 36 | 37 | /// Insert a call to the run-time library to notify it of the basic block 38 | /// entry. 39 | void insertBasicBlockNotification(llvm::BasicBlock &B); 40 | 41 | /// Finish the processing of PHI nodes. 42 | /// 43 | /// This assumes that there is a dummy PHI node for each such instruction in 44 | /// the function, and that we have recorded all PHI nodes in the member 45 | /// phiNodes. In other words, the function has to be called after all 46 | /// instructions have been processed in order to fix up PHI nodes. See the 47 | /// documentation of member phiNodes for why we process PHI nodes in two 48 | /// steps. 49 | /// 50 | /// Important! Calling this function invalidates symbolicExpressions. 51 | void finalizePHINodes(); 52 | 53 | /// Rewrite symbolic computation to only occur if some operand is symbolic. 54 | /// 55 | /// We don't want to build up formulas for symbolic computation if all 56 | /// operands are concrete. Therefore, this function rewrites all places that 57 | /// build up formulas (as recorded during the main pass) to skip formula 58 | /// construction if all operands are concrete. Additionally, it inserts code 59 | /// that constructs formulas for concrete operands if necessary. 60 | /// 61 | /// The basic idea is to transform code like this... 62 | /// 63 | /// res_expr = call _sym_some_computation(expr1, expr2, ...) 64 | /// res = some_computation(val1, val2, ...) 65 | /// 66 | /// ...into this: 67 | /// 68 | /// start: 69 | /// expr1_symbolic = icmp ne 0, expr1 70 | /// ... 71 | /// some_symbolic = or expr1_symbolic, ... 72 | /// br some_symbolic, check_arg1, end 73 | /// 74 | /// check_arg1: 75 | /// need_expr1 = icmp eq 0, expr1 76 | /// br need_expr1, create_expr1, check_arg2 77 | /// 78 | /// create_expr1: 79 | /// new_expr1 = ... (based on val1) 80 | /// br check_arg2 81 | /// 82 | /// check_arg2: 83 | /// good_expr1 = phi [expr1, check_arg1], [new_expr1, create_expr1] 84 | /// need_expr2 = ... 85 | /// ... 86 | /// 87 | /// sym_computation: 88 | /// sym_expr = call _sym_some_computation(good_expr1, good_expr2, ...) 89 | /// br end 90 | /// 91 | /// end: 92 | /// final_expr = phi [null, start], [sym_expr, sym_computation] 93 | /// 94 | /// The resulting code is much longer but avoids solver calls for all 95 | /// operations without symbolic data. 96 | void shortCircuitExpressionUses(); 97 | 98 | void handleIntrinsicCall(llvm::CallBase &I); 99 | void handleInlineAssembly(llvm::CallInst &I); 100 | void handleFunctionCall(llvm::CallBase &I, llvm::Instruction *returnPoint); 101 | 102 | // 103 | // Implementation of InstVisitor 104 | // 105 | void visitBinaryOperator(llvm::BinaryOperator &I); 106 | void visitUnaryOperator(llvm::UnaryOperator &I); 107 | void visitSelectInst(llvm::SelectInst &I); 108 | void visitCmpInst(llvm::CmpInst &I); 109 | void visitReturnInst(llvm::ReturnInst &I); 110 | void visitBranchInst(llvm::BranchInst &I); 111 | void visitIndirectBrInst(llvm::IndirectBrInst &I); 112 | void visitCallInst(llvm::CallInst &I); 113 | void visitInvokeInst(llvm::InvokeInst &I); 114 | void visitAllocaInst(llvm::AllocaInst &); 115 | void visitLoadInst(llvm::LoadInst &I); 116 | void visitStoreInst(llvm::StoreInst &I); 117 | void visitGetElementPtrInst(llvm::GetElementPtrInst &I); 118 | void visitBitCastInst(llvm::BitCastInst &I); 119 | void visitTruncInst(llvm::TruncInst &I); 120 | void visitIntToPtrInst(llvm::IntToPtrInst &I); 121 | void visitPtrToIntInst(llvm::PtrToIntInst &I); 122 | void visitSIToFPInst(llvm::SIToFPInst &I); 123 | void visitUIToFPInst(llvm::UIToFPInst &I); 124 | void visitFPExtInst(llvm::FPExtInst &I); 125 | void visitFPTruncInst(llvm::FPTruncInst &I); 126 | void visitFPToSI(llvm::FPToSIInst &I); 127 | void visitFPToUI(llvm::FPToUIInst &I); 128 | void visitCastInst(llvm::CastInst &I); 129 | void visitPHINode(llvm::PHINode &I); 130 | void visitInsertValueInst(llvm::InsertValueInst &I); 131 | void visitExtractValueInst(llvm::ExtractValueInst &I); 132 | void visitSwitchInst(llvm::SwitchInst &I); 133 | void visitUnreachableInst(llvm::UnreachableInst &); 134 | void visitInstruction(llvm::Instruction &I); 135 | 136 | private: 137 | static constexpr unsigned kExpectedMaxPHINodesPerFunction = 16; 138 | static constexpr unsigned kExpectedSymbolicArgumentsPerComputation = 2; 139 | 140 | /// A symbolic input. 141 | struct Input { 142 | llvm::Value *concreteValue; 143 | unsigned operandIndex; 144 | llvm::Instruction *user; 145 | 146 | Input() = default; 147 | 148 | Input(llvm::Value *concrete, unsigned idx, llvm::Instruction *user) 149 | : concreteValue(concrete), operandIndex(idx), user(user) { 150 | assert(getSymbolicOperand()->getType() == 151 | llvm::Type::getInt8Ty(user->getContext())->getPointerTo()); 152 | } 153 | 154 | llvm::Value *getSymbolicOperand() const { 155 | return user->getOperand(operandIndex); 156 | } 157 | 158 | void replaceOperand(llvm::Value *newOperand) { 159 | user->setOperand(operandIndex, newOperand); 160 | } 161 | }; 162 | 163 | /// A symbolic computation with its inputs. 164 | struct SymbolicComputation { 165 | llvm::Instruction *firstInstruction = nullptr, *lastInstruction = nullptr; 166 | llvm::SmallVector inputs; 167 | 168 | SymbolicComputation() = default; 169 | 170 | SymbolicComputation(llvm::Instruction *first, llvm::Instruction *last, 171 | llvm::ArrayRef in) 172 | : firstInstruction(first), lastInstruction(last), 173 | inputs(in.begin(), in.end()) {} 174 | 175 | /// Append another symbolic computation to this one. 176 | /// 177 | /// The computation that is to be appended must occur after the one that 178 | /// this method is called on. 179 | void merge(const SymbolicComputation &other) { 180 | if (&other == this) 181 | return; 182 | 183 | if (firstInstruction == nullptr) 184 | firstInstruction = other.firstInstruction; 185 | lastInstruction = other.lastInstruction; 186 | 187 | for (const auto &input : other.inputs) 188 | inputs.push_back(input); 189 | } 190 | 191 | friend llvm::raw_ostream & 192 | operator<<(llvm::raw_ostream &out, 193 | const Symbolizer::SymbolicComputation &computation) { 194 | out << "\nComputation starting at " << *computation.firstInstruction 195 | << "\n...ending at " << *computation.lastInstruction 196 | << "\n...with inputs:\n"; 197 | for (const auto &input : computation.inputs) { 198 | out << '\t' << *input.concreteValue << " => " << *input.user << '\n'; 199 | } 200 | return out; 201 | } 202 | }; 203 | 204 | /// Create an expression that represents the concrete value. 205 | llvm::Instruction *createValueExpression(llvm::Value *V, 206 | llvm::IRBuilder<> &IRB); 207 | 208 | /// Get the (already created) symbolic expression for a value. 209 | llvm::Value *getSymbolicExpression(llvm::Value *V) const { 210 | auto exprIt = symbolicExpressions.find(V); 211 | return (exprIt != symbolicExpressions.end()) ? exprIt->second : nullptr; 212 | } 213 | 214 | llvm::Value *getSymbolicExpressionOrNull(llvm::Value *V) const { 215 | auto *expr = getSymbolicExpression(V); 216 | if (expr == nullptr) 217 | return llvm::ConstantPointerNull::get( 218 | llvm::IntegerType::getInt8Ty(V->getContext())->getPointerTo()); 219 | return expr; 220 | } 221 | 222 | bool isLittleEndian(llvm::Type *type) { 223 | return (!type->isAggregateType() && dataLayout.isLittleEndian()); 224 | } 225 | 226 | /// Like buildRuntimeCall, but the call is always generated. 227 | SymbolicComputation forceBuildRuntimeCall( 228 | llvm::IRBuilder<> &IRB, SymFnT function, 229 | llvm::ArrayRef> args) const; 230 | 231 | /// Create a call to the specified function in the run-time library. 232 | /// 233 | /// Each argument is specified as a pair of Value and Boolean. The Boolean 234 | /// specifies whether the Value is a symbolic argument, in which case the 235 | /// corresponding symbolic expression will be passed to the run-time function. 236 | /// Moreover, the use of symbolic expressions will be recorded in the 237 | /// resulting SymbolicComputation. If all symbolic arguments are known to be 238 | /// concrete (e.g., because they are compile-time constants), no call 239 | /// instruction is emitted and the function returns null. 240 | std::optional 241 | buildRuntimeCall(llvm::IRBuilder<> &IRB, SymFnT function, 242 | llvm::ArrayRef> args) const { 243 | if (std::all_of(args.begin(), args.end(), 244 | [this](std::pair arg) { 245 | return (getSymbolicExpression(arg.first) == nullptr); 246 | })) { 247 | return {}; 248 | } 249 | 250 | return forceBuildRuntimeCall(IRB, function, args); 251 | } 252 | 253 | /// Convenience overload that treats all arguments as symbolic. 254 | std::optional 255 | buildRuntimeCall(llvm::IRBuilder<> &IRB, SymFnT function, 256 | llvm::ArrayRef symbolicArgs) const { 257 | std::vector> args; 258 | for (const auto &arg : symbolicArgs) { 259 | args.emplace_back(arg, true); 260 | } 261 | 262 | return buildRuntimeCall(IRB, function, args); 263 | } 264 | 265 | /// Register the result of the computation as the symbolic expression 266 | /// corresponding to the concrete value and record the computation for 267 | /// short-circuiting. 268 | void registerSymbolicComputation(const SymbolicComputation &computation, 269 | llvm::Value *concrete = nullptr) { 270 | if (concrete != nullptr) 271 | symbolicExpressions[concrete] = computation.lastInstruction; 272 | expressionUses.push_back(computation); 273 | } 274 | 275 | /// Convenience overload for chaining with buildRuntimeCall. 276 | void registerSymbolicComputation( 277 | const std::optional &computation, 278 | llvm::Value *concrete = nullptr) { 279 | if (computation) 280 | registerSymbolicComputation(*computation, concrete); 281 | } 282 | 283 | /// Generate code that makes the solver try an alternative value for V. 284 | void tryAlternative(llvm::IRBuilder<> &IRB, llvm::Value *V); 285 | 286 | /// Helper to use a pointer to a host object as integer (truncating!). 287 | /// 288 | /// Note that the conversion will truncate the most significant bits of the 289 | /// pointer if the host uses larger addresses than the target. Therefore, use 290 | /// this function only when such loss is acceptable (e.g., when generating 291 | /// site identifiers to be passed to the backend, where collisions of the 292 | /// least significant bits are reasonably unlikely). 293 | /// 294 | /// Why not do a lossless conversion and make the backend accept 64-bit 295 | /// integers? 296 | /// 297 | /// 1. Performance: 32-bit architectures will process 32-bit values faster 298 | /// than 64-bit values. 299 | /// 300 | /// 2. Pragmatism: Changing the backend to accept and process 64-bit values 301 | /// would require modifying code that we don't control (in the case of Qsym). 302 | llvm::ConstantInt *getTargetPreferredInt(void *pointer) { 303 | return llvm::ConstantInt::get(intPtrType, 304 | reinterpret_cast(pointer)); 305 | } 306 | 307 | /// Compute the offset of a member in a (possibly nested) aggregate. 308 | uint64_t aggregateMemberOffset(llvm::Type *aggregateType, 309 | llvm::ArrayRef indices) const; 310 | 311 | /// Emit code that converts the bit-vector expression represented by I to an 312 | /// expression that is appropriate for T; return the instruction that computes 313 | /// the result (which may be I if no conversion is needed). 314 | /// 315 | /// The solver doesn't represent all values as bit vectors. For example, 316 | /// floating-point values and Booleans are of separate kinds, so we emit code 317 | /// that changes the solver kind of the expression to whatever is needed. 318 | llvm::Instruction *convertBitVectorExprForType(llvm::IRBuilder<> &IRB, 319 | llvm::Instruction *I, 320 | llvm::Type *T) const; 321 | 322 | /// Emit code that converts the expression Expr for V to a bit-vector 323 | /// expression. Return the SymbolicComputation representing the conversion 324 | /// (if a conversion is necessary); the last instruction computes the result. 325 | /// 326 | /// This is the inverse operation of convertBitVectorExprForType (see details 327 | /// there). 328 | std::optional 329 | convertExprForTypeToBitVectorExpr(llvm::IRBuilder<> &IRB, llvm::Value *V, 330 | llvm::Value *Expr) const; 331 | 332 | const Runtime runtime; 333 | 334 | /// The data layout of the currently processed module. 335 | const llvm::DataLayout &dataLayout; 336 | 337 | /// The width in bits of pointers in the module. 338 | unsigned ptrBits; 339 | 340 | /// An integer type at least as wide as a pointer. 341 | llvm::IntegerType *intPtrType; 342 | 343 | /// Mapping from SSA values to symbolic expressions. 344 | /// 345 | /// For pointer values, the stored value is an expression describing the value 346 | /// of the pointer itself (i.e., the address, not the referenced value). For 347 | /// structure values, the expression is a single large bit vector. 348 | /// 349 | /// TODO This member adds a lot of complexity: various methods rely on it, and 350 | /// finalizePHINodes invalidates it. We may want to pass the map around 351 | /// explicitly. 352 | llvm::ValueMap symbolicExpressions; 353 | 354 | /// A record of all PHI nodes in this function. 355 | /// 356 | /// PHI nodes may refer to themselves, in which case we run into an infinite 357 | /// loop when trying to generate symbolic expressions recursively. Therefore, 358 | /// we only insert a dummy symbolic expression for each PHI node and fix it 359 | /// after all instructions have been processed. 360 | llvm::SmallVector phiNodes; 361 | 362 | /// A record of expression uses that can be short-circuited. 363 | /// 364 | /// Most values in a program are concrete, even if they're not constant (in 365 | /// which case we would know that they're concrete at compile time already). 366 | /// There is no point in building up formulas if all values involved in a 367 | /// computation are concrete, so we short-circuit those cases. Since this 368 | /// process requires splitting basic blocks, we can't do it during the main 369 | /// analysis phase (because InstVisitor gets out of step if we try). 370 | /// Therefore, we keep a record of all the places that construct expressions 371 | /// and insert the fast path later. 372 | std::vector expressionUses; 373 | }; 374 | 375 | #endif 376 | -------------------------------------------------------------------------------- /compiler/sym++.in: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This file is part of SymCC. 4 | # 5 | # SymCC is free software: you can redistribute it and/or modify it under the 6 | # terms of the GNU General Public License as published by the Free Software 7 | # Foundation, either version 3 of the License, or (at your option) any later 8 | # version. 9 | # 10 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 11 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 12 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License along with 15 | # SymCC. If not, see . 16 | 17 | runtime_64bit_dir="${SYMCC_RUNTIME_DIR:-@SYMCC_RUNTIME_DIR@}" 18 | runtime_32bit_dir="${SYMCC_RUNTIME32_DIR:-@SYMCC_RUNTIME_32BIT_DIR@}" 19 | pass="${SYMCC_PASS_DIR:-@CMAKE_CURRENT_BINARY_DIR@}/libsymcc.so" 20 | libcxx_var=SYMCC_LIBCXX_PATH 21 | compiler="${SYMCC_CLANGPP:-@CLANGPP_BINARY@}" 22 | 23 | # Find out if we're cross-compiling for a 32-bit architecture 24 | runtime_dir="$runtime_64bit_dir" 25 | for arg in "$@"; do 26 | if [[ $arg == "-m32" ]]; then 27 | if [ -z "$runtime_32bit_dir" ]; then 28 | echo "SymCC: 32-bit compilation requested but SymCC was not built with TARGET_32BIT=ON" >&2 29 | exit 255 30 | else 31 | runtime_dir="$runtime_32bit_dir" 32 | libcxx_var=SYMCC_LIBCXX_32BIT_PATH 33 | break 34 | fi 35 | fi 36 | done 37 | 38 | if [[ -v SYMCC_REGULAR_LIBCXX ]]; then 39 | stdlib_cflags= 40 | stdlib_ldflags= 41 | elif [[ ! -v $libcxx_var ]]; then 42 | >&2 echo "Please set $libcxx_var to the directory containing libc++ or confirm usage of the system library by setting SYMCC_REGULAR_LIBCXX!" 43 | exit 255 44 | else 45 | # It is important that the resulting binaries load libstdc++ before libc++; 46 | # otherwise our backend calls the instrumented library in cases where 47 | # exported names collide. 48 | stdlib_cflags="-isystem ${!libcxx_var}/include/c++/v1 -nostdlib++" 49 | stdlib_ldflags="-L${!libcxx_var}/lib -Wl,-rpath,${!libcxx_var}/lib -lstdc++ -lc++ -stdlib=libc++" 50 | fi 51 | 52 | if [ $# -eq 0 ]; then 53 | echo "Use sym++ as a drop-in replacement for clang++, e.g., sym++ -O2 -o foo foo.cpp" >&2 54 | exit 1 55 | fi 56 | 57 | exec $compiler \ 58 | @CLANG_LOAD_PASS@"$pass" \ 59 | $stdlib_cflags \ 60 | "$@" \ 61 | $stdlib_ldflags \ 62 | -L"$runtime_dir" \ 63 | -lsymcc-rt \ 64 | -Wl,-rpath,"$runtime_dir" \ 65 | -Qunused-arguments 66 | -------------------------------------------------------------------------------- /compiler/symcc.in: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This file is part of SymCC. 4 | # 5 | # SymCC is free software: you can redistribute it and/or modify it under the 6 | # terms of the GNU General Public License as published by the Free Software 7 | # Foundation, either version 3 of the License, or (at your option) any later 8 | # version. 9 | # 10 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 11 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 12 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License along with 15 | # SymCC. If not, see . 16 | 17 | runtime_64bit_dir="${SYMCC_RUNTIME_DIR:-@SYMCC_RUNTIME_DIR@}" 18 | runtime_32bit_dir="${SYMCC_RUNTIME32_DIR:-@SYMCC_RUNTIME_32BIT_DIR@}" 19 | pass="${SYMCC_PASS_DIR:-@CMAKE_CURRENT_BINARY_DIR@}/libsymcc.so" 20 | compiler="${SYMCC_CLANG:-@CLANG_BINARY@}" 21 | 22 | # Find out if we're cross-compiling for a 32-bit architecture 23 | runtime_dir="$runtime_64bit_dir" 24 | for arg in "$@"; do 25 | if [[ $arg == "-m32" ]]; then 26 | if [ -z "$runtime_32bit_dir" ]; then 27 | echo "SymCC: 32-bit compilation requested but SymCC was not built with TARGET_32BIT=ON" >&2 28 | exit 255 29 | else 30 | runtime_dir="$runtime_32bit_dir" 31 | break 32 | fi 33 | fi 34 | done 35 | 36 | if [ $# -eq 0 ]; then 37 | echo "Use symcc as a drop-in replacement for clang, e.g., symcc -O2 -o foo foo.c" >&2 38 | exit 1 39 | fi 40 | 41 | exec "$compiler" \ 42 | @CLANG_LOAD_PASS@"$pass" \ 43 | "$@" \ 44 | -L"$runtime_dir" \ 45 | -lsymcc-rt \ 46 | -Wl,-rpath,"$runtime_dir" \ 47 | -Qunused-arguments 48 | -------------------------------------------------------------------------------- /docs/32-bit.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | Compiling 32-bit programs on a 64-bit host 4 | (and cross-compilation in general) 5 | 6 | 7 | In theory, SymCC can use clang to cross-compile programs for any architecture 8 | that LLVM supports. Note, however, that this requires cross-compiling the 9 | symbolic backend and its dependencies as well, and then linking the right 10 | backend into the target programs. We currently provide automatic handling only 11 | for the common case of compiling 32-bit libraries and programs on a 64-bit host 12 | machine. In all other cases, we recommend building SymCC on the target machine, 13 | so that cross-compilation is not needed. 14 | 15 | Let's assume that you're running SymCC on a 64-bit x86 machine, wanting to 16 | create 32-bit binaries from some code under test. First of all, make sure that 17 | 32-bit versions of the backend's dependencies are available: LLVM (usually 18 | obtainable via packages of your Linux distribution) and Z3 (which is reasonably 19 | easy to build following its CMake instructions and exporting CFLAGS="-m32" and 20 | CXXFLAGS="-m32"). Beware of Z3 version 4.8.7 - it contains a bug that breaks the 21 | 32-bit build with an error related to "__builtin_ctz" (see 22 | https://github.com/Z3Prover/z3/issues/2727). If you build Z3 yourself, note that 23 | it is sufficient to point SymCC to the build directory - there is no need to 24 | install the 32-bit version of Z3 in your system. 25 | 26 | Once the dependencies for 32-bit SymCC are available (as well as the 64-bit 27 | dependencies mentioned in the main README), configure and build SymCC as usual 28 | but add "-DTARGET_32BIT=ON" to the CMake invocation. If the build system doesn't 29 | find your 32-bit versions of LLVM and Z3, and specify their locations with 30 | "-DLLVM_32BIT_DIR=/some/path" and "-DZ3_32BIT_DIR=/some/other/path", 31 | respectively - analogously to how you would hint at the 64-bit versions. 32 | 33 | The resulting "symcc" and "sym++" scripts work like regular SymCC, but they 34 | additionally understand the "-m32" switch, which tells Clang to build 32-bit 35 | artifacts. If you build anything with "-m32", SymCC will make sure that the 36 | 32-bit version of the symbolic backend is linked to it instead of the 64-bit 37 | variant that would normally be used. Note that, in order to compile C++ code 38 | with "sym++" in 32-bit mode, you also need to build a 32-bit version of libc++ 39 | (following the instructions for the 64-bit build from docs/C++.txt but 40 | additionally passing "-DLLVM_BUILD_32_BITS=ON" to CMake); communicate its 41 | location to SymCC via the environment variable SYMCC_LIBCXX_32BIT_PATH. 42 | 43 | If your 64-bit host is equipped to run 32-bit binaries, you can simply execute 44 | the instrumented programs produced by SymCC like any other program. 45 | -------------------------------------------------------------------------------- /docs/C++.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | Compiling C++ 4 | 5 | 6 | SymCC has full support for C++ code and provides a wrapper "sym++" around 7 | clang++. Since C++ programs typically depend on the C++ standard library, we 8 | have two options when building them with SymCC: 9 | 10 | 1. Use the C++ standard library provided by the system. This is the easiest, 11 | requiring no additional effort, but it has an important drawback: data that 12 | passes through the standard library will be concretized, i.e., we lose track 13 | of the corresponding symbolic expressions. 14 | 2. The alternative is to build an instrumented C++ standard library. This means 15 | that we can track data through the library, but it requires building the 16 | library and compiling all code against it. 17 | 18 | We discuss both approaches in more detail below. 19 | 20 | 21 | Building against the system's C++ standard library 22 | 23 | 24 | In order to use the regular (uninstrumented) C++ standard library that the 25 | system provides, just call sym++ as a drop-in replacement for clang++: 26 | 27 | $ export SYMCC_REGULAR_LIBCXX=yes 28 | $ sym++ -o myprogram mysource.cpp 29 | $ ./myprogram 30 | 31 | The program will execute and produce alternative outputs as usual with SymCC, 32 | but it will not be able to trace operations that happen in C++ standard classes, 33 | such as std::vector. 34 | 35 | 36 | Instrumenting the C++ standard library 37 | 38 | 39 | Building an instrumented C++ standard library is a one-time effort; the library 40 | can then be used in all subsequent C++ compilations. We use "libc++", the LLVM 41 | project's implementation of the standard library. First, get the source code: 42 | 43 | $ git clone --depth 1 https://github.com/llvm/llvm-project.git 44 | 45 | Then build the library with SymCC: 46 | 47 | $ mkdir libcxx_symcc 48 | $ cd libcxx_symcc 49 | $ export SYMCC_REGULAR_LIBCXX=yes 50 | $ export SYMCC_NO_SYMBOLIC_INPUT=yes 51 | $ cmake -G Ninja /path-to-llvm-project/llvm \ 52 | -DLLVM_ENABLE_PROJECTS="libcxx;libcxxabi" \ 53 | -DLLVM_TARGETS_TO_BUILD="X86" \ 54 | -DLLVM_DISTRIBUTION_COMPONENTS="cxx;cxxabi;cxx-headers" \ 55 | -DCMAKE_BUILD_TYPE=Release \ 56 | -DCMAKE_INSTALL_PREFIX=/some/convenient/location \ 57 | -DCMAKE_C_COMPILER=/path-to-symcc-with-simple-backend/symcc \ 58 | -DCMAKE_CXX_COMPILER=/path-to-symcc-with-simple-backend/sym++ 59 | $ ninja distribution 60 | $ ninja install-distribution 61 | $ unset SYMCC_REGULAR_LIBCXX SYMCC_NO_SYMBOLIC_INPUT 62 | 63 | Note the two environment variables: SYMCC_REGULAR_LIBCXX avoids a 64 | chicken-and-egg problem - without it, SymCC would expect to compile against the 65 | instrumented C++ standard library. SYMCC_NO_SYMBOLIC_INPUT disables symbolic 66 | handling of input data - the build process of libc++ involves the creation of 67 | helper programs that are subsequently run, and we do not want them to perform 68 | symbolic analysis. 69 | 70 | A word on the choice of backends: While the instrumented libc++ will work with 71 | both backends, building it currently doesn't work with the QSYM backend. Just 72 | use the simple backend for the build process - there is no problem in using the 73 | library with the QSYM backend later. For very interested readers, here is an 74 | explanation of the problem: libc++ is an LLVM project and as such uses LLVM 75 | support code. During the build process, it builds a code-generation tool that is 76 | subsequently invoked (hence the recommendation to set SYMCC_NO_SYMBOLIC_INPUT). 77 | At run-time, the tool loads code built from the LLVM sources we obtained via git 78 | above. Why is this a problem for the QSYM backend? QSYM uses support code from 79 | LLVM as well, which means that the QSYM backend is linked against your system's 80 | LLVM libraries. If we build libc++ with the QSYM backend, the code-generation 81 | tool loads the QSYM code at run time and, via dependency resolution, also the 82 | system's LLVM installation. The end result is that we have two versions of LLVM 83 | support code in the same process - the system version and the one built from git 84 | - which will most likely collide. Using the simple backend avoids the problem 85 | because it doesn't depend on the system installation of LLVM. 86 | 87 | Once the library is ready, tell SymCC where to find it and compile C++ code as 88 | usual: 89 | 90 | $ export SYMCC_LIBCXX_PATH=/path-provided-as-cmake-install-prefix-for-libcxx 91 | $ sym++ -o myprogram mysource.cpp 92 | $ ./myprogram 93 | 94 | Now the program will use the instrumented C++ standard library, which enables it 95 | to trace computations inside the library. Note that you need to compile all code 96 | against the instrumented standard library - attempts to mix it with code 97 | compiled against the system's C++ standard library will lead to linker errors. 98 | And if you're so brave as to mix it with code compiled against an uninstrumented 99 | libc++, a run-time crash is the best you can hope for... 100 | -------------------------------------------------------------------------------- /docs/Concreteness.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | Concreteness Checks 4 | 5 | 6 | If we do not distinguish between symbolic and concrete values in the program 7 | under test, then we end up passing every computation to the solver, including 8 | concrete ones. Since all parameters are known in the purely concrete case, the 9 | solver will just repeat the computation (most likely in a less efficient way) 10 | and conclude that there is no way to change its outcome. We can avoid such 11 | wasted effort by only passing symbolic computations to the solver. 12 | 13 | There are two stages at which data can be identified as concrete: 14 | 1. At compile time, if a value is known to be a constant we can conclude that it 15 | will always be concrete at run time. 16 | 2. At run time, a value that is not a constant may still turn out to be 17 | concrete. For example, data read from memory can be symbolic or concrete. 18 | 19 | If we detect in the compiler pass that a value is a compile-time constant (case 20 | 1 above), we do not emit code for symbolic handling at all. However, for any 21 | other type of data, we need to generate code that handles the case of it being 22 | symbolic at run time. Concretely (no pun intended), we mark concrete values at 23 | run time by setting their corresponding symbolic expression in shadow memory to 24 | null. This makes it very cheap to check concreteness during execution: just run 25 | a null check on the symbolic expression. 26 | 27 | The code that we inject into the program under test performs concreteness checks 28 | on the arguments of each instruction. For example, when the program adds two 29 | values, the generated code performs the addition and additionally represents it 30 | symbolically according to the concreteness of the two addends. There are 31 | multiple cases to distinguish: 32 | 1. If all arguments of a computation are concrete, we can skip symbolic 33 | processing altogether and just set the result expression to null, indicating 34 | that the result is a concrete value. 35 | 2. If at least one argument is symbolic, we need to generate an expression 36 | representing the result. Therefore, we generate expressions for all arguments 37 | (since the concrete arguments will have null expressions) and call into the 38 | run-time support library to produce an expression according to the performed 39 | computation. There are several opportunities for optimization, e.g., when 40 | the computation only has a single argument that is not a compile-time 41 | constant we do not need to check it for concreteness again. 42 | 43 | It is important to note that these checks cannot be performed by the compiler 44 | because the concreteness of non-constant data is not known at compile time. 45 | Instead, the compiler emits code that performs the required checks at run time 46 | and acts accordingly. 47 | -------------------------------------------------------------------------------- /docs/Configuration.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | Configuration options 4 | 5 | 6 | SymCC is configured at two different stages: 7 | 8 | 1. At compile time, you decide which features to enable, which mainly affects 9 | compilation time and the set of dependencies. This is done via arguments to 10 | CMake. 11 | 12 | 2. When you run programs that have been compiled with SymCC, the environment 13 | variables control various aspects of the execution and analysis. 14 | 15 | We list all available options for each stage in turn. 16 | 17 | 18 | Compile-time options 19 | 20 | 21 | Each of these is passed to CMake with "-D" when configuring the build: 22 | 23 | - QSYM_BACKEND=ON/OFF (default OFF): Compile either the QSYM backend or our 24 | simple Z3 wrapper (see docs/Backends.txt for details). Note that binaries 25 | produced by the SymCC compiler are backend-agnostic; you can use 26 | LD_LIBRARY_PATH to switch between backends per execution. 27 | 28 | - TARGET_32BIT=ON/OFF (default OFF): Enable support for 32-bit compilation on 29 | 64-bit hosts. This will essentially make the compiler switch "-m32" work as 30 | expected; see docs/32-bit.txt for details. 31 | 32 | - LLVM_DIR/LLVM_32BIT_DIR (default empty): Hints for the build system to find 33 | LLVM if it's in a non-standard location. 34 | 35 | - Z3_DIR/Z3_32BIT_DIR (default empty): Hints for the build system to find Z3 if 36 | it's in a non-standard location. 37 | 38 | - Z3_TRUST_SYSTEM_VERSION (default OFF): Trust that the system provides a 39 | suitable version of Z3 if the corresponding CMake module can't be found. Use 40 | this with Linux distributions that don't package the CMake module but still 41 | ship an otherwise usable development setup (e.g., Fedora before F33). Note 42 | that we can't check the Z3 version for compatibility in this case, so prepare 43 | for compiler errors if the system-wide installation of Z3 is too old. 44 | 45 | 46 | Run-time options 47 | 48 | 49 | "Run time" refers to the time when you run programs compiled with SymCC, not 50 | when you run SymCC itself. In other words, these are settings that you can 51 | change on every execution of an instrumented program. They are specified via 52 | environment variables. 53 | 54 | - SYMCC_NO_SYMBOLIC_INPUT=0/1 (default 0): When set to 1, input is never marked 55 | as symbolic; in other words, instrumented programs will run just like their 56 | uninstrumented counterparts. 57 | 58 | - SYMCC_OUTPUT_DIR (default "/tmp/output"): This is the directory where SymCC 59 | will store new inputs (QSYM backend only). If you prefer to handle them 60 | programmatically, make your program call symcc_set_test_case_handler; the 61 | handler will be called instead of the default handler each time the backend 62 | generates a new input. 63 | 64 | - SYMCC_INPUT_FILE (default empty): When empty, SymCC treats data read from 65 | standard input as symbolic; when set to a file name, any data read from that 66 | file is considered symbolic. Ignored if SYMCC_NO_SYMBOLIC_INPUT is set to 1. 67 | 68 | - SYMCC_MEMORY_INPUT=0/1 (default 0): When set to 1, expect the program under 69 | test to communicate symbolic inputs with one or more calls to 70 | symcc_make_symbolic. Can't be combined with SYMCC_INPUT_FILE. Ignored if 71 | SYMCC_NO_SYMBOLIC_INPUT is set to 1. 72 | 73 | - SYMCC_LOG_FILE (default empty): When set to a file name, SymCC creates the 74 | file (or overwrites any existing file!) and uses it to log backend activity 75 | including solver output (simple backend only). 76 | 77 | - SYMCC_ENABLE_LINEARIZATION=0/1 (default 0): Enable QSYM's basic-block pruning, 78 | a call-stack-aware strategy to reduce solver queries when executing code 79 | repeatedly (QSYM backend only). See the QSYM paper for details; highly 80 | recommended for fuzzing and enabled automatically by the fuzzing helper. 81 | 82 | - SYMCC_AFL_COVERAGE_MAP (default empty): When set to the file name of an 83 | AFL-style coverage map, load the map before executing the target program and 84 | use it to skip solver queries for paths that have already been covered (QSYM 85 | backend only). The map is updated in place, so beware of races when running 86 | multiple instances of SymCC! The fuzzing helper uses this to remember the 87 | state of exploration across multiple executions of the target program. 88 | Warning: This setting has a misleading name - while the format of the map 89 | follows (classic) AFL, the variable isn't meant to point at a map file that 90 | AFL uses too! 91 | 92 | (Most people should stop reading here.) 93 | 94 | 95 | Advanced options 96 | 97 | 98 | There is actually a third category of options: when compiling with SymCC, you 99 | can specify the location of its various components via environment variables. 100 | This is not necessary in most cases because the build system makes sure that all 101 | components know about each other; however, in some advanced setups you may need 102 | to move files around after building them, and in that case, you can use the 103 | variables documented below to communicate the new locations: 104 | 105 | - SYMCC_RUNTIME_DIR and SYMCC_RUNTIME32_DIR: The directory that contains the 106 | run-time support library (i.e., libSymRuntime.so). 107 | 108 | - SYMCC_PASS_DIR: The directory containing the compiler pass (i.e., 109 | libSymbolize.so). 110 | 111 | - SYMCC_CLANG and SYMCC_CLANGPP: The clang and clang++ binaries to use during 112 | compilation. Be very careful with this one: if the version of the compiler you 113 | specify here doesn't match the one you built SymCC against, you'll most likely 114 | get linker errors. 115 | -------------------------------------------------------------------------------- /docs/Experiments.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | Experiments 4 | 5 | 6 | Here we document how to reproduce the experiments that we show in the paper 7 | "Symbolic execution with SymCC: Don't interpret, compile!" The same instructions 8 | are available on our website [1], which also provides our raw results. Feel free 9 | to reach out to us if you encounter problems with reproducing the benchmarks. 10 | 11 | The datasets are also archived on figshare [10]. 12 | 13 | In the paper, we describe two sets of experiments: we first benchmark SymCC on 14 | the CGC programs, then we run it on real-world software. 15 | 16 | 17 | CGC experiments 18 | 19 | We used the Linux port of the CGC programs by Trail of Bits [2]. SymCC needs to 20 | be built with support for 32-bit compilation (see docs/32-bit.txt; this is not 21 | part of the Dockerfile because it would double the build time of the container 22 | while providing value to just a few users). Then you can simply export 23 | CC=/path/to/symcc, CXX=/path/to/sym++ and SYMCC_NO_SYMBOLIC_INPUT=1, and build 24 | the CGC programs as usual (i.e., by invoking their build.sh script). 25 | 26 | Run the programs on the raw PoV inputs [3] with SYMCC_NO_SYMBOLIC_INPUT=1 to 27 | measure pure execution time, and unset the environment variable for symbolic 28 | execution. To assess coverage, we ran afl-showmap with the AFL-instrumented CGC 29 | programs on each generated input and accumulated the resulting coverage maps per 30 | program, resulting in a set of covered map entries for each CGC program. The 31 | sizes of those sets can then be fed to the scoring formula presented in the 32 | paper. 33 | 34 | For KLEE and QSYM, we used the setup described in our IR study [3] but with the 35 | regular 32-bit binaries built by cb-multios. 36 | 37 | 38 | Real-world software 39 | 40 | The analysis of real-world software always follows the same procedure. Assuming 41 | you have exported CC=symcc, CXX=sym++ and SYMCC_NO_SYMBOLIC_INPUT=1, first 42 | download the code, then build it using its own build system, and finally unset 43 | SYMCC_NO_SYMBOLIC_INPUT and analyze the program in concert with AFL (which 44 | requires building a second time for AFL, see docs/Fuzzing.txt). We used AFL 45 | 2.56b and built the targets with AFL_USE_ASAN=1. Note that the fuzzing helper is 46 | already installed in the Docker container. 47 | 48 | OpenJPEG [4]: we used revision 1f1e9682, built with CMake as described in the 49 | project's INSTALL.md (adding "-DBUILD_THIRDPARTY=ON" to make sure that 50 | third-party libraries are compiled with SymCC as well), and analyzed 51 | "bin/opj_decompress -i @@ -o /tmp/image.pgm"; the corpus consisted of test 52 | files file1.jp2 and file8.jp2 [5]. 53 | 54 | libarchive [6]: we used revision 9ebb2484, built with CMake as described in the 55 | project's INSTALL (but adding "-DCMAKE_BUILD_TYPE=Release"), and analyzed 56 | "bin/bsdtar tf @@"; the corpus consisted of just a single dummy file 57 | containing the character "A". 58 | 59 | tcpdump: we built both tcpdump [7] and libpcap [8]; in order to make the former 60 | find the latter, just place the source directories next to each other in the 61 | same folder. We used revision d615abec of libpcap and revision d57927e1 of 62 | tcpdump. We built first libpcap and then tcpdump with "./configure && make", 63 | and analyzed "tcpdump/tcpdump -e -r @@"; the corpus consisted of just a single 64 | dummy file containing the character "A". 65 | 66 | All experiments used one AFL master process, one secondary AFL process, and one 67 | SymCC process. We let them run for 24 hours and repeated each of them 30 times 68 | to create the graphs in the paper; AFL map density was extracted from the 69 | secondary AFL process' "plot_data" file, column "map_size". 70 | 71 | The QSYM experiments used an analogous setup, replacing SymCC with QSYM and 72 | running it with AFL according to the QSYM authors' instructions [9]. 73 | 74 | [1] http://www.s3.eurecom.fr/tools/symbolic_execution/symcc.html 75 | [2] https://github.com/trailofbits/cb-multios 76 | [3] http://www.s3.eurecom.fr/tools/symbolic_execution/ir_study.html 77 | [4] https://github.com/uclouvain/openjpeg.git 78 | [5] https://github.com/uclouvain/openjpeg-data/blob/master/input/conformance 79 | [6] https://github.com/libarchive/libarchive.git 80 | [7] https://github.com/the-tcpdump-group/tcpdump.git 81 | [8] https://github.com/the-tcpdump-group/libpcap.git 82 | [9] https://github.com/sslab-gatech/qsym#run-hybrid-fuzzing-with-afl 83 | [10] https://doi.org/10.6084/m9.figshare.24270709.v1 or https://figshare.com/articles/dataset/SymCC_evaluation_data/24270709 84 | -------------------------------------------------------------------------------- /docs/Fuzzing.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | Combining SymCC with a fuzzer 4 | 5 | 6 | Programs instrumented with SymCC generate new test inputs on every run. This is 7 | the core building block for program testing, but a full analysis requires 8 | additional components: new test cases need to be checked for whether they 9 | trigger vulnerabilities in the target program, we have to sort them by relevance 10 | and feed them back to symbolic execution. These tasks are essentially the same 11 | as in fuzzing, except that we use a smarter (yet more expensive) strategy to 12 | generate new inputs. Here we show how to reuse an existing fuzzer for the 13 | management tasks but additionally generate new inputs with SymCC. 14 | 15 | 16 | Setup 17 | 18 | 19 | We use AFL, a popular gray-box fuzzer, in its parallel mode. See AFL's 20 | documentation on parallel fuzzing for details on this mode - the basic idea is 21 | that SymCC and the fuzzer periodically exchange new inputs. SymCC comes with a 22 | helper that coordinates the collaboration with the fuzzer. It is written in 23 | Rust, so rustc and cargo (the Rust package manager) have to be installed. On 24 | Debian-based distributions, for example, a simple "apt install rustc cargo" is 25 | all you need. Build the tool by executing the following command in the root of 26 | SymCC's source repository: 27 | 28 | $ cargo install --path util/symcc_fuzzing_helper 29 | 30 | Afterwards, you should have a self-contained binary 31 | ~/.cargo/bin/symcc_fuzzing_helper. If you are interested in the tool's 32 | internals, you can render documentation as follows: 33 | 34 | $ cargo doc --manifest-path util/symcc_fuzzing_helper/Cargo.toml \ 35 | --document-private-items --open 36 | 37 | This is all on the SymCC side. Now just make sure that AFL is installed - we've 38 | tested with version 2.56b. 39 | 40 | 41 | Testing an example program 42 | 43 | 44 | Suppose we wanted to search memory-related vulnerabilities in tcpdump's 45 | link-layer parsers. The program can be instructed to read from a pcap and print 46 | relevant headers like so: 47 | 48 | $ tcpdump -e -r 49 | 50 | Compile tcpdump and libpcap, the library it uses for pcap reading, once with 51 | SymCC and once with one of AFL's compiler wrappers (e.g., afl-clang). In order 52 | to detect memory corruptions, enable address sanitizer in the AFL-instrumented 53 | version by exporting AFL_USE_ASAN=1 before compiling: 54 | 55 | $ git clone https://github.com/the-tcpdump-group/libpcap.git 56 | $ git clone https://github.com/the-tcpdump-group/tcpdump.git 57 | 58 | $ mkdir symcc_build; cd symcc_build 59 | $ cp -r ../{libpcap,tcpdump} . 60 | $ cd libpcap 61 | $ CC=/path/to/symcc ./configure 62 | $ make 63 | $ cd ../tcpdump 64 | $ CC=/path/to/symcc ./configure 65 | $ make 66 | $ cd .. 67 | 68 | $ mkdir afl_build; cd afl_build 69 | $ export AFL_USE_ASAN=1 70 | $ cp -r ../{libpcap,tcpdump} . 71 | $ cd libpcap 72 | $ CC=/path/to/afl-clang ./configure 73 | $ make 74 | $ cd ../tcpdump 75 | $ CC=/path/to/afl-clang ./configure 76 | $ make 77 | $ cd .. 78 | 79 | Note that we need two copies of the source code because the projects build 80 | in-tree. Also, it is important to place the source code directories next to each 81 | other, so that tcpdump's build system can find and statically link the 82 | previously built libpcap. 83 | 84 | Create a corpus of dummy files somewhere (say, in a directory called "corpus"); 85 | for tcpdump, we just start with an empty corpus containing only a dummy file for 86 | AFL: 87 | 88 | $ mkdir corpus 89 | $ echo A > corpus/dummy 90 | 91 | Then launch one AFL master and one AFL secondary instance, both writing their 92 | outputs to the arbitrarily named directory "afl_out": 93 | 94 | $ afl-fuzz -M afl-master -i corpus -o afl_out -m none -- afl_build/tcpdump/tcpdump -e -r @@ 95 | $ afl-fuzz -S afl-secondary -i corpus -o afl_out -m none -- afl_build/tcpdump/tcpdump -e -r @@ 96 | 97 | For simplicity, we disable memory limits (with "-m none"); be sure to read AFL's 98 | notes on address sanitizer to learn about the implications. Alternatively, you 99 | can compile the target program without address sanitizer, in which case you 100 | don't need to disable the memory limit. 101 | 102 | Finally, we can run SymCC using the helper: 103 | 104 | $ ~/.cargo/bin/symcc_fuzzing_helper -o afl_out -a afl-secondary -n symcc -- symcc_build/tcpdump/tcpdump -e -r @@ 105 | 106 | It will run SymCC on the most promising inputs generated by the secondary AFL 107 | instance and feed any interesting results back to AFL. In AFL's status screen, 108 | you should see the counter "imported" in the "path geometry" section increase 109 | after a short time - this means that the fuzzer instances and SymCC are 110 | exchanging inputs. Crashes will be stored in afl_out/*/crashes as usual. 111 | 112 | It is possible to run SymCC with only an AFL master or only a secondary AFL 113 | instance; see the AFL docs for the implications. Moreover, the number of fuzzer 114 | and SymCC instances can be increased - just make sure that each has a unique 115 | name. 116 | 117 | Note that there are currently a few gotchas with the fuzzing helper: 118 | 119 | 1. It expects afl-showmap to be in the same directory as afl-fuzz (which is 120 | usually the case), and it finds that directory via your afl-fuzz command. If 121 | afl-fuzz is on your PATH (as we assumed in the example above), all is good 122 | and you can ignore this point. Otherwise, you need to either call afl-fuzz 123 | with an absolute path (e.g., /afl/afl-fuzz in the Docker image) or, if you 124 | use a relative path, start afl-fuzz from the same working directory as the 125 | fuzzing helper. 126 | 127 | 2. The helper needs to know how to call the AFL-instrumented version of the 128 | target, and it finds that information by scanning your afl-fuzz command. To 129 | this end, it _requires_ the double dash that we used in the example above to 130 | separate afl-fuzz options from the target command; if you omit it, you'll 131 | likely get errors from the helper when it tries to run afl-showmap. 132 | -------------------------------------------------------------------------------- /docs/Ideas.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | Ideas for future work 4 | 5 | 6 | Here we collect ideas around improvements and new features that could be 7 | interesting to implement. 8 | 9 | 10 | Position in the optimizer pipeline 11 | 12 | Intuitively, we should run towards the end of the pipeline, so that the target 13 | program has been simplified as much as possible. However, SymCC currently runs 14 | just before the vectorizer - a position later in the pipeline would require 15 | supporting LLVM vector instructions, so for now we choose implementation 16 | simplicity over potential performance gains. Still, it would be very interesting 17 | to check whether moving to the end of the pipeline accelerates the system 18 | significantly, and how much it would cost in terms of complexity. 19 | 20 | 21 | Optimize injected code 22 | 23 | We should schedule a few optimization passes after inserting our 24 | instrumentation, so that the instrumentation code gets optimized as well. This 25 | becomes more important the further we move our pass to the end of the pipeline. 26 | We could take inspiration from popular sanitizers like ASan and MSan regarding 27 | the concrete passes to run, and their order. Also, we should enable link-time 28 | optimization to inline some simple run-time support functions. 29 | 30 | 31 | Free symbolic expressions in memory 32 | 33 | SymCC currently doesn't free symbolic expressions. This is fine most of the time 34 | because intermediate values are rarely computed without being used: typically, 35 | they end up being inputs to future computations, so we couldn't free the 36 | corresponding expressions anyway. A notable exception is the computation of 37 | values only for output - the expressions for such values could be freed after 38 | the value is output, which would reduce memory consumption, especially with 39 | output-heavy target programs. 40 | 41 | 42 | Better fuzzer integration 43 | 44 | Our current coordination with the fuzzer is very crude: we use AFL's distributed 45 | mode to make it periodically pull new inputs from SymCC, and we try to 46 | prioritize the most interesting inputs from AFL's queue for execution in SymCC. 47 | However, a better integration would consider the trade-offs of symbolic 48 | execution: it's expensive but uses more sophisticated reasoning. As long as the 49 | fuzzer makes good progress (for some progress metric), CPU power should be 50 | allocated only to the fuzzer; the price of symbolic execution should be paid 51 | only when necessary. Moreover, a faster synchronization mechanism than AFL's 52 | file-system based approach would be nice. 53 | 54 | 55 | Work with other fuzzers 56 | 57 | Integrating with AFL is easy because its distributed mode only requires working 58 | with files and directories. Other fuzzers might not provide such easy 59 | mechanisms, but by integrating with them we would gain whatever performance 60 | improvements they have made over AFL (e.g., AFL++ or Honggfuzz). 61 | 62 | 63 | Forking version 64 | 65 | Instead of working with a fuzzer, we could also implement forking and some 66 | scheduling strategies ourselves. Georgia Tech has developed some OS-level 67 | primitives that could help to implement such a feature: 68 | https://github.com/sslab-gatech/perf-fuzz. 69 | -------------------------------------------------------------------------------- /docs/Libc.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | Libc Call Interception 4 | 5 | 6 | While we can, in principle, compile all code with instrumentation, it is unclear 7 | how difficult this is for the C standard library. The LLVM sanitizers don't try, 8 | even if MSan otherwise requires all code to be instrumented, and we take this as 9 | a sign that there may be unforeseen challenges. For now, we take the same route 10 | as the sanitizers and intercept calls to libc functions, wrapping them with 11 | symbolic handling. For example, the wrapper for "memset" obtains the symbolic 12 | expression for the value to be written in memory and pushes it to the shadow 13 | region of the destination memory. In the future, we may experiment with 14 | compiling (parts of) the libc to avoid the effort of manually defining wrappers. 15 | 16 | Initially, we tried the interception mechanism that the LLVM sanitizers use, 17 | implemented in the compiler-rt library. The Linux version basically just defines 18 | a function with the name of the libc function. The dynamic loader resolves 19 | symbols to the first function with the right name that it finds; given an 20 | appropriate link order, the wrapper (or "interceptor" in compiler-rt parlance) 21 | will be called instead of the libc function. Calling the real function is just a 22 | matter of asking the loader for alternative resolutions (i.e., calling "dlsym" 23 | with flag "RTLD_NEXT"). The problem for us is that this approach *globally* 24 | replaces a given libc function, in the executable and in all libraries that it 25 | loads. However, our run-time support library is loaded into the same process and 26 | makes heavy use of libc, so we need the ability to use wrappers in one part of 27 | the program and concrete functions in another. This turned out to complicate the 28 | compiler-rt-based implementation so much that we eventually abandoned the 29 | approach. 30 | 31 | Function renaming provided a convenient alternative: we control all code that is 32 | supposed to call wrappers rather than the libc functions properly, so we just rename 33 | the targets of their calls. For example, a call to "memset" in the program under 34 | test is turned into a call to "memset_symbolized", which we can easily define as 35 | a regular function wrapping "memset". Calls from our run-time library, on the 36 | other hand, use the regular function names and thus end up in libc as usual. 37 | -------------------------------------------------------------------------------- /docs/Optimization.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | Optimization 4 | 5 | 6 | A popular technique for experimenting with compiler passes is to produce bitcode 7 | with "clang -emit-llvm" and run the pass on the resulting bitcode with opt. Note 8 | that this approach does not mix well with optimization: simply running "opt -O3" 9 | on the instrumented bitcode yields inferior results. Why? In principle, the 10 | instrumentation that adds symbolic execution capabilities does not interfere 11 | with the compiler's regular optimization. However, while "opt -O3" runs the same 12 | middle-end optimizations as clang does internally, "clang -O3" performs 13 | additional analysis before invoking the middle end. In particular, type-based 14 | alias analysis (TBAA) adds metadata to the bitcode that enables the SROA pass to 15 | promote a lot of stack-allocated variables into SSA values. 16 | 17 | In order to produce bitcode that can later be properly optimized with opt, pass 18 | the desired optimization flag at each stage of the workflow: 19 | 20 | $ clang -O3 -Xclang -disable-llvm-passes -emit-llvm -S test.c -o test.ll 21 | $ opt -load ./libSymbolize.so -symbolize < test.ll > test_instrumented.bc 22 | $ opt -O3 < test_instrumented.bc > test_instrumented_optimized.bc 23 | $ clang -O3 test_instrumented_optimized.bc -o test 24 | $ ./test 25 | -------------------------------------------------------------------------------- /docs/Testing.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | Testing 4 | 5 | 6 | The short version: 7 | 8 | $ ninja check 9 | 10 | 11 | The slightly longer version: 12 | 13 | We use the LLVM tools "lit" and "FileCheck", integrated with CMake. Since there 14 | is little documentation on setting up the combination of those tools (apart from 15 | the LLVM source code), here is an overview of the setup: 16 | 17 | 1. Tests are C or C++ source files in the "test" subdirectory of the project. 18 | They contain instructions for FileCheck in comments; see that tool's 19 | documentation. 20 | 21 | 2. We run the individual tests through lit, LLVM's configurable test runner. It 22 | finds each test file, performs some variable substitutions (see below), and 23 | runs the tests. The main source of configuration is "test/lit.cfg". 24 | 25 | 3. At configuration time, CMake creates an additional config file for lit, 26 | containing site-specific configuration such as the output directory of the 27 | build. The template is "test/lit.site.cfg.in". 28 | 29 | 4. CMake adds the "check" target, which invokes lit on the test suite. (It would 30 | be nice to call the target "test", but this is a reserved name in some 31 | versions of CMake, and the built-in test mechanism that it is reserved for 32 | doesn't track dependencies the way we need.) 33 | 34 | Test files can use the following patterns: 35 | 36 | %s The test file itself. 37 | %t A temporary file. 38 | %symcc Invocation of clang with our custom pass loaded. 39 | %filecheck Invocation of FileCheck with the right arguments for the backend. 40 | 41 | Since we support multiple symbolic backends, the tests must account for 42 | different output from different backends. To this end, we rely on FileCheck's 43 | prefix mechanism: test files use different prefixes to specify requirements on 44 | different backends. The following prefixes are supported: 45 | 46 | SIMPLE: Active when we test with our own backend. 47 | QSYM: Active when we test with the QSYM backend. 48 | ANY: Always active. 49 | 50 | The build system makes sure that "%filecheck" always expands to an invocation of 51 | FileCheck that activates the right prefixes for the current build configuration. 52 | 53 | Note that we run the tests only with the backend selected at configuration time, 54 | so a full test requires building the project in multiple configurations. Also, 55 | be aware that the backends write all log messages to standard error; therefore, 56 | checks should not depend on the relative ordering of backend logs and messages 57 | that the test program writes to standard output (use stderr instead). 58 | 59 | 60 | Regression tests 61 | 62 | In addition to the hand-written tests that exercise compiler functionality via C 63 | code, we have a directory "test/regression" where we can collect LLVM bitcode 64 | files that triggered bugs in real SymCC use. Generate the bitcode by running the 65 | crashing compiler command with additional arguments "-emit-llvm -S -o-", pipe 66 | the result through "opt -S -instnamer", and add a comment at the top to tell lit 67 | how to compile it. The instruction naming is necessary because different LLVM 68 | versions treat numbered (i.e., unnamed) instructions differently and may 69 | complain if the numbering sequence doesn't match expectations. 70 | -------------------------------------------------------------------------------- /sample.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int main(int argc, char *argv[]) { 4 | std::cout << "What's your name?" << std::endl; 5 | std::string name; 6 | std::cin >> name; 7 | 8 | if (name == "root") 9 | std::cout << "What is your command?" << std::endl; 10 | else 11 | std::cout << "Hello, " << name << "!" << std::endl; 12 | 13 | return 0; 14 | } 15 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # This file is part of SymCC. 2 | # 3 | # SymCC is free software: you can redistribute it and/or modify it under the 4 | # terms of the GNU General Public License as published by the Free Software 5 | # Foundation, either version 3 of the License, or (at your option) any later 6 | # version. 7 | # 8 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License along with 13 | # SymCC. If not, see . 14 | 15 | if (SYMCC_RT_BACKEND STREQUAL "qsym") 16 | set(SYM_TEST_FILECHECK_ARGS "--check-prefix=QSYM --check-prefix=ANY") 17 | elseif (SYMCC_RT_BACKEND STREQUAL "simple") 18 | set(SYM_TEST_FILECHECK_ARGS "--check-prefix=SIMPLE --check-prefix=ANY") 19 | else() 20 | message(FATAL_ERROR "Unknown backend to test: ${SYMCC_RT_BACKEND}") 21 | endif() 22 | 23 | if (${LLVM_VERSION_MAJOR} VERSION_GREATER_EQUAL 14) 24 | # FileCheck used to be fine with unused prefixes when more than one prefix was 25 | # defined. This changed in LLVM version 14, requiring the new option 26 | # "--allow-unused-prefixes" (added in LLVM 13) to restore the old behavior. 27 | set(SYM_TEST_FILECHECK_ARGS "${SYM_TEST_FILECHECK_ARGS} --allow-unused-prefixes") 28 | endif() 29 | 30 | configure_file("lit.site.cfg.in" "lit.site.cfg") 31 | 32 | add_custom_target(check 33 | lit 34 | --verbose 35 | --path=${LLVM_TOOLS_BINARY_DIR} 36 | ${CMAKE_CURRENT_BINARY_DIR} 37 | COMMENT "Testing the system..." 38 | USES_TERMINAL) 39 | 40 | add_dependencies(check SymCCRuntime SymCC) 41 | if (TARGET SymCCRuntime32) 42 | add_dependencies(check SymCCRuntime32 SymCC) 43 | endif() 44 | -------------------------------------------------------------------------------- /test/README: -------------------------------------------------------------------------------- 1 | ../docs/Testing.txt -------------------------------------------------------------------------------- /test/arrays.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc -O2 %s -o %t 16 | // RUN: echo -ne "\x01\x02\x03\x04\x00" | %t 2>&1 | %filecheck %s 17 | #include 18 | #include 19 | #include 20 | 21 | int main(int argc, char* argv[]) { 22 | uint8_t input[4]; 23 | if (read(STDIN_FILENO, input, sizeof(input)) != sizeof(input)) { 24 | fprintf(stderr, "Failed to read the input\n"); 25 | return -1; 26 | } 27 | 28 | uint8_t offset; 29 | if (read(STDIN_FILENO, &offset, sizeof(offset)) != sizeof(offset)) { 30 | fprintf(stderr, "Failed to read the offset\n"); 31 | return -1; 32 | } 33 | 34 | // This is just to make the base pointer symbolic. 35 | uint8_t *p = input + offset; 36 | 37 | fprintf(stderr, "%s\n", (p[0] == 1) ? "yes" : "no"); 38 | // SIMPLE: Trying to solve 39 | // QSYM-COUNT-2: SMT 40 | // ANY: yes 41 | 42 | // If our GetElementPointer computations are incorrect, this will create 43 | // path constraints that conflict with those generated by the previous array 44 | // access. 45 | fprintf(stderr, "%s\n", (p[2] == 3) ? "yes" : "no"); 46 | // SIMPLE: Trying to solve 47 | // QSYM-COUNT-2: SMT 48 | // ANY: yes 49 | 50 | // Use the pointer in a condition to see if contradicting constraints have 51 | // been created. The QSYM backend will log an error in this case (see 52 | // below), the simple backend just aborts. 53 | fprintf(stderr, "%s\n", (p == input) ? "yes" : "no"); 54 | // SIMPLE: Trying to solve 55 | // QSYM-NOT: Incorrect constraints are inserted 56 | // QSYM-COUNT-2: SMT 57 | // ANY: yes 58 | 59 | return 0; 60 | } 61 | -------------------------------------------------------------------------------- /test/arrays.test32: -------------------------------------------------------------------------------- 1 | RUN: %symcc -m32 -O2 %S/arrays.c -o %t_32 2 | RUN: echo -ne "\x01\x02\x03\x04\x00" | %t_32 2>&1 | %filecheck %S/arrays.c 3 | -------------------------------------------------------------------------------- /test/bcopy_bcmp_bzero.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc -O2 %s -o %t 16 | // RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s 17 | // 18 | // Test symbolic handling of bcopy, bcmp, and bzero. We copy symbolic data with 19 | // bcmp, then compare it with bcmp, expecting the solver to be triggered 20 | // (indicating that the two functions are represented correctly); then we bzero 21 | // the region and perform another comparison, which should not result in a 22 | // solver query (indicating that bzero concretized as expected). 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | 29 | void symcc_make_symbolic(const void *start, size_t byte_length); 30 | typedef void (*TestCaseHandler)(const void *, size_t); 31 | void symcc_set_test_case_handler(TestCaseHandler handler); 32 | 33 | int solved = 0; 34 | 35 | void handle_test_case(const void *data, size_t data_length) { 36 | assert(data_length == 4); 37 | assert(bcmp(data, "bar", 4) == 0); 38 | solved = 1; 39 | } 40 | 41 | int main(int argc, char *argv[]) { 42 | symcc_set_test_case_handler(handle_test_case); 43 | 44 | const char input[] = "foo"; 45 | symcc_make_symbolic(input, 4); 46 | 47 | // Make a copy and compare it in order to trigger the solver. 48 | char copy[4]; 49 | bcopy(input, copy, 4); 50 | int bcmp_result = bcmp(copy, "bar", 4); 51 | assert(bcmp_result != 0); 52 | 53 | // Zero out the symbolic data and compare again (which should not trigger the 54 | // solver this time). 55 | bzero(copy, 4); 56 | bcmp_result = bcmp(copy, "abc", 4); 57 | assert(bcmp_result != 0); 58 | 59 | // The simple backend doesn't support test-case handlers, so we only expect a 60 | // solution with the QSYM backend. 61 | printf("Solved: %d\n", solved); 62 | // SIMPLE: Solved: 0 63 | // QSYM: Solved: 1 64 | return 0; 65 | } 66 | -------------------------------------------------------------------------------- /test/bool_cast.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc -O1 %s -o %t 16 | // RUN: echo b | %t 2>&1 | %filecheck %s 17 | // 18 | // Check that bool cast is handled correctly (Issue #108) 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | int bar(unsigned char a) { 25 | if (a == 0xCA) return -1; 26 | else return 0; 27 | } 28 | 29 | int main() { 30 | unsigned char input = 0; 31 | read(0, &input, sizeof(input)); 32 | int r = bar(input); 33 | // SIMPLE: Trying to solve 34 | // SIMPLE: Found diverging input 35 | // SIMPLE: stdin0 -> #xca 36 | // QSYM-COUNT-2: SMT 37 | // QSYM: New testcase 38 | if (r == -1) printf("Bingo!\n"); 39 | else printf("Ok\n"); 40 | // ANY: Ok 41 | return r; 42 | } 43 | -------------------------------------------------------------------------------- /test/bswap.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc %s -o %t 16 | // RUN: echo -ne "\x04\x03\x02\x01" | %t 2>&1 | %filecheck %s 17 | // RUN: %symcc %s -S -emit-llvm -o - | FileCheck --check-prefix=BITCODE %s 18 | // 19 | // Here we test that the "bswap" intrinsic is handled correctly. 20 | 21 | #include 22 | #include 23 | 24 | #include 25 | #include 26 | 27 | int main(int argc, char* argv[]) { 28 | uint32_t x; 29 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) { 30 | fprintf(stderr, "Failed to read x\n"); 31 | return -1; 32 | } 33 | x = ntohl(x); 34 | 35 | // BITCODE: llvm.bswap.i32 36 | uint32_t y = __builtin_bswap32(x); 37 | 38 | // ANY: 0x04030201 0x01020304 39 | fprintf(stderr, "0x%08x 0x%08x\n", x, y); 40 | 41 | // SIMPLE: Trying to solve 42 | // SIMPLE: Found diverging input 43 | // SIMPLE-DAG: stdin0 -> #xef 44 | // SIMPLE-DAG: stdin1 -> #xbe 45 | // SIMPLE-DAG: stdin2 -> #xfe 46 | // SIMPLE-DAG: stdin3 -> #xca 47 | // QSYM-COUNT-2: SMT 48 | // ANY: Not quite. 49 | if (y == 0xcafebeef) 50 | fprintf(stderr, "Correct test input.\n"); 51 | else 52 | fprintf(stderr, "Not quite.\n"); 53 | 54 | return 0; 55 | } 56 | -------------------------------------------------------------------------------- /test/bswap.test32: -------------------------------------------------------------------------------- 1 | RUN: %symcc -m32 %S/bswap.c -o %t_32 2 | RUN: echo -ne "\x04\x03\x02\x01" | %t_32 2>&1 | %filecheck %S/bswap.c 3 | RUN: %symcc %S/bswap.c -m32 -S -emit-llvm -o - | FileCheck --check-prefix=BITCODE %S/bswap.c 4 | -------------------------------------------------------------------------------- /test/concrete_structs.ll: -------------------------------------------------------------------------------- 1 | ; This file is part of SymCC. 2 | ; 3 | ; SymCC is free software: you can redistribute it and/or modify it under the 4 | ; terms of the GNU General Public License as published by the Free Software 5 | ; Foundation, either version 3 of the License, or (at your option) any later 6 | ; version. 7 | ; 8 | ; SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | ; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | ; A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | ; 12 | ; You should have received a copy of the GNU General Public License along with 13 | ; SymCC. If not, see . 14 | 15 | ; Verify that we create correct expressions from struct values. For each kind of 16 | ; value, we trigger expression creation by inserting a symbolic value into the 17 | ; struct. Compiling this code with SymCC and verifying that the resulting binary 18 | ; exits cleanly shows that SymCC's instrumentation doesn't break the execution 19 | ; of the program. Moreover, we store a struct value to memory, load one of its 20 | ; elements back into a register, and branch based on it in order to trigger the 21 | ; solver; by checking the generated test case we can verify that the expression 22 | ; was correct. 23 | ; 24 | ; This test reproduces a bug where creating expressions for some structs would 25 | ; lead to a program crash. 26 | ; 27 | ; Since the bitcode is written by hand, we first run llc on it because it 28 | ; performs a validity check, whereas Clang doesn't. 29 | 30 | ; RUN: llc %s -o /dev/null 31 | ; RUN: %symcc %s -o %t 32 | ; RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s 33 | 34 | target triple = "x86_64-pc-linux-gnu" 35 | 36 | ; The struct type which we'll create expressions for. Include a floating-point 37 | ; value and a Boolean because they're represented with non-bitvector solver 38 | ; variables (reproducing eurecom-s3/symcc#138). 39 | %struct_type = type { i8, i32, i8, float, i1 } 40 | 41 | ; Global variable to record whether we've found a solution. Since the simple 42 | ; backend doesn't support test-case handlers, we start with "true". 43 | @solved = global i1 1 44 | 45 | ; Our test-case handler verifies that the new test case is a 32-bit integer 46 | ; with the value 42. 47 | define void @test_case_handler(i8* %data, i64 %data_length) { 48 | %correct_length = icmp eq i64 %data_length, 4 49 | br i1 %correct_length, label %check_data, label %failed 50 | 51 | check_data: 52 | %value_pointer = bitcast i8* %data to i32* 53 | %value = load i32, i32* %value_pointer 54 | %correct_value = icmp eq i32 %value, 42 55 | br i1 %correct_value, label %all_good, label %failed 56 | 57 | all_good: 58 | store i1 1, i1* @solved 59 | ret void 60 | 61 | failed: 62 | store i1 0, i1* @solved 63 | ret void 64 | } 65 | 66 | define i32 @main(i32 %argc, i8** %argv) { 67 | ; Register our test-case handler. 68 | call void @symcc_set_test_case_handler(void (i8*, i64)* @test_case_handler) 69 | ; SIMPLE: Warning: test-case handlers 70 | 71 | ; Create a symbolic value that we can use to trigger the creation of struct 72 | ; expressions. 73 | %symbolic_value_mem = alloca i32 74 | store i32 1, i32* %symbolic_value_mem 75 | call void @symcc_make_symbolic(i32* %symbolic_value_mem, i64 4) 76 | %symbolic_value = load i32, i32* %symbolic_value_mem 77 | %symbolic_byte = trunc i32 %symbolic_value to i8 78 | 79 | ; Undef struct 80 | insertvalue %struct_type undef, i32 %symbolic_value, 1 81 | 82 | ; Struct with concrete value 83 | insertvalue %struct_type { i8 1, i32 undef, i8 2, float undef, i1 undef }, i32 %symbolic_value, 1 84 | 85 | ; Write a struct to memory and load one of its elements back into a register. 86 | ; It's important to also insert a symbolic value into the struct, so that we 87 | ; generate an expression in the first place. 88 | %struct_mem = alloca %struct_type 89 | %struct_value = insertvalue %struct_type { i8 0, i32 42, i8 undef, float undef, i1 undef }, i8 %symbolic_byte, 2 90 | store %struct_type %struct_value, %struct_type* %struct_mem 91 | %value_address = getelementptr %struct_type, %struct_type* %struct_mem, i32 0, i32 1 92 | %value_loaded = load i32, i32* %value_address 93 | %is_forty_two = icmp eq i32 %value_loaded, %symbolic_value 94 | br i1 %is_forty_two, label %never_executed, label %done 95 | ; QSYM: SMT 96 | 97 | never_executed: 98 | br label %done 99 | 100 | done: 101 | %solved = load i1, i1* @solved 102 | %result = select i1 %solved, i32 0, i32 1 103 | ret i32 %result 104 | } 105 | 106 | declare void @symcc_make_symbolic(i32*, i64) 107 | declare void @symcc_set_test_case_handler(void (i8*, i64)*) 108 | -------------------------------------------------------------------------------- /test/file_input.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: /bin/echo -ne "\x00\x00\x00\x05aaaa" > %T/%basename_t.input 16 | // RUN: %symcc -O2 %s -o %t 17 | // RUN: env SYMCC_INPUT_FILE=%T/%basename_t.input %t %T/%basename_t.input 2>&1 | %filecheck %s 18 | 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | int main(int argc, char* argv[]) { 27 | // 28 | // Read from the input file using Unix primitives. 29 | // 30 | 31 | // ANY-NOT: Warning 32 | int fd = open(argv[1], O_RDONLY); 33 | if (fd < 0) { 34 | perror("failed to open the input file"); 35 | return -1; 36 | } 37 | 38 | int input; 39 | if (read(fd, &input, sizeof(input)) != 4) { 40 | perror("failed to read from the input file"); 41 | return -1; 42 | } 43 | input = ntohl(input); 44 | 45 | int four_as; 46 | if (read(fd, &four_as, sizeof(four_as)) != 4) { 47 | perror("failed to read from the input file"); 48 | return -1; 49 | } 50 | 51 | int eof = 42; 52 | if (read(fd, &eof, sizeof(eof)) != 0) { 53 | perror("this should be exactly the end of the file"); 54 | return -1; 55 | } 56 | 57 | // Make sure that we haven't created a symbolic expression 58 | if (eof == 42) 59 | fprintf(stderr, "All is good.\n"); 60 | else 61 | fprintf(stderr, "Why was the variable overwritten?\n"); 62 | // SIMPLE-NOT: Trying to solve 63 | // QSYM-NOT: SMT 64 | // ANY: All is good. 65 | 66 | // SIMPLE: Trying to solve 67 | // SIMPLE: Found diverging input 68 | // QSYM-COUNT-2: SMT 69 | // QSYM: New testcase 70 | // ANY: Not sure 71 | if (input >= 42) 72 | fprintf(stderr, "This may be the answer.\n"); 73 | else 74 | fprintf(stderr, "Not sure this is correct...\n"); 75 | 76 | // 77 | // Rewind and read again. 78 | // 79 | 80 | if (lseek(fd, 4, SEEK_SET) != 4) { 81 | perror("failed to rewind the file"); 82 | return -1; 83 | } 84 | 85 | if (read(fd, &four_as, sizeof(four_as)) < 0) { 86 | perror("failed to read from the input file"); 87 | return -1; 88 | } 89 | 90 | // SIMPLE: Trying to solve 91 | // SIMPLE: Found diverging input 92 | // QSYM-COUNT-2: SMT 93 | // QSYM: New testcase 94 | // ANY: No. 95 | if (four_as != (int)0x61616161) 96 | fprintf(stderr, "The matrix has changed.\n"); 97 | else 98 | fprintf(stderr, "No.\n"); 99 | 100 | // 101 | // Read with the C standard library. 102 | // 103 | 104 | // ANY: Warning 105 | FILE *file = fopen(argv[1], "r"); 106 | if (file == NULL) { 107 | perror("failed to open the input file"); 108 | return -1; 109 | } 110 | 111 | int same_input; 112 | if (fread(&same_input, sizeof(same_input), 1, file) < 0) { 113 | perror("failed to read from the input file"); 114 | return -1; 115 | } 116 | same_input = ntohl(same_input); 117 | 118 | // SIMPLE: Trying to solve 119 | // QSYM-COUNT-2: SMT 120 | // ANY: Yep 121 | if (same_input == 5) 122 | fprintf(stderr, "Yep, it's the test input.\n"); 123 | else 124 | fprintf(stderr, "Not the test input!\n"); 125 | 126 | // 127 | // Rewind and read again. 128 | // 129 | 130 | // fseek doesn't return the current offset (unlike lseek) - it just returns 0 131 | // on success! 132 | if (fseek(file, 4, SEEK_SET) != 0) { 133 | perror("failed to rewind the file"); 134 | return -1; 135 | } 136 | 137 | int same_four_as; 138 | if (fread(&same_four_as, sizeof(same_four_as), 1, file) < 0) { 139 | perror("failed to read from the input file"); 140 | return -1; 141 | } 142 | 143 | // SIMPLE: Trying to solve 144 | // QSYM-COUNT-2: SMT 145 | // ANY: Still 146 | if (same_four_as == (int)0x61616161) 147 | fprintf(stderr, "Still the test input.\n"); 148 | else 149 | fprintf(stderr, "Not the test input!\n"); 150 | 151 | return 0; 152 | } 153 | -------------------------------------------------------------------------------- /test/file_input.test32: -------------------------------------------------------------------------------- 1 | RUN: /bin/echo -ne "\x00\x00\x00\x05aaaa" > %T/%basename_t.input 2 | RUN: %symcc -m32 -O2 %S/file_input.c -o %t_32 3 | RUN: env SYMCC_INPUT_FILE=%T/%basename_t.input %t_32 %T/%basename_t.input 2>&1 | %filecheck %S/file_input.c 4 | -------------------------------------------------------------------------------- /test/floats.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc -O2 %s -o %t 16 | // RUN: echo -ne "\x00\x00\x00\x05" | %t 2>&1 | %filecheck %s 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | float g_value = 0.1234; 25 | 26 | int main(int argc, char *argv[]) { 27 | int x; 28 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) { 29 | fprintf(stderr, "Failed to read x\n"); 30 | return -1; 31 | } 32 | x = ntohl(x); 33 | 34 | g_value += x; 35 | fprintf(stderr, "%f\n", g_value); 36 | // ANY: 5.1234 37 | 38 | fprintf(stderr, "%s\n", ((g_value < 7) && (g_value > 6)) ? "yes" : "no"); 39 | // SIMPLE: Trying to solve 40 | // SIMPLE: Found diverging input 41 | // SIMPLE: #x06 42 | // Qsym doesn't support symbolic floats! 43 | // QSYM-NOT: SMT 44 | // ANY: no 45 | 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /test/floats.test32: -------------------------------------------------------------------------------- 1 | RUN: %symcc -m32 -O2 %S/floats.c -o %t_32 2 | RUN: echo -ne "\x00\x00\x00\x05" | %t_32 2>&1 | %filecheck %S/floats.c 3 | -------------------------------------------------------------------------------- /test/globals.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc -O2 %s -o %t 16 | // RUN: echo -ne "\x00\x00\x00\x05" | %t 2>&1 | %filecheck %s 17 | // 18 | // Test that global variables are handled correctly. The special challenge is 19 | // that we need to initialize the symbolic expression corresponding to any 20 | // global variable that has an initial value. 21 | 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | 28 | int g_increment = 17; 29 | int g_uninitialized; 30 | int g_more_than_one_byte_int = 512; 31 | 32 | char g_values[] = {1, 2, 3}; 33 | int g_non_char_values[] = {300, 400, 500}; 34 | 35 | int increment(int x) { 36 | int result = x + g_increment; 37 | if (result < 30) 38 | return result; 39 | else 40 | return 42; 41 | } 42 | 43 | void sum(int x) { 44 | int result = 0; 45 | for (size_t i = 0; i < (sizeof(g_values) / sizeof(g_values[0])); i++) { 46 | result += g_values[i]; 47 | } 48 | 49 | fprintf(stderr, "%s\n", (result < x) ? "foo" : "bar"); 50 | } 51 | 52 | void sum_ints(int x) { 53 | int result = 0; 54 | for (size_t i = 0; i < (sizeof(g_non_char_values) / sizeof(g_non_char_values[0])); i++) { 55 | result += g_non_char_values[i]; 56 | } 57 | 58 | fprintf(stderr, "%s\n", (result < x) ? "foo" : "bar"); 59 | } 60 | 61 | int main(int argc, char* argv[]) { 62 | int x; 63 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) { 64 | fprintf(stderr, "Failed to read x\n"); 65 | return -1; 66 | } 67 | x = ntohl(x); 68 | 69 | fprintf(stderr, "%d\n", increment(x)); 70 | // SIMPLE: Trying to solve 71 | // SIMPLE: (bvadd #x{{0*}}11 72 | // SIMPLE: Found diverging input 73 | // QSYM-COUNT-2: SMT 74 | // QSYM: New testcase 75 | // ANY: 22 76 | 77 | g_increment = 18; 78 | fprintf(stderr, "%d\n", increment(x)); 79 | // SIMPLE: Trying to solve 80 | // SIMPLE: Found diverging input 81 | // We can't check for 0x12 here because with some versions of clang we end 82 | // up in a situation where (x + 18) >= 30 is folded into x >= 12. 83 | // QSYM-COUNT-2: SMT 84 | // QSYM: New testcase 85 | // ANY: 23 86 | 87 | g_uninitialized = 101; 88 | fprintf(stderr, "%s\n", (x < g_uninitialized) ? "smaller" : "greater or equal"); 89 | // SIMPLE: Trying to solve 90 | // SIMPLE: (bvsle #x{{0*}}65 91 | // QSYM-COUNT-2: SMT 92 | // QSYM: New testcase 93 | // ANY: smaller 94 | 95 | sum(x); 96 | // SIMPLE: Trying to solve 97 | // SIMPLE-NOT: Can't find 98 | // SIMPLE: Found diverging input 99 | // QSYM-COUNT-2: SMT 100 | // QSYM: New testcase 101 | // ANY: bar 102 | 103 | fprintf(stderr, "%s\n", (x < g_more_than_one_byte_int) ? "true" : "false"); 104 | // SIMPLE: Trying to solve 105 | // SIMPLE: #x{{0*}}200 106 | // SIMPLE: Can't find 107 | // QSYM-COUNT-2: SMT 108 | // ANY: true 109 | 110 | sum_ints(x); 111 | // SIMPLE: Trying to solve 112 | // SIMPLE: #x{{0*}}4b0 113 | // SIMPLE: Can't find 114 | // QSYM-COUNT-2: SMT 115 | // ANY: bar 116 | 117 | return 0; 118 | } 119 | -------------------------------------------------------------------------------- /test/globals.test32: -------------------------------------------------------------------------------- 1 | RUN: %symcc -m32 -O2 %S/globals.c -o %t_32 2 | RUN: echo -ne "\x00\x00\x00\x05" | %t_32 2>&1 | %filecheck %S/globals.c 3 | -------------------------------------------------------------------------------- /test/if.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc -O2 %s -o %t 16 | // RUN: echo -ne "\x05\x00\x00\x00" | %t 2>&1 | %filecheck %s 17 | // This test is disabled until we can move the pass behind the optimizer in the pipeline: 18 | // RUN-disabled: %symcc -O2 -emit-llvm -S %s -o - | FileCheck --check-prefix=BITCODE %s 19 | // 20 | // Here we test two things: 21 | // 1. We can compile the file, and executing it symbolically results in solving 22 | // path constraints. 23 | // 2. The bitcode is optimized, i.e., the instrumentation we insert does not 24 | // break compiler optimizations. 25 | #include 26 | #include 27 | #include 28 | 29 | int foo(int a, int b) { 30 | // BITCODE-NOT: alloca 31 | // BITCODE-NOT: load 32 | // BITCODE-NOT: store 33 | // SIMPLE: Trying to solve 34 | // QSYM-COUNT-2: SMT 35 | // BITCODE: shl 36 | if (2 * a < b) 37 | return a; 38 | // SIMPLE: Trying to solve 39 | // QSYM-COUNT-2: SMT 40 | else if (a % b) 41 | return b; 42 | else 43 | return a + b; 44 | } 45 | 46 | int main(int argc, char* argv[]) { 47 | int x; 48 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) { 49 | fprintf(stderr, "Failed to read x\n"); 50 | return -1; 51 | } 52 | fprintf(stderr, "%d\n", x); 53 | fprintf(stderr, "%d\n", foo(x, 7)); 54 | // ANY: 7 55 | return 0; 56 | } 57 | -------------------------------------------------------------------------------- /test/if.test32: -------------------------------------------------------------------------------- 1 | RUN: %symcc -m32 -O2 %S/if.c -o %t_32 2 | RUN: echo -ne "\x05\x00\x00\x00" | %t_32 2>&1 | %filecheck %S/if.c 3 | -------------------------------------------------------------------------------- /test/integers.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc -O2 %s -o %t 16 | // RUN: echo -ne "\x05\x00\x00\x00\x00\x00\x00\x00" | %t 2>&1 | %filecheck %s 17 | #include 18 | #include 19 | #include 20 | 21 | uint64_t g_value = 0xaaaabbbbccccdddd; 22 | 23 | int main(int argc, char *argv[]) { 24 | uint64_t x; 25 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) { 26 | fprintf(stderr, "Failed to read x\n"); 27 | return -1; 28 | } 29 | 30 | fprintf(stderr, "%s\n", (x == g_value) ? "yes" : "no"); 31 | // SIMPLE: Trying to solve 32 | // SIMPLE: Found diverging input 33 | // Make sure that we don't truncate integers. 34 | // SIMPLE-DAG: #xaa 35 | // SIMPLE-DAG: #xbb 36 | // SIMPLE-DAG: #xcc 37 | // SIMPLE-DAG: #xdd 38 | // QSYM-COUNT-2: SMT 39 | // ANY: no 40 | 41 | return 0; 42 | } 43 | -------------------------------------------------------------------------------- /test/integers.test32: -------------------------------------------------------------------------------- 1 | RUN: %symcc -m32 -O2 %S/integers.c -o %t_32 2 | RUN: echo -ne "\x05\x00\x00\x00\x00\x00\x00\x00" | %t_32 2>&1 | %filecheck %S/integers.c 3 | -------------------------------------------------------------------------------- /test/large_alloc.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc %s -o %t 16 | // RUN: echo -ne "\x00\x00\x00\x2a" | %t 2>&1 | %filecheck %s 17 | // 18 | // Make sure that we can handle large allocations symbolically. Also, test 19 | // memory-related library functions. 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #include 26 | #include 27 | 28 | int main(int argc, char *argv[]) { 29 | int x; 30 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) { 31 | fprintf(stderr, "Failed to read x\n"); 32 | return -1; 33 | } 34 | int netlongX = x; 35 | x = ntohl(x); 36 | 37 | char *largeAllocation = malloc(10000); 38 | memset(largeAllocation, (char)x, 10000); 39 | 40 | fprintf(stderr, "%s\n", (largeAllocation[9999] < 100) ? "worked" : "error"); 41 | // SIMPLE: Trying to solve 42 | // SIMPLE: Found diverging input 43 | // QSYM-COUNT-2: SMT 44 | // QSYM: New testcase 45 | // ANY: worked 46 | 47 | memset(largeAllocation, 'A', 10000); 48 | fprintf(stderr, "%s\n", (largeAllocation[5000] == 17) ? "true" : "false"); 49 | // SIMPLE-NOT: Trying to solve 50 | // QSYM-NOT: SMT 51 | // ANY: false 52 | 53 | memset(largeAllocation, x, 10000); 54 | fprintf(stderr, "%s\n", (largeAllocation[5000] > 100) ? "true" : "false"); 55 | // SIMPLE: Trying to solve 56 | // SIMPLE: Can't find a diverging input at this point 57 | // QSYM-COUNT-2: SMT 58 | // (Qsym finds a new test case with the optimistic strategy.) 59 | // ANY: false 60 | 61 | memcpy(largeAllocation + x, &x, sizeof(x)); 62 | // SIMPLE: Trying to solve 63 | // SIMPLE: Found diverging input 64 | // QSYM-COUNT-2: SMT 65 | // QSYM: New testcase 66 | 67 | // Make x little-endian. 68 | x = __builtin_bswap32(netlongX); 69 | 70 | memcpy(largeAllocation, &x, sizeof(x)); 71 | // SIMPLE-NOT: Trying to solve 72 | // QSYM-NOT: SMT 73 | 74 | memmove(largeAllocation + 1, largeAllocation, sizeof(x)); 75 | fprintf(stderr, "%s\n", (largeAllocation[0] == largeAllocation[2]) ? "true" : "false"); 76 | // SIMPLE: Trying to solve 77 | // QSYM-COUNT-2: SMT 78 | // TODO should find new inputs 79 | // ANY: false 80 | 81 | return 0; 82 | } 83 | -------------------------------------------------------------------------------- /test/large_alloc.test32: -------------------------------------------------------------------------------- 1 | RUN: %symcc -m32 %S/large_alloc.c -o %t_32 2 | RUN: echo -ne "\x00\x00\x00\x2a" | %t_32 2>&1 | %filecheck %S/large_alloc.c 3 | -------------------------------------------------------------------------------- /test/lit.cfg: -------------------------------------------------------------------------------- 1 | # -*- Python -*- 2 | 3 | # This file is part of SymCC. 4 | # 5 | # SymCC is free software: you can redistribute it and/or modify it under the 6 | # terms of the GNU General Public License as published by the Free Software 7 | # Foundation, either version 3 of the License, or (at your option) any later 8 | # version. 9 | # 10 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 11 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 12 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License along with 15 | # SymCC. If not, see . 16 | 17 | import lit.formats.shtest 18 | 19 | config.name = "compiler" 20 | config.test_format = lit.formats.shtest.ShTest() 21 | config.suffixes = [".c", ".cpp", ".ll"] 22 | config.substitutions += [ 23 | ("%symcc", config.test_exec_root + "/../symcc"), 24 | ] 25 | -------------------------------------------------------------------------------- /test/lit.site.cfg.in: -------------------------------------------------------------------------------- 1 | # -*- Python -*- 2 | 3 | # This file is part of SymCC. 4 | # 5 | # SymCC is free software: you can redistribute it and/or modify it under the 6 | # terms of the GNU General Public License as published by the Free Software 7 | # Foundation, either version 3 of the License, or (at your option) any later 8 | # version. 9 | # 10 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 11 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 12 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License along with 15 | # SymCC. If not, see . 16 | 17 | import os 18 | from os import path 19 | 20 | # Used by lit to locate tests and output locations 21 | config.test_source_root = "@CMAKE_CURRENT_SOURCE_DIR@" 22 | config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@" 23 | 24 | outputDir = "@CMAKE_CURRENT_BINARY_DIR@/SymccOutput" 25 | try: 26 | os.mkdir(outputDir) 27 | except OSError: 28 | # Directory exists - fine 29 | pass 30 | 31 | config.environment["SYMCC_OUTPUT_DIR"] = outputDir 32 | 33 | # Delegate to the generic configuration file 34 | lit_config.load_config(config, path.join(config.test_source_root, "lit.cfg")) 35 | 36 | # Depending on the backend, the tests have to look for different output 37 | config.substitutions += [ 38 | ("%filecheck", "FileCheck @SYM_TEST_FILECHECK_ARGS@"), 39 | ] 40 | 41 | if "@TARGET_32BIT@" == "ON": 42 | config.suffixes.add(".test32") 43 | -------------------------------------------------------------------------------- /test/load_store.ll: -------------------------------------------------------------------------------- 1 | ; This file is part of SymCC. 2 | ; 3 | ; SymCC is free software: you can redistribute it and/or modify it under the 4 | ; terms of the GNU General Public License as published by the Free Software 5 | ; Foundation, either version 3 of the License, or (at your option) any later 6 | ; version. 7 | ; 8 | ; SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | ; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | ; A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | ; 12 | ; You should have received a copy of the GNU General Public License along with 13 | ; SymCC. If not, see . 14 | 15 | ; Verify that loading and storing concrete values of various types works. For 16 | ; each type, we allocate space on the stack, then store a constant value into 17 | ; it, and finally load it back. Compiling this code with SymCC and verifying 18 | ; that the resulting binary exits cleanly shows that SymCC's instrumentation 19 | ; doesn't break the load/store operations. 20 | ; 21 | ; This test reproduces a bug where loading a concrete Boolean would lead to a 22 | ; program crash. 23 | ; 24 | ; Since the bitcode is written by hand, we first run llc on it because it 25 | ; performs a validity check, whereas Clang doesn't. 26 | ; 27 | ; RUN: llc %s -o /dev/null 28 | ; RUN: %symcc %s -o %t 29 | ; RUN: %t 2>&1 30 | 31 | target triple = "x86_64-pc-linux-gnu" 32 | 33 | define i32 @main(i32 %argc, i8** %argv) { 34 | ; Load and store a Boolean. 35 | %stack_bool = alloca i1 36 | store i1 0, i1* %stack_bool 37 | %copy_of_stack_bool = load i1, i1* %stack_bool 38 | 39 | ; Load and store a float. 40 | %stack_float = alloca float 41 | store float 0.0, float* %stack_float 42 | %copy_of_stack_float = load float, float* %stack_float 43 | 44 | ret i32 0 45 | } 46 | -------------------------------------------------------------------------------- /test/loop.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc -O2 %s -o %t 16 | // RUN: echo -ne "\x00\x00\x00\x05" | %t 2>&1 | %filecheck %s 17 | // 18 | // Make sure that our instrumentation works with back-jumps. Also, test support 19 | // for 128-bit integers (if available). 20 | 21 | #include 22 | #include 23 | 24 | #include 25 | #include 26 | 27 | #ifdef __SIZEOF_INT128__ 28 | #define MYINT __int128 29 | #else 30 | #define MYINT int64_t 31 | #endif 32 | 33 | int fac(int x) { 34 | MYINT result = 1; 35 | 36 | // SIMPLE-COUNT-5: Found diverging input 37 | // SIMPLE-NOT: Found diverging input 38 | // QSYM-COUNT-5: New testcase 39 | for (MYINT i = 2; i <= x; i++) 40 | result *= i; 41 | 42 | return result; 43 | } 44 | 45 | int main(int argc, char* argv[]) { 46 | int x; 47 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) { 48 | fprintf(stderr, "Failed to read x\n"); 49 | return -1; 50 | } 51 | x = ntohl(x); 52 | fprintf(stderr, "%d\n", fac(x)); 53 | // ANY: 120 54 | return 0; 55 | } 56 | -------------------------------------------------------------------------------- /test/loop.test32: -------------------------------------------------------------------------------- 1 | RUN: %symcc -m32 -O2 %S/loop.c -o %t_32 2 | RUN: echo -ne "\x00\x00\x00\x05" | %t_32 2>&1 | %filecheck %S/loop.c 3 | -------------------------------------------------------------------------------- /test/memcpy.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc -O2 %s -o %t 16 | // RUN: echo -ne "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03" | %t 2>&1 | %filecheck %s 17 | // 18 | // Test that we generate alternative inputs for the parameters to memcpy (which 19 | // should assert that the concept works for other functions as well). Also, make 20 | // sure that we handle the different parameter sizes for mmap correctly. 21 | 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | int main(int argc, char *argv[]) { 31 | char values[] = {1, 2, 3}; 32 | char values_copy[3]; 33 | 34 | int dest_offset; 35 | if (read(STDIN_FILENO, &dest_offset, sizeof(dest_offset)) != 36 | sizeof(dest_offset)) { 37 | fprintf(stderr, "Failed to read dest_offset\n"); 38 | return -1; 39 | } 40 | dest_offset = ntohl(dest_offset); 41 | int src_offset; 42 | if (read(STDIN_FILENO, &src_offset, sizeof(src_offset)) != 43 | sizeof(src_offset)) { 44 | fprintf(stderr, "Failed to read src_offset\n"); 45 | return -1; 46 | } 47 | src_offset = ntohl(src_offset); 48 | int length; 49 | if (read(STDIN_FILENO, &length, sizeof(length)) != sizeof(length)) { 50 | fprintf(stderr, "Failed to read length\n"); 51 | return -1; 52 | } 53 | length = ntohl(length); 54 | 55 | memcpy(values_copy + dest_offset, values + src_offset, length); 56 | fprintf(stderr, "%d\n", values_copy[0]); 57 | // SIMPLE: Trying to solve 58 | // SIMPLE: Found diverging input 59 | // SIMPLE: stdin{{[0-3]}} 60 | // QSYM-COUNT-2: SMT 61 | // QSYM: New testcase 62 | // SIMPLE: Trying to solve 63 | // SIMPLE: Found diverging input 64 | // SIMPLE-DAG: stdin{{[0-3]}} -> #x00 65 | // SIMPLE-DAG: stdin{{[4-7]}} -> #x{{.?[^0].?}} 66 | // QSYM-COUNT-2: SMT 67 | // QSYM: New testcase 68 | // SIMPLE: Trying to solve 69 | // SIMPLE: Found diverging input 70 | // SIMPLE-DAG: stdin{{[0-7]}} -> #x00 71 | // QSYM-COUNT-2: SMT 72 | // QSYM: New testcase 73 | // ANY: 1 74 | 75 | void *pointer = mmap(NULL, 8, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 76 | puts(pointer == MAP_FAILED ? "failed" : "succeeded"); 77 | // ANY: succeeded 78 | 79 | return 0; 80 | } 81 | -------------------------------------------------------------------------------- /test/memcpy.test32: -------------------------------------------------------------------------------- 1 | RUN: %symcc -m32 -O2 %S/memcpy.c -o %t_32 2 | RUN: echo -ne "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03" | %t_32 2>&1 | %filecheck %S/memcpy.c 3 | -------------------------------------------------------------------------------- /test/memory_input.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc -O2 %s -o %t 16 | // RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s 17 | #include 18 | #include 19 | #include 20 | 21 | void symcc_make_symbolic(const void *start, size_t byte_length); 22 | 23 | uint64_t g_value = 0xaaaabbbbccccdddd; 24 | 25 | int main(int argc, char *argv[]) { 26 | uint64_t x = 10; 27 | uint8_t y = 0; 28 | 29 | symcc_make_symbolic(&x, sizeof(x)); 30 | symcc_make_symbolic(&y, sizeof(y)); 31 | 32 | fprintf(stderr, "%s\n", (x == g_value) ? "yes" : "no"); 33 | // SIMPLE: Trying to solve 34 | // SIMPLE: Found diverging input 35 | // SIMPLE-DAG: #xaa 36 | // SIMPLE-DAG: #xbb 37 | // SIMPLE-DAG: #xcc 38 | // SIMPLE-DAG: #xdd 39 | // QSYM-COUNT-2: SMT 40 | // ANY: no 41 | 42 | fprintf(stderr, "%s\n", (y == 10) ? "yes" : "no"); 43 | // SIMPLE: Trying to solve 44 | // SIMPLE: Found diverging input 45 | // y should be part of the input, just after x 46 | // SIMPLE: stdin8 -> #x0a 47 | // QSYM-COUNT-2: SMT 48 | // ANY: no 49 | 50 | return 0; 51 | } 52 | -------------------------------------------------------------------------------- /test/pointers.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc -O2 %s -o %t 16 | // RUN: echo -ne "\x00\x00\x00\x05\x12\x34\x56\x78\x90\xab\xcd\xef" | %t 2>&1 | %filecheck %s 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | volatile int g_value; 25 | 26 | int main(int argc, char* argv[]) { 27 | int x; 28 | void *ptr; 29 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) { 30 | fprintf(stderr, "Failed to read x\n"); 31 | return -1; 32 | } 33 | x = ntohl(x); 34 | if (read(STDIN_FILENO, &ptr, sizeof(ptr)) != sizeof(ptr)) { 35 | fprintf(stderr, "Failed to read ptr\n"); 36 | return -1; 37 | } 38 | g_value = htonl(0x1200ab00); 39 | uint8_t *charPtr = (uint8_t*)&g_value; 40 | 41 | charPtr += 2; 42 | fprintf(stderr, "%x\n", *charPtr); 43 | // ANY: ab 44 | 45 | fprintf(stderr, "%s\n", (*charPtr == x) ? "equal" : "different"); 46 | // SIMPLE: Trying to solve 47 | // SIMPLE: Found diverging input 48 | // SIMPLE: #xab 49 | // QSYM-COUNT-2: SMT 50 | // QSYM: New testcase 51 | // ANY: different 52 | 53 | volatile int local = 0x12345678; 54 | charPtr = (uint8_t*)&local; 55 | charPtr++; 56 | fprintf(stderr, "%s\n", (*charPtr == x) ? "equal" : "different"); 57 | // SIMPLE: Trying to solve 58 | // SIMPLE: Found diverging input 59 | // SIMPLE: #x56 60 | // QSYM-COUNT-2: SMT 61 | // QSYM: New testcase 62 | // ANY: different 63 | 64 | fprintf(stderr, "%s\n", !ptr ? "null" : "not null"); 65 | // SIMPLE: Trying to solve 66 | // SIMPLE: Found diverging input 67 | // 68 | // We expect a null pointer, but since pointer length varies between 32 and 69 | // 64-bit architectures we can't just expect N times #x00. Instead, we use a 70 | // regular expression that disallows nonzero values for anything but stdin0 71 | // to stdin3 (which are part of x, not ptr). 72 | // 73 | // SIMPLE-NOT: stdin{{[4-9]|1[0-9]}} -> #x{{.?[^0].?}} 74 | // QSYM-COUNT-2: SMT 75 | // QSYM: New testcase 76 | // ANY: not null 77 | 78 | return 0; 79 | } 80 | -------------------------------------------------------------------------------- /test/pointers.test32: -------------------------------------------------------------------------------- 1 | RUN: %symcc -m32 -O2 %S/pointers.c -o %t_32 2 | RUN: echo -ne "\x00\x00\x00\x05\x12\x34\x56\x78" | %t_32 2>&1 | %filecheck %S/pointers.c 3 | -------------------------------------------------------------------------------- /test/propagation_select.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc -O1 %s -o %t 16 | // RUN: echo xxx | %t 2>&1 | %filecheck %s 17 | // 18 | // Check that select instruction is propagating the symbolic value (issue #109) 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | char bar(char a, char b, char c) { return (a == 0xA) ? b : c; } 25 | 26 | int main() { 27 | char input[3] = {0}; 28 | read(0, &input, sizeof(input)); 29 | // SIMPLE: Trying to solve 30 | // SIMPLE: Found diverging input 31 | // SIMPLE: stdin0 -> #x0a 32 | // QSYM-COUNT-2: SMT 33 | // QSYM: New testcase 34 | char r = bar(input[0], input[1], input[2]); 35 | // SIMPLE: Trying to solve 36 | // SIMPLE: Found diverging input 37 | // SIMPLE-DAG: stdin2 -> #x0b 38 | // SIMPLE-DAG: stdin0 -> #x00 39 | // QSYM-COUNT-2: SMT 40 | // QSYM: New testcase 41 | // ANY: KO 42 | if (r == 0xB) 43 | printf("OK!\n"); 44 | else 45 | printf("KO\n"); 46 | return 0; 47 | } 48 | -------------------------------------------------------------------------------- /test/read.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc -O2 %s -o %t 16 | // RUN: echo b | %t 2>&1 | %filecheck %s 17 | // 18 | // Check the symbolic handling of "read" 19 | 20 | #include 21 | #include 22 | 23 | int main(int argc, char* argv[]) { 24 | char c; 25 | 26 | ssize_t nbytes = read(STDIN_FILENO, &c, 1); 27 | if (nbytes != 1) 28 | return 1; 29 | 30 | // SIMPLE: Trying to solve 31 | // SIMPLE: Found diverging input 32 | // SIMPLE: stdin0 -> #x61 33 | // QSYM-COUNT-2: SMT 34 | // QSYM: New testcase 35 | if (c == 'a') 36 | fprintf(stderr, "Correct\n"); 37 | else 38 | fprintf(stderr, "Next time...\n"); 39 | // ANY: Next time... 40 | return 0; 41 | } 42 | -------------------------------------------------------------------------------- /test/read.test32: -------------------------------------------------------------------------------- 1 | RUN: %symcc -m32 -O2 %S/read.c -o %t_32 2 | RUN: echo b | %t_32 2>&1 | %filecheck %S/read.c 3 | -------------------------------------------------------------------------------- /test/strings.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc -O2 %s -o %t 16 | // RUN: echo -n test | %t 2>&1 | %filecheck %s 17 | // 18 | // Test the symbolic versions of string functions. 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | int main(int argc, char *argv[]) { 25 | char buffer[5]; 26 | 27 | if (read(STDIN_FILENO, buffer, sizeof(buffer) - 1) != 28 | sizeof(buffer) - 1) { 29 | fprintf(stderr, "Failed to read the input\n"); 30 | return -1; 31 | } 32 | 33 | buffer[4] = '\0'; 34 | 35 | // Fully concrete 36 | fputs(strchr("foobar", 'o') != NULL ? "found" : "nope", stderr); 37 | // SIMPLE-NOT: Trying to solve 38 | // QSYM-NOT: SMT 39 | // ANY: found 40 | 41 | // Symbolic buffer, concrete char 42 | fputs(strchr(buffer, 'x') != NULL ? "found" : "nope", stderr); 43 | // SIMPLE-COUNT-4: Found diverging input 44 | // QSYM: SMT 45 | // ANY: nope 46 | 47 | // Concrete buffer, symbolic char 48 | fputs(strchr("test", buffer[0]) != NULL ? "found" : "nope", stderr); 49 | // SIMPLE: Trying to solve 50 | // 51 | // QSYM's back-off mechanism kicks in because we're generating too many 52 | // queries; let's not check them anymore. 53 | // 54 | // ANY: found 55 | 56 | // Symbolic buffer, symbolic char 57 | fputs(strchr(buffer, buffer[1]) != NULL ? "found" : "nope", stderr); 58 | // SIMPLE-COUNT-2: Trying to solve 59 | // ANY: found 60 | 61 | return 0; 62 | } 63 | -------------------------------------------------------------------------------- /test/strings.test32: -------------------------------------------------------------------------------- 1 | RUN: %symcc -m32 -O2 %S/strings.c -o %t_32 2 | RUN: echo -n test | %t_32 2>&1 | %filecheck %S/strings.c 3 | -------------------------------------------------------------------------------- /test/structs.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc -O2 %s -o %t 16 | // RUN: echo -ne "\x00\x00\x00\x05" | %t 2>&1 | %filecheck %s 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | 24 | struct point { 25 | int x; 26 | int y; 27 | }; 28 | 29 | struct line { 30 | struct point start; 31 | struct point end; 32 | }; 33 | 34 | static struct point g_point = {1, 2}; 35 | static struct point g_point_array[] = {{1, 2}, {3, 4}, {5, 6}}; 36 | 37 | int main(int argc, char* argv[]) { 38 | int x; 39 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) { 40 | fprintf(stderr, "Failed to read x\n"); 41 | return -1; 42 | } 43 | x = ntohl(x); 44 | 45 | struct point p = {x, 17}; 46 | 47 | fprintf(stderr, "%s\n", (p.x < 100) ? "yes" : "no"); 48 | // SIMPLE: Trying to solve 49 | // SIMPLE: Found diverging input 50 | // QSYM-COUNT-2: SMT 51 | // QSYM: New testcase 52 | // ANY: yes 53 | 54 | fprintf(stderr, "%s\n", (p.y < 100) ? "yes" : "no"); 55 | // SIMPLE-NOT: Trying to solve 56 | // QSYM-NOT: SMT 57 | // ANY: yes 58 | 59 | fprintf(stderr, "%s\n", (p.x < p.y) ? "yes" : "no"); 60 | // SIMPLE: Trying to solve 61 | // SIMPLE: Found diverging input 62 | // QSYM-COUNT-2: SMT 63 | // QSYM: New testcase 64 | // ANY: yes 65 | 66 | fprintf(stderr, "%s\n", ((p.x < g_point.x) || (p.y < g_point.y)) ? "yes" : "no"); 67 | // SIMPLE: Trying to solve 68 | // SIMPLE: Found diverging input 69 | // QSYM-COUNT-2: SMT 70 | // QSYM: New testcase 71 | // ANY: no 72 | 73 | fprintf(stderr, "%s\n", (g_point_array[1].x < x) ? "yes" : "no"); 74 | // SIMPLE: Trying to solve 75 | // SIMPLE: Found diverging input 76 | // QSYM-COUNT-2: SMT 77 | // QSYM: New testcase 78 | // ANY: yes 79 | 80 | // Nested structs 81 | 82 | struct line l = {{0, 0}, {5, 5}}; 83 | 84 | fprintf(stderr, "%s\n", (l.end.x > x) ? "yes" : "no"); 85 | // SIMPLE: Trying to solve 86 | // SIMPLE: Found diverging input 87 | // QSYM-COUNT-2: SMT 88 | // QSYM: New testcase 89 | // ANY: no 90 | 91 | return 0; 92 | } 93 | -------------------------------------------------------------------------------- /test/structs.test32: -------------------------------------------------------------------------------- 1 | RUN: %symcc -m32 -O2 %S/structs.c -o %t_32 2 | RUN: echo -ne "\x00\x00\x00\x05" | %t_32 2>&1 | %filecheck %S/structs.c 3 | -------------------------------------------------------------------------------- /test/switch.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc -O2 %s -o %t 16 | // RUN: echo -ne "\x00\x00\x00\x05" | %t 2>&1 | %filecheck %s 17 | // 18 | // Check the symbolic handling of "read" 19 | 20 | #include 21 | #include 22 | 23 | #include 24 | #include 25 | 26 | int main(int argc, char* argv[]) { 27 | int x; 28 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) { 29 | fprintf(stderr, "Failed to read x\n"); 30 | return -1; 31 | } 32 | x = ntohl(x); 33 | 34 | int foo = 0; 35 | switch (x) { 36 | case 3: 37 | foo = 0; 38 | fprintf(stderr, "x is 3\n"); 39 | break; 40 | case 4: 41 | foo = 1; 42 | // Deliberately not printing anything here, which will generate a direct 43 | // jump to the block after the switch statement. 44 | break; 45 | case 5: 46 | foo = 2; 47 | fprintf(stderr, "x is 5\n"); 48 | break; 49 | default: 50 | foo = 3; 51 | fprintf(stderr, "x is something else\n"); 52 | break; 53 | } 54 | // SIMPLE: Trying to solve 55 | // SIMPLE: Found diverging input 56 | // QSYM-COUNT-2: SMT 57 | // QSYM: New testcase 58 | // ANY: x is 5 59 | 60 | fprintf(stderr, "%d\n", foo); 61 | // ANY: 2 62 | 63 | // When the value to branch on is concrete there should be no solver 64 | // interaction. 65 | volatile int y = 17; 66 | switch (y) { 67 | case 3: 68 | fprintf(stderr, "y is 3\n"); 69 | break; 70 | case 4: 71 | fprintf(stderr, "y is 4\n"); 72 | break; 73 | case 5: 74 | fprintf(stderr, "y is 5\n"); 75 | break; 76 | default: 77 | fprintf(stderr, "y is something else\n"); 78 | break; 79 | } 80 | // SIMPLE-NOT: Trying to solve 81 | // QSYM-NOT: SMT 82 | // ANY: y is something else 83 | 84 | return 0; 85 | } 86 | -------------------------------------------------------------------------------- /test/switch.test32: -------------------------------------------------------------------------------- 1 | RUN: %symcc -m32 -O2 %S/switch.c -o %t_32 2 | RUN: echo -ne "\x00\x00\x00\x05" | %t_32 2>&1 | %filecheck %S/switch.c 3 | -------------------------------------------------------------------------------- /test/symbolic_structs.ll: -------------------------------------------------------------------------------- 1 | ; This file is part of SymCC. 2 | ; 3 | ; SymCC is free software: you can redistribute it and/or modify it under the 4 | ; terms of the GNU General Public License as published by the Free Software 5 | ; Foundation, either version 3 of the License, or (at your option) any later 6 | ; version. 7 | ; 8 | ; SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | ; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | ; A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | ; 12 | ; You should have received a copy of the GNU General Public License along with 13 | ; SymCC. If not, see . 14 | 15 | ; Verify that we correctly insert into symbolic struct values. We insert values 16 | ; of various types into a symbolic struct, thus triggering expression updates. 17 | ; Compiling this code with SymCC and verifying that the resulting binary exits 18 | ; cleanly shows that SymCC's instrumentation doesn't break the execution of the 19 | ; program. 20 | ; 21 | ; This test reproduces a bug where inserting a concrete floating-point value 22 | ; into a symbolic struct would lead to a program crash (eurecom-s3/symcc#138). 23 | ; 24 | ; Since the bitcode is written by hand, we first run llc on it because it 25 | ; performs a validity check, whereas Clang doesn't. 26 | 27 | ; RUN: llc %s -o /dev/null 28 | ; RUN: %symcc %s -o %t 29 | ; RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 30 | 31 | target triple = "x86_64-pc-linux-gnu" 32 | 33 | ; The struct type of our symbolic value. Include a floating-point value and a 34 | ; Boolean because they're represented with non-bitvector solver variables 35 | ; (reproducing eurecom-s3/symcc#138). 36 | %struct_type = type { i8, i32, i8, float, i1 } 37 | 38 | define i32 @main(i32 %argc, i8** %argv) { 39 | ; Create a symbolic struct value that we can subsequently insert values into. 40 | %struct_value_mem = alloca %struct_type 41 | call void @symcc_make_symbolic(%struct_type* %struct_value_mem, i64 20) 42 | %symbolic_struct = load %struct_type, %struct_type* %struct_value_mem 43 | 44 | ; Insert values of various types, triggering the creation of new expressions. 45 | insertvalue %struct_type %symbolic_struct, i32 5, 1 46 | insertvalue %struct_type %symbolic_struct, float 42.0, 3 47 | insertvalue %struct_type %symbolic_struct, i1 1, 4 48 | 49 | ret i32 0 50 | } 51 | 52 | declare void @symcc_make_symbolic(%struct_type*, i64) 53 | -------------------------------------------------------------------------------- /test/test_case_handler.c: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | // RUN: %symcc -O2 %s -o %t 16 | // RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s 17 | #include 18 | #include 19 | #include 20 | 21 | #define MAGIC 0xab 22 | 23 | void symcc_make_symbolic(const void *start, size_t byte_length); 24 | typedef void (*TestCaseHandler)(const void *, size_t); 25 | void symcc_set_test_case_handler(TestCaseHandler handler); 26 | 27 | int solved = 0; 28 | int num_test_cases = 0; 29 | 30 | void handle_test_case(const void *data, size_t data_length) { 31 | num_test_cases++; 32 | if (data_length == 1 && ((const uint8_t *)data)[0] == MAGIC) 33 | solved = 1; 34 | } 35 | 36 | int main(int argc, char *argv[]) { 37 | symcc_set_test_case_handler(handle_test_case); 38 | // SIMPLE: Warning: test-case handlers 39 | 40 | uint8_t input = 0; 41 | symcc_make_symbolic(&input, sizeof(input)); 42 | 43 | fprintf(stderr, "%s\n", (input == MAGIC) ? "yes" : "no"); 44 | // SIMPLE: Trying to solve 45 | // SIMPLE: Found diverging input 46 | // SIMPLE: stdin0 -> #xab 47 | // QSYM: SMT 48 | // ANY: no 49 | 50 | fprintf(stderr, "%d\n", solved); 51 | // QSYM: 1 52 | // SIMPLE: 0 53 | 54 | fprintf(stderr, "%d\n", num_test_cases); 55 | // QSYM: 1 56 | // SIMPLE: 0 57 | 58 | return 0; 59 | } 60 | -------------------------------------------------------------------------------- /test/uadd_sat.ll: -------------------------------------------------------------------------------- 1 | ; RUN: %symcc -O2 %s -o %t 2 | ; RUN: echo -ne "\x05\x00\x00\x00" | %t 2>&1 | %filecheck %s 3 | 4 | %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } 5 | %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } 6 | 7 | @g_value = dso_local local_unnamed_addr global i16 40, align 2 8 | @stderr = external dso_local local_unnamed_addr global %struct._IO_FILE*, align 8 9 | @.str = private unnamed_addr constant [18 x i8] c"Failed to read x\0A\00", align 1 10 | @.str.1 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1 11 | @.str.2 = private unnamed_addr constant [4 x i8] c"yes\00", align 1 12 | @.str.3 = private unnamed_addr constant [3 x i8] c"no\00", align 1 13 | 14 | ; Function Attrs: nofree nounwind uwtable 15 | define dso_local i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 { 16 | entry: 17 | %x = alloca i16, align 2 18 | %0 = bitcast i16* %x to i8* 19 | %call = call i64 @read(i32 0, i8* nonnull %0, i64 2) #5 20 | %cmp.not = icmp eq i64 %call, 2 21 | %1 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 22 | br i1 %cmp.not, label %if.end, label %if.then 23 | 24 | if.then: ; preds = %entry 25 | %2 = call i64 @fwrite(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str, i64 0, i64 0), i64 17, i64 1, %struct._IO_FILE* %1) #6 26 | br label %cleanup 27 | 28 | if.end: ; preds = %entry 29 | %3 = load i16, i16* %x, align 2 30 | %4 = load i16, i16* @g_value, align 2 31 | %add = call i16 @llvm.uadd.sat.i16(i16 %3, i16 %4) 32 | %cmp = icmp eq i16 %add, 43981 33 | %cond = select i1 %cmp, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0) 34 | ; SIMPLE: Trying to solve 35 | ; SIMPLE: Found diverging input 36 | ; SIMPLE-DAG: stdin0 -> #xa5 37 | ; SIMPLE-DAG: stdin1 -> #xab 38 | ; ANY: no 39 | %call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), i8* %cond) #6 40 | br label %cleanup 41 | 42 | cleanup: ; preds = %if.end, %if.then 43 | %retval.0 = phi i32 [ -1, %if.then ], [ 0, %if.end ] 44 | ret i32 %retval.0 45 | } 46 | 47 | declare i64 @read(i32, i8* nocapture, i64) 48 | declare i32 @fprintf(%struct._IO_FILE* nocapture , i8* nocapture readonly, ...) 49 | declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) 50 | declare i16 @llvm.uadd.sat.i16(i16, i16) 51 | -------------------------------------------------------------------------------- /test/uadd_sat.test32: -------------------------------------------------------------------------------- 1 | RUN: %symcc -m32 -O2 %S/uadd_sat.ll -o %t_32 2 | RUN: echo -ne "\x05\x00\x00\x00\x00\x00\x00\x00" | %t_32 2>&1 | %filecheck %s 3 | -------------------------------------------------------------------------------- /test/usub_sat.ll: -------------------------------------------------------------------------------- 1 | ; RUN: %symcc -O2 %s -o %t 2 | ; RUN: echo -ne "\x05\x00\x00\x00" | %t 2>&1 | %filecheck %s 3 | 4 | %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } 5 | %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } 6 | 7 | @g_value = dso_local local_unnamed_addr global i16 40, align 2 8 | @stderr = external dso_local local_unnamed_addr global %struct._IO_FILE*, align 8 9 | @.str = private unnamed_addr constant [18 x i8] c"Failed to read x\0A\00", align 1 10 | @.str.1 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1 11 | @.str.2 = private unnamed_addr constant [4 x i8] c"yes\00", align 1 12 | @.str.3 = private unnamed_addr constant [3 x i8] c"no\00", align 1 13 | 14 | ; Function Attrs: nofree nounwind uwtable 15 | define dso_local i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 { 16 | entry: 17 | %x = alloca i16, align 2 18 | %0 = bitcast i16* %x to i8* 19 | %call = call i64 @read(i32 0, i8* nonnull %0, i64 2) #5 20 | %cmp.not = icmp eq i64 %call, 2 21 | %1 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 22 | br i1 %cmp.not, label %if.end, label %if.then 23 | 24 | if.then: ; preds = %entry 25 | %2 = call i64 @fwrite(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str, i64 0, i64 0), i64 17, i64 1, %struct._IO_FILE* %1) #6 26 | br label %cleanup 27 | 28 | if.end: ; preds = %entry 29 | %3 = load i16, i16* %x, align 2 30 | %4 = load i16, i16* @g_value, align 2 31 | %add = call i16 @llvm.usub.sat.i16(i16 %3, i16 %4) 32 | %cmp = icmp eq i16 %add, 43981 33 | %cond = select i1 %cmp, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0) 34 | ; SIMPLE: Trying to solve 35 | ; SIMPLE: Found diverging input 36 | ; SIMPLE-DAG: stdin0 -> #xf5 37 | ; SIMPLE-DAG: stdin1 -> #xab 38 | ; ANY: no 39 | %call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), i8* %cond) #6 40 | br label %cleanup 41 | 42 | cleanup: ; preds = %if.end, %if.then 43 | %retval.0 = phi i32 [ -1, %if.then ], [ 0, %if.end ] 44 | ret i32 %retval.0 45 | } 46 | 47 | declare i64 @read(i32, i8* nocapture, i64) 48 | declare i32 @fprintf(%struct._IO_FILE* nocapture , i8* nocapture readonly, ...) 49 | declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) 50 | declare i16 @llvm.usub.sat.i16(i16, i16) 51 | -------------------------------------------------------------------------------- /test/usub_sat.test32: -------------------------------------------------------------------------------- 1 | RUN: %symcc -m32 -O2 %S/usub_sat.ll -o %t_32 2 | RUN: echo -ne "\x05\x00\x00\x00\x00\x00\x00\x00" | %t_32 2>&1 | %filecheck %s 3 | -------------------------------------------------------------------------------- /util/pure_concolic_execution.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -u 4 | 5 | function usage() { 6 | echo "Usage: $0 -i INPUT_DIR [-o OUTPUT_DIR] [-f FAILED_DIR] TARGET..." 7 | echo 8 | echo "Run SymCC-instrumented TARGET in a loop, feeding newly generated inputs back " 9 | echo "into it. Initial inputs are expected in INPUT_DIR, and new inputs are " 10 | echo "continuously read from there. If OUTPUT_DIR is specified, a copy of the corpus " 11 | echo "and of each generated input is preserved there. TARGET may contain the special " 12 | echo "string \"@@\", which is replaced with the name of the current input file." 13 | echo "If FAILED_DIR is specified, a copy of the failing test cases is preserved there." 14 | echo 15 | echo "Note that SymCC never changes the length of the input, so be sure that the " 16 | echo "initial inputs cover all required input lengths." 17 | } 18 | 19 | while getopts "i:o:f:" opt; do 20 | case "$opt" in 21 | i) 22 | in=$OPTARG 23 | ;; 24 | o) 25 | out=$OPTARG 26 | ;; 27 | f) 28 | failed_dir=$OPTARG 29 | ;; 30 | *) 31 | usage 32 | exit 1 33 | ;; 34 | esac 35 | done 36 | shift $((OPTIND-1)) 37 | target=("$@") 38 | target[0]=$(realpath "${target[0]}") 39 | target="${target[@]}" 40 | timeout="timeout -k 5 90" 41 | 42 | if [[ ! -v in ]]; then 43 | echo "Please specify the input directory!" 44 | usage 45 | exit 1 46 | fi 47 | 48 | # Create the work environment 49 | work_dir=$(mktemp -d) 50 | mkdir $work_dir/{next,symcc_out} 51 | touch $work_dir/analyzed_inputs 52 | if [[ -v out ]]; then 53 | mkdir -p $out 54 | fi 55 | if [[ -v failed_dir ]]; then 56 | mkdir -p "$failed_dir" 57 | fi 58 | 59 | function cleanup() { 60 | rm -rf --preserve-root -- $work_dir 61 | } 62 | 63 | trap cleanup EXIT 64 | 65 | # Copy one file to the destination directory, renaming it according to its hash. 66 | function copy_file_with_unique_name() { 67 | local file_name="$1" 68 | local dest_dir="$2" 69 | 70 | local dest="$dest_dir/$(sha256sum "$file_name" | cut -d' ' -f1)" 71 | cp "$file_name" "$dest" 72 | 73 | } 74 | 75 | # Copy all files in the source directory to the destination directory, renaming 76 | # them according to their hash. 77 | function copy_with_unique_name() { 78 | local source_dir="$1" 79 | local dest_dir="$2" 80 | 81 | if [ -n "$(ls -A $source_dir)" ]; then 82 | local f 83 | for f in $source_dir/*; do 84 | copy_file_with_unique_name "$f" "$dest_dir" 85 | done 86 | fi 87 | } 88 | 89 | # Copy files from the source directory into the next generation. 90 | function add_to_next_generation() { 91 | local source_dir="$1" 92 | copy_with_unique_name "$source_dir" "$work_dir/next" 93 | } 94 | 95 | # If an output directory is set, copy the files in the source directory there. 96 | function maybe_export() { 97 | local source_dir="$1" 98 | if [[ -v out ]]; then 99 | copy_with_unique_name "$source_dir" "$out" 100 | fi 101 | } 102 | 103 | # Remove input files which has been already analysed. Used to prevent infinite loop. 104 | function remove_analysed() { 105 | local source_dir="$1" 106 | local f 107 | for f in $source_dir/*; do 108 | if grep -q "$(basename $f)" $work_dir/analyzed_inputs; then 109 | rm $f 110 | fi 111 | done 112 | } 113 | 114 | # Copy those files from the input directory to the next generation that haven't 115 | # been analyzed yet. 116 | function maybe_import() { 117 | if [ -n "$(ls -A $in)" ]; then 118 | local f 119 | for f in $in/*; do 120 | if grep -q "$(basename $f)" $work_dir/analyzed_inputs; then 121 | continue 122 | fi 123 | 124 | if [ -e "$work_dir/next/$(basename $f)" ]; then 125 | continue 126 | fi 127 | 128 | echo "Importing $f from the input directory" 129 | cp "$f" "$work_dir/next" 130 | done 131 | fi 132 | } 133 | 134 | # If the input file caused non 0 return code, then copy it to the FAILED_DIR. 135 | function save_failed() { 136 | local ret_code=$1 137 | local input_file="$2" 138 | if [ $ret_code -ne 0 ] && [[ -v failed_dir ]] ; then 139 | copy_file_with_unique_name "$input_file" "$failed_dir" 140 | fi 141 | } 142 | 143 | # Set up the shell environment 144 | export SYMCC_OUTPUT_DIR=$work_dir/symcc_out 145 | export SYMCC_ENABLE_LINEARIZATION=1 146 | # export SYMCC_AFL_COVERAGE_MAP=$work_dir/map 147 | 148 | # Run generation after generation until we don't generate new inputs anymore 149 | gen_count=0 150 | while true; do 151 | # Initialize the generation 152 | maybe_import 153 | mv $work_dir/{next,cur} 154 | mkdir $work_dir/next 155 | 156 | # Run it (or wait if there's nothing to run on) 157 | if [ -n "$(ls -A $work_dir/cur)" ]; then 158 | echo "Generation $gen_count..." 159 | 160 | for f in $work_dir/cur/*; do 161 | echo "Running on $f" 162 | if [[ "$target " =~ " @@ " ]]; then 163 | env SYMCC_INPUT_FILE=$f $timeout ${target[@]/@@/$f} >/dev/null 2>&1 164 | ret_code=$? 165 | else 166 | $timeout $target <$f >/dev/null 2>&1 167 | ret_code=$? 168 | fi 169 | 170 | # Make the new test cases part of the next generation 171 | add_to_next_generation $work_dir/symcc_out 172 | maybe_export $work_dir/symcc_out 173 | remove_analysed $work_dir/next 174 | save_failed $ret_code "$f" 175 | echo $(basename $f) >> $work_dir/analyzed_inputs 176 | rm -f $f 177 | done 178 | 179 | rm -rf $work_dir/cur 180 | gen_count=$((gen_count+1)) 181 | else 182 | echo "Waiting for more input..." 183 | rmdir $work_dir/cur 184 | sleep 5 185 | fi 186 | done 187 | -------------------------------------------------------------------------------- /util/quicktest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # This script is to make a quick test of SymCC with Qsym backend, it 4 | # is supposed to work on ubuntu groovy, e.g., after running: 5 | 6 | # vagrant init ubuntu/groovy64 7 | # vagrant up 8 | # vagrant ssh 9 | 10 | # exit when any command fails 11 | set -e 12 | 13 | if ! uname -a | grep ubuntu-groovy>> /dev/null ; then 14 | echo "** Warning: Script known to work for ubuntu groovy only **" 15 | fi 16 | 17 | # update system 18 | sudo apt-get update 19 | sudo apt-get upgrade -y 20 | 21 | # install requirements 22 | sudo apt-get install -y git cargo clang-10 cmake g++ git libz3-dev llvm-10-dev llvm-10-tools ninja-build python3-pip zlib1g-dev 23 | sudo pip3 install lit 24 | 25 | # Clone project 26 | git clone https://github.com/eurecom-s3/symcc.git 27 | cd symcc 28 | 29 | # init/update submodules 30 | git submodule update --init --recursive 31 | 32 | # build 33 | mkdir ../symcc-build 34 | cd ../symcc-build 35 | cmake -G Ninja -DSYMCC_RT_BACKEND=qsym -DZ3_TRUST_SYSTEM_VERSION=on ../symcc 36 | ninja check 37 | 38 | # create a test case 39 | cat > test.c << 'EOF' 40 | #include 41 | #include 42 | #include 43 | 44 | int foo(int a, int b) { 45 | if (2 * a < b) 46 | return a; 47 | else if (a % b) 48 | return b; 49 | else 50 | return a + b; 51 | } 52 | 53 | int main(int argc, char* argv[]) { 54 | int x; 55 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) { 56 | printf("Failed to read x\n"); 57 | return -1; 58 | } 59 | printf("%d\n", foo(x, 7)); 60 | return 0; 61 | } 62 | 63 | EOF 64 | 65 | # test it 66 | ./symcc test.c -o test.out 67 | mkdir -p results 68 | export SYMCC_OUTPUT_DIR=`pwd`/results 69 | echo 'aaaa' | ./test.out 70 | cat ${SYMCC_OUTPUT_DIR}/000000 | ./test.out 71 | 72 | # TODO: this is not a very precise regression test, generated testcase 73 | # may be incorrect, but binding to a specific test case may be too 74 | # narrow (fail if test isn't exactly the expected result, but a 75 | # different valid one), this should be improved. 76 | if [ -f ${SYMCC_OUTPUT_DIR}/000001 ]; then 77 | echo "SUCCESS: looks like this build of SymCC is working, type vagrant ssh to interact with it !" 78 | fi 79 | -------------------------------------------------------------------------------- /util/symcc_fuzzing_helper/.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # These are backup files generated by rustfmt 6 | **/*.rs.bk -------------------------------------------------------------------------------- /util/symcc_fuzzing_helper/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "0.7.8" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "743ad5a418686aad3b87fd14c43badd828cf26e214a00f92a384291cf22e1811" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "anyhow" 16 | version = "1.0.26" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "7825f6833612eb2414095684fcf6c635becf3ce97fe48cf6421321e93bfbd53c" 19 | 20 | [[package]] 21 | name = "atty" 22 | version = "0.2.14" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" 25 | dependencies = [ 26 | "hermit-abi", 27 | "libc", 28 | "winapi", 29 | ] 30 | 31 | [[package]] 32 | name = "autocfg" 33 | version = "1.1.0" 34 | source = "registry+https://github.com/rust-lang/crates.io-index" 35 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 36 | 37 | [[package]] 38 | name = "bitflags" 39 | version = "1.2.1" 40 | source = "registry+https://github.com/rust-lang/crates.io-index" 41 | checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" 42 | 43 | [[package]] 44 | name = "c2-chacha" 45 | version = "0.2.3" 46 | source = "registry+https://github.com/rust-lang/crates.io-index" 47 | checksum = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb" 48 | dependencies = [ 49 | "ppv-lite86", 50 | ] 51 | 52 | [[package]] 53 | name = "cfg-if" 54 | version = "0.1.10" 55 | source = "registry+https://github.com/rust-lang/crates.io-index" 56 | checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" 57 | 58 | [[package]] 59 | name = "clap" 60 | version = "3.1.18" 61 | source = "registry+https://github.com/rust-lang/crates.io-index" 62 | checksum = "d2dbdf4bdacb33466e854ce889eee8dfd5729abf7ccd7664d0a2d60cd384440b" 63 | dependencies = [ 64 | "atty", 65 | "bitflags", 66 | "clap_derive", 67 | "clap_lex", 68 | "indexmap", 69 | "lazy_static", 70 | "strsim", 71 | "termcolor", 72 | "textwrap", 73 | ] 74 | 75 | [[package]] 76 | name = "clap_derive" 77 | version = "3.1.18" 78 | source = "registry+https://github.com/rust-lang/crates.io-index" 79 | checksum = "25320346e922cffe59c0bbc5410c8d8784509efb321488971081313cb1e1a33c" 80 | dependencies = [ 81 | "heck", 82 | "proc-macro-error", 83 | "proc-macro2", 84 | "quote", 85 | "syn", 86 | ] 87 | 88 | [[package]] 89 | name = "clap_lex" 90 | version = "0.2.0" 91 | source = "registry+https://github.com/rust-lang/crates.io-index" 92 | checksum = "a37c35f1112dad5e6e0b1adaff798507497a18fceeb30cceb3bae7d1427b9213" 93 | dependencies = [ 94 | "os_str_bytes", 95 | ] 96 | 97 | [[package]] 98 | name = "env_logger" 99 | version = "0.7.1" 100 | source = "registry+https://github.com/rust-lang/crates.io-index" 101 | checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" 102 | dependencies = [ 103 | "atty", 104 | "humantime", 105 | "log", 106 | "regex", 107 | "termcolor", 108 | ] 109 | 110 | [[package]] 111 | name = "getrandom" 112 | version = "0.1.14" 113 | source = "registry+https://github.com/rust-lang/crates.io-index" 114 | checksum = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" 115 | dependencies = [ 116 | "cfg-if", 117 | "libc", 118 | "wasi", 119 | ] 120 | 121 | [[package]] 122 | name = "hashbrown" 123 | version = "0.11.2" 124 | source = "registry+https://github.com/rust-lang/crates.io-index" 125 | checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" 126 | 127 | [[package]] 128 | name = "heck" 129 | version = "0.4.0" 130 | source = "registry+https://github.com/rust-lang/crates.io-index" 131 | checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" 132 | 133 | [[package]] 134 | name = "hermit-abi" 135 | version = "0.1.6" 136 | source = "registry+https://github.com/rust-lang/crates.io-index" 137 | checksum = "eff2656d88f158ce120947499e971d743c05dbcbed62e5bd2f38f1698bbc3772" 138 | dependencies = [ 139 | "libc", 140 | ] 141 | 142 | [[package]] 143 | name = "humantime" 144 | version = "1.3.0" 145 | source = "registry+https://github.com/rust-lang/crates.io-index" 146 | checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" 147 | dependencies = [ 148 | "quick-error", 149 | ] 150 | 151 | [[package]] 152 | name = "indexmap" 153 | version = "1.8.2" 154 | source = "registry+https://github.com/rust-lang/crates.io-index" 155 | checksum = "e6012d540c5baa3589337a98ce73408de9b5a25ec9fc2c6fd6be8f0d39e0ca5a" 156 | dependencies = [ 157 | "autocfg", 158 | "hashbrown", 159 | ] 160 | 161 | [[package]] 162 | name = "lazy_static" 163 | version = "1.4.0" 164 | source = "registry+https://github.com/rust-lang/crates.io-index" 165 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 166 | 167 | [[package]] 168 | name = "libc" 169 | version = "0.2.66" 170 | source = "registry+https://github.com/rust-lang/crates.io-index" 171 | checksum = "d515b1f41455adea1313a4a2ac8a8a477634fbae63cc6100e3aebb207ce61558" 172 | 173 | [[package]] 174 | name = "log" 175 | version = "0.4.8" 176 | source = "registry+https://github.com/rust-lang/crates.io-index" 177 | checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" 178 | dependencies = [ 179 | "cfg-if", 180 | ] 181 | 182 | [[package]] 183 | name = "memchr" 184 | version = "2.3.0" 185 | source = "registry+https://github.com/rust-lang/crates.io-index" 186 | checksum = "3197e20c7edb283f87c071ddfc7a2cca8f8e0b888c242959846a6fce03c72223" 187 | 188 | [[package]] 189 | name = "os_str_bytes" 190 | version = "6.1.0" 191 | source = "registry+https://github.com/rust-lang/crates.io-index" 192 | checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa" 193 | 194 | [[package]] 195 | name = "ppv-lite86" 196 | version = "0.2.6" 197 | source = "registry+https://github.com/rust-lang/crates.io-index" 198 | checksum = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b" 199 | 200 | [[package]] 201 | name = "proc-macro-error" 202 | version = "1.0.4" 203 | source = "registry+https://github.com/rust-lang/crates.io-index" 204 | checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" 205 | dependencies = [ 206 | "proc-macro-error-attr", 207 | "proc-macro2", 208 | "quote", 209 | "syn", 210 | "version_check", 211 | ] 212 | 213 | [[package]] 214 | name = "proc-macro-error-attr" 215 | version = "1.0.4" 216 | source = "registry+https://github.com/rust-lang/crates.io-index" 217 | checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" 218 | dependencies = [ 219 | "proc-macro2", 220 | "quote", 221 | "version_check", 222 | ] 223 | 224 | [[package]] 225 | name = "proc-macro2" 226 | version = "1.0.39" 227 | source = "registry+https://github.com/rust-lang/crates.io-index" 228 | checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" 229 | dependencies = [ 230 | "unicode-ident", 231 | ] 232 | 233 | [[package]] 234 | name = "quick-error" 235 | version = "1.2.3" 236 | source = "registry+https://github.com/rust-lang/crates.io-index" 237 | checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" 238 | 239 | [[package]] 240 | name = "quote" 241 | version = "1.0.18" 242 | source = "registry+https://github.com/rust-lang/crates.io-index" 243 | checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" 244 | dependencies = [ 245 | "proc-macro2", 246 | ] 247 | 248 | [[package]] 249 | name = "rand" 250 | version = "0.7.3" 251 | source = "registry+https://github.com/rust-lang/crates.io-index" 252 | checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" 253 | dependencies = [ 254 | "getrandom", 255 | "libc", 256 | "rand_chacha", 257 | "rand_core", 258 | "rand_hc", 259 | ] 260 | 261 | [[package]] 262 | name = "rand_chacha" 263 | version = "0.2.1" 264 | source = "registry+https://github.com/rust-lang/crates.io-index" 265 | checksum = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853" 266 | dependencies = [ 267 | "c2-chacha", 268 | "rand_core", 269 | ] 270 | 271 | [[package]] 272 | name = "rand_core" 273 | version = "0.5.1" 274 | source = "registry+https://github.com/rust-lang/crates.io-index" 275 | checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" 276 | dependencies = [ 277 | "getrandom", 278 | ] 279 | 280 | [[package]] 281 | name = "rand_hc" 282 | version = "0.2.0" 283 | source = "registry+https://github.com/rust-lang/crates.io-index" 284 | checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" 285 | dependencies = [ 286 | "rand_core", 287 | ] 288 | 289 | [[package]] 290 | name = "redox_syscall" 291 | version = "0.1.56" 292 | source = "registry+https://github.com/rust-lang/crates.io-index" 293 | checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" 294 | 295 | [[package]] 296 | name = "regex" 297 | version = "1.3.4" 298 | source = "registry+https://github.com/rust-lang/crates.io-index" 299 | checksum = "322cf97724bea3ee221b78fe25ac9c46114ebb51747ad5babd51a2fc6a8235a8" 300 | dependencies = [ 301 | "aho-corasick", 302 | "memchr", 303 | "regex-syntax", 304 | "thread_local", 305 | ] 306 | 307 | [[package]] 308 | name = "regex-syntax" 309 | version = "0.6.14" 310 | source = "registry+https://github.com/rust-lang/crates.io-index" 311 | checksum = "b28dfe3fe9badec5dbf0a79a9cccad2cfc2ab5484bdb3e44cbd1ae8b3ba2be06" 312 | 313 | [[package]] 314 | name = "remove_dir_all" 315 | version = "0.5.2" 316 | source = "registry+https://github.com/rust-lang/crates.io-index" 317 | checksum = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e" 318 | dependencies = [ 319 | "winapi", 320 | ] 321 | 322 | [[package]] 323 | name = "strsim" 324 | version = "0.10.0" 325 | source = "registry+https://github.com/rust-lang/crates.io-index" 326 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" 327 | 328 | [[package]] 329 | name = "symcc_fuzzing_helper" 330 | version = "0.1.0" 331 | dependencies = [ 332 | "anyhow", 333 | "clap", 334 | "env_logger", 335 | "log", 336 | "regex", 337 | "tempfile", 338 | ] 339 | 340 | [[package]] 341 | name = "syn" 342 | version = "1.0.96" 343 | source = "registry+https://github.com/rust-lang/crates.io-index" 344 | checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf" 345 | dependencies = [ 346 | "proc-macro2", 347 | "quote", 348 | "unicode-ident", 349 | ] 350 | 351 | [[package]] 352 | name = "tempfile" 353 | version = "3.1.0" 354 | source = "registry+https://github.com/rust-lang/crates.io-index" 355 | checksum = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9" 356 | dependencies = [ 357 | "cfg-if", 358 | "libc", 359 | "rand", 360 | "redox_syscall", 361 | "remove_dir_all", 362 | "winapi", 363 | ] 364 | 365 | [[package]] 366 | name = "termcolor" 367 | version = "1.1.3" 368 | source = "registry+https://github.com/rust-lang/crates.io-index" 369 | checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" 370 | dependencies = [ 371 | "winapi-util", 372 | ] 373 | 374 | [[package]] 375 | name = "textwrap" 376 | version = "0.15.0" 377 | source = "registry+https://github.com/rust-lang/crates.io-index" 378 | checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" 379 | 380 | [[package]] 381 | name = "thread_local" 382 | version = "1.0.1" 383 | source = "registry+https://github.com/rust-lang/crates.io-index" 384 | checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14" 385 | dependencies = [ 386 | "lazy_static", 387 | ] 388 | 389 | [[package]] 390 | name = "unicode-ident" 391 | version = "1.0.0" 392 | source = "registry+https://github.com/rust-lang/crates.io-index" 393 | checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" 394 | 395 | [[package]] 396 | name = "version_check" 397 | version = "0.9.4" 398 | source = "registry+https://github.com/rust-lang/crates.io-index" 399 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 400 | 401 | [[package]] 402 | name = "wasi" 403 | version = "0.9.0+wasi-snapshot-preview1" 404 | source = "registry+https://github.com/rust-lang/crates.io-index" 405 | checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" 406 | 407 | [[package]] 408 | name = "winapi" 409 | version = "0.3.8" 410 | source = "registry+https://github.com/rust-lang/crates.io-index" 411 | checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" 412 | dependencies = [ 413 | "winapi-i686-pc-windows-gnu", 414 | "winapi-x86_64-pc-windows-gnu", 415 | ] 416 | 417 | [[package]] 418 | name = "winapi-i686-pc-windows-gnu" 419 | version = "0.4.0" 420 | source = "registry+https://github.com/rust-lang/crates.io-index" 421 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 422 | 423 | [[package]] 424 | name = "winapi-util" 425 | version = "0.1.3" 426 | source = "registry+https://github.com/rust-lang/crates.io-index" 427 | checksum = "4ccfbf554c6ad11084fb7517daca16cfdcaccbdadba4fc336f032a8b12c2ad80" 428 | dependencies = [ 429 | "winapi", 430 | ] 431 | 432 | [[package]] 433 | name = "winapi-x86_64-pc-windows-gnu" 434 | version = "0.4.0" 435 | source = "registry+https://github.com/rust-lang/crates.io-index" 436 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 437 | -------------------------------------------------------------------------------- /util/symcc_fuzzing_helper/Cargo.toml: -------------------------------------------------------------------------------- 1 | # This file is part of SymCC. 2 | # 3 | # SymCC is free software: you can redistribute it and/or modify it under the 4 | # terms of the GNU General Public License as published by the Free Software 5 | # Foundation, either version 3 of the License, or (at your option) any later 6 | # version. 7 | # 8 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | # 12 | # You should have received a copy of the GNU General Public License along with 13 | # SymCC. If not, see . 14 | 15 | [package] 16 | name = "symcc_fuzzing_helper" 17 | version = "0.1.0" 18 | authors = ["Sebastian Poeplau "] 19 | edition = "2018" 20 | license = "GPL-3.0-or-later" 21 | 22 | [dependencies] 23 | clap = { version = "3", features = ["derive"] } 24 | tempfile = "3.1" 25 | anyhow = "1.0" 26 | log = "0.4.0" 27 | env_logger = "0.7.1" 28 | regex = "1" 29 | -------------------------------------------------------------------------------- /util/symcc_fuzzing_helper/src/main.rs: -------------------------------------------------------------------------------- 1 | // This file is part of SymCC. 2 | // 3 | // SymCC is free software: you can redistribute it and/or modify it under the 4 | // terms of the GNU General Public License as published by the Free Software 5 | // Foundation, either version 3 of the License, or (at your option) any later 6 | // version. 7 | // 8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY 9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR 10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details. 11 | // 12 | // You should have received a copy of the GNU General Public License along with 13 | // SymCC. If not, see . 14 | 15 | mod symcc; 16 | 17 | use anyhow::{Context, Result}; 18 | use clap::{self, StructOpt}; 19 | use std::collections::HashSet; 20 | use std::fs; 21 | use std::fs::File; 22 | use std::io::Write; 23 | use std::path::{Path, PathBuf}; 24 | use std::thread; 25 | use std::time::{Duration, Instant}; 26 | use symcc::{AflConfig, AflMap, AflShowmapResult, SymCC, TestcaseDir}; 27 | use tempfile::tempdir; 28 | 29 | const STATS_INTERVAL_SEC: u64 = 60; 30 | 31 | // TODO extend timeout when idle? Possibly reprocess previously timed-out 32 | // inputs. 33 | 34 | #[derive(Debug, StructOpt)] 35 | #[clap(about = "Make SymCC collaborate with AFL.")] 36 | struct CLI { 37 | /// The name of the fuzzer to work with 38 | #[clap(short = 'a')] 39 | fuzzer_name: String, 40 | 41 | /// The AFL output directory 42 | #[clap(short = 'o')] 43 | output_dir: PathBuf, 44 | 45 | /// Name to use for SymCC 46 | #[clap(short = 'n')] 47 | name: String, 48 | 49 | /// Enable verbose logging 50 | #[clap(short = 'v')] 51 | verbose: bool, 52 | 53 | /// Program under test 54 | command: Vec, 55 | } 56 | 57 | /// Execution statistics. 58 | #[derive(Debug, Default)] 59 | struct Stats { 60 | /// Number of successful executions. 61 | total_count: u32, 62 | 63 | /// Time spent in successful executions of SymCC. 64 | total_time: Duration, 65 | 66 | /// Time spent in the solver as part of successfully running SymCC. 67 | solver_time: Option, 68 | 69 | /// Number of failed executions. 70 | failed_count: u32, 71 | 72 | /// Time spent in failed SymCC executions. 73 | failed_time: Duration, 74 | } 75 | 76 | impl Stats { 77 | fn add_execution(&mut self, result: &symcc::SymCCResult) { 78 | if result.killed { 79 | self.failed_count += 1; 80 | self.failed_time += result.time; 81 | } else { 82 | self.total_count += 1; 83 | self.total_time += result.time; 84 | self.solver_time = match (self.solver_time, result.solver_time) { 85 | (None, None) => None, 86 | (Some(t), None) => Some(t), // no queries in this execution 87 | (None, Some(t)) => Some(t), 88 | (Some(a), Some(b)) => Some(a + b), 89 | }; 90 | } 91 | } 92 | 93 | fn log(&self, out: &mut impl Write) -> Result<()> { 94 | writeln!(out, "Successful executions: {}", self.total_count)?; 95 | writeln!( 96 | out, 97 | "Time in successful executions: {}ms", 98 | self.total_time.as_millis() 99 | )?; 100 | 101 | if self.total_count > 0 { 102 | writeln!( 103 | out, 104 | "Avg time per successful execution: {}ms", 105 | (self.total_time / self.total_count).as_millis() 106 | )?; 107 | } 108 | 109 | if let Some(st) = self.solver_time { 110 | writeln!( 111 | out, 112 | "Solver time (successful executions): {}ms", 113 | st.as_millis() 114 | )?; 115 | 116 | if self.total_time.as_secs() > 0 { 117 | let solver_share = 118 | st.as_millis() as f64 / self.total_time.as_millis() as f64 * 100_f64; 119 | writeln!( 120 | out, 121 | "Solver time share (successful executions): {:.2}% (-> {:.2}% in execution)", 122 | solver_share, 123 | 100_f64 - solver_share 124 | )?; 125 | writeln!( 126 | out, 127 | "Avg solver time per successful execution: {}ms", 128 | (st / self.total_count).as_millis() 129 | )?; 130 | } 131 | } 132 | 133 | writeln!(out, "Failed executions: {}", self.failed_count)?; 134 | writeln!( 135 | out, 136 | "Time spent on failed executions: {}ms", 137 | self.failed_time.as_millis() 138 | )?; 139 | 140 | if self.failed_count > 0 { 141 | writeln!( 142 | out, 143 | "Avg time in failed executions: {}ms", 144 | (self.failed_time / self.failed_count).as_millis() 145 | )?; 146 | } 147 | 148 | writeln!( 149 | out, 150 | "--------------------------------------------------------------------------------" 151 | )?; 152 | 153 | Ok(()) 154 | } 155 | } 156 | 157 | /// Mutable run-time state. 158 | /// 159 | /// This is a collection of the state we update during execution. 160 | struct State { 161 | /// The cumulative coverage of all test cases generated so far. 162 | current_bitmap: AflMap, 163 | 164 | /// The AFL test cases that have been analyzed so far. 165 | processed_files: HashSet, 166 | 167 | /// The place to put new and useful test cases. 168 | queue: TestcaseDir, 169 | 170 | /// The place for new test cases that time out. 171 | hangs: TestcaseDir, 172 | 173 | /// The place for new test cases that crash. 174 | crashes: TestcaseDir, 175 | 176 | /// Run-time statistics. 177 | stats: Stats, 178 | 179 | /// When did we last output the statistics? 180 | last_stats_output: Instant, 181 | 182 | /// Write statistics to this file. 183 | stats_file: File, 184 | } 185 | 186 | impl State { 187 | /// Initialize the run-time environment in the given output directory. 188 | /// 189 | /// This involves creating the output directory and all required 190 | /// subdirectories. 191 | fn initialize(output_dir: impl AsRef) -> Result { 192 | let symcc_dir = output_dir.as_ref(); 193 | 194 | fs::create_dir(&symcc_dir).with_context(|| { 195 | format!("Failed to create SymCC's directory {}", symcc_dir.display()) 196 | })?; 197 | let symcc_queue = 198 | TestcaseDir::new(symcc_dir.join("queue")).context("Failed to create SymCC's queue")?; 199 | let symcc_hangs = TestcaseDir::new(symcc_dir.join("hangs"))?; 200 | let symcc_crashes = TestcaseDir::new(symcc_dir.join("crashes"))?; 201 | let stats_file = File::create(symcc_dir.join("stats"))?; 202 | 203 | Ok(State { 204 | current_bitmap: AflMap::new(), 205 | processed_files: HashSet::new(), 206 | queue: symcc_queue, 207 | hangs: symcc_hangs, 208 | crashes: symcc_crashes, 209 | stats: Default::default(), // Is this bad style? 210 | last_stats_output: Instant::now(), 211 | stats_file, 212 | }) 213 | } 214 | 215 | /// Run a single input through SymCC and process the new test cases it 216 | /// generates. 217 | fn test_input( 218 | &mut self, 219 | input: impl AsRef, 220 | symcc: &SymCC, 221 | afl_config: &AflConfig, 222 | ) -> Result<()> { 223 | log::info!("Running on input {}", input.as_ref().display()); 224 | 225 | let tmp_dir = tempdir() 226 | .context("Failed to create a temporary directory for this execution of SymCC")?; 227 | 228 | let mut num_interesting = 0u64; 229 | let mut num_total = 0u64; 230 | 231 | let symcc_result = symcc 232 | .run(&input, tmp_dir.path().join("output")) 233 | .context("Failed to run SymCC")?; 234 | for new_test in symcc_result.test_cases.iter() { 235 | let res = process_new_testcase(&new_test, &input, &tmp_dir, &afl_config, self)?; 236 | 237 | num_total += 1; 238 | if res == TestcaseResult::New { 239 | log::debug!("Test case is interesting"); 240 | num_interesting += 1; 241 | } 242 | } 243 | 244 | log::info!( 245 | "Generated {} test cases ({} new)", 246 | num_total, 247 | num_interesting 248 | ); 249 | 250 | if symcc_result.killed { 251 | log::info!( 252 | "The target process was killed (probably timeout or out of memory); \ 253 | archiving to {}", 254 | self.hangs.path.display() 255 | ); 256 | symcc::copy_testcase(&input, &mut self.hangs, &input) 257 | .context("Failed to archive the test case")?; 258 | } 259 | 260 | self.processed_files.insert(input.as_ref().to_path_buf()); 261 | self.stats.add_execution(&symcc_result); 262 | Ok(()) 263 | } 264 | } 265 | 266 | fn main() -> Result<()> { 267 | let options = CLI::parse(); 268 | env_logger::builder() 269 | .filter_level(if options.verbose { 270 | log::LevelFilter::Debug 271 | } else { 272 | log::LevelFilter::Info 273 | }) 274 | .init(); 275 | 276 | if !options.output_dir.is_dir() { 277 | log::error!( 278 | "The directory {} does not exist!", 279 | options.output_dir.display() 280 | ); 281 | return Ok(()); 282 | } 283 | 284 | let afl_queue = options.output_dir.join(&options.fuzzer_name).join("queue"); 285 | if !afl_queue.is_dir() { 286 | log::error!("The AFL queue {} does not exist!", afl_queue.display()); 287 | return Ok(()); 288 | } 289 | 290 | let symcc_dir = options.output_dir.join(&options.name); 291 | if symcc_dir.is_dir() { 292 | log::error!( 293 | "{} already exists; we do not currently support resuming", 294 | symcc_dir.display() 295 | ); 296 | return Ok(()); 297 | } 298 | 299 | let symcc = SymCC::new(symcc_dir.clone(), &options.command); 300 | log::debug!("SymCC configuration: {:?}", &symcc); 301 | let afl_config = AflConfig::load(options.output_dir.join(&options.fuzzer_name))?; 302 | log::debug!("AFL configuration: {:?}", &afl_config); 303 | let mut state = State::initialize(symcc_dir)?; 304 | 305 | loop { 306 | match afl_config 307 | .best_new_testcase(&state.processed_files) 308 | .context("Failed to check for new test cases")? 309 | { 310 | None => { 311 | log::debug!("Waiting for new test cases..."); 312 | thread::sleep(Duration::from_secs(5)); 313 | } 314 | Some(input) => state.test_input(&input, &symcc, &afl_config)?, 315 | } 316 | 317 | if state.last_stats_output.elapsed().as_secs() > STATS_INTERVAL_SEC { 318 | if let Err(e) = state.stats.log(&mut state.stats_file) { 319 | log::error!("Failed to log run-time statistics: {}", e); 320 | } 321 | state.last_stats_output = Instant::now(); 322 | } 323 | } 324 | } 325 | 326 | /// The possible outcomes of test-case evaluation. 327 | #[derive(Debug, PartialEq, Eq)] 328 | enum TestcaseResult { 329 | Uninteresting, 330 | New, 331 | Hang, 332 | Crash, 333 | } 334 | 335 | /// Check if the given test case provides new coverage, crashes, or times out; 336 | /// copy it to the corresponding location. 337 | fn process_new_testcase( 338 | testcase: impl AsRef, 339 | parent: impl AsRef, 340 | tmp_dir: impl AsRef, 341 | afl_config: &AflConfig, 342 | state: &mut State, 343 | ) -> Result { 344 | log::debug!("Processing test case {}", testcase.as_ref().display()); 345 | 346 | let testcase_bitmap_path = tmp_dir.as_ref().join("testcase_bitmap"); 347 | match afl_config 348 | .run_showmap(&testcase_bitmap_path, &testcase) 349 | .with_context(|| { 350 | format!( 351 | "Failed to check whether test case {} is interesting", 352 | &testcase.as_ref().display() 353 | ) 354 | })? { 355 | AflShowmapResult::Success(testcase_bitmap) => { 356 | let interesting = state.current_bitmap.merge(*testcase_bitmap)?; 357 | if interesting { 358 | symcc::copy_testcase(&testcase, &mut state.queue, parent).with_context(|| { 359 | format!( 360 | "Failed to enqueue the new test case {}", 361 | testcase.as_ref().display() 362 | ) 363 | })?; 364 | 365 | Ok(TestcaseResult::New) 366 | } else { 367 | Ok(TestcaseResult::Uninteresting) 368 | } 369 | } 370 | AflShowmapResult::Hang => { 371 | log::info!( 372 | "Ignoring new test case {} because afl-showmap timed out on it", 373 | testcase.as_ref().display() 374 | ); 375 | Ok(TestcaseResult::Hang) 376 | } 377 | AflShowmapResult::Crash => { 378 | log::info!( 379 | "Test case {} crashes afl-showmap; it is probably interesting", 380 | testcase.as_ref().display() 381 | ); 382 | symcc::copy_testcase(&testcase, &mut state.crashes, &parent)?; 383 | symcc::copy_testcase(&testcase, &mut state.queue, &parent).with_context(|| { 384 | format!( 385 | "Failed to enqueue the new test case {}", 386 | testcase.as_ref().display() 387 | ) 388 | })?; 389 | Ok(TestcaseResult::Crash) 390 | } 391 | } 392 | } 393 | --------------------------------------------------------------------------------