├── .adacore-gitlab-ci.yml
├── .clang-format
├── .dockerignore
├── .github
└── workflows
│ ├── check_style.yml
│ ├── create_push_docker_image.yml
│ └── run_tests.yml
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── README.md
├── Vagrantfile
├── compiler
├── Main.cpp
├── Pass.cpp
├── Pass.h
├── Runtime.cpp
├── Runtime.h
├── Symbolizer.cpp
├── Symbolizer.h
├── sym++.in
└── symcc.in
├── docs
├── 32-bit.txt
├── C++.txt
├── Concreteness.txt
├── Configuration.txt
├── Experiments.txt
├── Fuzzing.txt
├── Ideas.txt
├── Libc.txt
├── Optimization.txt
└── Testing.txt
├── sample.cpp
├── test
├── CMakeLists.txt
├── README
├── arrays.c
├── arrays.test32
├── bcopy_bcmp_bzero.c
├── bool_cast.c
├── bswap.c
├── bswap.test32
├── concrete_structs.ll
├── file_input.c
├── file_input.test32
├── floats.c
├── floats.test32
├── globals.c
├── globals.test32
├── if.c
├── if.test32
├── integers.c
├── integers.test32
├── large_alloc.c
├── large_alloc.test32
├── lit.cfg
├── lit.site.cfg.in
├── load_store.ll
├── loop.c
├── loop.test32
├── memcpy.c
├── memcpy.test32
├── memory_input.c
├── pointers.c
├── pointers.test32
├── propagation_select.c
├── read.c
├── read.test32
├── regression
│ └── cxa_vector.ll
├── strings.c
├── strings.test32
├── structs.c
├── structs.test32
├── switch.c
├── switch.test32
├── symbolic_structs.ll
├── test_case_handler.c
├── uadd_sat.ll
├── uadd_sat.test32
├── usub_sat.ll
└── usub_sat.test32
└── util
├── pure_concolic_execution.sh
├── quicktest.sh
└── symcc_fuzzing_helper
├── .gitignore
├── Cargo.lock
├── Cargo.toml
└── src
├── main.rs
└── symcc.rs
/.adacore-gitlab-ci.yml:
--------------------------------------------------------------------------------
1 | include:
2 | - component: $CI_SERVER_FQDN/eng/gitlab-templates/build@~latest
3 | inputs:
4 | anod-args: build symcc
5 | generic-anod-ci-args: --add-dep eng/fuzz/qsym
6 |
--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
1 | ---
2 | BasedOnStyle: LLVM
3 | ...
4 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | # Ignore the third-party software that's shipped with Qsym; we don't use it anyway.
2 | runtime/qsym_backend/qsym/third_party
3 | # Similarly, we don't run its tests.
4 | runtime/qsym_backend/qsym/tests
5 |
6 | # Rust stores build artifacts in the tree
7 | util/symcc_fuzzing_helper/target
8 |
9 | # Some build utilities that we can ignore
10 | TAGS
11 | compile_commands.json
12 |
13 | # The Dockerfile itself doesn't need to be copied
14 | Dockerfile
15 |
16 | # Do not include build directories
17 | build/
18 | cmake-*
19 |
--------------------------------------------------------------------------------
/.github/workflows/check_style.yml:
--------------------------------------------------------------------------------
1 | name: Check coding style
2 | on: [pull_request]
3 | jobs:
4 | coding_style:
5 | runs-on: ubuntu-22.04
6 | steps:
7 | - uses: actions/checkout@v4
8 | with:
9 | fetch-depth: 0
10 | - name: Run clang-format
11 | shell: bash
12 | run: |
13 | format_changes=$(git clang-format-14 --quiet --diff \
14 | ${{ github.event.pull_request.base.sha }} \
15 | ${{ github.event.pull_request.head.sha }} | wc -c)
16 | if [[ $format_changes -ne 0 ]]; then
17 | echo "Please format your changes with clang-format using the LLVM style, e.g., git clang-format --style LLVM before committing"
18 | exit 1
19 | fi
20 |
--------------------------------------------------------------------------------
/.github/workflows/create_push_docker_image.yml:
--------------------------------------------------------------------------------
1 | name: Publish SymCC Docker image
2 | on:
3 | push:
4 | branches: ['master']
5 |
6 | jobs:
7 | upload_dockerhub:
8 | if: ${{ (github.repository == 'eurecom-s3/symcc') && (github.ref == 'refs/heads/master') }}
9 | runs-on: ubuntu-latest
10 | steps:
11 | -
12 | name: Checkout project sources
13 | uses: actions/checkout@v4
14 | -
15 | name: Login to Docker Hub
16 | uses: docker/login-action@v3
17 | with:
18 | username: ${{ secrets.DOCKER_USERNAME }}
19 | password: ${{ secrets.DOCKER_PASSWORD }}
20 | -
21 | name: Set up Docker Buildx
22 | uses: docker/setup-buildx-action@v3
23 | -
24 | name: Build and push
25 | uses: docker/build-push-action@v5
26 | with:
27 | context: .
28 | file: ./Dockerfile
29 | push: true
30 | tags: ${{ secrets.DOCKER_USERNAME }}/symcc:latest
31 |
--------------------------------------------------------------------------------
/.github/workflows/run_tests.yml:
--------------------------------------------------------------------------------
1 | name: Compile and test SymCC
2 | on: [pull_request, workflow_dispatch]
3 | jobs:
4 | # Building and running the tests with Dockerfile
5 | build_and_test_symcc:
6 | runs-on: ubuntu-24.04
7 | steps:
8 | - uses: actions/checkout@v2
9 | - name: Setup docker compilation environment
10 | run: docker build --target builder -t symcc .
11 | - name: Build and test SymCC with simple backend
12 | run: docker build --target builder_simple -t symcc .
13 | - name: Build libcxx using SymCC simple backend
14 | run: docker build --target builder_libcxx -t symcc .
15 | - name: Build and test SymCC with Qsym backend
16 | run: docker build --target builder_qsym -t symcc .
17 | - name: Creation of the final SymCC docker image with Qsym backend and libcxx
18 | run: docker build -t symcc .
19 |
20 | # checking compatibility with ubuntu llvm packages
21 | llvm_compatibility:
22 | runs-on: ubuntu-24.04
23 | strategy:
24 | matrix:
25 | llvm_version: [15, 16, 17, 18, 19]
26 | steps:
27 | - uses: actions/checkout@v4
28 | with:
29 | submodules: true
30 | - name: Install dependencies
31 | run: |
32 | sudo apt-get update
33 | sudo apt-get install -y \
34 | llvm-${{ matrix.llvm_version }}-dev \
35 | libz3-dev \
36 | git
37 |
38 | - name: Build SymCC with the QSYM backend
39 | run: |
40 | git submodule update --init --recursive runtime
41 | mkdir build
42 | cd build
43 | cmake \
44 | -DCMAKE_BUILD_TYPE=Release \
45 | -DZ3_TRUST_SYSTEM_VERSION=ON \
46 | -DSYMCC_RT_BACKEND=qsym \
47 | -DLLVM_DIR=/usr/lib/llvm-${{ matrix.llvm_version }}/cmake \
48 | ..
49 | make
50 |
51 | # TODO Re-enable the section below when LLVM releases a version that isn't
52 | # supported by Ubuntu packages in our runner image.
53 |
54 | # # checking compatibility (compilation only) with more recent packages
55 | # llvm_compatibility_latest_llvm:
56 | # runs-on: ubuntu-22.04
57 | # strategy:
58 | # matrix:
59 | # llvm_version: [16, 17, 18]
60 | # steps:
61 | # - uses: actions/checkout@v4
62 | # with:
63 | # submodules: true
64 | # - name: Add LLVM project deb repository
65 | # uses: myci-actions/add-deb-repo@11
66 | # with:
67 | # repo: deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-${{ matrix.llvm_version }} main
68 | # repo-name: llvm
69 | # update: false
70 | # keys-asc: https://apt.llvm.org/llvm-snapshot.gpg.key
71 | # - name: Install dependencies
72 | # run: |
73 | # sudo apt-get update
74 | # sudo apt-get install -y \
75 | # llvm-${{ matrix.llvm_version }}-dev \
76 | # libz3-dev \
77 | # git
78 | # - name: Build SymCC with the QSYM backend
79 | # run: |
80 | # git submodule update --init --recursive runtime
81 | # mkdir build
82 | # cd build
83 | # cmake \
84 | # -DCMAKE_BUILD_TYPE=Release \
85 | # -DZ3_TRUST_SYSTEM_VERSION=ON \
86 | # -DSYMCC_RT_BACKEND=qsym \
87 | # -DLLVM_DIR=/usr/lib/llvm-${{ matrix.llvm_version }}/cmake \
88 | # ..
89 | # make
90 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Prerequisites
2 | *.d
3 |
4 | # Compiled Object files
5 | *.slo
6 | *.lo
7 | *.o
8 | *.obj
9 |
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 |
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 |
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 |
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 |
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 |
34 | # Tags
35 | TAGS
36 |
37 | # CLion project
38 | .idea
39 |
40 | # Clang tooling
41 | compile_commands.json
42 | .clangd
43 | .cache
44 |
45 | # Build directories
46 | *build*
47 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "runtime"]
2 | path = runtime
3 | url = https://github.com/eurecom-s3/symcc-rt.git
4 | branch = main
5 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # This file is part of SymCC.
2 | #
3 | # SymCC is free software: you can redistribute it and/or modify it under the
4 | # terms of the GNU General Public License as published by the Free Software
5 | # Foundation, either version 3 of the License, or (at your option) any later
6 | # version.
7 | #
8 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU General Public License along with
13 | # SymCC. If not, see .
14 |
15 | cmake_minimum_required(VERSION 3.16)
16 | project(SymCC)
17 |
18 | set(LLVM_VERSION "" CACHE STRING "LLVM version to use. The corresponding LLVM dev package must be installed.")
19 | set(SYMCC_RT_BACKEND "qsym" CACHE STRING "The symbolic backend to use. Please check symcc-rt to get a list of the available backends.")
20 | option(TARGET_32BIT "Make the compiler work correctly with -m32" OFF)
21 |
22 | # We need to build the runtime as an external project because CMake otherwise
23 | # doesn't allow us to build it twice with different options (one 32-bit version
24 | # and one 64-bit variant).
25 | include(ExternalProject)
26 |
27 | # Find LLVM
28 | find_package(LLVM ${LLVM_VERSION} REQUIRED CONFIG)
29 |
30 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
31 | message(STATUS "Using LLVMConfig.cmake from ${LLVM_DIR}")
32 |
33 | if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 17)
34 | message(WARNING "The software has been developed for LLVM 8 through 17; \
35 | it is unlikely to work with other versions!")
36 | endif()
37 |
38 | set(SYM_RUNTIME_BUILD_ARGS
39 | -DCMAKE_AR=${CMAKE_AR}
40 | -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
41 | -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
42 | -DCMAKE_C_FLAGS_INIT=${CMAKE_C_FLAGS_INIT}
43 | -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
44 | -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
45 | -DCMAKE_CXX_FLAGS_INIT=${CMAKE_CXX_FLAGS_INIT}
46 | -DCMAKE_EXE_LINKER_FLAGS=${CMAKE_EXE_LINKER_FLAGS}
47 | -DCMAKE_EXE_LINKER_FLAGS_INIT=${CMAKE_EXE_LINKER_FLAGS_INIT}
48 | -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
49 | -DCMAKE_MODULE_LINKER_FLAGS=${CMAKE_MODULE_LINKER_FLAGS}
50 | -DCMAKE_MODULE_LINKER_FLAGS_INIT=${CMAKE_MODULE_LINKER_FLAGS_INIT}
51 | -DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS}
52 | -DCMAKE_SHARED_LINKER_FLAGS_INIT=${CMAKE_SHARED_LINKER_FLAGS_INIT}
53 | -DCMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}
54 | -DCMAKE_SYSROOT=${CMAKE_SYSROOT}
55 | -DSYMCC_RT_BACKEND=${SYMCC_RT_BACKEND}
56 | -DLLVM_VERSION=${LLVM_PACKAGE_VERSION}
57 | -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
58 | -DZ3_TRUST_SYSTEM_VERSION=${Z3_TRUST_SYSTEM_VERSION})
59 |
60 | ExternalProject_Add(SymCCRuntime
61 | SOURCE_DIR ${CMAKE_SOURCE_DIR}/runtime
62 | CMAKE_ARGS
63 | ${SYM_RUNTIME_BUILD_ARGS}
64 | -DCMAKE_EXPORT_COMPILE_COMMANDS=${CMAKE_EXPORT_COMPILE_COMMANDS}
65 | -DZ3_DIR=${Z3_DIR}
66 | -DLLVM_DIR=${LLVM_DIR}
67 | INSTALL_COMMAND ""
68 | BUILD_ALWAYS TRUE)
69 |
70 | ExternalProject_Get_Property(SymCCRuntime BINARY_DIR)
71 | set(SYMCC_RUNTIME_DIR ${BINARY_DIR})
72 |
73 | if (${TARGET_32BIT})
74 | ExternalProject_Add(SymCCRuntime32
75 | SOURCE_DIR ${CMAKE_SOURCE_DIR}/runtime
76 | CMAKE_ARGS
77 | ${SYM_RUNTIME_BUILD_ARGS}
78 | -DCMAKE_C_FLAGS="${CMAKE_C_FLAGS} -m32"
79 | -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -m32"
80 | -DZ3_DIR=${Z3_32BIT_DIR}
81 | -DLLVM_DIR=${LLVM_32BIT_DIR}
82 | INSTALL_COMMAND ""
83 | BUILD_ALWAYS TRUE)
84 |
85 | ExternalProject_Get_Property(SymCCRuntime32 BINARY_DIR)
86 | set(SYMCC_RUNTIME_32BIT_DIR ${BINARY_DIR})
87 | endif()
88 |
89 | find_package(LLVM REQUIRED CONFIG)
90 |
91 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
92 | message(STATUS "Using LLVMConfig.cmake from ${LLVM_DIR}")
93 |
94 | if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 18)
95 | message(WARNING "The software has been developed for LLVM 8 through 18; \
96 | it is unlikely to work with other versions!")
97 | endif()
98 |
99 | add_definitions(${LLVM_DEFINITIONS})
100 | include_directories(SYSTEM ${LLVM_INCLUDE_DIRS})
101 |
102 | set(CMAKE_CXX_STANDARD 17)
103 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \
104 | -Wredundant-decls -Wcast-align -Wmissing-include-dirs -Wswitch-default \
105 | -Wextra -Wall -Winvalid-pch -Wredundant-decls -Wformat=2 \
106 | -Wmissing-format-attribute -Wformat-nonliteral -Werror -Wno-error=deprecated-declarations")
107 |
108 | # Mark nodelete to work around unload bug in upstream LLVM 5.0+
109 | set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,-z,nodelete")
110 |
111 | # This is the compiler pass that we later load into clang or opt. If LLVM is
112 | # built without RTTI we have to disable it for our library too, otherwise we'll
113 | # get linker errors.
114 | add_library(SymCC MODULE
115 | compiler/Symbolizer.cpp
116 | compiler/Pass.cpp
117 | compiler/Runtime.cpp
118 | compiler/Main.cpp)
119 |
120 | set_target_properties(SymCC PROPERTIES OUTPUT_NAME "symcc")
121 | if (NOT LLVM_ENABLE_RTTI)
122 | set_target_properties(SymCC PROPERTIES COMPILE_FLAGS "-fno-rtti")
123 | endif()
124 |
125 | find_program(CLANG_BINARY "clang"
126 | HINTS ${LLVM_TOOLS_BINARY_DIR}
127 | DOC "The clang binary to use in the symcc wrapper script.")
128 | find_program(CLANGPP_BINARY "clang++"
129 | HINTS ${LLVM_TOOLS_BINARY_DIR}
130 | DOC "The clang binary to use in the sym++ wrapper script.")
131 | if (NOT CLANG_BINARY)
132 | message(FATAL_ERROR "Clang not found; please make sure that the version corresponding to your LLVM installation is available.")
133 | endif()
134 |
135 | if (${LLVM_VERSION_MAJOR} LESS 13)
136 | set(CLANG_LOAD_PASS "-Xclang -load -Xclang ")
137 | else()
138 | set(CLANG_LOAD_PASS "-fpass-plugin=")
139 | endif()
140 |
141 | configure_file("compiler/symcc.in" "symcc" @ONLY)
142 | configure_file("compiler/sym++.in" "sym++" @ONLY)
143 |
144 | add_subdirectory(test)
145 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to SymCC
2 |
3 | We encourage everyone to contribute improvements and bug fixes to SymCC. Our
4 | preferred way of accepting contributions is via GitHub pull requests. Please be
5 | sure to run clang-format on any C/C++ code you change; an easy way to do so is
6 | with `git clang-format --style LLVM` just before committing. (On Ubuntu, you can
7 | get `git-clang-format` via `apt install clang-format`.) Ideally, also add a test
8 | to your patch (see the
9 | [docs](https://github.com/eurecom-s3/symcc/blob/master/docs/Testing.txt) for
10 | details). Unfortunately, since the project is a bit short on developers at the
11 | moment, we have to ask for your patience while we review your PR.
12 |
13 | Please note that any contributions you make are licensed under the same terms as
14 | the code you're contributing to, as per the GitHub Terms of Service, [section
15 | D.6](https://docs.github.com/en/site-policy/github-terms/github-terms-of-service#6-contributions-under-repository-license).
16 | At the time of writing, this means LGPL (version 3 or later) for the SymCC
17 | runtime, and GPL (version 3 or later) for the rest of SymCC.
18 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # This file is part of SymCC.
2 | #
3 | # SymCC is free software: you can redistribute it and/or modify it under the
4 | # terms of the GNU General Public License as published by the Free Software
5 | # Foundation, either version 3 of the License, or (at your option) any later
6 | # version.
7 | #
8 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU General Public License along with
13 | # SymCC. If not, see .
14 |
15 | #
16 | # The base image
17 | #
18 | FROM ubuntu:22.04 AS builder
19 |
20 | # Install dependencies
21 | RUN apt-get update \
22 | && DEBIAN_FRONTEND=noninteractive apt-get install -y \
23 | cargo \
24 | cmake \
25 | g++ \
26 | git \
27 | libz3-dev \
28 | ninja-build \
29 | python3-pip \
30 | zlib1g-dev \
31 | wget
32 | RUN pip3 install lit
33 |
34 | WORKDIR /
35 |
36 | # Build AFL.
37 | RUN git clone -b v2.56b https://github.com/google/AFL.git afl \
38 | && cd afl \
39 | && make
40 |
41 | # This is passed along to symcc and qsym backend
42 | # Version 15 is buggy https://github.com/eurecom-s3/symcc/issues/164
43 | arg LLVM_VERSION=12
44 |
45 | # installing/building with the right LLVM version, currently:
46 | # - no plan to support < 11
47 | # - 12 to 15 are in official packages,
48 | # - 16 and 17 provided by llvm.org
49 | # - TODO 18 should be fixed
50 | RUN if [ $LLVM_VERSION -le 11 ]; then echo "LLVM <= 11 not supported" ; false ;fi
51 | RUN if [ $LLVM_VERSION -ge 18 ]; then echo "LLVM >= 18 currently not supported" ; false ;fi
52 | RUN if [ $LLVM_VERSION -eq 12 ] || [ $LLVM_VERSION -eq 13 ] || [ $LLVM_VERSION -eq 14 ] || [ $LLVM_VERSION -eq 15 ]; then \
53 | apt install -y llvm-${LLVM_VERSION} clang-${LLVM_VERSION} ; \
54 | else \
55 | false ; \
56 | fi
57 |
58 | RUN rm -rf /var/lib/apt/lists/*
59 | # Download the LLVM sources already so that we don't need to get them again when
60 | # SymCC changes
61 | RUN git clone -b llvmorg-$LLVM_VERSION.0.0 --depth 1 https://github.com/llvm/llvm-project.git /llvm_source
62 |
63 | # Build a version of SymCC with the simple backend to compile libc++
64 | COPY . /symcc_source
65 |
66 | # Init submodules if they are not initialiazed yet
67 | WORKDIR /symcc_source
68 | RUN git submodule update --init --recursive
69 |
70 | #
71 | # Build SymCC with the simple backend
72 | #
73 | FROM builder AS builder_simple
74 | WORKDIR /symcc_build_simple
75 | RUN cmake -G Ninja \
76 | -DSYMCC_RT_BACKEND=simple \
77 | -DCMAKE_BUILD_TYPE=RelWithDebInfo \
78 | -DZ3_TRUST_SYSTEM_VERSION=on \
79 | /symcc_source \
80 | && ninja check
81 |
82 | #
83 | # Build libc++ with SymCC using the simple backend
84 | #
85 | FROM builder_simple AS builder_libcxx
86 | WORKDIR /libcxx_symcc
87 | RUN export SYMCC_REGULAR_LIBCXX=yes SYMCC_NO_SYMBOLIC_INPUT=yes \
88 | && mkdir /libcxx_symcc_build \
89 | && cd /libcxx_symcc_build \
90 | && cmake -G Ninja /llvm_source/llvm \
91 | -DLLVM_ENABLE_PROJECTS="libcxx;libcxxabi" \
92 | -DLLVM_TARGETS_TO_BUILD="X86" \
93 | -DLLVM_DISTRIBUTION_COMPONENTS="cxx;cxxabi;cxx-headers" \
94 | -DCMAKE_BUILD_TYPE=Release \
95 | -DCMAKE_INSTALL_PREFIX=/libcxx_symcc_install \
96 | -DCMAKE_C_COMPILER=/symcc_build_simple/symcc \
97 | -DCMAKE_CXX_COMPILER=/symcc_build_simple/sym++ \
98 | && ninja distribution \
99 | && ninja install-distribution
100 |
101 |
102 | #
103 | # Build SymCC with the Qsym backend
104 | #
105 | FROM builder_libcxx AS builder_qsym
106 | WORKDIR /symcc_build
107 | RUN cmake -G Ninja \
108 | -DSYMCC_RT_BACKEND=qsym \
109 | -DCMAKE_BUILD_TYPE=RelWithDebInfo \
110 | -DZ3_TRUST_SYSTEM_VERSION=on \
111 | /symcc_source \
112 | && ninja check \
113 | && cargo install --path /symcc_source/util/symcc_fuzzing_helper
114 |
115 |
116 | #
117 | # The final image
118 | #
119 | FROM ubuntu:22.04 as symcc
120 |
121 | RUN apt-get update \
122 | && DEBIAN_FRONTEND=noninteractive apt-get install -y \
123 | build-essential \
124 | g++ \
125 | zlib1g \
126 | sudo \
127 | && useradd -m -s /bin/bash ubuntu \
128 | && echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/ubuntu
129 |
130 | arg LLVM_VERSION=15
131 |
132 | RUN apt-get update \
133 | && DEBIAN_FRONTEND=noninteractive apt-get install -y \
134 | llvm-$LLVM_VERSION \
135 | clang-$LLVM_VERSION \
136 | && rm -rf /var/lib/apt/lists/*
137 |
138 | COPY --from=builder_qsym /symcc_build /symcc_build
139 | COPY --from=builder_qsym /root/.cargo/bin/symcc_fuzzing_helper /symcc_build/
140 | COPY util/pure_concolic_execution.sh /symcc_build/
141 | COPY --from=builder_qsym /libcxx_symcc_install /libcxx_symcc_install
142 | COPY --from=builder_qsym /afl /afl
143 |
144 | # fix permissions
145 | RUN chmod -R og+rX /symcc_build
146 |
147 | ENV PATH /symcc_build:$PATH
148 | ENV AFL_PATH /afl
149 | ENV AFL_CC clang-$LLVM_VERSION
150 | ENV AFL_CXX clang++-$LLVM_VERSION
151 | ENV SYMCC_LIBCXX_PATH=/libcxx_symcc_install
152 |
153 | USER ubuntu
154 | WORKDIR /home/ubuntu
155 | COPY --chown=ubuntu:ubuntu sample.cpp /home/ubuntu/
156 |
157 | RUN mkdir /tmp/output
158 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://github.com/eurecom-s3/symcc/actions/workflows/run_tests.yml)
2 |
3 | Note: The SymCC project is currently understaffed and therefore maintained in a
4 | best effort mode. In fact, we are hiring, in case you are interested to join
5 | the [S3 group at Eurecom](https://www.s3.eurecom.fr/) to work on this (and other
6 | projects in the group) please [contact us](mailto:aurelien.francillon@eurecom.fr).
7 | We nevertheless appreciate PRs and apologize in advance for the slow processing
8 | of PRs, we will try to merge them when possible.
9 |
10 | # SymCC: efficient compiler-based symbolic execution
11 |
12 | SymCC is a compiler pass which embeds symbolic execution into the program
13 | during compilation, and an associated run-time support library. In essence, the
14 | compiler inserts code that computes symbolic expressions for each value in the
15 | program. The actual computation happens through calls to the support library at
16 | run time.
17 |
18 | To build the pass and the support library, install LLVM (any version between 8
19 | and 18) and Z3 (version 4.5 or later), as well as a C++ compiler with support
20 | for C++17. LLVM lit is only needed to run the tests; if it's not packaged with
21 | your LLVM, you can get it with `pip install lit`.
22 |
23 | Under Ubuntu Groovy the following one-liner should install all required
24 | packages:
25 |
26 | ```
27 | sudo apt install -y git cargo clang-14 cmake g++ git libz3-dev llvm-14-dev llvm-14-tools ninja-build python3-pip zlib1g-dev && sudo pip3 install lit
28 | ```
29 |
30 | Alternatively, see below for using the provided Dockerfile, or the file
31 | `util/quicktest.sh` for exact steps to perform under Ubuntu (or use with the
32 | provided Vagrant file).
33 |
34 | Make sure to pull the SymCC Runtime:
35 |
36 | ```
37 | $ git submodule update --init --recursive
38 | ```
39 |
40 | Note that it is not necessary or recommended to build the QSYM submodule - our
41 | build system will automatically extract the right source files and include them
42 | in the build.
43 |
44 | Create a build directory somewhere, and execute the following commands inside
45 | it:
46 |
47 | ```
48 | $ cmake -G Ninja -DSYMCC_RT_BACKEND=qsym /path/to/compiler/sources
49 | $ ninja check
50 | ```
51 |
52 | If LLVM is installed in a non-standard location, add the CMake parameter
53 | `-DLLVM_DIR=/path/to/llvm/cmake/module`. Similarly, you can point to a
54 | non-standard Z3 installation with `-DZ3_DIR=/path/to/z3/cmake/module` (which
55 | requires Z3 to be built with CMake).
56 |
57 | The main build artifact from the user's point of view is `symcc`, a wrapper
58 | script around clang that sets the right options to load our pass and link
59 | against the run-time library. (See below for additional C++ support.)
60 |
61 | To try the compiler, take some simple C code like the following:
62 |
63 | ``` c
64 | #include
65 | #include
66 | #include
67 |
68 | int foo(int a, int b) {
69 | if (2 * a < b)
70 | return a;
71 | else if (a % b)
72 | return b;
73 | else
74 | return a + b;
75 | }
76 |
77 | int main(int argc, char* argv[]) {
78 | int x;
79 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) {
80 | printf("Failed to read x\n");
81 | return -1;
82 | }
83 | printf("%d\n", foo(x, 7));
84 | return 0;
85 | }
86 | ```
87 |
88 | Save the code as `test.c`. To compile it with symbolic execution built in, we
89 | call symcc as we would normally call clang:
90 |
91 | ```
92 | $ ./symcc test.c -o test
93 | ```
94 |
95 | Before starting the analysis, create a directory for the results and tell SymCC
96 | about it:
97 |
98 | ```
99 | $ mkdir results
100 | $ export SYMCC_OUTPUT_DIR=`pwd`/results
101 | ```
102 |
103 | Then run the program like any other binary, providing arbitrary input:
104 |
105 | ```
106 | $ echo 'aaaa' | ./test
107 | ```
108 |
109 | The program will execute the same computations as an uninstrumented version
110 | would, but additionally the injected code will track computations symbolically
111 | and attempt to compute diverging inputs at each branch point. All data that the
112 | program reads from standard input is treated as symbolic; alternatively, you can
113 | set the environment variable SYMCC_INPUT_FILE to the name of a file whose
114 | contents will be treated as symbolic when read.
115 |
116 | Note that due to how the QSYM backend is implemented, all input has to be available
117 | from the start. In particular, when providing symbolic data on standard input
118 | interactively, you need to terminate your input by pressing Ctrl+D before the
119 | program starts to execute.
120 |
121 | When execution is finished, the result directory will contain the new test cases
122 | generated during program execution. Try running the program again on one of
123 | those (or use [util/pure_concolic_execution.sh](util/pure_concolic_execution.sh)
124 | to automate the process). For better results, combine SymCC with a fuzzer (see
125 | [docs/Fuzzing.txt](docs/Fuzzing.txt)).
126 |
127 |
128 | ## Documentation
129 |
130 | The directory [docs](docs) contains documentation on several internal aspects of
131 | SymCC, as well as [building C++ code](docs/C++.txt), [compiling 32-bit binaries
132 | on a 64-bit host](docs/32-bit.txt), and [running SymCC with a
133 | fuzzer](docs/Fuzzing.txt). There is also a [list of all configuration
134 | options](docs/Configuration.txt).
135 |
136 | If you're interested in the research paper that we wrote about SymCC, have a
137 | look at our group's
138 | [website](http://www.s3.eurecom.fr/tools/symbolic_execution/symcc.html). It also
139 | contains detailed instructions to replicate our experiments, as well as the raw
140 | results that we obtained.
141 |
142 | ### Video demonstration
143 | On YouTube you can find [a practical introduction to
144 | SymCC](https://www.youtube.com/watch?v=htDrNBiL7Y8) as well as a video on [how
145 | to combine AFL and SymCC](https://www.youtube.com/watch?v=zmC-ptp3W3k)
146 |
147 | ## Building a Docker image
148 |
149 | If you prefer a Docker container over building SymCC natively, just tell Docker
150 | to build the image after pulling the QSYM code as above. (Be warned though: the
151 | Docker image enables optional C++ support from source, so creating the image can
152 | take quite some time!)
153 |
154 | ```
155 | $ docker build -t symcc .
156 | $ docker run -it --rm symcc
157 | ```
158 |
159 | Alternatively, you can pull an existing image (current master branch) from
160 | Docker Hub:
161 |
162 | ```
163 | $ docker pull eurecoms3/symcc
164 | $ docker run -it --rm symcc
165 | ```
166 |
167 | This will build a Docker image and run an ephemeral container to try out SymCC.
168 | Inside the container, `symcc` is available as a drop-in replacement for `clang`,
169 | using the QSYM backend; similarly, `sym++` can be used instead of `clang++`. Now
170 | try something like the following inside the container:
171 |
172 | ```
173 | container$ cat sample.cpp
174 | (Note that "root" is the input we're looking for.)
175 | container$ sym++ -o sample sample.cpp
176 | container$ echo test | ./sample
177 | ...
178 | container$ cat /tmp/output/000008-optimistic
179 | root
180 | ```
181 |
182 | The Docker image also has AFL and `symcc_fuzzing_helper` preinstalled, so you
183 | can use it to run SymCC with a fuzzer as described in [the
184 | docs](docs/Fuzzing.txt). (The AFL binaries are located in `/afl`.)
185 |
186 | While the Docker image is very convenient for _using_ SymCC, I recommend a local
187 | build outside Docker for _development_. Docker will rebuild most of the image on
188 | every change to SymCC (which is, in principle the right thing to do), whereas in
189 | many cases it is sufficient to let the build system figure out what to rebuild
190 | (and recompile, e.g., libc++ only when necessary).
191 |
192 | ## FAQ / BUGS / TODOs
193 |
194 | ### Why is SymCC only exploring one path and not all paths?
195 |
196 | SymCC is currently a concolic executor. As such, it follows the concrete
197 | path. In theory, it would be possible to make it a forking executor -
198 | see [issue #14](https://github.com/eurecom-s3/symcc/issues/14)
199 |
200 | ### Why does SymCC not generate some test cases?
201 |
202 | There are multiple possible reasons:
203 |
204 | #### QSym backend performs pruning
205 |
206 | When built with the QSym backend exploration (e.g., loops) symcc is
207 | subject to path pruning, this is part of the optimizations that makes
208 | SymCC/QSym fast, it isn't sound. This is not a problem for using in
209 | hybrid fuzzing, but this may be a problem for other uses. See for
210 | example [issue #88](https://github.com/eurecom-s3/symcc/issues/88).
211 |
212 | When building with the simple backend the paths should be found. If
213 | the paths are not found with the simple backend this may be a bug (or
214 | possibly a limitation of the simple backend).
215 |
216 | #### Incomplete symbolic handing of functions, systems interactions.
217 |
218 | The current symbolic understanding of libc is incomplete. So when an
219 | unsupported libc function is called SymCC can't trace the computations
220 | that happen in the function.
221 |
222 | 1. Adding the function to the [collection of wrapped libc
223 | functions](https://github.com/eurecom-s3/symcc-rt/blob/main/src/LibcWrappers.cpp)
224 | and [register the
225 | wrapper](https://github.com/eurecom-s3/symcc/blob/b29dc4db2803830ebf50798e72b336473a567655/compiler/Runtime.cpp#L159)
226 | in the compiler.
227 | 2. Build a fully instrumented libc.
228 | 3. Cherry-pick individual libc functions from a libc implementation (e.g., musl)
229 |
230 | See [issue #23](https://github.com/eurecom-s3/symcc/issues/23) for more details.
231 |
232 |
233 | ### Rust support ?
234 |
235 | This would be possible to support RUST, see [issue
236 | #1](https://github.com/eurecom-s3/symcc/issues/1) for tracking this.
237 |
238 | ### Bug reporting
239 |
240 | We appreciate bugs with test cases and steps to reproduce, PR with
241 | corresponding test cases. SymCC is currently understaffed, we hope to
242 | catch up and get back to active development at some point.
243 |
244 | ## Contact
245 |
246 | Feel free to use GitHub issues and pull requests for improvements, bug reports,
247 | etc. Alternatively, you can send an email to Sebastian Poeplau
248 | (sebastian.poeplau@eurecom.fr) and Aurélien Francillon
249 | (aurelien.francillon@eurecom.fr).
250 |
251 |
252 | ## Reference
253 |
254 | To cite SymCC in scientific work, please use the following BibTeX:
255 |
256 | ``` bibtex
257 | @inproceedings {poeplau2020symcc,
258 | author = {Sebastian Poeplau and Aurélien Francillon},
259 | title = {Symbolic execution with {SymCC}: Don't interpret, compile!},
260 | booktitle = {29th {USENIX} Security Symposium ({USENIX} Security 20)},
261 | isbn = {978-1-939133-17-5},
262 | pages = {181--198},
263 | year = 2020,
264 | url = {https://www.usenix.org/conference/usenixsecurity20/presentation/poeplau},
265 | publisher = {{USENIX} Association},
266 | month = aug,
267 | }
268 | ```
269 |
270 | More information on the paper is available
271 | [here](http://www.s3.eurecom.fr/tools/symbolic_execution/symcc.html).
272 |
273 |
274 | ## Other projects using SymCC
275 |
276 | [SymQEMU](https://github.com/eurecom-s3/symqemu) relies on SymCC.
277 |
278 | LibAFL supports concolic execution with [SymCC](https://aflplus.plus/libafl-book/advanced_features/concolic/concolic.html),
279 | requires external patches (for now).
280 |
281 | [AdaCore](https://www.adacore.com/) published [a paper describing](https://dl.acm.org/doi/10.1145/3631483.3631500)
282 | SymCC integration in GNATfuzz for test case generation and [plans to release this
283 | as part of GNATfuzz beta release](https://docs.adacore.com/live/wave/roadmap/html/roadmap/roadmap_25_GNAT%20Pro.html#symbolic-execution-to-retrieve-input-values).
284 |
285 | ## License
286 |
287 | SymCC is free software: you can redistribute it and/or modify it under the terms
288 | of the GNU General Public License as published by the Free Software Foundation,
289 | either version 3 of the License, or (at your option) any later version.
290 |
291 | SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
292 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
293 | PARTICULAR PURPOSE. See the GNU General Public License for more details.
294 |
295 | You should have received a copy of the GNU General Public License and the GNU
296 | Lesser General Public License along with SymCC. If not, see
297 | .
298 |
299 | The following pieces of software have additional or alternate copyrights,
300 | licenses, and/or restrictions:
301 |
302 | | Program | Directory |
303 | |---------------|-----------------------------|
304 | | SymCC Runtime | `runtime` |
305 |
--------------------------------------------------------------------------------
/Vagrantfile:
--------------------------------------------------------------------------------
1 | # -*- mode: ruby -*-
2 | # vi: set ft=ruby :
3 |
4 | Vagrant.configure("2") do |config|
5 | config.vm.box = "ubuntu/groovy64"
6 | config.vm.provision "shell", path: "util/quicktest.sh"
7 | end
8 |
--------------------------------------------------------------------------------
/compiler/Main.cpp:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | #include
16 | #if LLVM_VERSION_MAJOR <= 15
17 | #include
18 | #endif
19 | #include
20 | #include
21 |
22 | #if LLVM_VERSION_MAJOR >= 13
23 | #include
24 | #include
25 |
26 | #if LLVM_VERSION_MAJOR >= 14
27 | #include
28 | #else
29 | using OptimizationLevel = llvm::PassBuilder::OptimizationLevel;
30 | #endif
31 | #endif
32 |
33 | #if LLVM_VERSION_MAJOR >= 15
34 | #include
35 | #else
36 | #include
37 | #endif
38 |
39 | #include "Pass.h"
40 |
41 | using namespace llvm;
42 |
43 | //
44 | // Legacy pass registration (up to LLVM 13)
45 | //
46 |
47 | #if LLVM_VERSION_MAJOR <= 15
48 |
49 | void addSymbolizeLegacyPass(const PassManagerBuilder & /* unused */,
50 | legacy::PassManagerBase &PM) {
51 | PM.add(createScalarizerPass());
52 | PM.add(createLowerAtomicPass());
53 | PM.add(new SymbolizeLegacyPass());
54 | }
55 |
56 | // Make the pass known to opt.
57 | static RegisterPass X("symbolize", "Symbolization Pass");
58 | // Tell frontends to run the pass automatically.
59 | static struct RegisterStandardPasses Y(PassManagerBuilder::EP_VectorizerStart,
60 | addSymbolizeLegacyPass);
61 | static struct RegisterStandardPasses
62 | Z(PassManagerBuilder::EP_EnabledOnOptLevel0, addSymbolizeLegacyPass);
63 |
64 | #endif
65 |
66 | //
67 | // New pass registration (LLVM 13 and above)
68 | //
69 |
70 | #if LLVM_VERSION_MAJOR >= 13
71 |
72 | PassPluginLibraryInfo getSymbolizePluginInfo() {
73 | return {LLVM_PLUGIN_API_VERSION, "Symbolization Pass", LLVM_VERSION_STRING,
74 | [](PassBuilder &PB) {
75 | // We need to act on the entire module as well as on each function.
76 | // Those actions are independent from each other, so we register a
77 | // module pass at the start of the pipeline and a function pass just
78 | // before the vectorizer. (There doesn't seem to be a way to run
79 | // module passes at the start of the vectorizer, hence the split.)
80 | PB.registerPipelineStartEPCallback(
81 | [](ModulePassManager &PM, OptimizationLevel) {
82 | PM.addPass(SymbolizePass());
83 | });
84 | PB.registerVectorizerStartEPCallback(
85 | [](FunctionPassManager &PM, OptimizationLevel) {
86 | PM.addPass(ScalarizerPass());
87 | PM.addPass(LowerAtomicPass());
88 | PM.addPass(SymbolizePass());
89 | });
90 | }};
91 | }
92 |
93 | extern "C" LLVM_ATTRIBUTE_WEAK PassPluginLibraryInfo llvmGetPassPluginInfo() {
94 | return getSymbolizePluginInfo();
95 | }
96 |
97 | #endif
98 |
--------------------------------------------------------------------------------
/compiler/Pass.cpp:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | #include "Pass.h"
16 |
17 | #include
18 | #include
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include
27 |
28 | #if LLVM_VERSION_MAJOR < 14
29 | #include
30 | #else
31 | #include
32 | #endif
33 |
34 | #include "Runtime.h"
35 | #include "Symbolizer.h"
36 |
37 | using namespace llvm;
38 |
39 | #ifndef NDEBUG
40 | #define DEBUG(X) \
41 | do { \
42 | X; \
43 | } while (false)
44 | #else
45 | #define DEBUG(X) ((void)0)
46 | #endif
47 |
48 | char SymbolizeLegacyPass::ID = 0;
49 |
50 | namespace {
51 |
52 | static constexpr char kSymCtorName[] = "__sym_ctor";
53 |
54 | bool instrumentModule(Module &M) {
55 | DEBUG(errs() << "Symbolizer module instrumentation\n");
56 |
57 | // Redirect calls to external functions to the corresponding wrappers and
58 | // rename internal functions.
59 | for (auto &function : M.functions()) {
60 | auto name = function.getName();
61 | if (isInterceptedFunction(function))
62 | function.setName(name + "_symbolized");
63 | }
64 |
65 | // Insert a constructor that initializes the runtime and any globals.
66 | Function *ctor;
67 | std::tie(ctor, std::ignore) = createSanitizerCtorAndInitFunctions(
68 | M, kSymCtorName, "_sym_initialize", {}, {});
69 | appendToGlobalCtors(M, ctor, 0);
70 |
71 | return true;
72 | }
73 |
74 | bool canLower(const CallInst *CI) {
75 | const Function *Callee = CI->getCalledFunction();
76 | if (!Callee)
77 | return false;
78 |
79 | switch (Callee->getIntrinsicID()) {
80 | case Intrinsic::expect:
81 | case Intrinsic::ctpop:
82 | case Intrinsic::ctlz:
83 | case Intrinsic::cttz:
84 | case Intrinsic::prefetch:
85 | case Intrinsic::pcmarker:
86 | case Intrinsic::dbg_declare:
87 | case Intrinsic::dbg_label:
88 | case Intrinsic::annotation:
89 | case Intrinsic::ptr_annotation:
90 | case Intrinsic::assume:
91 | #if LLVM_VERSION_MAJOR > 11
92 | case Intrinsic::experimental_noalias_scope_decl:
93 | #endif
94 | case Intrinsic::var_annotation:
95 | case Intrinsic::sqrt:
96 | case Intrinsic::log:
97 | case Intrinsic::log2:
98 | case Intrinsic::log10:
99 | case Intrinsic::exp:
100 | case Intrinsic::exp2:
101 | case Intrinsic::pow:
102 | case Intrinsic::sin:
103 | case Intrinsic::cos:
104 | case Intrinsic::floor:
105 | case Intrinsic::ceil:
106 | case Intrinsic::trunc:
107 | case Intrinsic::round:
108 | #if LLVM_VERSION_MAJOR > 10
109 | case Intrinsic::roundeven:
110 | #endif
111 | case Intrinsic::copysign:
112 | #if LLVM_VERSION_MAJOR < 16
113 | case Intrinsic::flt_rounds:
114 | #else
115 | case Intrinsic::get_rounding:
116 | #endif
117 | case Intrinsic::invariant_start:
118 | case Intrinsic::lifetime_start:
119 | case Intrinsic::invariant_end:
120 | case Intrinsic::lifetime_end:
121 | return true;
122 | default:
123 | return false;
124 | }
125 |
126 | llvm_unreachable("Control cannot reach here");
127 | }
128 |
129 | void liftInlineAssembly(CallInst *CI) {
130 | // TODO When we don't have to worry about the old pass manager anymore, move
131 | // the initialization to the pass constructor. (Currently there are two
132 | // passes, but only if we're on a recent enough LLVM...)
133 |
134 | Function *F = CI->getFunction();
135 | Module *M = F->getParent();
136 | auto triple = M->getTargetTriple();
137 |
138 | std::string error;
139 | auto target = TargetRegistry::lookupTarget(triple, error);
140 | if (!target) {
141 | errs() << "Warning: can't get target info to lift inline assembly\n";
142 | return;
143 | }
144 |
145 | auto cpu = F->getFnAttribute("target-cpu").getValueAsString();
146 | auto features = F->getFnAttribute("target-features").getValueAsString();
147 |
148 | std::unique_ptr TM(
149 | target->createTargetMachine(triple, cpu, features, TargetOptions(), {}));
150 | auto subTarget = TM->getSubtargetImpl(*F);
151 | if (subTarget == nullptr)
152 | return;
153 |
154 | auto targetLowering = subTarget->getTargetLowering();
155 | if (targetLowering == nullptr)
156 | return;
157 |
158 | targetLowering->ExpandInlineAsm(CI);
159 | }
160 |
161 | bool instrumentFunction(Function &F) {
162 | auto functionName = F.getName();
163 | if (functionName == kSymCtorName)
164 | return false;
165 |
166 | DEBUG(errs() << "Symbolizing function ");
167 | DEBUG(errs().write_escaped(functionName) << '\n');
168 |
169 | SmallVector allInstructions;
170 | allInstructions.reserve(F.getInstructionCount());
171 | for (auto &I : instructions(F))
172 | allInstructions.push_back(&I);
173 |
174 | IntrinsicLowering IL(F.getParent()->getDataLayout());
175 | for (auto *I : allInstructions) {
176 | if (auto *CI = dyn_cast(I)) {
177 | if (canLower(CI)) {
178 | IL.LowerIntrinsicCall(CI);
179 | } else if (isa(CI->getCalledOperand())) {
180 | liftInlineAssembly(CI);
181 | }
182 | }
183 | }
184 |
185 | allInstructions.clear();
186 | for (auto &I : instructions(F))
187 | allInstructions.push_back(&I);
188 |
189 | Symbolizer symbolizer(*F.getParent());
190 | symbolizer.symbolizeFunctionArguments(F);
191 |
192 | for (auto &basicBlock : F)
193 | symbolizer.insertBasicBlockNotification(basicBlock);
194 |
195 | for (auto *instPtr : allInstructions)
196 | symbolizer.visit(instPtr);
197 |
198 | symbolizer.finalizePHINodes();
199 | symbolizer.shortCircuitExpressionUses();
200 |
201 | // DEBUG(errs() << F << '\n');
202 | assert(!verifyFunction(F, &errs()) &&
203 | "SymbolizePass produced invalid bitcode");
204 |
205 | return true;
206 | }
207 |
208 | } // namespace
209 |
210 | bool SymbolizeLegacyPass::doInitialization(Module &M) {
211 | return instrumentModule(M);
212 | }
213 |
214 | bool SymbolizeLegacyPass::runOnFunction(Function &F) {
215 | return instrumentFunction(F);
216 | }
217 |
218 | #if LLVM_VERSION_MAJOR >= 13
219 |
220 | PreservedAnalyses SymbolizePass::run(Function &F, FunctionAnalysisManager &) {
221 | return instrumentFunction(F) ? PreservedAnalyses::none()
222 | : PreservedAnalyses::all();
223 | }
224 |
225 | PreservedAnalyses SymbolizePass::run(Module &M, ModuleAnalysisManager &) {
226 | return instrumentModule(M) ? PreservedAnalyses::none()
227 | : PreservedAnalyses::all();
228 | }
229 |
230 | #endif
231 |
--------------------------------------------------------------------------------
/compiler/Pass.h:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | #ifndef PASS_H
16 | #define PASS_H
17 |
18 | #include
19 | #include
20 | #include
21 |
22 | #if LLVM_VERSION_MAJOR >= 13
23 | #include
24 | #endif
25 |
26 | class SymbolizeLegacyPass : public llvm::FunctionPass {
27 | public:
28 | static char ID;
29 |
30 | SymbolizeLegacyPass() : FunctionPass(ID) {}
31 |
32 | virtual bool doInitialization(llvm::Module &M) override;
33 | virtual bool runOnFunction(llvm::Function &F) override;
34 | };
35 |
36 | #if LLVM_VERSION_MAJOR >= 13
37 |
38 | class SymbolizePass : public llvm::PassInfoMixin {
39 | public:
40 | llvm::PreservedAnalyses run(llvm::Function &F,
41 | llvm::FunctionAnalysisManager &);
42 | llvm::PreservedAnalyses run(llvm::Module &M, llvm::ModuleAnalysisManager &);
43 |
44 | static bool isRequired() { return true; }
45 | };
46 |
47 | #endif
48 |
49 | #endif
50 |
--------------------------------------------------------------------------------
/compiler/Runtime.cpp:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | #include "Runtime.h"
16 |
17 | #include
18 | #include
19 | #include
20 |
21 | using namespace llvm;
22 |
23 | namespace {
24 |
25 | template
26 | SymFnT import(llvm::Module &M, llvm::StringRef name, llvm::Type *ret,
27 | ArgsTy... args) {
28 | #if LLVM_VERSION_MAJOR >= 9 && LLVM_VERSION_MAJOR < 11
29 | return M.getOrInsertFunction(name, ret, args...).getCallee();
30 | #else
31 | return M.getOrInsertFunction(name, ret, args...);
32 | #endif
33 | }
34 |
35 | } // namespace
36 |
37 | Runtime::Runtime(Module &M) {
38 | IRBuilder<> IRB(M.getContext());
39 | auto *intPtrType = M.getDataLayout().getIntPtrType(M.getContext());
40 | auto *ptrT = IRB.getInt8Ty()->getPointerTo();
41 | auto *int8T = IRB.getInt8Ty();
42 | auto *int1T = IRB.getInt1Ty();
43 | auto *voidT = IRB.getVoidTy();
44 |
45 | buildInteger = import(M, "_sym_build_integer", ptrT, IRB.getInt64Ty(), int8T);
46 | buildInteger128 = import(M, "_sym_build_integer128", ptrT, IRB.getInt64Ty(),
47 | IRB.getInt64Ty());
48 | buildFloat = import(M, "_sym_build_float", ptrT, IRB.getDoubleTy(), int1T);
49 | buildNullPointer = import(M, "_sym_build_null_pointer", ptrT);
50 | buildTrue = import(M, "_sym_build_true", ptrT);
51 | buildFalse = import(M, "_sym_build_false", ptrT);
52 | buildBool = import(M, "_sym_build_bool", ptrT, int1T);
53 | buildSExt = import(M, "_sym_build_sext", ptrT, ptrT, int8T);
54 | buildZExt = import(M, "_sym_build_zext", ptrT, ptrT, int8T);
55 | buildTrunc = import(M, "_sym_build_trunc", ptrT, ptrT, int8T);
56 | buildBswap = import(M, "_sym_build_bswap", ptrT, ptrT);
57 | buildIntToFloat =
58 | import(M, "_sym_build_int_to_float", ptrT, ptrT, int1T, int1T);
59 | buildFloatToFloat = import(M, "_sym_build_float_to_float", ptrT, ptrT, int1T);
60 | buildBitsToFloat = import(M, "_sym_build_bits_to_float", ptrT, ptrT, int1T);
61 | buildFloatToBits = import(M, "_sym_build_float_to_bits", ptrT, ptrT);
62 | buildFloatToSignedInt =
63 | import(M, "_sym_build_float_to_signed_integer", ptrT, ptrT, int8T);
64 | buildFloatToUnsignedInt =
65 | import(M, "_sym_build_float_to_unsigned_integer", ptrT, ptrT, int8T);
66 | buildFloatAbs = import(M, "_sym_build_fp_abs", ptrT, ptrT);
67 | buildBoolAnd = import(M, "_sym_build_bool_and", ptrT, ptrT, ptrT);
68 | buildBoolOr = import(M, "_sym_build_bool_or", ptrT, ptrT, ptrT);
69 | buildBoolXor = import(M, "_sym_build_bool_xor", ptrT, ptrT, ptrT);
70 | buildBoolToBit = import(M, "_sym_build_bool_to_bit", ptrT, ptrT);
71 | buildBitToBool = import(M, "_sym_build_bit_to_bool", ptrT, ptrT);
72 | buildConcat =
73 | import(M, "_sym_concat_helper", ptrT, ptrT,
74 | ptrT); // doesn't follow naming convention for historic reasons
75 | pushPathConstraint =
76 | import(M, "_sym_push_path_constraint", voidT, ptrT, int1T, intPtrType);
77 |
78 | // Overflow arithmetic
79 | buildAddOverflow =
80 | import(M, "_sym_build_add_overflow", ptrT, ptrT, ptrT, int1T, int1T);
81 | buildSubOverflow =
82 | import(M, "_sym_build_sub_overflow", ptrT, ptrT, ptrT, int1T, int1T);
83 | buildMulOverflow =
84 | import(M, "_sym_build_mul_overflow", ptrT, ptrT, ptrT, int1T, int1T);
85 |
86 | // Saturating arithmetic
87 | buildSAddSat = import(M, "_sym_build_sadd_sat", ptrT, ptrT, ptrT);
88 | buildUAddSat = import(M, "_sym_build_uadd_sat", ptrT, ptrT, ptrT);
89 | buildSSubSat = import(M, "_sym_build_ssub_sat", ptrT, ptrT, ptrT);
90 | buildUSubSat = import(M, "_sym_build_usub_sat", ptrT, ptrT, ptrT);
91 | buildSShlSat = import(M, "_sym_build_sshl_sat", ptrT, ptrT, ptrT);
92 | buildUShlSat = import(M, "_sym_build_ushl_sat", ptrT, ptrT, ptrT);
93 |
94 | buildFshl = import(M, "_sym_build_funnel_shift_left", ptrT, ptrT, ptrT, ptrT);
95 | buildFshr =
96 | import(M, "_sym_build_funnel_shift_right", ptrT, ptrT, ptrT, ptrT);
97 | buildAbs = import(M, "_sym_build_abs", ptrT, ptrT);
98 |
99 | setParameterExpression =
100 | import(M, "_sym_set_parameter_expression", voidT, int8T, ptrT);
101 | getParameterExpression =
102 | import(M, "_sym_get_parameter_expression", ptrT, int8T);
103 | setReturnExpression = import(M, "_sym_set_return_expression", voidT, ptrT);
104 | getReturnExpression = import(M, "_sym_get_return_expression", ptrT);
105 |
106 | #define LOAD_BINARY_OPERATOR_HANDLER(constant, name) \
107 | binaryOperatorHandlers[Instruction::constant] = \
108 | import(M, "_sym_build_" #name, ptrT, ptrT, ptrT);
109 |
110 | LOAD_BINARY_OPERATOR_HANDLER(Add, add)
111 | LOAD_BINARY_OPERATOR_HANDLER(Sub, sub)
112 | LOAD_BINARY_OPERATOR_HANDLER(Mul, mul)
113 | LOAD_BINARY_OPERATOR_HANDLER(UDiv, unsigned_div)
114 | LOAD_BINARY_OPERATOR_HANDLER(SDiv, signed_div)
115 | LOAD_BINARY_OPERATOR_HANDLER(URem, unsigned_rem)
116 | LOAD_BINARY_OPERATOR_HANDLER(SRem, signed_rem)
117 | LOAD_BINARY_OPERATOR_HANDLER(Shl, shift_left)
118 | LOAD_BINARY_OPERATOR_HANDLER(LShr, logical_shift_right)
119 | LOAD_BINARY_OPERATOR_HANDLER(AShr, arithmetic_shift_right)
120 | LOAD_BINARY_OPERATOR_HANDLER(And, and)
121 | LOAD_BINARY_OPERATOR_HANDLER(Or, or)
122 | LOAD_BINARY_OPERATOR_HANDLER(Xor, xor)
123 |
124 | // Floating-point arithmetic
125 | LOAD_BINARY_OPERATOR_HANDLER(FAdd, fp_add)
126 | LOAD_BINARY_OPERATOR_HANDLER(FSub, fp_sub)
127 | LOAD_BINARY_OPERATOR_HANDLER(FMul, fp_mul)
128 | LOAD_BINARY_OPERATOR_HANDLER(FDiv, fp_div)
129 | LOAD_BINARY_OPERATOR_HANDLER(FRem, fp_rem)
130 |
131 | #undef LOAD_BINARY_OPERATOR_HANDLER
132 |
133 | #define LOAD_UNARY_OPERATOR_HANDLER(constant, name) \
134 | unaryOperatorHandlers[Instruction::constant] = \
135 | import(M, "_sym_build_" #name, ptrT, ptrT);
136 |
137 | LOAD_UNARY_OPERATOR_HANDLER(FNeg, fp_neg)
138 |
139 | #undef LOAD_UNARY_OPERATOR_HANDLER
140 |
141 | #define LOAD_COMPARISON_HANDLER(constant, name) \
142 | comparisonHandlers[CmpInst::constant] = \
143 | import(M, "_sym_build_" #name, ptrT, ptrT, ptrT);
144 |
145 | LOAD_COMPARISON_HANDLER(ICMP_EQ, equal)
146 | LOAD_COMPARISON_HANDLER(ICMP_NE, not_equal)
147 | LOAD_COMPARISON_HANDLER(ICMP_UGT, unsigned_greater_than)
148 | LOAD_COMPARISON_HANDLER(ICMP_UGE, unsigned_greater_equal)
149 | LOAD_COMPARISON_HANDLER(ICMP_ULT, unsigned_less_than)
150 | LOAD_COMPARISON_HANDLER(ICMP_ULE, unsigned_less_equal)
151 | LOAD_COMPARISON_HANDLER(ICMP_SGT, signed_greater_than)
152 | LOAD_COMPARISON_HANDLER(ICMP_SGE, signed_greater_equal)
153 | LOAD_COMPARISON_HANDLER(ICMP_SLT, signed_less_than)
154 | LOAD_COMPARISON_HANDLER(ICMP_SLE, signed_less_equal)
155 |
156 | // Floating-point comparisons
157 | LOAD_COMPARISON_HANDLER(FCMP_OGT, float_ordered_greater_than)
158 | LOAD_COMPARISON_HANDLER(FCMP_OGE, float_ordered_greater_equal)
159 | LOAD_COMPARISON_HANDLER(FCMP_OLT, float_ordered_less_than)
160 | LOAD_COMPARISON_HANDLER(FCMP_OLE, float_ordered_less_equal)
161 | LOAD_COMPARISON_HANDLER(FCMP_OEQ, float_ordered_equal)
162 | LOAD_COMPARISON_HANDLER(FCMP_ONE, float_ordered_not_equal)
163 | LOAD_COMPARISON_HANDLER(FCMP_ORD, float_ordered)
164 | LOAD_COMPARISON_HANDLER(FCMP_UNO, float_unordered)
165 | LOAD_COMPARISON_HANDLER(FCMP_UGT, float_unordered_greater_than)
166 | LOAD_COMPARISON_HANDLER(FCMP_UGE, float_unordered_greater_equal)
167 | LOAD_COMPARISON_HANDLER(FCMP_ULT, float_unordered_less_than)
168 | LOAD_COMPARISON_HANDLER(FCMP_ULE, float_unordered_less_equal)
169 | LOAD_COMPARISON_HANDLER(FCMP_UEQ, float_unordered_equal)
170 | LOAD_COMPARISON_HANDLER(FCMP_UNE, float_unordered_not_equal)
171 |
172 | #undef LOAD_COMPARISON_HANDLER
173 |
174 | memcpy = import(M, "_sym_memcpy", voidT, ptrT, ptrT, intPtrType);
175 | memset = import(M, "_sym_memset", voidT, ptrT, ptrT, intPtrType);
176 | memmove = import(M, "_sym_memmove", voidT, ptrT, ptrT, intPtrType);
177 | readMemory =
178 | import(M, "_sym_read_memory", ptrT, intPtrType, intPtrType, int1T);
179 | writeMemory = import(M, "_sym_write_memory", voidT, intPtrType, intPtrType,
180 | ptrT, int1T);
181 | buildZeroBytes = import(M, "_sym_build_zero_bytes", ptrT, intPtrType);
182 | buildInsert =
183 | import(M, "_sym_build_insert", ptrT, ptrT, ptrT, IRB.getInt64Ty(), int1T);
184 | buildExtract = import(M, "_sym_build_extract", ptrT, ptrT, IRB.getInt64Ty(),
185 | IRB.getInt64Ty(), int1T);
186 |
187 | notifyCall = import(M, "_sym_notify_call", voidT, intPtrType);
188 | notifyRet = import(M, "_sym_notify_ret", voidT, intPtrType);
189 | notifyBasicBlock = import(M, "_sym_notify_basic_block", voidT, intPtrType);
190 | }
191 |
192 | /// Decide whether a function is called symbolically.
193 | bool isInterceptedFunction(const Function &f) {
194 | static const StringSet<> kInterceptedFunctions = {
195 | "malloc", "calloc", "mmap", "mmap64", "open", "read",
196 | "lseek", "lseek64", "fopen", "fopen64", "fread", "fseek",
197 | "fseeko", "rewind", "fseeko64", "getc", "ungetc", "memcpy",
198 | "memset", "strncpy", "strchr", "memcmp", "memmove", "ntohl",
199 | "fgets", "fgetc", "getchar", "bcopy", "bcmp", "bzero"};
200 |
201 | return (kInterceptedFunctions.count(f.getName()) > 0);
202 | }
203 |
--------------------------------------------------------------------------------
/compiler/Runtime.h:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | #ifndef RUNTIME_H
16 | #define RUNTIME_H
17 |
18 | #include
19 | #include
20 |
21 | #if LLVM_VERSION_MAJOR >= 9 && LLVM_VERSION_MAJOR < 11
22 | using SymFnT = llvm::Value *;
23 | #else
24 | using SymFnT = llvm::FunctionCallee;
25 | #endif
26 |
27 | /// Runtime functions
28 | struct Runtime {
29 | Runtime(llvm::Module &M);
30 |
31 | SymFnT buildInteger{};
32 | SymFnT buildInteger128{};
33 | SymFnT buildFloat{};
34 | SymFnT buildNullPointer{};
35 | SymFnT buildTrue{};
36 | SymFnT buildFalse{};
37 | SymFnT buildBool{};
38 | SymFnT buildSExt{};
39 | SymFnT buildZExt{};
40 | SymFnT buildTrunc{};
41 | SymFnT buildBswap{};
42 | SymFnT buildIntToFloat{};
43 | SymFnT buildFloatToFloat{};
44 | SymFnT buildBitsToFloat{};
45 | SymFnT buildFloatToBits{};
46 | SymFnT buildFloatToSignedInt{};
47 | SymFnT buildFloatToUnsignedInt{};
48 | SymFnT buildFloatAbs{};
49 | SymFnT buildBoolAnd{};
50 | SymFnT buildBoolOr{};
51 | SymFnT buildBoolXor{};
52 | SymFnT buildBoolToBit{};
53 | SymFnT buildBitToBool{};
54 | SymFnT buildAddOverflow{};
55 | SymFnT buildSubOverflow{};
56 | SymFnT buildMulOverflow{};
57 | SymFnT buildSAddSat{};
58 | SymFnT buildUAddSat{};
59 | SymFnT buildSSubSat{};
60 | SymFnT buildUSubSat{};
61 | SymFnT buildSShlSat{};
62 | SymFnT buildUShlSat{};
63 | SymFnT buildFshl{};
64 | SymFnT buildFshr{};
65 | SymFnT buildAbs{};
66 | SymFnT buildConcat{};
67 | SymFnT pushPathConstraint{};
68 | SymFnT getParameterExpression{};
69 | SymFnT setParameterExpression{};
70 | SymFnT setReturnExpression{};
71 | SymFnT getReturnExpression{};
72 | SymFnT memcpy{};
73 | SymFnT memset{};
74 | SymFnT memmove{};
75 | SymFnT readMemory{};
76 | SymFnT writeMemory{};
77 | SymFnT buildZeroBytes{};
78 | SymFnT buildInsert{};
79 | SymFnT buildExtract{};
80 | SymFnT notifyCall{};
81 | SymFnT notifyRet{};
82 | SymFnT notifyBasicBlock{};
83 |
84 | /// Mapping from icmp predicates to the functions that build the corresponding
85 | /// symbolic expressions.
86 | std::array comparisonHandlers{};
87 |
88 | /// Mapping from binary operators to the functions that build the
89 | /// corresponding symbolic expressions.
90 | std::array binaryOperatorHandlers{};
91 |
92 | /// Mapping from unary operators to the functions that build the
93 | /// corresponding symbolic expressions.
94 | std::array unaryOperatorHandlers{};
95 | };
96 |
97 | bool isInterceptedFunction(const llvm::Function &f);
98 |
99 | #endif
100 |
--------------------------------------------------------------------------------
/compiler/Symbolizer.h:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | #ifndef SYMBOLIZE_H
16 | #define SYMBOLIZE_H
17 |
18 | #include
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include
24 |
25 | #include "Runtime.h"
26 |
27 | class Symbolizer : public llvm::InstVisitor {
28 | public:
29 | explicit Symbolizer(llvm::Module &M)
30 | : runtime(M), dataLayout(M.getDataLayout()),
31 | ptrBits(M.getDataLayout().getPointerSizeInBits()),
32 | intPtrType(M.getDataLayout().getIntPtrType(M.getContext())) {}
33 |
34 | /// Insert code to obtain the symbolic expressions for the function arguments.
35 | void symbolizeFunctionArguments(llvm::Function &F);
36 |
37 | /// Insert a call to the run-time library to notify it of the basic block
38 | /// entry.
39 | void insertBasicBlockNotification(llvm::BasicBlock &B);
40 |
41 | /// Finish the processing of PHI nodes.
42 | ///
43 | /// This assumes that there is a dummy PHI node for each such instruction in
44 | /// the function, and that we have recorded all PHI nodes in the member
45 | /// phiNodes. In other words, the function has to be called after all
46 | /// instructions have been processed in order to fix up PHI nodes. See the
47 | /// documentation of member phiNodes for why we process PHI nodes in two
48 | /// steps.
49 | ///
50 | /// Important! Calling this function invalidates symbolicExpressions.
51 | void finalizePHINodes();
52 |
53 | /// Rewrite symbolic computation to only occur if some operand is symbolic.
54 | ///
55 | /// We don't want to build up formulas for symbolic computation if all
56 | /// operands are concrete. Therefore, this function rewrites all places that
57 | /// build up formulas (as recorded during the main pass) to skip formula
58 | /// construction if all operands are concrete. Additionally, it inserts code
59 | /// that constructs formulas for concrete operands if necessary.
60 | ///
61 | /// The basic idea is to transform code like this...
62 | ///
63 | /// res_expr = call _sym_some_computation(expr1, expr2, ...)
64 | /// res = some_computation(val1, val2, ...)
65 | ///
66 | /// ...into this:
67 | ///
68 | /// start:
69 | /// expr1_symbolic = icmp ne 0, expr1
70 | /// ...
71 | /// some_symbolic = or expr1_symbolic, ...
72 | /// br some_symbolic, check_arg1, end
73 | ///
74 | /// check_arg1:
75 | /// need_expr1 = icmp eq 0, expr1
76 | /// br need_expr1, create_expr1, check_arg2
77 | ///
78 | /// create_expr1:
79 | /// new_expr1 = ... (based on val1)
80 | /// br check_arg2
81 | ///
82 | /// check_arg2:
83 | /// good_expr1 = phi [expr1, check_arg1], [new_expr1, create_expr1]
84 | /// need_expr2 = ...
85 | /// ...
86 | ///
87 | /// sym_computation:
88 | /// sym_expr = call _sym_some_computation(good_expr1, good_expr2, ...)
89 | /// br end
90 | ///
91 | /// end:
92 | /// final_expr = phi [null, start], [sym_expr, sym_computation]
93 | ///
94 | /// The resulting code is much longer but avoids solver calls for all
95 | /// operations without symbolic data.
96 | void shortCircuitExpressionUses();
97 |
98 | void handleIntrinsicCall(llvm::CallBase &I);
99 | void handleInlineAssembly(llvm::CallInst &I);
100 | void handleFunctionCall(llvm::CallBase &I, llvm::Instruction *returnPoint);
101 |
102 | //
103 | // Implementation of InstVisitor
104 | //
105 | void visitBinaryOperator(llvm::BinaryOperator &I);
106 | void visitUnaryOperator(llvm::UnaryOperator &I);
107 | void visitSelectInst(llvm::SelectInst &I);
108 | void visitCmpInst(llvm::CmpInst &I);
109 | void visitReturnInst(llvm::ReturnInst &I);
110 | void visitBranchInst(llvm::BranchInst &I);
111 | void visitIndirectBrInst(llvm::IndirectBrInst &I);
112 | void visitCallInst(llvm::CallInst &I);
113 | void visitInvokeInst(llvm::InvokeInst &I);
114 | void visitAllocaInst(llvm::AllocaInst &);
115 | void visitLoadInst(llvm::LoadInst &I);
116 | void visitStoreInst(llvm::StoreInst &I);
117 | void visitGetElementPtrInst(llvm::GetElementPtrInst &I);
118 | void visitBitCastInst(llvm::BitCastInst &I);
119 | void visitTruncInst(llvm::TruncInst &I);
120 | void visitIntToPtrInst(llvm::IntToPtrInst &I);
121 | void visitPtrToIntInst(llvm::PtrToIntInst &I);
122 | void visitSIToFPInst(llvm::SIToFPInst &I);
123 | void visitUIToFPInst(llvm::UIToFPInst &I);
124 | void visitFPExtInst(llvm::FPExtInst &I);
125 | void visitFPTruncInst(llvm::FPTruncInst &I);
126 | void visitFPToSI(llvm::FPToSIInst &I);
127 | void visitFPToUI(llvm::FPToUIInst &I);
128 | void visitCastInst(llvm::CastInst &I);
129 | void visitPHINode(llvm::PHINode &I);
130 | void visitInsertValueInst(llvm::InsertValueInst &I);
131 | void visitExtractValueInst(llvm::ExtractValueInst &I);
132 | void visitSwitchInst(llvm::SwitchInst &I);
133 | void visitUnreachableInst(llvm::UnreachableInst &);
134 | void visitInstruction(llvm::Instruction &I);
135 |
136 | private:
137 | static constexpr unsigned kExpectedMaxPHINodesPerFunction = 16;
138 | static constexpr unsigned kExpectedSymbolicArgumentsPerComputation = 2;
139 |
140 | /// A symbolic input.
141 | struct Input {
142 | llvm::Value *concreteValue;
143 | unsigned operandIndex;
144 | llvm::Instruction *user;
145 |
146 | Input() = default;
147 |
148 | Input(llvm::Value *concrete, unsigned idx, llvm::Instruction *user)
149 | : concreteValue(concrete), operandIndex(idx), user(user) {
150 | assert(getSymbolicOperand()->getType() ==
151 | llvm::Type::getInt8Ty(user->getContext())->getPointerTo());
152 | }
153 |
154 | llvm::Value *getSymbolicOperand() const {
155 | return user->getOperand(operandIndex);
156 | }
157 |
158 | void replaceOperand(llvm::Value *newOperand) {
159 | user->setOperand(operandIndex, newOperand);
160 | }
161 | };
162 |
163 | /// A symbolic computation with its inputs.
164 | struct SymbolicComputation {
165 | llvm::Instruction *firstInstruction = nullptr, *lastInstruction = nullptr;
166 | llvm::SmallVector inputs;
167 |
168 | SymbolicComputation() = default;
169 |
170 | SymbolicComputation(llvm::Instruction *first, llvm::Instruction *last,
171 | llvm::ArrayRef in)
172 | : firstInstruction(first), lastInstruction(last),
173 | inputs(in.begin(), in.end()) {}
174 |
175 | /// Append another symbolic computation to this one.
176 | ///
177 | /// The computation that is to be appended must occur after the one that
178 | /// this method is called on.
179 | void merge(const SymbolicComputation &other) {
180 | if (&other == this)
181 | return;
182 |
183 | if (firstInstruction == nullptr)
184 | firstInstruction = other.firstInstruction;
185 | lastInstruction = other.lastInstruction;
186 |
187 | for (const auto &input : other.inputs)
188 | inputs.push_back(input);
189 | }
190 |
191 | friend llvm::raw_ostream &
192 | operator<<(llvm::raw_ostream &out,
193 | const Symbolizer::SymbolicComputation &computation) {
194 | out << "\nComputation starting at " << *computation.firstInstruction
195 | << "\n...ending at " << *computation.lastInstruction
196 | << "\n...with inputs:\n";
197 | for (const auto &input : computation.inputs) {
198 | out << '\t' << *input.concreteValue << " => " << *input.user << '\n';
199 | }
200 | return out;
201 | }
202 | };
203 |
204 | /// Create an expression that represents the concrete value.
205 | llvm::Instruction *createValueExpression(llvm::Value *V,
206 | llvm::IRBuilder<> &IRB);
207 |
208 | /// Get the (already created) symbolic expression for a value.
209 | llvm::Value *getSymbolicExpression(llvm::Value *V) const {
210 | auto exprIt = symbolicExpressions.find(V);
211 | return (exprIt != symbolicExpressions.end()) ? exprIt->second : nullptr;
212 | }
213 |
214 | llvm::Value *getSymbolicExpressionOrNull(llvm::Value *V) const {
215 | auto *expr = getSymbolicExpression(V);
216 | if (expr == nullptr)
217 | return llvm::ConstantPointerNull::get(
218 | llvm::IntegerType::getInt8Ty(V->getContext())->getPointerTo());
219 | return expr;
220 | }
221 |
222 | bool isLittleEndian(llvm::Type *type) {
223 | return (!type->isAggregateType() && dataLayout.isLittleEndian());
224 | }
225 |
226 | /// Like buildRuntimeCall, but the call is always generated.
227 | SymbolicComputation forceBuildRuntimeCall(
228 | llvm::IRBuilder<> &IRB, SymFnT function,
229 | llvm::ArrayRef> args) const;
230 |
231 | /// Create a call to the specified function in the run-time library.
232 | ///
233 | /// Each argument is specified as a pair of Value and Boolean. The Boolean
234 | /// specifies whether the Value is a symbolic argument, in which case the
235 | /// corresponding symbolic expression will be passed to the run-time function.
236 | /// Moreover, the use of symbolic expressions will be recorded in the
237 | /// resulting SymbolicComputation. If all symbolic arguments are known to be
238 | /// concrete (e.g., because they are compile-time constants), no call
239 | /// instruction is emitted and the function returns null.
240 | std::optional
241 | buildRuntimeCall(llvm::IRBuilder<> &IRB, SymFnT function,
242 | llvm::ArrayRef> args) const {
243 | if (std::all_of(args.begin(), args.end(),
244 | [this](std::pair arg) {
245 | return (getSymbolicExpression(arg.first) == nullptr);
246 | })) {
247 | return {};
248 | }
249 |
250 | return forceBuildRuntimeCall(IRB, function, args);
251 | }
252 |
253 | /// Convenience overload that treats all arguments as symbolic.
254 | std::optional
255 | buildRuntimeCall(llvm::IRBuilder<> &IRB, SymFnT function,
256 | llvm::ArrayRef symbolicArgs) const {
257 | std::vector> args;
258 | for (const auto &arg : symbolicArgs) {
259 | args.emplace_back(arg, true);
260 | }
261 |
262 | return buildRuntimeCall(IRB, function, args);
263 | }
264 |
265 | /// Register the result of the computation as the symbolic expression
266 | /// corresponding to the concrete value and record the computation for
267 | /// short-circuiting.
268 | void registerSymbolicComputation(const SymbolicComputation &computation,
269 | llvm::Value *concrete = nullptr) {
270 | if (concrete != nullptr)
271 | symbolicExpressions[concrete] = computation.lastInstruction;
272 | expressionUses.push_back(computation);
273 | }
274 |
275 | /// Convenience overload for chaining with buildRuntimeCall.
276 | void registerSymbolicComputation(
277 | const std::optional &computation,
278 | llvm::Value *concrete = nullptr) {
279 | if (computation)
280 | registerSymbolicComputation(*computation, concrete);
281 | }
282 |
283 | /// Generate code that makes the solver try an alternative value for V.
284 | void tryAlternative(llvm::IRBuilder<> &IRB, llvm::Value *V);
285 |
286 | /// Helper to use a pointer to a host object as integer (truncating!).
287 | ///
288 | /// Note that the conversion will truncate the most significant bits of the
289 | /// pointer if the host uses larger addresses than the target. Therefore, use
290 | /// this function only when such loss is acceptable (e.g., when generating
291 | /// site identifiers to be passed to the backend, where collisions of the
292 | /// least significant bits are reasonably unlikely).
293 | ///
294 | /// Why not do a lossless conversion and make the backend accept 64-bit
295 | /// integers?
296 | ///
297 | /// 1. Performance: 32-bit architectures will process 32-bit values faster
298 | /// than 64-bit values.
299 | ///
300 | /// 2. Pragmatism: Changing the backend to accept and process 64-bit values
301 | /// would require modifying code that we don't control (in the case of Qsym).
302 | llvm::ConstantInt *getTargetPreferredInt(void *pointer) {
303 | return llvm::ConstantInt::get(intPtrType,
304 | reinterpret_cast(pointer));
305 | }
306 |
307 | /// Compute the offset of a member in a (possibly nested) aggregate.
308 | uint64_t aggregateMemberOffset(llvm::Type *aggregateType,
309 | llvm::ArrayRef indices) const;
310 |
311 | /// Emit code that converts the bit-vector expression represented by I to an
312 | /// expression that is appropriate for T; return the instruction that computes
313 | /// the result (which may be I if no conversion is needed).
314 | ///
315 | /// The solver doesn't represent all values as bit vectors. For example,
316 | /// floating-point values and Booleans are of separate kinds, so we emit code
317 | /// that changes the solver kind of the expression to whatever is needed.
318 | llvm::Instruction *convertBitVectorExprForType(llvm::IRBuilder<> &IRB,
319 | llvm::Instruction *I,
320 | llvm::Type *T) const;
321 |
322 | /// Emit code that converts the expression Expr for V to a bit-vector
323 | /// expression. Return the SymbolicComputation representing the conversion
324 | /// (if a conversion is necessary); the last instruction computes the result.
325 | ///
326 | /// This is the inverse operation of convertBitVectorExprForType (see details
327 | /// there).
328 | std::optional
329 | convertExprForTypeToBitVectorExpr(llvm::IRBuilder<> &IRB, llvm::Value *V,
330 | llvm::Value *Expr) const;
331 |
332 | const Runtime runtime;
333 |
334 | /// The data layout of the currently processed module.
335 | const llvm::DataLayout &dataLayout;
336 |
337 | /// The width in bits of pointers in the module.
338 | unsigned ptrBits;
339 |
340 | /// An integer type at least as wide as a pointer.
341 | llvm::IntegerType *intPtrType;
342 |
343 | /// Mapping from SSA values to symbolic expressions.
344 | ///
345 | /// For pointer values, the stored value is an expression describing the value
346 | /// of the pointer itself (i.e., the address, not the referenced value). For
347 | /// structure values, the expression is a single large bit vector.
348 | ///
349 | /// TODO This member adds a lot of complexity: various methods rely on it, and
350 | /// finalizePHINodes invalidates it. We may want to pass the map around
351 | /// explicitly.
352 | llvm::ValueMap symbolicExpressions;
353 |
354 | /// A record of all PHI nodes in this function.
355 | ///
356 | /// PHI nodes may refer to themselves, in which case we run into an infinite
357 | /// loop when trying to generate symbolic expressions recursively. Therefore,
358 | /// we only insert a dummy symbolic expression for each PHI node and fix it
359 | /// after all instructions have been processed.
360 | llvm::SmallVector phiNodes;
361 |
362 | /// A record of expression uses that can be short-circuited.
363 | ///
364 | /// Most values in a program are concrete, even if they're not constant (in
365 | /// which case we would know that they're concrete at compile time already).
366 | /// There is no point in building up formulas if all values involved in a
367 | /// computation are concrete, so we short-circuit those cases. Since this
368 | /// process requires splitting basic blocks, we can't do it during the main
369 | /// analysis phase (because InstVisitor gets out of step if we try).
370 | /// Therefore, we keep a record of all the places that construct expressions
371 | /// and insert the fast path later.
372 | std::vector expressionUses;
373 | };
374 |
375 | #endif
376 |
--------------------------------------------------------------------------------
/compiler/sym++.in:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This file is part of SymCC.
4 | #
5 | # SymCC is free software: you can redistribute it and/or modify it under the
6 | # terms of the GNU General Public License as published by the Free Software
7 | # Foundation, either version 3 of the License, or (at your option) any later
8 | # version.
9 | #
10 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License along with
15 | # SymCC. If not, see .
16 |
17 | runtime_64bit_dir="${SYMCC_RUNTIME_DIR:-@SYMCC_RUNTIME_DIR@}"
18 | runtime_32bit_dir="${SYMCC_RUNTIME32_DIR:-@SYMCC_RUNTIME_32BIT_DIR@}"
19 | pass="${SYMCC_PASS_DIR:-@CMAKE_CURRENT_BINARY_DIR@}/libsymcc.so"
20 | libcxx_var=SYMCC_LIBCXX_PATH
21 | compiler="${SYMCC_CLANGPP:-@CLANGPP_BINARY@}"
22 |
23 | # Find out if we're cross-compiling for a 32-bit architecture
24 | runtime_dir="$runtime_64bit_dir"
25 | for arg in "$@"; do
26 | if [[ $arg == "-m32" ]]; then
27 | if [ -z "$runtime_32bit_dir" ]; then
28 | echo "SymCC: 32-bit compilation requested but SymCC was not built with TARGET_32BIT=ON" >&2
29 | exit 255
30 | else
31 | runtime_dir="$runtime_32bit_dir"
32 | libcxx_var=SYMCC_LIBCXX_32BIT_PATH
33 | break
34 | fi
35 | fi
36 | done
37 |
38 | if [[ -v SYMCC_REGULAR_LIBCXX ]]; then
39 | stdlib_cflags=
40 | stdlib_ldflags=
41 | elif [[ ! -v $libcxx_var ]]; then
42 | >&2 echo "Please set $libcxx_var to the directory containing libc++ or confirm usage of the system library by setting SYMCC_REGULAR_LIBCXX!"
43 | exit 255
44 | else
45 | # It is important that the resulting binaries load libstdc++ before libc++;
46 | # otherwise our backend calls the instrumented library in cases where
47 | # exported names collide.
48 | stdlib_cflags="-isystem ${!libcxx_var}/include/c++/v1 -nostdlib++"
49 | stdlib_ldflags="-L${!libcxx_var}/lib -Wl,-rpath,${!libcxx_var}/lib -lstdc++ -lc++ -stdlib=libc++"
50 | fi
51 |
52 | if [ $# -eq 0 ]; then
53 | echo "Use sym++ as a drop-in replacement for clang++, e.g., sym++ -O2 -o foo foo.cpp" >&2
54 | exit 1
55 | fi
56 |
57 | exec $compiler \
58 | @CLANG_LOAD_PASS@"$pass" \
59 | $stdlib_cflags \
60 | "$@" \
61 | $stdlib_ldflags \
62 | -L"$runtime_dir" \
63 | -lsymcc-rt \
64 | -Wl,-rpath,"$runtime_dir" \
65 | -Qunused-arguments
66 |
--------------------------------------------------------------------------------
/compiler/symcc.in:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This file is part of SymCC.
4 | #
5 | # SymCC is free software: you can redistribute it and/or modify it under the
6 | # terms of the GNU General Public License as published by the Free Software
7 | # Foundation, either version 3 of the License, or (at your option) any later
8 | # version.
9 | #
10 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License along with
15 | # SymCC. If not, see .
16 |
17 | runtime_64bit_dir="${SYMCC_RUNTIME_DIR:-@SYMCC_RUNTIME_DIR@}"
18 | runtime_32bit_dir="${SYMCC_RUNTIME32_DIR:-@SYMCC_RUNTIME_32BIT_DIR@}"
19 | pass="${SYMCC_PASS_DIR:-@CMAKE_CURRENT_BINARY_DIR@}/libsymcc.so"
20 | compiler="${SYMCC_CLANG:-@CLANG_BINARY@}"
21 |
22 | # Find out if we're cross-compiling for a 32-bit architecture
23 | runtime_dir="$runtime_64bit_dir"
24 | for arg in "$@"; do
25 | if [[ $arg == "-m32" ]]; then
26 | if [ -z "$runtime_32bit_dir" ]; then
27 | echo "SymCC: 32-bit compilation requested but SymCC was not built with TARGET_32BIT=ON" >&2
28 | exit 255
29 | else
30 | runtime_dir="$runtime_32bit_dir"
31 | break
32 | fi
33 | fi
34 | done
35 |
36 | if [ $# -eq 0 ]; then
37 | echo "Use symcc as a drop-in replacement for clang, e.g., symcc -O2 -o foo foo.c" >&2
38 | exit 1
39 | fi
40 |
41 | exec "$compiler" \
42 | @CLANG_LOAD_PASS@"$pass" \
43 | "$@" \
44 | -L"$runtime_dir" \
45 | -lsymcc-rt \
46 | -Wl,-rpath,"$runtime_dir" \
47 | -Qunused-arguments
48 |
--------------------------------------------------------------------------------
/docs/32-bit.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Compiling 32-bit programs on a 64-bit host
4 | (and cross-compilation in general)
5 |
6 |
7 | In theory, SymCC can use clang to cross-compile programs for any architecture
8 | that LLVM supports. Note, however, that this requires cross-compiling the
9 | symbolic backend and its dependencies as well, and then linking the right
10 | backend into the target programs. We currently provide automatic handling only
11 | for the common case of compiling 32-bit libraries and programs on a 64-bit host
12 | machine. In all other cases, we recommend building SymCC on the target machine,
13 | so that cross-compilation is not needed.
14 |
15 | Let's assume that you're running SymCC on a 64-bit x86 machine, wanting to
16 | create 32-bit binaries from some code under test. First of all, make sure that
17 | 32-bit versions of the backend's dependencies are available: LLVM (usually
18 | obtainable via packages of your Linux distribution) and Z3 (which is reasonably
19 | easy to build following its CMake instructions and exporting CFLAGS="-m32" and
20 | CXXFLAGS="-m32"). Beware of Z3 version 4.8.7 - it contains a bug that breaks the
21 | 32-bit build with an error related to "__builtin_ctz" (see
22 | https://github.com/Z3Prover/z3/issues/2727). If you build Z3 yourself, note that
23 | it is sufficient to point SymCC to the build directory - there is no need to
24 | install the 32-bit version of Z3 in your system.
25 |
26 | Once the dependencies for 32-bit SymCC are available (as well as the 64-bit
27 | dependencies mentioned in the main README), configure and build SymCC as usual
28 | but add "-DTARGET_32BIT=ON" to the CMake invocation. If the build system doesn't
29 | find your 32-bit versions of LLVM and Z3, and specify their locations with
30 | "-DLLVM_32BIT_DIR=/some/path" and "-DZ3_32BIT_DIR=/some/other/path",
31 | respectively - analogously to how you would hint at the 64-bit versions.
32 |
33 | The resulting "symcc" and "sym++" scripts work like regular SymCC, but they
34 | additionally understand the "-m32" switch, which tells Clang to build 32-bit
35 | artifacts. If you build anything with "-m32", SymCC will make sure that the
36 | 32-bit version of the symbolic backend is linked to it instead of the 64-bit
37 | variant that would normally be used. Note that, in order to compile C++ code
38 | with "sym++" in 32-bit mode, you also need to build a 32-bit version of libc++
39 | (following the instructions for the 64-bit build from docs/C++.txt but
40 | additionally passing "-DLLVM_BUILD_32_BITS=ON" to CMake); communicate its
41 | location to SymCC via the environment variable SYMCC_LIBCXX_32BIT_PATH.
42 |
43 | If your 64-bit host is equipped to run 32-bit binaries, you can simply execute
44 | the instrumented programs produced by SymCC like any other program.
45 |
--------------------------------------------------------------------------------
/docs/C++.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Compiling C++
4 |
5 |
6 | SymCC has full support for C++ code and provides a wrapper "sym++" around
7 | clang++. Since C++ programs typically depend on the C++ standard library, we
8 | have two options when building them with SymCC:
9 |
10 | 1. Use the C++ standard library provided by the system. This is the easiest,
11 | requiring no additional effort, but it has an important drawback: data that
12 | passes through the standard library will be concretized, i.e., we lose track
13 | of the corresponding symbolic expressions.
14 | 2. The alternative is to build an instrumented C++ standard library. This means
15 | that we can track data through the library, but it requires building the
16 | library and compiling all code against it.
17 |
18 | We discuss both approaches in more detail below.
19 |
20 |
21 | Building against the system's C++ standard library
22 |
23 |
24 | In order to use the regular (uninstrumented) C++ standard library that the
25 | system provides, just call sym++ as a drop-in replacement for clang++:
26 |
27 | $ export SYMCC_REGULAR_LIBCXX=yes
28 | $ sym++ -o myprogram mysource.cpp
29 | $ ./myprogram
30 |
31 | The program will execute and produce alternative outputs as usual with SymCC,
32 | but it will not be able to trace operations that happen in C++ standard classes,
33 | such as std::vector.
34 |
35 |
36 | Instrumenting the C++ standard library
37 |
38 |
39 | Building an instrumented C++ standard library is a one-time effort; the library
40 | can then be used in all subsequent C++ compilations. We use "libc++", the LLVM
41 | project's implementation of the standard library. First, get the source code:
42 |
43 | $ git clone --depth 1 https://github.com/llvm/llvm-project.git
44 |
45 | Then build the library with SymCC:
46 |
47 | $ mkdir libcxx_symcc
48 | $ cd libcxx_symcc
49 | $ export SYMCC_REGULAR_LIBCXX=yes
50 | $ export SYMCC_NO_SYMBOLIC_INPUT=yes
51 | $ cmake -G Ninja /path-to-llvm-project/llvm \
52 | -DLLVM_ENABLE_PROJECTS="libcxx;libcxxabi" \
53 | -DLLVM_TARGETS_TO_BUILD="X86" \
54 | -DLLVM_DISTRIBUTION_COMPONENTS="cxx;cxxabi;cxx-headers" \
55 | -DCMAKE_BUILD_TYPE=Release \
56 | -DCMAKE_INSTALL_PREFIX=/some/convenient/location \
57 | -DCMAKE_C_COMPILER=/path-to-symcc-with-simple-backend/symcc \
58 | -DCMAKE_CXX_COMPILER=/path-to-symcc-with-simple-backend/sym++
59 | $ ninja distribution
60 | $ ninja install-distribution
61 | $ unset SYMCC_REGULAR_LIBCXX SYMCC_NO_SYMBOLIC_INPUT
62 |
63 | Note the two environment variables: SYMCC_REGULAR_LIBCXX avoids a
64 | chicken-and-egg problem - without it, SymCC would expect to compile against the
65 | instrumented C++ standard library. SYMCC_NO_SYMBOLIC_INPUT disables symbolic
66 | handling of input data - the build process of libc++ involves the creation of
67 | helper programs that are subsequently run, and we do not want them to perform
68 | symbolic analysis.
69 |
70 | A word on the choice of backends: While the instrumented libc++ will work with
71 | both backends, building it currently doesn't work with the QSYM backend. Just
72 | use the simple backend for the build process - there is no problem in using the
73 | library with the QSYM backend later. For very interested readers, here is an
74 | explanation of the problem: libc++ is an LLVM project and as such uses LLVM
75 | support code. During the build process, it builds a code-generation tool that is
76 | subsequently invoked (hence the recommendation to set SYMCC_NO_SYMBOLIC_INPUT).
77 | At run-time, the tool loads code built from the LLVM sources we obtained via git
78 | above. Why is this a problem for the QSYM backend? QSYM uses support code from
79 | LLVM as well, which means that the QSYM backend is linked against your system's
80 | LLVM libraries. If we build libc++ with the QSYM backend, the code-generation
81 | tool loads the QSYM code at run time and, via dependency resolution, also the
82 | system's LLVM installation. The end result is that we have two versions of LLVM
83 | support code in the same process - the system version and the one built from git
84 | - which will most likely collide. Using the simple backend avoids the problem
85 | because it doesn't depend on the system installation of LLVM.
86 |
87 | Once the library is ready, tell SymCC where to find it and compile C++ code as
88 | usual:
89 |
90 | $ export SYMCC_LIBCXX_PATH=/path-provided-as-cmake-install-prefix-for-libcxx
91 | $ sym++ -o myprogram mysource.cpp
92 | $ ./myprogram
93 |
94 | Now the program will use the instrumented C++ standard library, which enables it
95 | to trace computations inside the library. Note that you need to compile all code
96 | against the instrumented standard library - attempts to mix it with code
97 | compiled against the system's C++ standard library will lead to linker errors.
98 | And if you're so brave as to mix it with code compiled against an uninstrumented
99 | libc++, a run-time crash is the best you can hope for...
100 |
--------------------------------------------------------------------------------
/docs/Concreteness.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Concreteness Checks
4 |
5 |
6 | If we do not distinguish between symbolic and concrete values in the program
7 | under test, then we end up passing every computation to the solver, including
8 | concrete ones. Since all parameters are known in the purely concrete case, the
9 | solver will just repeat the computation (most likely in a less efficient way)
10 | and conclude that there is no way to change its outcome. We can avoid such
11 | wasted effort by only passing symbolic computations to the solver.
12 |
13 | There are two stages at which data can be identified as concrete:
14 | 1. At compile time, if a value is known to be a constant we can conclude that it
15 | will always be concrete at run time.
16 | 2. At run time, a value that is not a constant may still turn out to be
17 | concrete. For example, data read from memory can be symbolic or concrete.
18 |
19 | If we detect in the compiler pass that a value is a compile-time constant (case
20 | 1 above), we do not emit code for symbolic handling at all. However, for any
21 | other type of data, we need to generate code that handles the case of it being
22 | symbolic at run time. Concretely (no pun intended), we mark concrete values at
23 | run time by setting their corresponding symbolic expression in shadow memory to
24 | null. This makes it very cheap to check concreteness during execution: just run
25 | a null check on the symbolic expression.
26 |
27 | The code that we inject into the program under test performs concreteness checks
28 | on the arguments of each instruction. For example, when the program adds two
29 | values, the generated code performs the addition and additionally represents it
30 | symbolically according to the concreteness of the two addends. There are
31 | multiple cases to distinguish:
32 | 1. If all arguments of a computation are concrete, we can skip symbolic
33 | processing altogether and just set the result expression to null, indicating
34 | that the result is a concrete value.
35 | 2. If at least one argument is symbolic, we need to generate an expression
36 | representing the result. Therefore, we generate expressions for all arguments
37 | (since the concrete arguments will have null expressions) and call into the
38 | run-time support library to produce an expression according to the performed
39 | computation. There are several opportunities for optimization, e.g., when
40 | the computation only has a single argument that is not a compile-time
41 | constant we do not need to check it for concreteness again.
42 |
43 | It is important to note that these checks cannot be performed by the compiler
44 | because the concreteness of non-constant data is not known at compile time.
45 | Instead, the compiler emits code that performs the required checks at run time
46 | and acts accordingly.
47 |
--------------------------------------------------------------------------------
/docs/Configuration.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Configuration options
4 |
5 |
6 | SymCC is configured at two different stages:
7 |
8 | 1. At compile time, you decide which features to enable, which mainly affects
9 | compilation time and the set of dependencies. This is done via arguments to
10 | CMake.
11 |
12 | 2. When you run programs that have been compiled with SymCC, the environment
13 | variables control various aspects of the execution and analysis.
14 |
15 | We list all available options for each stage in turn.
16 |
17 |
18 | Compile-time options
19 |
20 |
21 | Each of these is passed to CMake with "-D" when configuring the build:
22 |
23 | - QSYM_BACKEND=ON/OFF (default OFF): Compile either the QSYM backend or our
24 | simple Z3 wrapper (see docs/Backends.txt for details). Note that binaries
25 | produced by the SymCC compiler are backend-agnostic; you can use
26 | LD_LIBRARY_PATH to switch between backends per execution.
27 |
28 | - TARGET_32BIT=ON/OFF (default OFF): Enable support for 32-bit compilation on
29 | 64-bit hosts. This will essentially make the compiler switch "-m32" work as
30 | expected; see docs/32-bit.txt for details.
31 |
32 | - LLVM_DIR/LLVM_32BIT_DIR (default empty): Hints for the build system to find
33 | LLVM if it's in a non-standard location.
34 |
35 | - Z3_DIR/Z3_32BIT_DIR (default empty): Hints for the build system to find Z3 if
36 | it's in a non-standard location.
37 |
38 | - Z3_TRUST_SYSTEM_VERSION (default OFF): Trust that the system provides a
39 | suitable version of Z3 if the corresponding CMake module can't be found. Use
40 | this with Linux distributions that don't package the CMake module but still
41 | ship an otherwise usable development setup (e.g., Fedora before F33). Note
42 | that we can't check the Z3 version for compatibility in this case, so prepare
43 | for compiler errors if the system-wide installation of Z3 is too old.
44 |
45 |
46 | Run-time options
47 |
48 |
49 | "Run time" refers to the time when you run programs compiled with SymCC, not
50 | when you run SymCC itself. In other words, these are settings that you can
51 | change on every execution of an instrumented program. They are specified via
52 | environment variables.
53 |
54 | - SYMCC_NO_SYMBOLIC_INPUT=0/1 (default 0): When set to 1, input is never marked
55 | as symbolic; in other words, instrumented programs will run just like their
56 | uninstrumented counterparts.
57 |
58 | - SYMCC_OUTPUT_DIR (default "/tmp/output"): This is the directory where SymCC
59 | will store new inputs (QSYM backend only). If you prefer to handle them
60 | programmatically, make your program call symcc_set_test_case_handler; the
61 | handler will be called instead of the default handler each time the backend
62 | generates a new input.
63 |
64 | - SYMCC_INPUT_FILE (default empty): When empty, SymCC treats data read from
65 | standard input as symbolic; when set to a file name, any data read from that
66 | file is considered symbolic. Ignored if SYMCC_NO_SYMBOLIC_INPUT is set to 1.
67 |
68 | - SYMCC_MEMORY_INPUT=0/1 (default 0): When set to 1, expect the program under
69 | test to communicate symbolic inputs with one or more calls to
70 | symcc_make_symbolic. Can't be combined with SYMCC_INPUT_FILE. Ignored if
71 | SYMCC_NO_SYMBOLIC_INPUT is set to 1.
72 |
73 | - SYMCC_LOG_FILE (default empty): When set to a file name, SymCC creates the
74 | file (or overwrites any existing file!) and uses it to log backend activity
75 | including solver output (simple backend only).
76 |
77 | - SYMCC_ENABLE_LINEARIZATION=0/1 (default 0): Enable QSYM's basic-block pruning,
78 | a call-stack-aware strategy to reduce solver queries when executing code
79 | repeatedly (QSYM backend only). See the QSYM paper for details; highly
80 | recommended for fuzzing and enabled automatically by the fuzzing helper.
81 |
82 | - SYMCC_AFL_COVERAGE_MAP (default empty): When set to the file name of an
83 | AFL-style coverage map, load the map before executing the target program and
84 | use it to skip solver queries for paths that have already been covered (QSYM
85 | backend only). The map is updated in place, so beware of races when running
86 | multiple instances of SymCC! The fuzzing helper uses this to remember the
87 | state of exploration across multiple executions of the target program.
88 | Warning: This setting has a misleading name - while the format of the map
89 | follows (classic) AFL, the variable isn't meant to point at a map file that
90 | AFL uses too!
91 |
92 | (Most people should stop reading here.)
93 |
94 |
95 | Advanced options
96 |
97 |
98 | There is actually a third category of options: when compiling with SymCC, you
99 | can specify the location of its various components via environment variables.
100 | This is not necessary in most cases because the build system makes sure that all
101 | components know about each other; however, in some advanced setups you may need
102 | to move files around after building them, and in that case, you can use the
103 | variables documented below to communicate the new locations:
104 |
105 | - SYMCC_RUNTIME_DIR and SYMCC_RUNTIME32_DIR: The directory that contains the
106 | run-time support library (i.e., libSymRuntime.so).
107 |
108 | - SYMCC_PASS_DIR: The directory containing the compiler pass (i.e.,
109 | libSymbolize.so).
110 |
111 | - SYMCC_CLANG and SYMCC_CLANGPP: The clang and clang++ binaries to use during
112 | compilation. Be very careful with this one: if the version of the compiler you
113 | specify here doesn't match the one you built SymCC against, you'll most likely
114 | get linker errors.
115 |
--------------------------------------------------------------------------------
/docs/Experiments.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Experiments
4 |
5 |
6 | Here we document how to reproduce the experiments that we show in the paper
7 | "Symbolic execution with SymCC: Don't interpret, compile!" The same instructions
8 | are available on our website [1], which also provides our raw results. Feel free
9 | to reach out to us if you encounter problems with reproducing the benchmarks.
10 |
11 | The datasets are also archived on figshare [10].
12 |
13 | In the paper, we describe two sets of experiments: we first benchmark SymCC on
14 | the CGC programs, then we run it on real-world software.
15 |
16 |
17 | CGC experiments
18 |
19 | We used the Linux port of the CGC programs by Trail of Bits [2]. SymCC needs to
20 | be built with support for 32-bit compilation (see docs/32-bit.txt; this is not
21 | part of the Dockerfile because it would double the build time of the container
22 | while providing value to just a few users). Then you can simply export
23 | CC=/path/to/symcc, CXX=/path/to/sym++ and SYMCC_NO_SYMBOLIC_INPUT=1, and build
24 | the CGC programs as usual (i.e., by invoking their build.sh script).
25 |
26 | Run the programs on the raw PoV inputs [3] with SYMCC_NO_SYMBOLIC_INPUT=1 to
27 | measure pure execution time, and unset the environment variable for symbolic
28 | execution. To assess coverage, we ran afl-showmap with the AFL-instrumented CGC
29 | programs on each generated input and accumulated the resulting coverage maps per
30 | program, resulting in a set of covered map entries for each CGC program. The
31 | sizes of those sets can then be fed to the scoring formula presented in the
32 | paper.
33 |
34 | For KLEE and QSYM, we used the setup described in our IR study [3] but with the
35 | regular 32-bit binaries built by cb-multios.
36 |
37 |
38 | Real-world software
39 |
40 | The analysis of real-world software always follows the same procedure. Assuming
41 | you have exported CC=symcc, CXX=sym++ and SYMCC_NO_SYMBOLIC_INPUT=1, first
42 | download the code, then build it using its own build system, and finally unset
43 | SYMCC_NO_SYMBOLIC_INPUT and analyze the program in concert with AFL (which
44 | requires building a second time for AFL, see docs/Fuzzing.txt). We used AFL
45 | 2.56b and built the targets with AFL_USE_ASAN=1. Note that the fuzzing helper is
46 | already installed in the Docker container.
47 |
48 | OpenJPEG [4]: we used revision 1f1e9682, built with CMake as described in the
49 | project's INSTALL.md (adding "-DBUILD_THIRDPARTY=ON" to make sure that
50 | third-party libraries are compiled with SymCC as well), and analyzed
51 | "bin/opj_decompress -i @@ -o /tmp/image.pgm"; the corpus consisted of test
52 | files file1.jp2 and file8.jp2 [5].
53 |
54 | libarchive [6]: we used revision 9ebb2484, built with CMake as described in the
55 | project's INSTALL (but adding "-DCMAKE_BUILD_TYPE=Release"), and analyzed
56 | "bin/bsdtar tf @@"; the corpus consisted of just a single dummy file
57 | containing the character "A".
58 |
59 | tcpdump: we built both tcpdump [7] and libpcap [8]; in order to make the former
60 | find the latter, just place the source directories next to each other in the
61 | same folder. We used revision d615abec of libpcap and revision d57927e1 of
62 | tcpdump. We built first libpcap and then tcpdump with "./configure && make",
63 | and analyzed "tcpdump/tcpdump -e -r @@"; the corpus consisted of just a single
64 | dummy file containing the character "A".
65 |
66 | All experiments used one AFL master process, one secondary AFL process, and one
67 | SymCC process. We let them run for 24 hours and repeated each of them 30 times
68 | to create the graphs in the paper; AFL map density was extracted from the
69 | secondary AFL process' "plot_data" file, column "map_size".
70 |
71 | The QSYM experiments used an analogous setup, replacing SymCC with QSYM and
72 | running it with AFL according to the QSYM authors' instructions [9].
73 |
74 | [1] http://www.s3.eurecom.fr/tools/symbolic_execution/symcc.html
75 | [2] https://github.com/trailofbits/cb-multios
76 | [3] http://www.s3.eurecom.fr/tools/symbolic_execution/ir_study.html
77 | [4] https://github.com/uclouvain/openjpeg.git
78 | [5] https://github.com/uclouvain/openjpeg-data/blob/master/input/conformance
79 | [6] https://github.com/libarchive/libarchive.git
80 | [7] https://github.com/the-tcpdump-group/tcpdump.git
81 | [8] https://github.com/the-tcpdump-group/libpcap.git
82 | [9] https://github.com/sslab-gatech/qsym#run-hybrid-fuzzing-with-afl
83 | [10] https://doi.org/10.6084/m9.figshare.24270709.v1 or https://figshare.com/articles/dataset/SymCC_evaluation_data/24270709
84 |
--------------------------------------------------------------------------------
/docs/Fuzzing.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Combining SymCC with a fuzzer
4 |
5 |
6 | Programs instrumented with SymCC generate new test inputs on every run. This is
7 | the core building block for program testing, but a full analysis requires
8 | additional components: new test cases need to be checked for whether they
9 | trigger vulnerabilities in the target program, we have to sort them by relevance
10 | and feed them back to symbolic execution. These tasks are essentially the same
11 | as in fuzzing, except that we use a smarter (yet more expensive) strategy to
12 | generate new inputs. Here we show how to reuse an existing fuzzer for the
13 | management tasks but additionally generate new inputs with SymCC.
14 |
15 |
16 | Setup
17 |
18 |
19 | We use AFL, a popular gray-box fuzzer, in its parallel mode. See AFL's
20 | documentation on parallel fuzzing for details on this mode - the basic idea is
21 | that SymCC and the fuzzer periodically exchange new inputs. SymCC comes with a
22 | helper that coordinates the collaboration with the fuzzer. It is written in
23 | Rust, so rustc and cargo (the Rust package manager) have to be installed. On
24 | Debian-based distributions, for example, a simple "apt install rustc cargo" is
25 | all you need. Build the tool by executing the following command in the root of
26 | SymCC's source repository:
27 |
28 | $ cargo install --path util/symcc_fuzzing_helper
29 |
30 | Afterwards, you should have a self-contained binary
31 | ~/.cargo/bin/symcc_fuzzing_helper. If you are interested in the tool's
32 | internals, you can render documentation as follows:
33 |
34 | $ cargo doc --manifest-path util/symcc_fuzzing_helper/Cargo.toml \
35 | --document-private-items --open
36 |
37 | This is all on the SymCC side. Now just make sure that AFL is installed - we've
38 | tested with version 2.56b.
39 |
40 |
41 | Testing an example program
42 |
43 |
44 | Suppose we wanted to search memory-related vulnerabilities in tcpdump's
45 | link-layer parsers. The program can be instructed to read from a pcap and print
46 | relevant headers like so:
47 |
48 | $ tcpdump -e -r
49 |
50 | Compile tcpdump and libpcap, the library it uses for pcap reading, once with
51 | SymCC and once with one of AFL's compiler wrappers (e.g., afl-clang). In order
52 | to detect memory corruptions, enable address sanitizer in the AFL-instrumented
53 | version by exporting AFL_USE_ASAN=1 before compiling:
54 |
55 | $ git clone https://github.com/the-tcpdump-group/libpcap.git
56 | $ git clone https://github.com/the-tcpdump-group/tcpdump.git
57 |
58 | $ mkdir symcc_build; cd symcc_build
59 | $ cp -r ../{libpcap,tcpdump} .
60 | $ cd libpcap
61 | $ CC=/path/to/symcc ./configure
62 | $ make
63 | $ cd ../tcpdump
64 | $ CC=/path/to/symcc ./configure
65 | $ make
66 | $ cd ..
67 |
68 | $ mkdir afl_build; cd afl_build
69 | $ export AFL_USE_ASAN=1
70 | $ cp -r ../{libpcap,tcpdump} .
71 | $ cd libpcap
72 | $ CC=/path/to/afl-clang ./configure
73 | $ make
74 | $ cd ../tcpdump
75 | $ CC=/path/to/afl-clang ./configure
76 | $ make
77 | $ cd ..
78 |
79 | Note that we need two copies of the source code because the projects build
80 | in-tree. Also, it is important to place the source code directories next to each
81 | other, so that tcpdump's build system can find and statically link the
82 | previously built libpcap.
83 |
84 | Create a corpus of dummy files somewhere (say, in a directory called "corpus");
85 | for tcpdump, we just start with an empty corpus containing only a dummy file for
86 | AFL:
87 |
88 | $ mkdir corpus
89 | $ echo A > corpus/dummy
90 |
91 | Then launch one AFL master and one AFL secondary instance, both writing their
92 | outputs to the arbitrarily named directory "afl_out":
93 |
94 | $ afl-fuzz -M afl-master -i corpus -o afl_out -m none -- afl_build/tcpdump/tcpdump -e -r @@
95 | $ afl-fuzz -S afl-secondary -i corpus -o afl_out -m none -- afl_build/tcpdump/tcpdump -e -r @@
96 |
97 | For simplicity, we disable memory limits (with "-m none"); be sure to read AFL's
98 | notes on address sanitizer to learn about the implications. Alternatively, you
99 | can compile the target program without address sanitizer, in which case you
100 | don't need to disable the memory limit.
101 |
102 | Finally, we can run SymCC using the helper:
103 |
104 | $ ~/.cargo/bin/symcc_fuzzing_helper -o afl_out -a afl-secondary -n symcc -- symcc_build/tcpdump/tcpdump -e -r @@
105 |
106 | It will run SymCC on the most promising inputs generated by the secondary AFL
107 | instance and feed any interesting results back to AFL. In AFL's status screen,
108 | you should see the counter "imported" in the "path geometry" section increase
109 | after a short time - this means that the fuzzer instances and SymCC are
110 | exchanging inputs. Crashes will be stored in afl_out/*/crashes as usual.
111 |
112 | It is possible to run SymCC with only an AFL master or only a secondary AFL
113 | instance; see the AFL docs for the implications. Moreover, the number of fuzzer
114 | and SymCC instances can be increased - just make sure that each has a unique
115 | name.
116 |
117 | Note that there are currently a few gotchas with the fuzzing helper:
118 |
119 | 1. It expects afl-showmap to be in the same directory as afl-fuzz (which is
120 | usually the case), and it finds that directory via your afl-fuzz command. If
121 | afl-fuzz is on your PATH (as we assumed in the example above), all is good
122 | and you can ignore this point. Otherwise, you need to either call afl-fuzz
123 | with an absolute path (e.g., /afl/afl-fuzz in the Docker image) or, if you
124 | use a relative path, start afl-fuzz from the same working directory as the
125 | fuzzing helper.
126 |
127 | 2. The helper needs to know how to call the AFL-instrumented version of the
128 | target, and it finds that information by scanning your afl-fuzz command. To
129 | this end, it _requires_ the double dash that we used in the example above to
130 | separate afl-fuzz options from the target command; if you omit it, you'll
131 | likely get errors from the helper when it tries to run afl-showmap.
132 |
--------------------------------------------------------------------------------
/docs/Ideas.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Ideas for future work
4 |
5 |
6 | Here we collect ideas around improvements and new features that could be
7 | interesting to implement.
8 |
9 |
10 | Position in the optimizer pipeline
11 |
12 | Intuitively, we should run towards the end of the pipeline, so that the target
13 | program has been simplified as much as possible. However, SymCC currently runs
14 | just before the vectorizer - a position later in the pipeline would require
15 | supporting LLVM vector instructions, so for now we choose implementation
16 | simplicity over potential performance gains. Still, it would be very interesting
17 | to check whether moving to the end of the pipeline accelerates the system
18 | significantly, and how much it would cost in terms of complexity.
19 |
20 |
21 | Optimize injected code
22 |
23 | We should schedule a few optimization passes after inserting our
24 | instrumentation, so that the instrumentation code gets optimized as well. This
25 | becomes more important the further we move our pass to the end of the pipeline.
26 | We could take inspiration from popular sanitizers like ASan and MSan regarding
27 | the concrete passes to run, and their order. Also, we should enable link-time
28 | optimization to inline some simple run-time support functions.
29 |
30 |
31 | Free symbolic expressions in memory
32 |
33 | SymCC currently doesn't free symbolic expressions. This is fine most of the time
34 | because intermediate values are rarely computed without being used: typically,
35 | they end up being inputs to future computations, so we couldn't free the
36 | corresponding expressions anyway. A notable exception is the computation of
37 | values only for output - the expressions for such values could be freed after
38 | the value is output, which would reduce memory consumption, especially with
39 | output-heavy target programs.
40 |
41 |
42 | Better fuzzer integration
43 |
44 | Our current coordination with the fuzzer is very crude: we use AFL's distributed
45 | mode to make it periodically pull new inputs from SymCC, and we try to
46 | prioritize the most interesting inputs from AFL's queue for execution in SymCC.
47 | However, a better integration would consider the trade-offs of symbolic
48 | execution: it's expensive but uses more sophisticated reasoning. As long as the
49 | fuzzer makes good progress (for some progress metric), CPU power should be
50 | allocated only to the fuzzer; the price of symbolic execution should be paid
51 | only when necessary. Moreover, a faster synchronization mechanism than AFL's
52 | file-system based approach would be nice.
53 |
54 |
55 | Work with other fuzzers
56 |
57 | Integrating with AFL is easy because its distributed mode only requires working
58 | with files and directories. Other fuzzers might not provide such easy
59 | mechanisms, but by integrating with them we would gain whatever performance
60 | improvements they have made over AFL (e.g., AFL++ or Honggfuzz).
61 |
62 |
63 | Forking version
64 |
65 | Instead of working with a fuzzer, we could also implement forking and some
66 | scheduling strategies ourselves. Georgia Tech has developed some OS-level
67 | primitives that could help to implement such a feature:
68 | https://github.com/sslab-gatech/perf-fuzz.
69 |
--------------------------------------------------------------------------------
/docs/Libc.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Libc Call Interception
4 |
5 |
6 | While we can, in principle, compile all code with instrumentation, it is unclear
7 | how difficult this is for the C standard library. The LLVM sanitizers don't try,
8 | even if MSan otherwise requires all code to be instrumented, and we take this as
9 | a sign that there may be unforeseen challenges. For now, we take the same route
10 | as the sanitizers and intercept calls to libc functions, wrapping them with
11 | symbolic handling. For example, the wrapper for "memset" obtains the symbolic
12 | expression for the value to be written in memory and pushes it to the shadow
13 | region of the destination memory. In the future, we may experiment with
14 | compiling (parts of) the libc to avoid the effort of manually defining wrappers.
15 |
16 | Initially, we tried the interception mechanism that the LLVM sanitizers use,
17 | implemented in the compiler-rt library. The Linux version basically just defines
18 | a function with the name of the libc function. The dynamic loader resolves
19 | symbols to the first function with the right name that it finds; given an
20 | appropriate link order, the wrapper (or "interceptor" in compiler-rt parlance)
21 | will be called instead of the libc function. Calling the real function is just a
22 | matter of asking the loader for alternative resolutions (i.e., calling "dlsym"
23 | with flag "RTLD_NEXT"). The problem for us is that this approach *globally*
24 | replaces a given libc function, in the executable and in all libraries that it
25 | loads. However, our run-time support library is loaded into the same process and
26 | makes heavy use of libc, so we need the ability to use wrappers in one part of
27 | the program and concrete functions in another. This turned out to complicate the
28 | compiler-rt-based implementation so much that we eventually abandoned the
29 | approach.
30 |
31 | Function renaming provided a convenient alternative: we control all code that is
32 | supposed to call wrappers rather than the libc functions properly, so we just rename
33 | the targets of their calls. For example, a call to "memset" in the program under
34 | test is turned into a call to "memset_symbolized", which we can easily define as
35 | a regular function wrapping "memset". Calls from our run-time library, on the
36 | other hand, use the regular function names and thus end up in libc as usual.
37 |
--------------------------------------------------------------------------------
/docs/Optimization.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Optimization
4 |
5 |
6 | A popular technique for experimenting with compiler passes is to produce bitcode
7 | with "clang -emit-llvm" and run the pass on the resulting bitcode with opt. Note
8 | that this approach does not mix well with optimization: simply running "opt -O3"
9 | on the instrumented bitcode yields inferior results. Why? In principle, the
10 | instrumentation that adds symbolic execution capabilities does not interfere
11 | with the compiler's regular optimization. However, while "opt -O3" runs the same
12 | middle-end optimizations as clang does internally, "clang -O3" performs
13 | additional analysis before invoking the middle end. In particular, type-based
14 | alias analysis (TBAA) adds metadata to the bitcode that enables the SROA pass to
15 | promote a lot of stack-allocated variables into SSA values.
16 |
17 | In order to produce bitcode that can later be properly optimized with opt, pass
18 | the desired optimization flag at each stage of the workflow:
19 |
20 | $ clang -O3 -Xclang -disable-llvm-passes -emit-llvm -S test.c -o test.ll
21 | $ opt -load ./libSymbolize.so -symbolize < test.ll > test_instrumented.bc
22 | $ opt -O3 < test_instrumented.bc > test_instrumented_optimized.bc
23 | $ clang -O3 test_instrumented_optimized.bc -o test
24 | $ ./test
25 |
--------------------------------------------------------------------------------
/docs/Testing.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Testing
4 |
5 |
6 | The short version:
7 |
8 | $ ninja check
9 |
10 |
11 | The slightly longer version:
12 |
13 | We use the LLVM tools "lit" and "FileCheck", integrated with CMake. Since there
14 | is little documentation on setting up the combination of those tools (apart from
15 | the LLVM source code), here is an overview of the setup:
16 |
17 | 1. Tests are C or C++ source files in the "test" subdirectory of the project.
18 | They contain instructions for FileCheck in comments; see that tool's
19 | documentation.
20 |
21 | 2. We run the individual tests through lit, LLVM's configurable test runner. It
22 | finds each test file, performs some variable substitutions (see below), and
23 | runs the tests. The main source of configuration is "test/lit.cfg".
24 |
25 | 3. At configuration time, CMake creates an additional config file for lit,
26 | containing site-specific configuration such as the output directory of the
27 | build. The template is "test/lit.site.cfg.in".
28 |
29 | 4. CMake adds the "check" target, which invokes lit on the test suite. (It would
30 | be nice to call the target "test", but this is a reserved name in some
31 | versions of CMake, and the built-in test mechanism that it is reserved for
32 | doesn't track dependencies the way we need.)
33 |
34 | Test files can use the following patterns:
35 |
36 | %s The test file itself.
37 | %t A temporary file.
38 | %symcc Invocation of clang with our custom pass loaded.
39 | %filecheck Invocation of FileCheck with the right arguments for the backend.
40 |
41 | Since we support multiple symbolic backends, the tests must account for
42 | different output from different backends. To this end, we rely on FileCheck's
43 | prefix mechanism: test files use different prefixes to specify requirements on
44 | different backends. The following prefixes are supported:
45 |
46 | SIMPLE: Active when we test with our own backend.
47 | QSYM: Active when we test with the QSYM backend.
48 | ANY: Always active.
49 |
50 | The build system makes sure that "%filecheck" always expands to an invocation of
51 | FileCheck that activates the right prefixes for the current build configuration.
52 |
53 | Note that we run the tests only with the backend selected at configuration time,
54 | so a full test requires building the project in multiple configurations. Also,
55 | be aware that the backends write all log messages to standard error; therefore,
56 | checks should not depend on the relative ordering of backend logs and messages
57 | that the test program writes to standard output (use stderr instead).
58 |
59 |
60 | Regression tests
61 |
62 | In addition to the hand-written tests that exercise compiler functionality via C
63 | code, we have a directory "test/regression" where we can collect LLVM bitcode
64 | files that triggered bugs in real SymCC use. Generate the bitcode by running the
65 | crashing compiler command with additional arguments "-emit-llvm -S -o-", pipe
66 | the result through "opt -S -instnamer", and add a comment at the top to tell lit
67 | how to compile it. The instruction naming is necessary because different LLVM
68 | versions treat numbered (i.e., unnamed) instructions differently and may
69 | complain if the numbering sequence doesn't match expectations.
70 |
--------------------------------------------------------------------------------
/sample.cpp:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | int main(int argc, char *argv[]) {
4 | std::cout << "What's your name?" << std::endl;
5 | std::string name;
6 | std::cin >> name;
7 |
8 | if (name == "root")
9 | std::cout << "What is your command?" << std::endl;
10 | else
11 | std::cout << "Hello, " << name << "!" << std::endl;
12 |
13 | return 0;
14 | }
15 |
--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # This file is part of SymCC.
2 | #
3 | # SymCC is free software: you can redistribute it and/or modify it under the
4 | # terms of the GNU General Public License as published by the Free Software
5 | # Foundation, either version 3 of the License, or (at your option) any later
6 | # version.
7 | #
8 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU General Public License along with
13 | # SymCC. If not, see .
14 |
15 | if (SYMCC_RT_BACKEND STREQUAL "qsym")
16 | set(SYM_TEST_FILECHECK_ARGS "--check-prefix=QSYM --check-prefix=ANY")
17 | elseif (SYMCC_RT_BACKEND STREQUAL "simple")
18 | set(SYM_TEST_FILECHECK_ARGS "--check-prefix=SIMPLE --check-prefix=ANY")
19 | else()
20 | message(FATAL_ERROR "Unknown backend to test: ${SYMCC_RT_BACKEND}")
21 | endif()
22 |
23 | if (${LLVM_VERSION_MAJOR} VERSION_GREATER_EQUAL 14)
24 | # FileCheck used to be fine with unused prefixes when more than one prefix was
25 | # defined. This changed in LLVM version 14, requiring the new option
26 | # "--allow-unused-prefixes" (added in LLVM 13) to restore the old behavior.
27 | set(SYM_TEST_FILECHECK_ARGS "${SYM_TEST_FILECHECK_ARGS} --allow-unused-prefixes")
28 | endif()
29 |
30 | configure_file("lit.site.cfg.in" "lit.site.cfg")
31 |
32 | add_custom_target(check
33 | lit
34 | --verbose
35 | --path=${LLVM_TOOLS_BINARY_DIR}
36 | ${CMAKE_CURRENT_BINARY_DIR}
37 | COMMENT "Testing the system..."
38 | USES_TERMINAL)
39 |
40 | add_dependencies(check SymCCRuntime SymCC)
41 | if (TARGET SymCCRuntime32)
42 | add_dependencies(check SymCCRuntime32 SymCC)
43 | endif()
44 |
--------------------------------------------------------------------------------
/test/README:
--------------------------------------------------------------------------------
1 | ../docs/Testing.txt
--------------------------------------------------------------------------------
/test/arrays.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc -O2 %s -o %t
16 | // RUN: echo -ne "\x01\x02\x03\x04\x00" | %t 2>&1 | %filecheck %s
17 | #include
18 | #include
19 | #include
20 |
21 | int main(int argc, char* argv[]) {
22 | uint8_t input[4];
23 | if (read(STDIN_FILENO, input, sizeof(input)) != sizeof(input)) {
24 | fprintf(stderr, "Failed to read the input\n");
25 | return -1;
26 | }
27 |
28 | uint8_t offset;
29 | if (read(STDIN_FILENO, &offset, sizeof(offset)) != sizeof(offset)) {
30 | fprintf(stderr, "Failed to read the offset\n");
31 | return -1;
32 | }
33 |
34 | // This is just to make the base pointer symbolic.
35 | uint8_t *p = input + offset;
36 |
37 | fprintf(stderr, "%s\n", (p[0] == 1) ? "yes" : "no");
38 | // SIMPLE: Trying to solve
39 | // QSYM-COUNT-2: SMT
40 | // ANY: yes
41 |
42 | // If our GetElementPointer computations are incorrect, this will create
43 | // path constraints that conflict with those generated by the previous array
44 | // access.
45 | fprintf(stderr, "%s\n", (p[2] == 3) ? "yes" : "no");
46 | // SIMPLE: Trying to solve
47 | // QSYM-COUNT-2: SMT
48 | // ANY: yes
49 |
50 | // Use the pointer in a condition to see if contradicting constraints have
51 | // been created. The QSYM backend will log an error in this case (see
52 | // below), the simple backend just aborts.
53 | fprintf(stderr, "%s\n", (p == input) ? "yes" : "no");
54 | // SIMPLE: Trying to solve
55 | // QSYM-NOT: Incorrect constraints are inserted
56 | // QSYM-COUNT-2: SMT
57 | // ANY: yes
58 |
59 | return 0;
60 | }
61 |
--------------------------------------------------------------------------------
/test/arrays.test32:
--------------------------------------------------------------------------------
1 | RUN: %symcc -m32 -O2 %S/arrays.c -o %t_32
2 | RUN: echo -ne "\x01\x02\x03\x04\x00" | %t_32 2>&1 | %filecheck %S/arrays.c
3 |
--------------------------------------------------------------------------------
/test/bcopy_bcmp_bzero.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc -O2 %s -o %t
16 | // RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s
17 | //
18 | // Test symbolic handling of bcopy, bcmp, and bzero. We copy symbolic data with
19 | // bcmp, then compare it with bcmp, expecting the solver to be triggered
20 | // (indicating that the two functions are represented correctly); then we bzero
21 | // the region and perform another comparison, which should not result in a
22 | // solver query (indicating that bzero concretized as expected).
23 |
24 | #include
25 | #include
26 | #include
27 | #include
28 |
29 | void symcc_make_symbolic(const void *start, size_t byte_length);
30 | typedef void (*TestCaseHandler)(const void *, size_t);
31 | void symcc_set_test_case_handler(TestCaseHandler handler);
32 |
33 | int solved = 0;
34 |
35 | void handle_test_case(const void *data, size_t data_length) {
36 | assert(data_length == 4);
37 | assert(bcmp(data, "bar", 4) == 0);
38 | solved = 1;
39 | }
40 |
41 | int main(int argc, char *argv[]) {
42 | symcc_set_test_case_handler(handle_test_case);
43 |
44 | const char input[] = "foo";
45 | symcc_make_symbolic(input, 4);
46 |
47 | // Make a copy and compare it in order to trigger the solver.
48 | char copy[4];
49 | bcopy(input, copy, 4);
50 | int bcmp_result = bcmp(copy, "bar", 4);
51 | assert(bcmp_result != 0);
52 |
53 | // Zero out the symbolic data and compare again (which should not trigger the
54 | // solver this time).
55 | bzero(copy, 4);
56 | bcmp_result = bcmp(copy, "abc", 4);
57 | assert(bcmp_result != 0);
58 |
59 | // The simple backend doesn't support test-case handlers, so we only expect a
60 | // solution with the QSYM backend.
61 | printf("Solved: %d\n", solved);
62 | // SIMPLE: Solved: 0
63 | // QSYM: Solved: 1
64 | return 0;
65 | }
66 |
--------------------------------------------------------------------------------
/test/bool_cast.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc -O1 %s -o %t
16 | // RUN: echo b | %t 2>&1 | %filecheck %s
17 | //
18 | // Check that bool cast is handled correctly (Issue #108)
19 |
20 | #include
21 | #include
22 | #include
23 |
24 | int bar(unsigned char a) {
25 | if (a == 0xCA) return -1;
26 | else return 0;
27 | }
28 |
29 | int main() {
30 | unsigned char input = 0;
31 | read(0, &input, sizeof(input));
32 | int r = bar(input);
33 | // SIMPLE: Trying to solve
34 | // SIMPLE: Found diverging input
35 | // SIMPLE: stdin0 -> #xca
36 | // QSYM-COUNT-2: SMT
37 | // QSYM: New testcase
38 | if (r == -1) printf("Bingo!\n");
39 | else printf("Ok\n");
40 | // ANY: Ok
41 | return r;
42 | }
43 |
--------------------------------------------------------------------------------
/test/bswap.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc %s -o %t
16 | // RUN: echo -ne "\x04\x03\x02\x01" | %t 2>&1 | %filecheck %s
17 | // RUN: %symcc %s -S -emit-llvm -o - | FileCheck --check-prefix=BITCODE %s
18 | //
19 | // Here we test that the "bswap" intrinsic is handled correctly.
20 |
21 | #include
22 | #include
23 |
24 | #include
25 | #include
26 |
27 | int main(int argc, char* argv[]) {
28 | uint32_t x;
29 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) {
30 | fprintf(stderr, "Failed to read x\n");
31 | return -1;
32 | }
33 | x = ntohl(x);
34 |
35 | // BITCODE: llvm.bswap.i32
36 | uint32_t y = __builtin_bswap32(x);
37 |
38 | // ANY: 0x04030201 0x01020304
39 | fprintf(stderr, "0x%08x 0x%08x\n", x, y);
40 |
41 | // SIMPLE: Trying to solve
42 | // SIMPLE: Found diverging input
43 | // SIMPLE-DAG: stdin0 -> #xef
44 | // SIMPLE-DAG: stdin1 -> #xbe
45 | // SIMPLE-DAG: stdin2 -> #xfe
46 | // SIMPLE-DAG: stdin3 -> #xca
47 | // QSYM-COUNT-2: SMT
48 | // ANY: Not quite.
49 | if (y == 0xcafebeef)
50 | fprintf(stderr, "Correct test input.\n");
51 | else
52 | fprintf(stderr, "Not quite.\n");
53 |
54 | return 0;
55 | }
56 |
--------------------------------------------------------------------------------
/test/bswap.test32:
--------------------------------------------------------------------------------
1 | RUN: %symcc -m32 %S/bswap.c -o %t_32
2 | RUN: echo -ne "\x04\x03\x02\x01" | %t_32 2>&1 | %filecheck %S/bswap.c
3 | RUN: %symcc %S/bswap.c -m32 -S -emit-llvm -o - | FileCheck --check-prefix=BITCODE %S/bswap.c
4 |
--------------------------------------------------------------------------------
/test/concrete_structs.ll:
--------------------------------------------------------------------------------
1 | ; This file is part of SymCC.
2 | ;
3 | ; SymCC is free software: you can redistribute it and/or modify it under the
4 | ; terms of the GNU General Public License as published by the Free Software
5 | ; Foundation, either version 3 of the License, or (at your option) any later
6 | ; version.
7 | ;
8 | ; SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | ; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | ; A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | ;
12 | ; You should have received a copy of the GNU General Public License along with
13 | ; SymCC. If not, see .
14 |
15 | ; Verify that we create correct expressions from struct values. For each kind of
16 | ; value, we trigger expression creation by inserting a symbolic value into the
17 | ; struct. Compiling this code with SymCC and verifying that the resulting binary
18 | ; exits cleanly shows that SymCC's instrumentation doesn't break the execution
19 | ; of the program. Moreover, we store a struct value to memory, load one of its
20 | ; elements back into a register, and branch based on it in order to trigger the
21 | ; solver; by checking the generated test case we can verify that the expression
22 | ; was correct.
23 | ;
24 | ; This test reproduces a bug where creating expressions for some structs would
25 | ; lead to a program crash.
26 | ;
27 | ; Since the bitcode is written by hand, we first run llc on it because it
28 | ; performs a validity check, whereas Clang doesn't.
29 |
30 | ; RUN: llc %s -o /dev/null
31 | ; RUN: %symcc %s -o %t
32 | ; RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s
33 |
34 | target triple = "x86_64-pc-linux-gnu"
35 |
36 | ; The struct type which we'll create expressions for. Include a floating-point
37 | ; value and a Boolean because they're represented with non-bitvector solver
38 | ; variables (reproducing eurecom-s3/symcc#138).
39 | %struct_type = type { i8, i32, i8, float, i1 }
40 |
41 | ; Global variable to record whether we've found a solution. Since the simple
42 | ; backend doesn't support test-case handlers, we start with "true".
43 | @solved = global i1 1
44 |
45 | ; Our test-case handler verifies that the new test case is a 32-bit integer
46 | ; with the value 42.
47 | define void @test_case_handler(i8* %data, i64 %data_length) {
48 | %correct_length = icmp eq i64 %data_length, 4
49 | br i1 %correct_length, label %check_data, label %failed
50 |
51 | check_data:
52 | %value_pointer = bitcast i8* %data to i32*
53 | %value = load i32, i32* %value_pointer
54 | %correct_value = icmp eq i32 %value, 42
55 | br i1 %correct_value, label %all_good, label %failed
56 |
57 | all_good:
58 | store i1 1, i1* @solved
59 | ret void
60 |
61 | failed:
62 | store i1 0, i1* @solved
63 | ret void
64 | }
65 |
66 | define i32 @main(i32 %argc, i8** %argv) {
67 | ; Register our test-case handler.
68 | call void @symcc_set_test_case_handler(void (i8*, i64)* @test_case_handler)
69 | ; SIMPLE: Warning: test-case handlers
70 |
71 | ; Create a symbolic value that we can use to trigger the creation of struct
72 | ; expressions.
73 | %symbolic_value_mem = alloca i32
74 | store i32 1, i32* %symbolic_value_mem
75 | call void @symcc_make_symbolic(i32* %symbolic_value_mem, i64 4)
76 | %symbolic_value = load i32, i32* %symbolic_value_mem
77 | %symbolic_byte = trunc i32 %symbolic_value to i8
78 |
79 | ; Undef struct
80 | insertvalue %struct_type undef, i32 %symbolic_value, 1
81 |
82 | ; Struct with concrete value
83 | insertvalue %struct_type { i8 1, i32 undef, i8 2, float undef, i1 undef }, i32 %symbolic_value, 1
84 |
85 | ; Write a struct to memory and load one of its elements back into a register.
86 | ; It's important to also insert a symbolic value into the struct, so that we
87 | ; generate an expression in the first place.
88 | %struct_mem = alloca %struct_type
89 | %struct_value = insertvalue %struct_type { i8 0, i32 42, i8 undef, float undef, i1 undef }, i8 %symbolic_byte, 2
90 | store %struct_type %struct_value, %struct_type* %struct_mem
91 | %value_address = getelementptr %struct_type, %struct_type* %struct_mem, i32 0, i32 1
92 | %value_loaded = load i32, i32* %value_address
93 | %is_forty_two = icmp eq i32 %value_loaded, %symbolic_value
94 | br i1 %is_forty_two, label %never_executed, label %done
95 | ; QSYM: SMT
96 |
97 | never_executed:
98 | br label %done
99 |
100 | done:
101 | %solved = load i1, i1* @solved
102 | %result = select i1 %solved, i32 0, i32 1
103 | ret i32 %result
104 | }
105 |
106 | declare void @symcc_make_symbolic(i32*, i64)
107 | declare void @symcc_set_test_case_handler(void (i8*, i64)*)
108 |
--------------------------------------------------------------------------------
/test/file_input.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: /bin/echo -ne "\x00\x00\x00\x05aaaa" > %T/%basename_t.input
16 | // RUN: %symcc -O2 %s -o %t
17 | // RUN: env SYMCC_INPUT_FILE=%T/%basename_t.input %t %T/%basename_t.input 2>&1 | %filecheck %s
18 |
19 | #include
20 |
21 | #include
22 | #include
23 | #include
24 | #include
25 |
26 | int main(int argc, char* argv[]) {
27 | //
28 | // Read from the input file using Unix primitives.
29 | //
30 |
31 | // ANY-NOT: Warning
32 | int fd = open(argv[1], O_RDONLY);
33 | if (fd < 0) {
34 | perror("failed to open the input file");
35 | return -1;
36 | }
37 |
38 | int input;
39 | if (read(fd, &input, sizeof(input)) != 4) {
40 | perror("failed to read from the input file");
41 | return -1;
42 | }
43 | input = ntohl(input);
44 |
45 | int four_as;
46 | if (read(fd, &four_as, sizeof(four_as)) != 4) {
47 | perror("failed to read from the input file");
48 | return -1;
49 | }
50 |
51 | int eof = 42;
52 | if (read(fd, &eof, sizeof(eof)) != 0) {
53 | perror("this should be exactly the end of the file");
54 | return -1;
55 | }
56 |
57 | // Make sure that we haven't created a symbolic expression
58 | if (eof == 42)
59 | fprintf(stderr, "All is good.\n");
60 | else
61 | fprintf(stderr, "Why was the variable overwritten?\n");
62 | // SIMPLE-NOT: Trying to solve
63 | // QSYM-NOT: SMT
64 | // ANY: All is good.
65 |
66 | // SIMPLE: Trying to solve
67 | // SIMPLE: Found diverging input
68 | // QSYM-COUNT-2: SMT
69 | // QSYM: New testcase
70 | // ANY: Not sure
71 | if (input >= 42)
72 | fprintf(stderr, "This may be the answer.\n");
73 | else
74 | fprintf(stderr, "Not sure this is correct...\n");
75 |
76 | //
77 | // Rewind and read again.
78 | //
79 |
80 | if (lseek(fd, 4, SEEK_SET) != 4) {
81 | perror("failed to rewind the file");
82 | return -1;
83 | }
84 |
85 | if (read(fd, &four_as, sizeof(four_as)) < 0) {
86 | perror("failed to read from the input file");
87 | return -1;
88 | }
89 |
90 | // SIMPLE: Trying to solve
91 | // SIMPLE: Found diverging input
92 | // QSYM-COUNT-2: SMT
93 | // QSYM: New testcase
94 | // ANY: No.
95 | if (four_as != (int)0x61616161)
96 | fprintf(stderr, "The matrix has changed.\n");
97 | else
98 | fprintf(stderr, "No.\n");
99 |
100 | //
101 | // Read with the C standard library.
102 | //
103 |
104 | // ANY: Warning
105 | FILE *file = fopen(argv[1], "r");
106 | if (file == NULL) {
107 | perror("failed to open the input file");
108 | return -1;
109 | }
110 |
111 | int same_input;
112 | if (fread(&same_input, sizeof(same_input), 1, file) < 0) {
113 | perror("failed to read from the input file");
114 | return -1;
115 | }
116 | same_input = ntohl(same_input);
117 |
118 | // SIMPLE: Trying to solve
119 | // QSYM-COUNT-2: SMT
120 | // ANY: Yep
121 | if (same_input == 5)
122 | fprintf(stderr, "Yep, it's the test input.\n");
123 | else
124 | fprintf(stderr, "Not the test input!\n");
125 |
126 | //
127 | // Rewind and read again.
128 | //
129 |
130 | // fseek doesn't return the current offset (unlike lseek) - it just returns 0
131 | // on success!
132 | if (fseek(file, 4, SEEK_SET) != 0) {
133 | perror("failed to rewind the file");
134 | return -1;
135 | }
136 |
137 | int same_four_as;
138 | if (fread(&same_four_as, sizeof(same_four_as), 1, file) < 0) {
139 | perror("failed to read from the input file");
140 | return -1;
141 | }
142 |
143 | // SIMPLE: Trying to solve
144 | // QSYM-COUNT-2: SMT
145 | // ANY: Still
146 | if (same_four_as == (int)0x61616161)
147 | fprintf(stderr, "Still the test input.\n");
148 | else
149 | fprintf(stderr, "Not the test input!\n");
150 |
151 | return 0;
152 | }
153 |
--------------------------------------------------------------------------------
/test/file_input.test32:
--------------------------------------------------------------------------------
1 | RUN: /bin/echo -ne "\x00\x00\x00\x05aaaa" > %T/%basename_t.input
2 | RUN: %symcc -m32 -O2 %S/file_input.c -o %t_32
3 | RUN: env SYMCC_INPUT_FILE=%T/%basename_t.input %t_32 %T/%basename_t.input 2>&1 | %filecheck %S/file_input.c
4 |
--------------------------------------------------------------------------------
/test/floats.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc -O2 %s -o %t
16 | // RUN: echo -ne "\x00\x00\x00\x05" | %t 2>&1 | %filecheck %s
17 |
18 | #include
19 | #include
20 |
21 | #include
22 | #include
23 |
24 | float g_value = 0.1234;
25 |
26 | int main(int argc, char *argv[]) {
27 | int x;
28 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) {
29 | fprintf(stderr, "Failed to read x\n");
30 | return -1;
31 | }
32 | x = ntohl(x);
33 |
34 | g_value += x;
35 | fprintf(stderr, "%f\n", g_value);
36 | // ANY: 5.1234
37 |
38 | fprintf(stderr, "%s\n", ((g_value < 7) && (g_value > 6)) ? "yes" : "no");
39 | // SIMPLE: Trying to solve
40 | // SIMPLE: Found diverging input
41 | // SIMPLE: #x06
42 | // Qsym doesn't support symbolic floats!
43 | // QSYM-NOT: SMT
44 | // ANY: no
45 |
46 | return 0;
47 | }
48 |
--------------------------------------------------------------------------------
/test/floats.test32:
--------------------------------------------------------------------------------
1 | RUN: %symcc -m32 -O2 %S/floats.c -o %t_32
2 | RUN: echo -ne "\x00\x00\x00\x05" | %t_32 2>&1 | %filecheck %S/floats.c
3 |
--------------------------------------------------------------------------------
/test/globals.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc -O2 %s -o %t
16 | // RUN: echo -ne "\x00\x00\x00\x05" | %t 2>&1 | %filecheck %s
17 | //
18 | // Test that global variables are handled correctly. The special challenge is
19 | // that we need to initialize the symbolic expression corresponding to any
20 | // global variable that has an initial value.
21 |
22 | #include
23 | #include
24 |
25 | #include
26 | #include
27 |
28 | int g_increment = 17;
29 | int g_uninitialized;
30 | int g_more_than_one_byte_int = 512;
31 |
32 | char g_values[] = {1, 2, 3};
33 | int g_non_char_values[] = {300, 400, 500};
34 |
35 | int increment(int x) {
36 | int result = x + g_increment;
37 | if (result < 30)
38 | return result;
39 | else
40 | return 42;
41 | }
42 |
43 | void sum(int x) {
44 | int result = 0;
45 | for (size_t i = 0; i < (sizeof(g_values) / sizeof(g_values[0])); i++) {
46 | result += g_values[i];
47 | }
48 |
49 | fprintf(stderr, "%s\n", (result < x) ? "foo" : "bar");
50 | }
51 |
52 | void sum_ints(int x) {
53 | int result = 0;
54 | for (size_t i = 0; i < (sizeof(g_non_char_values) / sizeof(g_non_char_values[0])); i++) {
55 | result += g_non_char_values[i];
56 | }
57 |
58 | fprintf(stderr, "%s\n", (result < x) ? "foo" : "bar");
59 | }
60 |
61 | int main(int argc, char* argv[]) {
62 | int x;
63 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) {
64 | fprintf(stderr, "Failed to read x\n");
65 | return -1;
66 | }
67 | x = ntohl(x);
68 |
69 | fprintf(stderr, "%d\n", increment(x));
70 | // SIMPLE: Trying to solve
71 | // SIMPLE: (bvadd #x{{0*}}11
72 | // SIMPLE: Found diverging input
73 | // QSYM-COUNT-2: SMT
74 | // QSYM: New testcase
75 | // ANY: 22
76 |
77 | g_increment = 18;
78 | fprintf(stderr, "%d\n", increment(x));
79 | // SIMPLE: Trying to solve
80 | // SIMPLE: Found diverging input
81 | // We can't check for 0x12 here because with some versions of clang we end
82 | // up in a situation where (x + 18) >= 30 is folded into x >= 12.
83 | // QSYM-COUNT-2: SMT
84 | // QSYM: New testcase
85 | // ANY: 23
86 |
87 | g_uninitialized = 101;
88 | fprintf(stderr, "%s\n", (x < g_uninitialized) ? "smaller" : "greater or equal");
89 | // SIMPLE: Trying to solve
90 | // SIMPLE: (bvsle #x{{0*}}65
91 | // QSYM-COUNT-2: SMT
92 | // QSYM: New testcase
93 | // ANY: smaller
94 |
95 | sum(x);
96 | // SIMPLE: Trying to solve
97 | // SIMPLE-NOT: Can't find
98 | // SIMPLE: Found diverging input
99 | // QSYM-COUNT-2: SMT
100 | // QSYM: New testcase
101 | // ANY: bar
102 |
103 | fprintf(stderr, "%s\n", (x < g_more_than_one_byte_int) ? "true" : "false");
104 | // SIMPLE: Trying to solve
105 | // SIMPLE: #x{{0*}}200
106 | // SIMPLE: Can't find
107 | // QSYM-COUNT-2: SMT
108 | // ANY: true
109 |
110 | sum_ints(x);
111 | // SIMPLE: Trying to solve
112 | // SIMPLE: #x{{0*}}4b0
113 | // SIMPLE: Can't find
114 | // QSYM-COUNT-2: SMT
115 | // ANY: bar
116 |
117 | return 0;
118 | }
119 |
--------------------------------------------------------------------------------
/test/globals.test32:
--------------------------------------------------------------------------------
1 | RUN: %symcc -m32 -O2 %S/globals.c -o %t_32
2 | RUN: echo -ne "\x00\x00\x00\x05" | %t_32 2>&1 | %filecheck %S/globals.c
3 |
--------------------------------------------------------------------------------
/test/if.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc -O2 %s -o %t
16 | // RUN: echo -ne "\x05\x00\x00\x00" | %t 2>&1 | %filecheck %s
17 | // This test is disabled until we can move the pass behind the optimizer in the pipeline:
18 | // RUN-disabled: %symcc -O2 -emit-llvm -S %s -o - | FileCheck --check-prefix=BITCODE %s
19 | //
20 | // Here we test two things:
21 | // 1. We can compile the file, and executing it symbolically results in solving
22 | // path constraints.
23 | // 2. The bitcode is optimized, i.e., the instrumentation we insert does not
24 | // break compiler optimizations.
25 | #include
26 | #include
27 | #include
28 |
29 | int foo(int a, int b) {
30 | // BITCODE-NOT: alloca
31 | // BITCODE-NOT: load
32 | // BITCODE-NOT: store
33 | // SIMPLE: Trying to solve
34 | // QSYM-COUNT-2: SMT
35 | // BITCODE: shl
36 | if (2 * a < b)
37 | return a;
38 | // SIMPLE: Trying to solve
39 | // QSYM-COUNT-2: SMT
40 | else if (a % b)
41 | return b;
42 | else
43 | return a + b;
44 | }
45 |
46 | int main(int argc, char* argv[]) {
47 | int x;
48 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) {
49 | fprintf(stderr, "Failed to read x\n");
50 | return -1;
51 | }
52 | fprintf(stderr, "%d\n", x);
53 | fprintf(stderr, "%d\n", foo(x, 7));
54 | // ANY: 7
55 | return 0;
56 | }
57 |
--------------------------------------------------------------------------------
/test/if.test32:
--------------------------------------------------------------------------------
1 | RUN: %symcc -m32 -O2 %S/if.c -o %t_32
2 | RUN: echo -ne "\x05\x00\x00\x00" | %t_32 2>&1 | %filecheck %S/if.c
3 |
--------------------------------------------------------------------------------
/test/integers.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc -O2 %s -o %t
16 | // RUN: echo -ne "\x05\x00\x00\x00\x00\x00\x00\x00" | %t 2>&1 | %filecheck %s
17 | #include
18 | #include
19 | #include
20 |
21 | uint64_t g_value = 0xaaaabbbbccccdddd;
22 |
23 | int main(int argc, char *argv[]) {
24 | uint64_t x;
25 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) {
26 | fprintf(stderr, "Failed to read x\n");
27 | return -1;
28 | }
29 |
30 | fprintf(stderr, "%s\n", (x == g_value) ? "yes" : "no");
31 | // SIMPLE: Trying to solve
32 | // SIMPLE: Found diverging input
33 | // Make sure that we don't truncate integers.
34 | // SIMPLE-DAG: #xaa
35 | // SIMPLE-DAG: #xbb
36 | // SIMPLE-DAG: #xcc
37 | // SIMPLE-DAG: #xdd
38 | // QSYM-COUNT-2: SMT
39 | // ANY: no
40 |
41 | return 0;
42 | }
43 |
--------------------------------------------------------------------------------
/test/integers.test32:
--------------------------------------------------------------------------------
1 | RUN: %symcc -m32 -O2 %S/integers.c -o %t_32
2 | RUN: echo -ne "\x05\x00\x00\x00\x00\x00\x00\x00" | %t_32 2>&1 | %filecheck %S/integers.c
3 |
--------------------------------------------------------------------------------
/test/large_alloc.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc %s -o %t
16 | // RUN: echo -ne "\x00\x00\x00\x2a" | %t 2>&1 | %filecheck %s
17 | //
18 | // Make sure that we can handle large allocations symbolically. Also, test
19 | // memory-related library functions.
20 |
21 | #include
22 | #include
23 | #include
24 |
25 | #include
26 | #include
27 |
28 | int main(int argc, char *argv[]) {
29 | int x;
30 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) {
31 | fprintf(stderr, "Failed to read x\n");
32 | return -1;
33 | }
34 | int netlongX = x;
35 | x = ntohl(x);
36 |
37 | char *largeAllocation = malloc(10000);
38 | memset(largeAllocation, (char)x, 10000);
39 |
40 | fprintf(stderr, "%s\n", (largeAllocation[9999] < 100) ? "worked" : "error");
41 | // SIMPLE: Trying to solve
42 | // SIMPLE: Found diverging input
43 | // QSYM-COUNT-2: SMT
44 | // QSYM: New testcase
45 | // ANY: worked
46 |
47 | memset(largeAllocation, 'A', 10000);
48 | fprintf(stderr, "%s\n", (largeAllocation[5000] == 17) ? "true" : "false");
49 | // SIMPLE-NOT: Trying to solve
50 | // QSYM-NOT: SMT
51 | // ANY: false
52 |
53 | memset(largeAllocation, x, 10000);
54 | fprintf(stderr, "%s\n", (largeAllocation[5000] > 100) ? "true" : "false");
55 | // SIMPLE: Trying to solve
56 | // SIMPLE: Can't find a diverging input at this point
57 | // QSYM-COUNT-2: SMT
58 | // (Qsym finds a new test case with the optimistic strategy.)
59 | // ANY: false
60 |
61 | memcpy(largeAllocation + x, &x, sizeof(x));
62 | // SIMPLE: Trying to solve
63 | // SIMPLE: Found diverging input
64 | // QSYM-COUNT-2: SMT
65 | // QSYM: New testcase
66 |
67 | // Make x little-endian.
68 | x = __builtin_bswap32(netlongX);
69 |
70 | memcpy(largeAllocation, &x, sizeof(x));
71 | // SIMPLE-NOT: Trying to solve
72 | // QSYM-NOT: SMT
73 |
74 | memmove(largeAllocation + 1, largeAllocation, sizeof(x));
75 | fprintf(stderr, "%s\n", (largeAllocation[0] == largeAllocation[2]) ? "true" : "false");
76 | // SIMPLE: Trying to solve
77 | // QSYM-COUNT-2: SMT
78 | // TODO should find new inputs
79 | // ANY: false
80 |
81 | return 0;
82 | }
83 |
--------------------------------------------------------------------------------
/test/large_alloc.test32:
--------------------------------------------------------------------------------
1 | RUN: %symcc -m32 %S/large_alloc.c -o %t_32
2 | RUN: echo -ne "\x00\x00\x00\x2a" | %t_32 2>&1 | %filecheck %S/large_alloc.c
3 |
--------------------------------------------------------------------------------
/test/lit.cfg:
--------------------------------------------------------------------------------
1 | # -*- Python -*-
2 |
3 | # This file is part of SymCC.
4 | #
5 | # SymCC is free software: you can redistribute it and/or modify it under the
6 | # terms of the GNU General Public License as published by the Free Software
7 | # Foundation, either version 3 of the License, or (at your option) any later
8 | # version.
9 | #
10 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License along with
15 | # SymCC. If not, see .
16 |
17 | import lit.formats.shtest
18 |
19 | config.name = "compiler"
20 | config.test_format = lit.formats.shtest.ShTest()
21 | config.suffixes = [".c", ".cpp", ".ll"]
22 | config.substitutions += [
23 | ("%symcc", config.test_exec_root + "/../symcc"),
24 | ]
25 |
--------------------------------------------------------------------------------
/test/lit.site.cfg.in:
--------------------------------------------------------------------------------
1 | # -*- Python -*-
2 |
3 | # This file is part of SymCC.
4 | #
5 | # SymCC is free software: you can redistribute it and/or modify it under the
6 | # terms of the GNU General Public License as published by the Free Software
7 | # Foundation, either version 3 of the License, or (at your option) any later
8 | # version.
9 | #
10 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License along with
15 | # SymCC. If not, see .
16 |
17 | import os
18 | from os import path
19 |
20 | # Used by lit to locate tests and output locations
21 | config.test_source_root = "@CMAKE_CURRENT_SOURCE_DIR@"
22 | config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@"
23 |
24 | outputDir = "@CMAKE_CURRENT_BINARY_DIR@/SymccOutput"
25 | try:
26 | os.mkdir(outputDir)
27 | except OSError:
28 | # Directory exists - fine
29 | pass
30 |
31 | config.environment["SYMCC_OUTPUT_DIR"] = outputDir
32 |
33 | # Delegate to the generic configuration file
34 | lit_config.load_config(config, path.join(config.test_source_root, "lit.cfg"))
35 |
36 | # Depending on the backend, the tests have to look for different output
37 | config.substitutions += [
38 | ("%filecheck", "FileCheck @SYM_TEST_FILECHECK_ARGS@"),
39 | ]
40 |
41 | if "@TARGET_32BIT@" == "ON":
42 | config.suffixes.add(".test32")
43 |
--------------------------------------------------------------------------------
/test/load_store.ll:
--------------------------------------------------------------------------------
1 | ; This file is part of SymCC.
2 | ;
3 | ; SymCC is free software: you can redistribute it and/or modify it under the
4 | ; terms of the GNU General Public License as published by the Free Software
5 | ; Foundation, either version 3 of the License, or (at your option) any later
6 | ; version.
7 | ;
8 | ; SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | ; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | ; A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | ;
12 | ; You should have received a copy of the GNU General Public License along with
13 | ; SymCC. If not, see .
14 |
15 | ; Verify that loading and storing concrete values of various types works. For
16 | ; each type, we allocate space on the stack, then store a constant value into
17 | ; it, and finally load it back. Compiling this code with SymCC and verifying
18 | ; that the resulting binary exits cleanly shows that SymCC's instrumentation
19 | ; doesn't break the load/store operations.
20 | ;
21 | ; This test reproduces a bug where loading a concrete Boolean would lead to a
22 | ; program crash.
23 | ;
24 | ; Since the bitcode is written by hand, we first run llc on it because it
25 | ; performs a validity check, whereas Clang doesn't.
26 | ;
27 | ; RUN: llc %s -o /dev/null
28 | ; RUN: %symcc %s -o %t
29 | ; RUN: %t 2>&1
30 |
31 | target triple = "x86_64-pc-linux-gnu"
32 |
33 | define i32 @main(i32 %argc, i8** %argv) {
34 | ; Load and store a Boolean.
35 | %stack_bool = alloca i1
36 | store i1 0, i1* %stack_bool
37 | %copy_of_stack_bool = load i1, i1* %stack_bool
38 |
39 | ; Load and store a float.
40 | %stack_float = alloca float
41 | store float 0.0, float* %stack_float
42 | %copy_of_stack_float = load float, float* %stack_float
43 |
44 | ret i32 0
45 | }
46 |
--------------------------------------------------------------------------------
/test/loop.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc -O2 %s -o %t
16 | // RUN: echo -ne "\x00\x00\x00\x05" | %t 2>&1 | %filecheck %s
17 | //
18 | // Make sure that our instrumentation works with back-jumps. Also, test support
19 | // for 128-bit integers (if available).
20 |
21 | #include
22 | #include
23 |
24 | #include
25 | #include
26 |
27 | #ifdef __SIZEOF_INT128__
28 | #define MYINT __int128
29 | #else
30 | #define MYINT int64_t
31 | #endif
32 |
33 | int fac(int x) {
34 | MYINT result = 1;
35 |
36 | // SIMPLE-COUNT-5: Found diverging input
37 | // SIMPLE-NOT: Found diverging input
38 | // QSYM-COUNT-5: New testcase
39 | for (MYINT i = 2; i <= x; i++)
40 | result *= i;
41 |
42 | return result;
43 | }
44 |
45 | int main(int argc, char* argv[]) {
46 | int x;
47 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) {
48 | fprintf(stderr, "Failed to read x\n");
49 | return -1;
50 | }
51 | x = ntohl(x);
52 | fprintf(stderr, "%d\n", fac(x));
53 | // ANY: 120
54 | return 0;
55 | }
56 |
--------------------------------------------------------------------------------
/test/loop.test32:
--------------------------------------------------------------------------------
1 | RUN: %symcc -m32 -O2 %S/loop.c -o %t_32
2 | RUN: echo -ne "\x00\x00\x00\x05" | %t_32 2>&1 | %filecheck %S/loop.c
3 |
--------------------------------------------------------------------------------
/test/memcpy.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc -O2 %s -o %t
16 | // RUN: echo -ne "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03" | %t 2>&1 | %filecheck %s
17 | //
18 | // Test that we generate alternative inputs for the parameters to memcpy (which
19 | // should assert that the concept works for other functions as well). Also, make
20 | // sure that we handle the different parameter sizes for mmap correctly.
21 |
22 | #include
23 | #include
24 | #include
25 |
26 | #include
27 | #include
28 | #include
29 |
30 | int main(int argc, char *argv[]) {
31 | char values[] = {1, 2, 3};
32 | char values_copy[3];
33 |
34 | int dest_offset;
35 | if (read(STDIN_FILENO, &dest_offset, sizeof(dest_offset)) !=
36 | sizeof(dest_offset)) {
37 | fprintf(stderr, "Failed to read dest_offset\n");
38 | return -1;
39 | }
40 | dest_offset = ntohl(dest_offset);
41 | int src_offset;
42 | if (read(STDIN_FILENO, &src_offset, sizeof(src_offset)) !=
43 | sizeof(src_offset)) {
44 | fprintf(stderr, "Failed to read src_offset\n");
45 | return -1;
46 | }
47 | src_offset = ntohl(src_offset);
48 | int length;
49 | if (read(STDIN_FILENO, &length, sizeof(length)) != sizeof(length)) {
50 | fprintf(stderr, "Failed to read length\n");
51 | return -1;
52 | }
53 | length = ntohl(length);
54 |
55 | memcpy(values_copy + dest_offset, values + src_offset, length);
56 | fprintf(stderr, "%d\n", values_copy[0]);
57 | // SIMPLE: Trying to solve
58 | // SIMPLE: Found diverging input
59 | // SIMPLE: stdin{{[0-3]}}
60 | // QSYM-COUNT-2: SMT
61 | // QSYM: New testcase
62 | // SIMPLE: Trying to solve
63 | // SIMPLE: Found diverging input
64 | // SIMPLE-DAG: stdin{{[0-3]}} -> #x00
65 | // SIMPLE-DAG: stdin{{[4-7]}} -> #x{{.?[^0].?}}
66 | // QSYM-COUNT-2: SMT
67 | // QSYM: New testcase
68 | // SIMPLE: Trying to solve
69 | // SIMPLE: Found diverging input
70 | // SIMPLE-DAG: stdin{{[0-7]}} -> #x00
71 | // QSYM-COUNT-2: SMT
72 | // QSYM: New testcase
73 | // ANY: 1
74 |
75 | void *pointer = mmap(NULL, 8, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
76 | puts(pointer == MAP_FAILED ? "failed" : "succeeded");
77 | // ANY: succeeded
78 |
79 | return 0;
80 | }
81 |
--------------------------------------------------------------------------------
/test/memcpy.test32:
--------------------------------------------------------------------------------
1 | RUN: %symcc -m32 -O2 %S/memcpy.c -o %t_32
2 | RUN: echo -ne "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03" | %t_32 2>&1 | %filecheck %S/memcpy.c
3 |
--------------------------------------------------------------------------------
/test/memory_input.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc -O2 %s -o %t
16 | // RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s
17 | #include
18 | #include
19 | #include
20 |
21 | void symcc_make_symbolic(const void *start, size_t byte_length);
22 |
23 | uint64_t g_value = 0xaaaabbbbccccdddd;
24 |
25 | int main(int argc, char *argv[]) {
26 | uint64_t x = 10;
27 | uint8_t y = 0;
28 |
29 | symcc_make_symbolic(&x, sizeof(x));
30 | symcc_make_symbolic(&y, sizeof(y));
31 |
32 | fprintf(stderr, "%s\n", (x == g_value) ? "yes" : "no");
33 | // SIMPLE: Trying to solve
34 | // SIMPLE: Found diverging input
35 | // SIMPLE-DAG: #xaa
36 | // SIMPLE-DAG: #xbb
37 | // SIMPLE-DAG: #xcc
38 | // SIMPLE-DAG: #xdd
39 | // QSYM-COUNT-2: SMT
40 | // ANY: no
41 |
42 | fprintf(stderr, "%s\n", (y == 10) ? "yes" : "no");
43 | // SIMPLE: Trying to solve
44 | // SIMPLE: Found diverging input
45 | // y should be part of the input, just after x
46 | // SIMPLE: stdin8 -> #x0a
47 | // QSYM-COUNT-2: SMT
48 | // ANY: no
49 |
50 | return 0;
51 | }
52 |
--------------------------------------------------------------------------------
/test/pointers.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc -O2 %s -o %t
16 | // RUN: echo -ne "\x00\x00\x00\x05\x12\x34\x56\x78\x90\xab\xcd\xef" | %t 2>&1 | %filecheck %s
17 |
18 | #include
19 | #include
20 |
21 | #include
22 | #include
23 |
24 | volatile int g_value;
25 |
26 | int main(int argc, char* argv[]) {
27 | int x;
28 | void *ptr;
29 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) {
30 | fprintf(stderr, "Failed to read x\n");
31 | return -1;
32 | }
33 | x = ntohl(x);
34 | if (read(STDIN_FILENO, &ptr, sizeof(ptr)) != sizeof(ptr)) {
35 | fprintf(stderr, "Failed to read ptr\n");
36 | return -1;
37 | }
38 | g_value = htonl(0x1200ab00);
39 | uint8_t *charPtr = (uint8_t*)&g_value;
40 |
41 | charPtr += 2;
42 | fprintf(stderr, "%x\n", *charPtr);
43 | // ANY: ab
44 |
45 | fprintf(stderr, "%s\n", (*charPtr == x) ? "equal" : "different");
46 | // SIMPLE: Trying to solve
47 | // SIMPLE: Found diverging input
48 | // SIMPLE: #xab
49 | // QSYM-COUNT-2: SMT
50 | // QSYM: New testcase
51 | // ANY: different
52 |
53 | volatile int local = 0x12345678;
54 | charPtr = (uint8_t*)&local;
55 | charPtr++;
56 | fprintf(stderr, "%s\n", (*charPtr == x) ? "equal" : "different");
57 | // SIMPLE: Trying to solve
58 | // SIMPLE: Found diverging input
59 | // SIMPLE: #x56
60 | // QSYM-COUNT-2: SMT
61 | // QSYM: New testcase
62 | // ANY: different
63 |
64 | fprintf(stderr, "%s\n", !ptr ? "null" : "not null");
65 | // SIMPLE: Trying to solve
66 | // SIMPLE: Found diverging input
67 | //
68 | // We expect a null pointer, but since pointer length varies between 32 and
69 | // 64-bit architectures we can't just expect N times #x00. Instead, we use a
70 | // regular expression that disallows nonzero values for anything but stdin0
71 | // to stdin3 (which are part of x, not ptr).
72 | //
73 | // SIMPLE-NOT: stdin{{[4-9]|1[0-9]}} -> #x{{.?[^0].?}}
74 | // QSYM-COUNT-2: SMT
75 | // QSYM: New testcase
76 | // ANY: not null
77 |
78 | return 0;
79 | }
80 |
--------------------------------------------------------------------------------
/test/pointers.test32:
--------------------------------------------------------------------------------
1 | RUN: %symcc -m32 -O2 %S/pointers.c -o %t_32
2 | RUN: echo -ne "\x00\x00\x00\x05\x12\x34\x56\x78" | %t_32 2>&1 | %filecheck %S/pointers.c
3 |
--------------------------------------------------------------------------------
/test/propagation_select.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc -O1 %s -o %t
16 | // RUN: echo xxx | %t 2>&1 | %filecheck %s
17 | //
18 | // Check that select instruction is propagating the symbolic value (issue #109)
19 |
20 | #include
21 | #include
22 | #include
23 |
24 | char bar(char a, char b, char c) { return (a == 0xA) ? b : c; }
25 |
26 | int main() {
27 | char input[3] = {0};
28 | read(0, &input, sizeof(input));
29 | // SIMPLE: Trying to solve
30 | // SIMPLE: Found diverging input
31 | // SIMPLE: stdin0 -> #x0a
32 | // QSYM-COUNT-2: SMT
33 | // QSYM: New testcase
34 | char r = bar(input[0], input[1], input[2]);
35 | // SIMPLE: Trying to solve
36 | // SIMPLE: Found diverging input
37 | // SIMPLE-DAG: stdin2 -> #x0b
38 | // SIMPLE-DAG: stdin0 -> #x00
39 | // QSYM-COUNT-2: SMT
40 | // QSYM: New testcase
41 | // ANY: KO
42 | if (r == 0xB)
43 | printf("OK!\n");
44 | else
45 | printf("KO\n");
46 | return 0;
47 | }
48 |
--------------------------------------------------------------------------------
/test/read.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc -O2 %s -o %t
16 | // RUN: echo b | %t 2>&1 | %filecheck %s
17 | //
18 | // Check the symbolic handling of "read"
19 |
20 | #include
21 | #include
22 |
23 | int main(int argc, char* argv[]) {
24 | char c;
25 |
26 | ssize_t nbytes = read(STDIN_FILENO, &c, 1);
27 | if (nbytes != 1)
28 | return 1;
29 |
30 | // SIMPLE: Trying to solve
31 | // SIMPLE: Found diverging input
32 | // SIMPLE: stdin0 -> #x61
33 | // QSYM-COUNT-2: SMT
34 | // QSYM: New testcase
35 | if (c == 'a')
36 | fprintf(stderr, "Correct\n");
37 | else
38 | fprintf(stderr, "Next time...\n");
39 | // ANY: Next time...
40 | return 0;
41 | }
42 |
--------------------------------------------------------------------------------
/test/read.test32:
--------------------------------------------------------------------------------
1 | RUN: %symcc -m32 -O2 %S/read.c -o %t_32
2 | RUN: echo b | %t_32 2>&1 | %filecheck %S/read.c
3 |
--------------------------------------------------------------------------------
/test/strings.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc -O2 %s -o %t
16 | // RUN: echo -n test | %t 2>&1 | %filecheck %s
17 | //
18 | // Test the symbolic versions of string functions.
19 |
20 | #include
21 | #include
22 | #include
23 |
24 | int main(int argc, char *argv[]) {
25 | char buffer[5];
26 |
27 | if (read(STDIN_FILENO, buffer, sizeof(buffer) - 1) !=
28 | sizeof(buffer) - 1) {
29 | fprintf(stderr, "Failed to read the input\n");
30 | return -1;
31 | }
32 |
33 | buffer[4] = '\0';
34 |
35 | // Fully concrete
36 | fputs(strchr("foobar", 'o') != NULL ? "found" : "nope", stderr);
37 | // SIMPLE-NOT: Trying to solve
38 | // QSYM-NOT: SMT
39 | // ANY: found
40 |
41 | // Symbolic buffer, concrete char
42 | fputs(strchr(buffer, 'x') != NULL ? "found" : "nope", stderr);
43 | // SIMPLE-COUNT-4: Found diverging input
44 | // QSYM: SMT
45 | // ANY: nope
46 |
47 | // Concrete buffer, symbolic char
48 | fputs(strchr("test", buffer[0]) != NULL ? "found" : "nope", stderr);
49 | // SIMPLE: Trying to solve
50 | //
51 | // QSYM's back-off mechanism kicks in because we're generating too many
52 | // queries; let's not check them anymore.
53 | //
54 | // ANY: found
55 |
56 | // Symbolic buffer, symbolic char
57 | fputs(strchr(buffer, buffer[1]) != NULL ? "found" : "nope", stderr);
58 | // SIMPLE-COUNT-2: Trying to solve
59 | // ANY: found
60 |
61 | return 0;
62 | }
63 |
--------------------------------------------------------------------------------
/test/strings.test32:
--------------------------------------------------------------------------------
1 | RUN: %symcc -m32 -O2 %S/strings.c -o %t_32
2 | RUN: echo -n test | %t_32 2>&1 | %filecheck %S/strings.c
3 |
--------------------------------------------------------------------------------
/test/structs.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc -O2 %s -o %t
16 | // RUN: echo -ne "\x00\x00\x00\x05" | %t 2>&1 | %filecheck %s
17 |
18 | #include
19 | #include
20 |
21 | #include
22 | #include
23 |
24 | struct point {
25 | int x;
26 | int y;
27 | };
28 |
29 | struct line {
30 | struct point start;
31 | struct point end;
32 | };
33 |
34 | static struct point g_point = {1, 2};
35 | static struct point g_point_array[] = {{1, 2}, {3, 4}, {5, 6}};
36 |
37 | int main(int argc, char* argv[]) {
38 | int x;
39 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) {
40 | fprintf(stderr, "Failed to read x\n");
41 | return -1;
42 | }
43 | x = ntohl(x);
44 |
45 | struct point p = {x, 17};
46 |
47 | fprintf(stderr, "%s\n", (p.x < 100) ? "yes" : "no");
48 | // SIMPLE: Trying to solve
49 | // SIMPLE: Found diverging input
50 | // QSYM-COUNT-2: SMT
51 | // QSYM: New testcase
52 | // ANY: yes
53 |
54 | fprintf(stderr, "%s\n", (p.y < 100) ? "yes" : "no");
55 | // SIMPLE-NOT: Trying to solve
56 | // QSYM-NOT: SMT
57 | // ANY: yes
58 |
59 | fprintf(stderr, "%s\n", (p.x < p.y) ? "yes" : "no");
60 | // SIMPLE: Trying to solve
61 | // SIMPLE: Found diverging input
62 | // QSYM-COUNT-2: SMT
63 | // QSYM: New testcase
64 | // ANY: yes
65 |
66 | fprintf(stderr, "%s\n", ((p.x < g_point.x) || (p.y < g_point.y)) ? "yes" : "no");
67 | // SIMPLE: Trying to solve
68 | // SIMPLE: Found diverging input
69 | // QSYM-COUNT-2: SMT
70 | // QSYM: New testcase
71 | // ANY: no
72 |
73 | fprintf(stderr, "%s\n", (g_point_array[1].x < x) ? "yes" : "no");
74 | // SIMPLE: Trying to solve
75 | // SIMPLE: Found diverging input
76 | // QSYM-COUNT-2: SMT
77 | // QSYM: New testcase
78 | // ANY: yes
79 |
80 | // Nested structs
81 |
82 | struct line l = {{0, 0}, {5, 5}};
83 |
84 | fprintf(stderr, "%s\n", (l.end.x > x) ? "yes" : "no");
85 | // SIMPLE: Trying to solve
86 | // SIMPLE: Found diverging input
87 | // QSYM-COUNT-2: SMT
88 | // QSYM: New testcase
89 | // ANY: no
90 |
91 | return 0;
92 | }
93 |
--------------------------------------------------------------------------------
/test/structs.test32:
--------------------------------------------------------------------------------
1 | RUN: %symcc -m32 -O2 %S/structs.c -o %t_32
2 | RUN: echo -ne "\x00\x00\x00\x05" | %t_32 2>&1 | %filecheck %S/structs.c
3 |
--------------------------------------------------------------------------------
/test/switch.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc -O2 %s -o %t
16 | // RUN: echo -ne "\x00\x00\x00\x05" | %t 2>&1 | %filecheck %s
17 | //
18 | // Check the symbolic handling of "read"
19 |
20 | #include
21 | #include
22 |
23 | #include
24 | #include
25 |
26 | int main(int argc, char* argv[]) {
27 | int x;
28 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) {
29 | fprintf(stderr, "Failed to read x\n");
30 | return -1;
31 | }
32 | x = ntohl(x);
33 |
34 | int foo = 0;
35 | switch (x) {
36 | case 3:
37 | foo = 0;
38 | fprintf(stderr, "x is 3\n");
39 | break;
40 | case 4:
41 | foo = 1;
42 | // Deliberately not printing anything here, which will generate a direct
43 | // jump to the block after the switch statement.
44 | break;
45 | case 5:
46 | foo = 2;
47 | fprintf(stderr, "x is 5\n");
48 | break;
49 | default:
50 | foo = 3;
51 | fprintf(stderr, "x is something else\n");
52 | break;
53 | }
54 | // SIMPLE: Trying to solve
55 | // SIMPLE: Found diverging input
56 | // QSYM-COUNT-2: SMT
57 | // QSYM: New testcase
58 | // ANY: x is 5
59 |
60 | fprintf(stderr, "%d\n", foo);
61 | // ANY: 2
62 |
63 | // When the value to branch on is concrete there should be no solver
64 | // interaction.
65 | volatile int y = 17;
66 | switch (y) {
67 | case 3:
68 | fprintf(stderr, "y is 3\n");
69 | break;
70 | case 4:
71 | fprintf(stderr, "y is 4\n");
72 | break;
73 | case 5:
74 | fprintf(stderr, "y is 5\n");
75 | break;
76 | default:
77 | fprintf(stderr, "y is something else\n");
78 | break;
79 | }
80 | // SIMPLE-NOT: Trying to solve
81 | // QSYM-NOT: SMT
82 | // ANY: y is something else
83 |
84 | return 0;
85 | }
86 |
--------------------------------------------------------------------------------
/test/switch.test32:
--------------------------------------------------------------------------------
1 | RUN: %symcc -m32 -O2 %S/switch.c -o %t_32
2 | RUN: echo -ne "\x00\x00\x00\x05" | %t_32 2>&1 | %filecheck %S/switch.c
3 |
--------------------------------------------------------------------------------
/test/symbolic_structs.ll:
--------------------------------------------------------------------------------
1 | ; This file is part of SymCC.
2 | ;
3 | ; SymCC is free software: you can redistribute it and/or modify it under the
4 | ; terms of the GNU General Public License as published by the Free Software
5 | ; Foundation, either version 3 of the License, or (at your option) any later
6 | ; version.
7 | ;
8 | ; SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | ; WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | ; A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | ;
12 | ; You should have received a copy of the GNU General Public License along with
13 | ; SymCC. If not, see .
14 |
15 | ; Verify that we correctly insert into symbolic struct values. We insert values
16 | ; of various types into a symbolic struct, thus triggering expression updates.
17 | ; Compiling this code with SymCC and verifying that the resulting binary exits
18 | ; cleanly shows that SymCC's instrumentation doesn't break the execution of the
19 | ; program.
20 | ;
21 | ; This test reproduces a bug where inserting a concrete floating-point value
22 | ; into a symbolic struct would lead to a program crash (eurecom-s3/symcc#138).
23 | ;
24 | ; Since the bitcode is written by hand, we first run llc on it because it
25 | ; performs a validity check, whereas Clang doesn't.
26 |
27 | ; RUN: llc %s -o /dev/null
28 | ; RUN: %symcc %s -o %t
29 | ; RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1
30 |
31 | target triple = "x86_64-pc-linux-gnu"
32 |
33 | ; The struct type of our symbolic value. Include a floating-point value and a
34 | ; Boolean because they're represented with non-bitvector solver variables
35 | ; (reproducing eurecom-s3/symcc#138).
36 | %struct_type = type { i8, i32, i8, float, i1 }
37 |
38 | define i32 @main(i32 %argc, i8** %argv) {
39 | ; Create a symbolic struct value that we can subsequently insert values into.
40 | %struct_value_mem = alloca %struct_type
41 | call void @symcc_make_symbolic(%struct_type* %struct_value_mem, i64 20)
42 | %symbolic_struct = load %struct_type, %struct_type* %struct_value_mem
43 |
44 | ; Insert values of various types, triggering the creation of new expressions.
45 | insertvalue %struct_type %symbolic_struct, i32 5, 1
46 | insertvalue %struct_type %symbolic_struct, float 42.0, 3
47 | insertvalue %struct_type %symbolic_struct, i1 1, 4
48 |
49 | ret i32 0
50 | }
51 |
52 | declare void @symcc_make_symbolic(%struct_type*, i64)
53 |
--------------------------------------------------------------------------------
/test/test_case_handler.c:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | // RUN: %symcc -O2 %s -o %t
16 | // RUN: env SYMCC_MEMORY_INPUT=1 %t 2>&1 | %filecheck %s
17 | #include
18 | #include
19 | #include
20 |
21 | #define MAGIC 0xab
22 |
23 | void symcc_make_symbolic(const void *start, size_t byte_length);
24 | typedef void (*TestCaseHandler)(const void *, size_t);
25 | void symcc_set_test_case_handler(TestCaseHandler handler);
26 |
27 | int solved = 0;
28 | int num_test_cases = 0;
29 |
30 | void handle_test_case(const void *data, size_t data_length) {
31 | num_test_cases++;
32 | if (data_length == 1 && ((const uint8_t *)data)[0] == MAGIC)
33 | solved = 1;
34 | }
35 |
36 | int main(int argc, char *argv[]) {
37 | symcc_set_test_case_handler(handle_test_case);
38 | // SIMPLE: Warning: test-case handlers
39 |
40 | uint8_t input = 0;
41 | symcc_make_symbolic(&input, sizeof(input));
42 |
43 | fprintf(stderr, "%s\n", (input == MAGIC) ? "yes" : "no");
44 | // SIMPLE: Trying to solve
45 | // SIMPLE: Found diverging input
46 | // SIMPLE: stdin0 -> #xab
47 | // QSYM: SMT
48 | // ANY: no
49 |
50 | fprintf(stderr, "%d\n", solved);
51 | // QSYM: 1
52 | // SIMPLE: 0
53 |
54 | fprintf(stderr, "%d\n", num_test_cases);
55 | // QSYM: 1
56 | // SIMPLE: 0
57 |
58 | return 0;
59 | }
60 |
--------------------------------------------------------------------------------
/test/uadd_sat.ll:
--------------------------------------------------------------------------------
1 | ; RUN: %symcc -O2 %s -o %t
2 | ; RUN: echo -ne "\x05\x00\x00\x00" | %t 2>&1 | %filecheck %s
3 |
4 | %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
5 | %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
6 |
7 | @g_value = dso_local local_unnamed_addr global i16 40, align 2
8 | @stderr = external dso_local local_unnamed_addr global %struct._IO_FILE*, align 8
9 | @.str = private unnamed_addr constant [18 x i8] c"Failed to read x\0A\00", align 1
10 | @.str.1 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
11 | @.str.2 = private unnamed_addr constant [4 x i8] c"yes\00", align 1
12 | @.str.3 = private unnamed_addr constant [3 x i8] c"no\00", align 1
13 |
14 | ; Function Attrs: nofree nounwind uwtable
15 | define dso_local i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 {
16 | entry:
17 | %x = alloca i16, align 2
18 | %0 = bitcast i16* %x to i8*
19 | %call = call i64 @read(i32 0, i8* nonnull %0, i64 2) #5
20 | %cmp.not = icmp eq i64 %call, 2
21 | %1 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
22 | br i1 %cmp.not, label %if.end, label %if.then
23 |
24 | if.then: ; preds = %entry
25 | %2 = call i64 @fwrite(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str, i64 0, i64 0), i64 17, i64 1, %struct._IO_FILE* %1) #6
26 | br label %cleanup
27 |
28 | if.end: ; preds = %entry
29 | %3 = load i16, i16* %x, align 2
30 | %4 = load i16, i16* @g_value, align 2
31 | %add = call i16 @llvm.uadd.sat.i16(i16 %3, i16 %4)
32 | %cmp = icmp eq i16 %add, 43981
33 | %cond = select i1 %cmp, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0)
34 | ; SIMPLE: Trying to solve
35 | ; SIMPLE: Found diverging input
36 | ; SIMPLE-DAG: stdin0 -> #xa5
37 | ; SIMPLE-DAG: stdin1 -> #xab
38 | ; ANY: no
39 | %call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), i8* %cond) #6
40 | br label %cleanup
41 |
42 | cleanup: ; preds = %if.end, %if.then
43 | %retval.0 = phi i32 [ -1, %if.then ], [ 0, %if.end ]
44 | ret i32 %retval.0
45 | }
46 |
47 | declare i64 @read(i32, i8* nocapture, i64)
48 | declare i32 @fprintf(%struct._IO_FILE* nocapture , i8* nocapture readonly, ...)
49 | declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture)
50 | declare i16 @llvm.uadd.sat.i16(i16, i16)
51 |
--------------------------------------------------------------------------------
/test/uadd_sat.test32:
--------------------------------------------------------------------------------
1 | RUN: %symcc -m32 -O2 %S/uadd_sat.ll -o %t_32
2 | RUN: echo -ne "\x05\x00\x00\x00\x00\x00\x00\x00" | %t_32 2>&1 | %filecheck %s
3 |
--------------------------------------------------------------------------------
/test/usub_sat.ll:
--------------------------------------------------------------------------------
1 | ; RUN: %symcc -O2 %s -o %t
2 | ; RUN: echo -ne "\x05\x00\x00\x00" | %t 2>&1 | %filecheck %s
3 |
4 | %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
5 | %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
6 |
7 | @g_value = dso_local local_unnamed_addr global i16 40, align 2
8 | @stderr = external dso_local local_unnamed_addr global %struct._IO_FILE*, align 8
9 | @.str = private unnamed_addr constant [18 x i8] c"Failed to read x\0A\00", align 1
10 | @.str.1 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1
11 | @.str.2 = private unnamed_addr constant [4 x i8] c"yes\00", align 1
12 | @.str.3 = private unnamed_addr constant [3 x i8] c"no\00", align 1
13 |
14 | ; Function Attrs: nofree nounwind uwtable
15 | define dso_local i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 {
16 | entry:
17 | %x = alloca i16, align 2
18 | %0 = bitcast i16* %x to i8*
19 | %call = call i64 @read(i32 0, i8* nonnull %0, i64 2) #5
20 | %cmp.not = icmp eq i64 %call, 2
21 | %1 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
22 | br i1 %cmp.not, label %if.end, label %if.then
23 |
24 | if.then: ; preds = %entry
25 | %2 = call i64 @fwrite(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str, i64 0, i64 0), i64 17, i64 1, %struct._IO_FILE* %1) #6
26 | br label %cleanup
27 |
28 | if.end: ; preds = %entry
29 | %3 = load i16, i16* %x, align 2
30 | %4 = load i16, i16* @g_value, align 2
31 | %add = call i16 @llvm.usub.sat.i16(i16 %3, i16 %4)
32 | %cmp = icmp eq i16 %add, 43981
33 | %cond = select i1 %cmp, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.3, i64 0, i64 0)
34 | ; SIMPLE: Trying to solve
35 | ; SIMPLE: Found diverging input
36 | ; SIMPLE-DAG: stdin0 -> #xf5
37 | ; SIMPLE-DAG: stdin1 -> #xab
38 | ; ANY: no
39 | %call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), i8* %cond) #6
40 | br label %cleanup
41 |
42 | cleanup: ; preds = %if.end, %if.then
43 | %retval.0 = phi i32 [ -1, %if.then ], [ 0, %if.end ]
44 | ret i32 %retval.0
45 | }
46 |
47 | declare i64 @read(i32, i8* nocapture, i64)
48 | declare i32 @fprintf(%struct._IO_FILE* nocapture , i8* nocapture readonly, ...)
49 | declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture)
50 | declare i16 @llvm.usub.sat.i16(i16, i16)
51 |
--------------------------------------------------------------------------------
/test/usub_sat.test32:
--------------------------------------------------------------------------------
1 | RUN: %symcc -m32 -O2 %S/usub_sat.ll -o %t_32
2 | RUN: echo -ne "\x05\x00\x00\x00\x00\x00\x00\x00" | %t_32 2>&1 | %filecheck %s
3 |
--------------------------------------------------------------------------------
/util/pure_concolic_execution.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -u
4 |
5 | function usage() {
6 | echo "Usage: $0 -i INPUT_DIR [-o OUTPUT_DIR] [-f FAILED_DIR] TARGET..."
7 | echo
8 | echo "Run SymCC-instrumented TARGET in a loop, feeding newly generated inputs back "
9 | echo "into it. Initial inputs are expected in INPUT_DIR, and new inputs are "
10 | echo "continuously read from there. If OUTPUT_DIR is specified, a copy of the corpus "
11 | echo "and of each generated input is preserved there. TARGET may contain the special "
12 | echo "string \"@@\", which is replaced with the name of the current input file."
13 | echo "If FAILED_DIR is specified, a copy of the failing test cases is preserved there."
14 | echo
15 | echo "Note that SymCC never changes the length of the input, so be sure that the "
16 | echo "initial inputs cover all required input lengths."
17 | }
18 |
19 | while getopts "i:o:f:" opt; do
20 | case "$opt" in
21 | i)
22 | in=$OPTARG
23 | ;;
24 | o)
25 | out=$OPTARG
26 | ;;
27 | f)
28 | failed_dir=$OPTARG
29 | ;;
30 | *)
31 | usage
32 | exit 1
33 | ;;
34 | esac
35 | done
36 | shift $((OPTIND-1))
37 | target=("$@")
38 | target[0]=$(realpath "${target[0]}")
39 | target="${target[@]}"
40 | timeout="timeout -k 5 90"
41 |
42 | if [[ ! -v in ]]; then
43 | echo "Please specify the input directory!"
44 | usage
45 | exit 1
46 | fi
47 |
48 | # Create the work environment
49 | work_dir=$(mktemp -d)
50 | mkdir $work_dir/{next,symcc_out}
51 | touch $work_dir/analyzed_inputs
52 | if [[ -v out ]]; then
53 | mkdir -p $out
54 | fi
55 | if [[ -v failed_dir ]]; then
56 | mkdir -p "$failed_dir"
57 | fi
58 |
59 | function cleanup() {
60 | rm -rf --preserve-root -- $work_dir
61 | }
62 |
63 | trap cleanup EXIT
64 |
65 | # Copy one file to the destination directory, renaming it according to its hash.
66 | function copy_file_with_unique_name() {
67 | local file_name="$1"
68 | local dest_dir="$2"
69 |
70 | local dest="$dest_dir/$(sha256sum "$file_name" | cut -d' ' -f1)"
71 | cp "$file_name" "$dest"
72 |
73 | }
74 |
75 | # Copy all files in the source directory to the destination directory, renaming
76 | # them according to their hash.
77 | function copy_with_unique_name() {
78 | local source_dir="$1"
79 | local dest_dir="$2"
80 |
81 | if [ -n "$(ls -A $source_dir)" ]; then
82 | local f
83 | for f in $source_dir/*; do
84 | copy_file_with_unique_name "$f" "$dest_dir"
85 | done
86 | fi
87 | }
88 |
89 | # Copy files from the source directory into the next generation.
90 | function add_to_next_generation() {
91 | local source_dir="$1"
92 | copy_with_unique_name "$source_dir" "$work_dir/next"
93 | }
94 |
95 | # If an output directory is set, copy the files in the source directory there.
96 | function maybe_export() {
97 | local source_dir="$1"
98 | if [[ -v out ]]; then
99 | copy_with_unique_name "$source_dir" "$out"
100 | fi
101 | }
102 |
103 | # Remove input files which has been already analysed. Used to prevent infinite loop.
104 | function remove_analysed() {
105 | local source_dir="$1"
106 | local f
107 | for f in $source_dir/*; do
108 | if grep -q "$(basename $f)" $work_dir/analyzed_inputs; then
109 | rm $f
110 | fi
111 | done
112 | }
113 |
114 | # Copy those files from the input directory to the next generation that haven't
115 | # been analyzed yet.
116 | function maybe_import() {
117 | if [ -n "$(ls -A $in)" ]; then
118 | local f
119 | for f in $in/*; do
120 | if grep -q "$(basename $f)" $work_dir/analyzed_inputs; then
121 | continue
122 | fi
123 |
124 | if [ -e "$work_dir/next/$(basename $f)" ]; then
125 | continue
126 | fi
127 |
128 | echo "Importing $f from the input directory"
129 | cp "$f" "$work_dir/next"
130 | done
131 | fi
132 | }
133 |
134 | # If the input file caused non 0 return code, then copy it to the FAILED_DIR.
135 | function save_failed() {
136 | local ret_code=$1
137 | local input_file="$2"
138 | if [ $ret_code -ne 0 ] && [[ -v failed_dir ]] ; then
139 | copy_file_with_unique_name "$input_file" "$failed_dir"
140 | fi
141 | }
142 |
143 | # Set up the shell environment
144 | export SYMCC_OUTPUT_DIR=$work_dir/symcc_out
145 | export SYMCC_ENABLE_LINEARIZATION=1
146 | # export SYMCC_AFL_COVERAGE_MAP=$work_dir/map
147 |
148 | # Run generation after generation until we don't generate new inputs anymore
149 | gen_count=0
150 | while true; do
151 | # Initialize the generation
152 | maybe_import
153 | mv $work_dir/{next,cur}
154 | mkdir $work_dir/next
155 |
156 | # Run it (or wait if there's nothing to run on)
157 | if [ -n "$(ls -A $work_dir/cur)" ]; then
158 | echo "Generation $gen_count..."
159 |
160 | for f in $work_dir/cur/*; do
161 | echo "Running on $f"
162 | if [[ "$target " =~ " @@ " ]]; then
163 | env SYMCC_INPUT_FILE=$f $timeout ${target[@]/@@/$f} >/dev/null 2>&1
164 | ret_code=$?
165 | else
166 | $timeout $target <$f >/dev/null 2>&1
167 | ret_code=$?
168 | fi
169 |
170 | # Make the new test cases part of the next generation
171 | add_to_next_generation $work_dir/symcc_out
172 | maybe_export $work_dir/symcc_out
173 | remove_analysed $work_dir/next
174 | save_failed $ret_code "$f"
175 | echo $(basename $f) >> $work_dir/analyzed_inputs
176 | rm -f $f
177 | done
178 |
179 | rm -rf $work_dir/cur
180 | gen_count=$((gen_count+1))
181 | else
182 | echo "Waiting for more input..."
183 | rmdir $work_dir/cur
184 | sleep 5
185 | fi
186 | done
187 |
--------------------------------------------------------------------------------
/util/quicktest.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | # This script is to make a quick test of SymCC with Qsym backend, it
4 | # is supposed to work on ubuntu groovy, e.g., after running:
5 |
6 | # vagrant init ubuntu/groovy64
7 | # vagrant up
8 | # vagrant ssh
9 |
10 | # exit when any command fails
11 | set -e
12 |
13 | if ! uname -a | grep ubuntu-groovy>> /dev/null ; then
14 | echo "** Warning: Script known to work for ubuntu groovy only **"
15 | fi
16 |
17 | # update system
18 | sudo apt-get update
19 | sudo apt-get upgrade -y
20 |
21 | # install requirements
22 | sudo apt-get install -y git cargo clang-10 cmake g++ git libz3-dev llvm-10-dev llvm-10-tools ninja-build python3-pip zlib1g-dev
23 | sudo pip3 install lit
24 |
25 | # Clone project
26 | git clone https://github.com/eurecom-s3/symcc.git
27 | cd symcc
28 |
29 | # init/update submodules
30 | git submodule update --init --recursive
31 |
32 | # build
33 | mkdir ../symcc-build
34 | cd ../symcc-build
35 | cmake -G Ninja -DSYMCC_RT_BACKEND=qsym -DZ3_TRUST_SYSTEM_VERSION=on ../symcc
36 | ninja check
37 |
38 | # create a test case
39 | cat > test.c << 'EOF'
40 | #include
41 | #include
42 | #include
43 |
44 | int foo(int a, int b) {
45 | if (2 * a < b)
46 | return a;
47 | else if (a % b)
48 | return b;
49 | else
50 | return a + b;
51 | }
52 |
53 | int main(int argc, char* argv[]) {
54 | int x;
55 | if (read(STDIN_FILENO, &x, sizeof(x)) != sizeof(x)) {
56 | printf("Failed to read x\n");
57 | return -1;
58 | }
59 | printf("%d\n", foo(x, 7));
60 | return 0;
61 | }
62 |
63 | EOF
64 |
65 | # test it
66 | ./symcc test.c -o test.out
67 | mkdir -p results
68 | export SYMCC_OUTPUT_DIR=`pwd`/results
69 | echo 'aaaa' | ./test.out
70 | cat ${SYMCC_OUTPUT_DIR}/000000 | ./test.out
71 |
72 | # TODO: this is not a very precise regression test, generated testcase
73 | # may be incorrect, but binding to a specific test case may be too
74 | # narrow (fail if test isn't exactly the expected result, but a
75 | # different valid one), this should be improved.
76 | if [ -f ${SYMCC_OUTPUT_DIR}/000001 ]; then
77 | echo "SUCCESS: looks like this build of SymCC is working, type vagrant ssh to interact with it !"
78 | fi
79 |
--------------------------------------------------------------------------------
/util/symcc_fuzzing_helper/.gitignore:
--------------------------------------------------------------------------------
1 | # Generated by Cargo
2 | # will have compiled files and executables
3 | /target/
4 |
5 | # These are backup files generated by rustfmt
6 | **/*.rs.bk
--------------------------------------------------------------------------------
/util/symcc_fuzzing_helper/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | version = 3
4 |
5 | [[package]]
6 | name = "aho-corasick"
7 | version = "0.7.8"
8 | source = "registry+https://github.com/rust-lang/crates.io-index"
9 | checksum = "743ad5a418686aad3b87fd14c43badd828cf26e214a00f92a384291cf22e1811"
10 | dependencies = [
11 | "memchr",
12 | ]
13 |
14 | [[package]]
15 | name = "anyhow"
16 | version = "1.0.26"
17 | source = "registry+https://github.com/rust-lang/crates.io-index"
18 | checksum = "7825f6833612eb2414095684fcf6c635becf3ce97fe48cf6421321e93bfbd53c"
19 |
20 | [[package]]
21 | name = "atty"
22 | version = "0.2.14"
23 | source = "registry+https://github.com/rust-lang/crates.io-index"
24 | checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
25 | dependencies = [
26 | "hermit-abi",
27 | "libc",
28 | "winapi",
29 | ]
30 |
31 | [[package]]
32 | name = "autocfg"
33 | version = "1.1.0"
34 | source = "registry+https://github.com/rust-lang/crates.io-index"
35 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
36 |
37 | [[package]]
38 | name = "bitflags"
39 | version = "1.2.1"
40 | source = "registry+https://github.com/rust-lang/crates.io-index"
41 | checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
42 |
43 | [[package]]
44 | name = "c2-chacha"
45 | version = "0.2.3"
46 | source = "registry+https://github.com/rust-lang/crates.io-index"
47 | checksum = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb"
48 | dependencies = [
49 | "ppv-lite86",
50 | ]
51 |
52 | [[package]]
53 | name = "cfg-if"
54 | version = "0.1.10"
55 | source = "registry+https://github.com/rust-lang/crates.io-index"
56 | checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
57 |
58 | [[package]]
59 | name = "clap"
60 | version = "3.1.18"
61 | source = "registry+https://github.com/rust-lang/crates.io-index"
62 | checksum = "d2dbdf4bdacb33466e854ce889eee8dfd5729abf7ccd7664d0a2d60cd384440b"
63 | dependencies = [
64 | "atty",
65 | "bitflags",
66 | "clap_derive",
67 | "clap_lex",
68 | "indexmap",
69 | "lazy_static",
70 | "strsim",
71 | "termcolor",
72 | "textwrap",
73 | ]
74 |
75 | [[package]]
76 | name = "clap_derive"
77 | version = "3.1.18"
78 | source = "registry+https://github.com/rust-lang/crates.io-index"
79 | checksum = "25320346e922cffe59c0bbc5410c8d8784509efb321488971081313cb1e1a33c"
80 | dependencies = [
81 | "heck",
82 | "proc-macro-error",
83 | "proc-macro2",
84 | "quote",
85 | "syn",
86 | ]
87 |
88 | [[package]]
89 | name = "clap_lex"
90 | version = "0.2.0"
91 | source = "registry+https://github.com/rust-lang/crates.io-index"
92 | checksum = "a37c35f1112dad5e6e0b1adaff798507497a18fceeb30cceb3bae7d1427b9213"
93 | dependencies = [
94 | "os_str_bytes",
95 | ]
96 |
97 | [[package]]
98 | name = "env_logger"
99 | version = "0.7.1"
100 | source = "registry+https://github.com/rust-lang/crates.io-index"
101 | checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36"
102 | dependencies = [
103 | "atty",
104 | "humantime",
105 | "log",
106 | "regex",
107 | "termcolor",
108 | ]
109 |
110 | [[package]]
111 | name = "getrandom"
112 | version = "0.1.14"
113 | source = "registry+https://github.com/rust-lang/crates.io-index"
114 | checksum = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb"
115 | dependencies = [
116 | "cfg-if",
117 | "libc",
118 | "wasi",
119 | ]
120 |
121 | [[package]]
122 | name = "hashbrown"
123 | version = "0.11.2"
124 | source = "registry+https://github.com/rust-lang/crates.io-index"
125 | checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
126 |
127 | [[package]]
128 | name = "heck"
129 | version = "0.4.0"
130 | source = "registry+https://github.com/rust-lang/crates.io-index"
131 | checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9"
132 |
133 | [[package]]
134 | name = "hermit-abi"
135 | version = "0.1.6"
136 | source = "registry+https://github.com/rust-lang/crates.io-index"
137 | checksum = "eff2656d88f158ce120947499e971d743c05dbcbed62e5bd2f38f1698bbc3772"
138 | dependencies = [
139 | "libc",
140 | ]
141 |
142 | [[package]]
143 | name = "humantime"
144 | version = "1.3.0"
145 | source = "registry+https://github.com/rust-lang/crates.io-index"
146 | checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f"
147 | dependencies = [
148 | "quick-error",
149 | ]
150 |
151 | [[package]]
152 | name = "indexmap"
153 | version = "1.8.2"
154 | source = "registry+https://github.com/rust-lang/crates.io-index"
155 | checksum = "e6012d540c5baa3589337a98ce73408de9b5a25ec9fc2c6fd6be8f0d39e0ca5a"
156 | dependencies = [
157 | "autocfg",
158 | "hashbrown",
159 | ]
160 |
161 | [[package]]
162 | name = "lazy_static"
163 | version = "1.4.0"
164 | source = "registry+https://github.com/rust-lang/crates.io-index"
165 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
166 |
167 | [[package]]
168 | name = "libc"
169 | version = "0.2.66"
170 | source = "registry+https://github.com/rust-lang/crates.io-index"
171 | checksum = "d515b1f41455adea1313a4a2ac8a8a477634fbae63cc6100e3aebb207ce61558"
172 |
173 | [[package]]
174 | name = "log"
175 | version = "0.4.8"
176 | source = "registry+https://github.com/rust-lang/crates.io-index"
177 | checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7"
178 | dependencies = [
179 | "cfg-if",
180 | ]
181 |
182 | [[package]]
183 | name = "memchr"
184 | version = "2.3.0"
185 | source = "registry+https://github.com/rust-lang/crates.io-index"
186 | checksum = "3197e20c7edb283f87c071ddfc7a2cca8f8e0b888c242959846a6fce03c72223"
187 |
188 | [[package]]
189 | name = "os_str_bytes"
190 | version = "6.1.0"
191 | source = "registry+https://github.com/rust-lang/crates.io-index"
192 | checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa"
193 |
194 | [[package]]
195 | name = "ppv-lite86"
196 | version = "0.2.6"
197 | source = "registry+https://github.com/rust-lang/crates.io-index"
198 | checksum = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"
199 |
200 | [[package]]
201 | name = "proc-macro-error"
202 | version = "1.0.4"
203 | source = "registry+https://github.com/rust-lang/crates.io-index"
204 | checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
205 | dependencies = [
206 | "proc-macro-error-attr",
207 | "proc-macro2",
208 | "quote",
209 | "syn",
210 | "version_check",
211 | ]
212 |
213 | [[package]]
214 | name = "proc-macro-error-attr"
215 | version = "1.0.4"
216 | source = "registry+https://github.com/rust-lang/crates.io-index"
217 | checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
218 | dependencies = [
219 | "proc-macro2",
220 | "quote",
221 | "version_check",
222 | ]
223 |
224 | [[package]]
225 | name = "proc-macro2"
226 | version = "1.0.39"
227 | source = "registry+https://github.com/rust-lang/crates.io-index"
228 | checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f"
229 | dependencies = [
230 | "unicode-ident",
231 | ]
232 |
233 | [[package]]
234 | name = "quick-error"
235 | version = "1.2.3"
236 | source = "registry+https://github.com/rust-lang/crates.io-index"
237 | checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
238 |
239 | [[package]]
240 | name = "quote"
241 | version = "1.0.18"
242 | source = "registry+https://github.com/rust-lang/crates.io-index"
243 | checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1"
244 | dependencies = [
245 | "proc-macro2",
246 | ]
247 |
248 | [[package]]
249 | name = "rand"
250 | version = "0.7.3"
251 | source = "registry+https://github.com/rust-lang/crates.io-index"
252 | checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
253 | dependencies = [
254 | "getrandom",
255 | "libc",
256 | "rand_chacha",
257 | "rand_core",
258 | "rand_hc",
259 | ]
260 |
261 | [[package]]
262 | name = "rand_chacha"
263 | version = "0.2.1"
264 | source = "registry+https://github.com/rust-lang/crates.io-index"
265 | checksum = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853"
266 | dependencies = [
267 | "c2-chacha",
268 | "rand_core",
269 | ]
270 |
271 | [[package]]
272 | name = "rand_core"
273 | version = "0.5.1"
274 | source = "registry+https://github.com/rust-lang/crates.io-index"
275 | checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
276 | dependencies = [
277 | "getrandom",
278 | ]
279 |
280 | [[package]]
281 | name = "rand_hc"
282 | version = "0.2.0"
283 | source = "registry+https://github.com/rust-lang/crates.io-index"
284 | checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
285 | dependencies = [
286 | "rand_core",
287 | ]
288 |
289 | [[package]]
290 | name = "redox_syscall"
291 | version = "0.1.56"
292 | source = "registry+https://github.com/rust-lang/crates.io-index"
293 | checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"
294 |
295 | [[package]]
296 | name = "regex"
297 | version = "1.3.4"
298 | source = "registry+https://github.com/rust-lang/crates.io-index"
299 | checksum = "322cf97724bea3ee221b78fe25ac9c46114ebb51747ad5babd51a2fc6a8235a8"
300 | dependencies = [
301 | "aho-corasick",
302 | "memchr",
303 | "regex-syntax",
304 | "thread_local",
305 | ]
306 |
307 | [[package]]
308 | name = "regex-syntax"
309 | version = "0.6.14"
310 | source = "registry+https://github.com/rust-lang/crates.io-index"
311 | checksum = "b28dfe3fe9badec5dbf0a79a9cccad2cfc2ab5484bdb3e44cbd1ae8b3ba2be06"
312 |
313 | [[package]]
314 | name = "remove_dir_all"
315 | version = "0.5.2"
316 | source = "registry+https://github.com/rust-lang/crates.io-index"
317 | checksum = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e"
318 | dependencies = [
319 | "winapi",
320 | ]
321 |
322 | [[package]]
323 | name = "strsim"
324 | version = "0.10.0"
325 | source = "registry+https://github.com/rust-lang/crates.io-index"
326 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
327 |
328 | [[package]]
329 | name = "symcc_fuzzing_helper"
330 | version = "0.1.0"
331 | dependencies = [
332 | "anyhow",
333 | "clap",
334 | "env_logger",
335 | "log",
336 | "regex",
337 | "tempfile",
338 | ]
339 |
340 | [[package]]
341 | name = "syn"
342 | version = "1.0.96"
343 | source = "registry+https://github.com/rust-lang/crates.io-index"
344 | checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf"
345 | dependencies = [
346 | "proc-macro2",
347 | "quote",
348 | "unicode-ident",
349 | ]
350 |
351 | [[package]]
352 | name = "tempfile"
353 | version = "3.1.0"
354 | source = "registry+https://github.com/rust-lang/crates.io-index"
355 | checksum = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9"
356 | dependencies = [
357 | "cfg-if",
358 | "libc",
359 | "rand",
360 | "redox_syscall",
361 | "remove_dir_all",
362 | "winapi",
363 | ]
364 |
365 | [[package]]
366 | name = "termcolor"
367 | version = "1.1.3"
368 | source = "registry+https://github.com/rust-lang/crates.io-index"
369 | checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
370 | dependencies = [
371 | "winapi-util",
372 | ]
373 |
374 | [[package]]
375 | name = "textwrap"
376 | version = "0.15.0"
377 | source = "registry+https://github.com/rust-lang/crates.io-index"
378 | checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb"
379 |
380 | [[package]]
381 | name = "thread_local"
382 | version = "1.0.1"
383 | source = "registry+https://github.com/rust-lang/crates.io-index"
384 | checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
385 | dependencies = [
386 | "lazy_static",
387 | ]
388 |
389 | [[package]]
390 | name = "unicode-ident"
391 | version = "1.0.0"
392 | source = "registry+https://github.com/rust-lang/crates.io-index"
393 | checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee"
394 |
395 | [[package]]
396 | name = "version_check"
397 | version = "0.9.4"
398 | source = "registry+https://github.com/rust-lang/crates.io-index"
399 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
400 |
401 | [[package]]
402 | name = "wasi"
403 | version = "0.9.0+wasi-snapshot-preview1"
404 | source = "registry+https://github.com/rust-lang/crates.io-index"
405 | checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
406 |
407 | [[package]]
408 | name = "winapi"
409 | version = "0.3.8"
410 | source = "registry+https://github.com/rust-lang/crates.io-index"
411 | checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6"
412 | dependencies = [
413 | "winapi-i686-pc-windows-gnu",
414 | "winapi-x86_64-pc-windows-gnu",
415 | ]
416 |
417 | [[package]]
418 | name = "winapi-i686-pc-windows-gnu"
419 | version = "0.4.0"
420 | source = "registry+https://github.com/rust-lang/crates.io-index"
421 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
422 |
423 | [[package]]
424 | name = "winapi-util"
425 | version = "0.1.3"
426 | source = "registry+https://github.com/rust-lang/crates.io-index"
427 | checksum = "4ccfbf554c6ad11084fb7517daca16cfdcaccbdadba4fc336f032a8b12c2ad80"
428 | dependencies = [
429 | "winapi",
430 | ]
431 |
432 | [[package]]
433 | name = "winapi-x86_64-pc-windows-gnu"
434 | version = "0.4.0"
435 | source = "registry+https://github.com/rust-lang/crates.io-index"
436 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
437 |
--------------------------------------------------------------------------------
/util/symcc_fuzzing_helper/Cargo.toml:
--------------------------------------------------------------------------------
1 | # This file is part of SymCC.
2 | #
3 | # SymCC is free software: you can redistribute it and/or modify it under the
4 | # terms of the GNU General Public License as published by the Free Software
5 | # Foundation, either version 3 of the License, or (at your option) any later
6 | # version.
7 | #
8 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU General Public License along with
13 | # SymCC. If not, see .
14 |
15 | [package]
16 | name = "symcc_fuzzing_helper"
17 | version = "0.1.0"
18 | authors = ["Sebastian Poeplau "]
19 | edition = "2018"
20 | license = "GPL-3.0-or-later"
21 |
22 | [dependencies]
23 | clap = { version = "3", features = ["derive"] }
24 | tempfile = "3.1"
25 | anyhow = "1.0"
26 | log = "0.4.0"
27 | env_logger = "0.7.1"
28 | regex = "1"
29 |
--------------------------------------------------------------------------------
/util/symcc_fuzzing_helper/src/main.rs:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | mod symcc;
16 |
17 | use anyhow::{Context, Result};
18 | use clap::{self, StructOpt};
19 | use std::collections::HashSet;
20 | use std::fs;
21 | use std::fs::File;
22 | use std::io::Write;
23 | use std::path::{Path, PathBuf};
24 | use std::thread;
25 | use std::time::{Duration, Instant};
26 | use symcc::{AflConfig, AflMap, AflShowmapResult, SymCC, TestcaseDir};
27 | use tempfile::tempdir;
28 |
29 | const STATS_INTERVAL_SEC: u64 = 60;
30 |
31 | // TODO extend timeout when idle? Possibly reprocess previously timed-out
32 | // inputs.
33 |
34 | #[derive(Debug, StructOpt)]
35 | #[clap(about = "Make SymCC collaborate with AFL.")]
36 | struct CLI {
37 | /// The name of the fuzzer to work with
38 | #[clap(short = 'a')]
39 | fuzzer_name: String,
40 |
41 | /// The AFL output directory
42 | #[clap(short = 'o')]
43 | output_dir: PathBuf,
44 |
45 | /// Name to use for SymCC
46 | #[clap(short = 'n')]
47 | name: String,
48 |
49 | /// Enable verbose logging
50 | #[clap(short = 'v')]
51 | verbose: bool,
52 |
53 | /// Program under test
54 | command: Vec,
55 | }
56 |
57 | /// Execution statistics.
58 | #[derive(Debug, Default)]
59 | struct Stats {
60 | /// Number of successful executions.
61 | total_count: u32,
62 |
63 | /// Time spent in successful executions of SymCC.
64 | total_time: Duration,
65 |
66 | /// Time spent in the solver as part of successfully running SymCC.
67 | solver_time: Option,
68 |
69 | /// Number of failed executions.
70 | failed_count: u32,
71 |
72 | /// Time spent in failed SymCC executions.
73 | failed_time: Duration,
74 | }
75 |
76 | impl Stats {
77 | fn add_execution(&mut self, result: &symcc::SymCCResult) {
78 | if result.killed {
79 | self.failed_count += 1;
80 | self.failed_time += result.time;
81 | } else {
82 | self.total_count += 1;
83 | self.total_time += result.time;
84 | self.solver_time = match (self.solver_time, result.solver_time) {
85 | (None, None) => None,
86 | (Some(t), None) => Some(t), // no queries in this execution
87 | (None, Some(t)) => Some(t),
88 | (Some(a), Some(b)) => Some(a + b),
89 | };
90 | }
91 | }
92 |
93 | fn log(&self, out: &mut impl Write) -> Result<()> {
94 | writeln!(out, "Successful executions: {}", self.total_count)?;
95 | writeln!(
96 | out,
97 | "Time in successful executions: {}ms",
98 | self.total_time.as_millis()
99 | )?;
100 |
101 | if self.total_count > 0 {
102 | writeln!(
103 | out,
104 | "Avg time per successful execution: {}ms",
105 | (self.total_time / self.total_count).as_millis()
106 | )?;
107 | }
108 |
109 | if let Some(st) = self.solver_time {
110 | writeln!(
111 | out,
112 | "Solver time (successful executions): {}ms",
113 | st.as_millis()
114 | )?;
115 |
116 | if self.total_time.as_secs() > 0 {
117 | let solver_share =
118 | st.as_millis() as f64 / self.total_time.as_millis() as f64 * 100_f64;
119 | writeln!(
120 | out,
121 | "Solver time share (successful executions): {:.2}% (-> {:.2}% in execution)",
122 | solver_share,
123 | 100_f64 - solver_share
124 | )?;
125 | writeln!(
126 | out,
127 | "Avg solver time per successful execution: {}ms",
128 | (st / self.total_count).as_millis()
129 | )?;
130 | }
131 | }
132 |
133 | writeln!(out, "Failed executions: {}", self.failed_count)?;
134 | writeln!(
135 | out,
136 | "Time spent on failed executions: {}ms",
137 | self.failed_time.as_millis()
138 | )?;
139 |
140 | if self.failed_count > 0 {
141 | writeln!(
142 | out,
143 | "Avg time in failed executions: {}ms",
144 | (self.failed_time / self.failed_count).as_millis()
145 | )?;
146 | }
147 |
148 | writeln!(
149 | out,
150 | "--------------------------------------------------------------------------------"
151 | )?;
152 |
153 | Ok(())
154 | }
155 | }
156 |
157 | /// Mutable run-time state.
158 | ///
159 | /// This is a collection of the state we update during execution.
160 | struct State {
161 | /// The cumulative coverage of all test cases generated so far.
162 | current_bitmap: AflMap,
163 |
164 | /// The AFL test cases that have been analyzed so far.
165 | processed_files: HashSet,
166 |
167 | /// The place to put new and useful test cases.
168 | queue: TestcaseDir,
169 |
170 | /// The place for new test cases that time out.
171 | hangs: TestcaseDir,
172 |
173 | /// The place for new test cases that crash.
174 | crashes: TestcaseDir,
175 |
176 | /// Run-time statistics.
177 | stats: Stats,
178 |
179 | /// When did we last output the statistics?
180 | last_stats_output: Instant,
181 |
182 | /// Write statistics to this file.
183 | stats_file: File,
184 | }
185 |
186 | impl State {
187 | /// Initialize the run-time environment in the given output directory.
188 | ///
189 | /// This involves creating the output directory and all required
190 | /// subdirectories.
191 | fn initialize(output_dir: impl AsRef) -> Result {
192 | let symcc_dir = output_dir.as_ref();
193 |
194 | fs::create_dir(&symcc_dir).with_context(|| {
195 | format!("Failed to create SymCC's directory {}", symcc_dir.display())
196 | })?;
197 | let symcc_queue =
198 | TestcaseDir::new(symcc_dir.join("queue")).context("Failed to create SymCC's queue")?;
199 | let symcc_hangs = TestcaseDir::new(symcc_dir.join("hangs"))?;
200 | let symcc_crashes = TestcaseDir::new(symcc_dir.join("crashes"))?;
201 | let stats_file = File::create(symcc_dir.join("stats"))?;
202 |
203 | Ok(State {
204 | current_bitmap: AflMap::new(),
205 | processed_files: HashSet::new(),
206 | queue: symcc_queue,
207 | hangs: symcc_hangs,
208 | crashes: symcc_crashes,
209 | stats: Default::default(), // Is this bad style?
210 | last_stats_output: Instant::now(),
211 | stats_file,
212 | })
213 | }
214 |
215 | /// Run a single input through SymCC and process the new test cases it
216 | /// generates.
217 | fn test_input(
218 | &mut self,
219 | input: impl AsRef,
220 | symcc: &SymCC,
221 | afl_config: &AflConfig,
222 | ) -> Result<()> {
223 | log::info!("Running on input {}", input.as_ref().display());
224 |
225 | let tmp_dir = tempdir()
226 | .context("Failed to create a temporary directory for this execution of SymCC")?;
227 |
228 | let mut num_interesting = 0u64;
229 | let mut num_total = 0u64;
230 |
231 | let symcc_result = symcc
232 | .run(&input, tmp_dir.path().join("output"))
233 | .context("Failed to run SymCC")?;
234 | for new_test in symcc_result.test_cases.iter() {
235 | let res = process_new_testcase(&new_test, &input, &tmp_dir, &afl_config, self)?;
236 |
237 | num_total += 1;
238 | if res == TestcaseResult::New {
239 | log::debug!("Test case is interesting");
240 | num_interesting += 1;
241 | }
242 | }
243 |
244 | log::info!(
245 | "Generated {} test cases ({} new)",
246 | num_total,
247 | num_interesting
248 | );
249 |
250 | if symcc_result.killed {
251 | log::info!(
252 | "The target process was killed (probably timeout or out of memory); \
253 | archiving to {}",
254 | self.hangs.path.display()
255 | );
256 | symcc::copy_testcase(&input, &mut self.hangs, &input)
257 | .context("Failed to archive the test case")?;
258 | }
259 |
260 | self.processed_files.insert(input.as_ref().to_path_buf());
261 | self.stats.add_execution(&symcc_result);
262 | Ok(())
263 | }
264 | }
265 |
266 | fn main() -> Result<()> {
267 | let options = CLI::parse();
268 | env_logger::builder()
269 | .filter_level(if options.verbose {
270 | log::LevelFilter::Debug
271 | } else {
272 | log::LevelFilter::Info
273 | })
274 | .init();
275 |
276 | if !options.output_dir.is_dir() {
277 | log::error!(
278 | "The directory {} does not exist!",
279 | options.output_dir.display()
280 | );
281 | return Ok(());
282 | }
283 |
284 | let afl_queue = options.output_dir.join(&options.fuzzer_name).join("queue");
285 | if !afl_queue.is_dir() {
286 | log::error!("The AFL queue {} does not exist!", afl_queue.display());
287 | return Ok(());
288 | }
289 |
290 | let symcc_dir = options.output_dir.join(&options.name);
291 | if symcc_dir.is_dir() {
292 | log::error!(
293 | "{} already exists; we do not currently support resuming",
294 | symcc_dir.display()
295 | );
296 | return Ok(());
297 | }
298 |
299 | let symcc = SymCC::new(symcc_dir.clone(), &options.command);
300 | log::debug!("SymCC configuration: {:?}", &symcc);
301 | let afl_config = AflConfig::load(options.output_dir.join(&options.fuzzer_name))?;
302 | log::debug!("AFL configuration: {:?}", &afl_config);
303 | let mut state = State::initialize(symcc_dir)?;
304 |
305 | loop {
306 | match afl_config
307 | .best_new_testcase(&state.processed_files)
308 | .context("Failed to check for new test cases")?
309 | {
310 | None => {
311 | log::debug!("Waiting for new test cases...");
312 | thread::sleep(Duration::from_secs(5));
313 | }
314 | Some(input) => state.test_input(&input, &symcc, &afl_config)?,
315 | }
316 |
317 | if state.last_stats_output.elapsed().as_secs() > STATS_INTERVAL_SEC {
318 | if let Err(e) = state.stats.log(&mut state.stats_file) {
319 | log::error!("Failed to log run-time statistics: {}", e);
320 | }
321 | state.last_stats_output = Instant::now();
322 | }
323 | }
324 | }
325 |
326 | /// The possible outcomes of test-case evaluation.
327 | #[derive(Debug, PartialEq, Eq)]
328 | enum TestcaseResult {
329 | Uninteresting,
330 | New,
331 | Hang,
332 | Crash,
333 | }
334 |
335 | /// Check if the given test case provides new coverage, crashes, or times out;
336 | /// copy it to the corresponding location.
337 | fn process_new_testcase(
338 | testcase: impl AsRef,
339 | parent: impl AsRef,
340 | tmp_dir: impl AsRef,
341 | afl_config: &AflConfig,
342 | state: &mut State,
343 | ) -> Result {
344 | log::debug!("Processing test case {}", testcase.as_ref().display());
345 |
346 | let testcase_bitmap_path = tmp_dir.as_ref().join("testcase_bitmap");
347 | match afl_config
348 | .run_showmap(&testcase_bitmap_path, &testcase)
349 | .with_context(|| {
350 | format!(
351 | "Failed to check whether test case {} is interesting",
352 | &testcase.as_ref().display()
353 | )
354 | })? {
355 | AflShowmapResult::Success(testcase_bitmap) => {
356 | let interesting = state.current_bitmap.merge(*testcase_bitmap)?;
357 | if interesting {
358 | symcc::copy_testcase(&testcase, &mut state.queue, parent).with_context(|| {
359 | format!(
360 | "Failed to enqueue the new test case {}",
361 | testcase.as_ref().display()
362 | )
363 | })?;
364 |
365 | Ok(TestcaseResult::New)
366 | } else {
367 | Ok(TestcaseResult::Uninteresting)
368 | }
369 | }
370 | AflShowmapResult::Hang => {
371 | log::info!(
372 | "Ignoring new test case {} because afl-showmap timed out on it",
373 | testcase.as_ref().display()
374 | );
375 | Ok(TestcaseResult::Hang)
376 | }
377 | AflShowmapResult::Crash => {
378 | log::info!(
379 | "Test case {} crashes afl-showmap; it is probably interesting",
380 | testcase.as_ref().display()
381 | );
382 | symcc::copy_testcase(&testcase, &mut state.crashes, &parent)?;
383 | symcc::copy_testcase(&testcase, &mut state.queue, &parent).with_context(|| {
384 | format!(
385 | "Failed to enqueue the new test case {}",
386 | testcase.as_ref().display()
387 | )
388 | })?;
389 | Ok(TestcaseResult::Crash)
390 | }
391 | }
392 | }
393 |
--------------------------------------------------------------------------------