├── .adacore-gitlab-ci.yml
├── .clang-format
├── .dockerignore
├── .github
└── workflows
│ ├── check_style.yml
│ └── run_tests.yml
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── LICENSE.lgpl
├── README.md
├── Vagrantfile
├── cmake
└── FindFilesystem.cmake
├── compiler
├── Main.cpp
├── Pass.cpp
├── Pass.h
├── Runtime.cpp
├── Runtime.h
├── Symbolizer.cpp
├── Symbolizer.h
├── sym++.in
└── symcc.in
├── docs
├── 32-bit.txt
├── Backends.txt
├── C++.txt
├── Concreteness.txt
├── Configuration.txt
├── Experiments.txt
├── Fuzzing.txt
├── Ideas.txt
├── Libc.txt
├── Optimization.txt
└── Testing.txt
├── runtime
├── CMakeLists.txt
├── Config.cpp
├── Config.h
├── GarbageCollection.cpp
├── GarbageCollection.h
├── LibcWrappers.cpp
├── LibcWrappers.h
├── RuntimeCommon.cpp
├── RuntimeCommon.h
├── Shadow.cpp
├── Shadow.h
├── bindings
│ ├── README
│ └── ada
│ │ ├── README
│ │ ├── symcc.ads
│ │ └── symcc.gpr
├── qsym_backend
│ ├── CMakeLists.txt
│ ├── Runtime.cpp
│ ├── Runtime.h
│ └── pin.H
├── rust_backend
│ ├── CMakeLists.txt
│ ├── README.md
│ ├── Runtime.cpp
│ ├── Runtime.h
│ └── RustRuntime.h
└── simple_backend
│ ├── CMakeLists.txt
│ ├── Runtime.cpp
│ └── Runtime.h
├── sample.cpp
├── test
├── CMakeLists.txt
├── README
├── arrays.c
├── arrays.test32
├── bcopy_bcmp_bzero.c
├── bool_cast.c
├── bswap.c
├── bswap.test32
├── concrete_structs.ll
├── file_input.c
├── file_input.test32
├── floats.c
├── floats.test32
├── globals.c
├── globals.test32
├── if.c
├── if.test32
├── integers.c
├── integers.test32
├── large_alloc.c
├── large_alloc.test32
├── lit.cfg
├── lit.site.cfg.in
├── load_store.ll
├── loop.c
├── loop.test32
├── memcpy.c
├── memcpy.test32
├── memory_input.c
├── pointers.c
├── pointers.test32
├── propagation_select.c
├── read.c
├── read.test32
├── regression
│ └── cxa_vector.ll
├── strings.c
├── strings.test32
├── structs.c
├── structs.test32
├── switch.c
├── switch.test32
├── symbolic_structs.ll
├── test_case_handler.c
├── uadd_sat.ll
├── uadd_sat.test32
├── usub_sat.ll
└── usub_sat.test32
└── util
├── pure_concolic_execution.sh
├── quicktest.sh
└── symcc_fuzzing_helper
├── .gitignore
├── Cargo.lock
├── Cargo.toml
└── src
├── main.rs
└── symcc.rs
/.adacore-gitlab-ci.yml:
--------------------------------------------------------------------------------
1 | workflow:
2 | rules:
3 | - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
4 |
5 | anod_build:
6 | services:
7 | - image:sandbox
8 | - cpu:8
9 | - mem:16
10 | stage: build
11 | script:
12 | - export ANOD_DEFAULT_SANDBOX_DIR=/it/wave
13 |
14 | # Check out QSYM
15 | - cd runtime/qsym_backend
16 | - git clone -b symcc https://gitlab-ci-token:${CI_JOB_TOKEN}@${CI_SERVER_HOST}:${CI_SERVER_PORT}/eng/fuzz/qsym
17 |
18 | # Use our repositories
19 | - anod vcs --add-repo symcc $CI_PROJECT_DIR
20 | - anod vcs --add-repo qsym $CI_PROJECT_DIR/runtime/qsym_backend/qsym
21 |
22 | # Build SymCC
23 | - anod source symcc
24 | - anod build symcc
25 |
--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
1 | ---
2 | BasedOnStyle: LLVM
3 | ...
4 |
--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | # Ignore the third-party software that's shipped with Qsym; we don't use it anyway.
2 | runtime/qsym_backend/qsym/third_party
3 | # Similarly, we don't run its tests.
4 | runtime/qsym_backend/qsym/tests
5 |
6 | # Rust stores build artifacts in the tree
7 | util/symcc_fuzzing_helper/target
8 |
9 | # Some build utilities that we can ignore
10 | TAGS
11 | compile_commands.json
12 |
13 | # The Dockerfile itself doesn't need to be copied
14 | Dockerfile
--------------------------------------------------------------------------------
/.github/workflows/check_style.yml:
--------------------------------------------------------------------------------
1 | name: Check coding style
2 | on: [pull_request]
3 | jobs:
4 | coding_style:
5 | runs-on: ubuntu-22.04
6 | steps:
7 | - uses: actions/checkout@v3
8 | with:
9 | fetch-depth: 0
10 | - name: Run clang-format
11 | shell: bash
12 | run: |
13 | format_changes=$(git clang-format-14 --quiet --diff \
14 | ${{ github.event.pull_request.base.sha }} \
15 | ${{ github.event.pull_request.head.sha }} | wc -c)
16 | if [[ $format_changes -ne 0 ]]; then
17 | echo "Please format your changes with clang-format using the LLVM style, e.g., git clang-format --style LLVM before committing"
18 | exit 1
19 | fi
20 |
--------------------------------------------------------------------------------
/.github/workflows/run_tests.yml:
--------------------------------------------------------------------------------
1 | name: Compile and test SymCC
2 | on: [pull_request, workflow_dispatch]
3 | jobs:
4 | build_and_test_symcc:
5 | runs-on: ubuntu-20.04
6 | steps:
7 | - uses: actions/checkout@v2
8 | - name: Setup docker compilation environment
9 | run: docker build --target builder -t symcc .
10 | - name: Build and test SymCC with simple backend
11 | run: docker build --target builder_simple -t symcc .
12 | - name: Build libcxx using SymCC simple backend
13 | run: docker build --target builder_libcxx -t symcc .
14 | - name: Build and test SymCC with Qsym backend
15 | run: docker build --target builder_qsym -t symcc .
16 | - name: Creation of the final SymCC docker image with Qsym backend and libcxx
17 | run: docker build -t symcc .
18 | llvm_compatibility:
19 | runs-on: ubuntu-22.04
20 | strategy:
21 | matrix:
22 | llvm_version: [11, 12, 13, 14, 15]
23 | steps:
24 | - uses: actions/checkout@v3
25 | with:
26 | submodules: true
27 | - name: Install dependencies
28 | run: |
29 | sudo apt-get update
30 | sudo apt-get install -y \
31 | llvm-${{ matrix.llvm_version }}-dev \
32 | libz3-dev \
33 | - name: Build SymCC with the QSYM backend
34 | run: |
35 | mkdir build
36 | cd build
37 | cmake \
38 | -DCMAKE_BUILD_TYPE=Release \
39 | -DZ3_TRUST_SYSTEM_VERSION=ON \
40 | -DQSYM_BACKEND=ON \
41 | -DLLVM_DIR=/usr/lib/llvm-${{ matrix.llvm_version }}/cmake \
42 | ..
43 | make
44 | llvm_compatibility_latest_llvm:
45 | runs-on: ubuntu-22.04
46 | strategy:
47 | matrix:
48 | llvm_version: [16, 17, 18]
49 | steps:
50 | - uses: actions/checkout@v3
51 | with:
52 | submodules: true
53 | - name: Add LLVM project deb repository
54 | uses: myci-actions/add-deb-repo@11
55 | with:
56 | repo: deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-${{ matrix.llvm_version }} main
57 | repo-name: llvm
58 | update: false
59 | keys-asc: https://apt.llvm.org/llvm-snapshot.gpg.key
60 | - name: Install dependencies
61 | run: |
62 | sudo apt-get update
63 | sudo apt-get install -y \
64 | llvm-${{ matrix.llvm_version }}-dev \
65 | libz3-dev \
66 | - name: Build SymCC with the QSYM backend
67 | run: |
68 | mkdir build
69 | cd build
70 | cmake \
71 | -DCMAKE_BUILD_TYPE=Release \
72 | -DZ3_TRUST_SYSTEM_VERSION=ON \
73 | -DQSYM_BACKEND=ON \
74 | -DLLVM_DIR=/usr/lib/llvm-${{ matrix.llvm_version }}/cmake \
75 | ..
76 | make
77 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Prerequisites
2 | *.d
3 |
4 | # Compiled Object files
5 | *.slo
6 | *.lo
7 | *.o
8 | *.obj
9 |
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 |
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 |
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 |
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 |
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 |
34 | # Tags
35 | TAGS
36 |
37 | # CLion project
38 | .idea
39 |
40 | # Clang tooling
41 | compile_commands.json
42 | .clangd
43 | .cache
44 |
45 | # Build directories
46 | build*
47 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "qsym_backend/qsym"]
2 | path = runtime/qsym_backend/qsym
3 | url = https://github.com/eurecom-s3/qsym.git
4 | branch = symcc
5 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # This file is part of SymCC.
2 | #
3 | # SymCC is free software: you can redistribute it and/or modify it under the
4 | # terms of the GNU General Public License as published by the Free Software
5 | # Foundation, either version 3 of the License, or (at your option) any later
6 | # version.
7 | #
8 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU General Public License along with
13 | # SymCC. If not, see .
14 |
15 | cmake_minimum_required(VERSION 3.5)
16 | project(SymbolicCompiler)
17 |
18 | list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
19 |
20 | option(QSYM_BACKEND "Use the Qsym backend instead of our own" OFF)
21 | option(RUST_BACKEND "Build the support code required for a Rust backend as a static archive." OFF)
22 | option(TARGET_32BIT "Make the compiler work correctly with -m32" OFF)
23 |
24 | # We need to build the runtime as an external project because CMake otherwise
25 | # doesn't allow us to build it twice with different options (one 32-bit version
26 | # and one 64-bit variant).
27 | include(ExternalProject)
28 |
29 | set(SYM_RUNTIME_BUILD_ARGS
30 | -DCMAKE_AR=${CMAKE_AR}
31 | -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
32 | -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
33 | -DCMAKE_C_FLAGS_INIT=${CMAKE_C_FLAGS_INIT}
34 | -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
35 | -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
36 | -DCMAKE_CXX_FLAGS_INIT=${CMAKE_CXX_FLAGS_INIT}
37 | -DCMAKE_EXE_LINKER_FLAGS=${CMAKE_EXE_LINKER_FLAGS}
38 | -DCMAKE_EXE_LINKER_FLAGS_INIT=${CMAKE_EXE_LINKER_FLAGS_INIT}
39 | -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
40 | -DCMAKE_MODULE_LINKER_FLAGS=${CMAKE_MODULE_LINKER_FLAGS}
41 | -DCMAKE_MODULE_LINKER_FLAGS_INIT=${CMAKE_MODULE_LINKER_FLAGS_INIT}
42 | -DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS}
43 | -DRUST_BACKEND=${RUST_BACKEND}
44 | -DCMAKE_SHARED_LINKER_FLAGS_INIT=${CMAKE_SHARED_LINKER_FLAGS_INIT}
45 | -DCMAKE_MODULE_PATH=${CMAKE_MODULE_PATH}
46 | -DCMAKE_SYSROOT=${CMAKE_SYSROOT}
47 | -DQSYM_BACKEND=${QSYM_BACKEND}
48 | -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
49 | -DZ3_TRUST_SYSTEM_VERSION=${Z3_TRUST_SYSTEM_VERSION})
50 |
51 | ExternalProject_Add(SymRuntime
52 | SOURCE_DIR ${CMAKE_SOURCE_DIR}/runtime
53 | CMAKE_ARGS
54 | ${SYM_RUNTIME_BUILD_ARGS}
55 | -DCMAKE_EXPORT_COMPILE_COMMANDS=${CMAKE_EXPORT_COMPILE_COMMANDS}
56 | -DZ3_DIR=${Z3_DIR}
57 | -DLLVM_DIR=${LLVM_DIR}
58 | INSTALL_COMMAND ""
59 | BUILD_ALWAYS TRUE)
60 |
61 | ExternalProject_Get_Property(SymRuntime BINARY_DIR)
62 | set(SYM_RUNTIME_DIR ${BINARY_DIR})
63 |
64 | if (${TARGET_32BIT})
65 | ExternalProject_Add(SymRuntime32
66 | SOURCE_DIR ${CMAKE_SOURCE_DIR}/runtime
67 | CMAKE_ARGS
68 | ${SYM_RUNTIME_BUILD_ARGS}
69 | -DCMAKE_C_FLAGS="${CMAKE_C_FLAGS} -m32"
70 | -DCMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -m32"
71 | -DZ3_DIR=${Z3_32BIT_DIR}
72 | -DLLVM_DIR=${LLVM_32BIT_DIR}
73 | INSTALL_COMMAND ""
74 | BUILD_ALWAYS TRUE)
75 |
76 | ExternalProject_Get_Property(SymRuntime32 BINARY_DIR)
77 | set(SYM_RUNTIME_32BIT_DIR ${BINARY_DIR})
78 | endif()
79 |
80 | find_package(LLVM REQUIRED CONFIG)
81 |
82 | message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
83 | message(STATUS "Using LLVMConfig.cmake from ${LLVM_DIR}")
84 |
85 | if (${LLVM_VERSION_MAJOR} LESS 8 OR ${LLVM_VERSION_MAJOR} GREATER 18)
86 | message(WARNING "The software has been developed for LLVM 8 through 18; \
87 | it is unlikely to work with other versions!")
88 | endif()
89 |
90 | add_definitions(${LLVM_DEFINITIONS})
91 | include_directories(SYSTEM ${LLVM_INCLUDE_DIRS})
92 |
93 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 \
94 | -Wredundant-decls -Wcast-align -Wmissing-include-dirs -Wswitch-default \
95 | -Wextra -Wall -Winvalid-pch -Wredundant-decls -Wformat=2 \
96 | -Wmissing-format-attribute -Wformat-nonliteral -Werror -Wno-error=deprecated-declarations")
97 |
98 | # Mark nodelete to work around unload bug in upstream LLVM 5.0+
99 | set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,-z,nodelete")
100 |
101 | # This is the compiler pass that we later load into clang or opt. If LLVM is
102 | # built without RTTI we have to disable it for our library too, otherwise we'll
103 | # get linker errors.
104 | add_library(Symbolize MODULE
105 | compiler/Symbolizer.cpp
106 | compiler/Pass.cpp
107 | compiler/Runtime.cpp
108 | compiler/Main.cpp)
109 | if (NOT LLVM_ENABLE_RTTI)
110 | set_target_properties(Symbolize PROPERTIES COMPILE_FLAGS "-fno-rtti")
111 | endif()
112 |
113 | find_program(CLANG_BINARY "clang"
114 | HINTS ${LLVM_TOOLS_BINARY_DIR}
115 | DOC "The clang binary to use in the symcc wrapper script.")
116 | find_program(CLANGPP_BINARY "clang++"
117 | HINTS ${LLVM_TOOLS_BINARY_DIR}
118 | DOC "The clang binary to use in the sym++ wrapper script.")
119 | if (NOT CLANG_BINARY)
120 | message(FATAL_ERROR "Clang not found; please make sure that the version corresponding to your LLVM installation is available.")
121 | endif()
122 |
123 | if (${LLVM_VERSION_MAJOR} LESS 13)
124 | set(CLANG_LOAD_PASS "-Xclang -load -Xclang ")
125 | else()
126 | set(CLANG_LOAD_PASS "-fpass-plugin=")
127 | endif()
128 |
129 | configure_file("compiler/symcc.in" "symcc" @ONLY)
130 | configure_file("compiler/sym++.in" "sym++" @ONLY)
131 |
132 | add_subdirectory(test)
133 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to SymCC
2 |
3 | We encourage everyone to contribute improvements and bug fixes to SymCC. Our
4 | preferred way of accepting contributions is via GitHub pull requests. Please be
5 | sure to run clang-format on any C/C++ code you change; an easy way to do so is
6 | with `git clang-format --style LLVM` just before committing. (On Ubuntu, you can
7 | get `git-clang-format` via `apt install clang-format`.) Ideally, also add a test
8 | to your patch (see the
9 | [docs](https://github.com/eurecom-s3/symcc/blob/master/docs/Testing.txt) for
10 | details). Unfortunately, since the project is a bit short on developers at the
11 | moment, we have to ask for your patience while we review your PR.
12 |
13 | Please note that any contributions you make are licensed under the same terms as
14 | the code you're contributing to, as per the GitHub Terms of Service, [section
15 | D.6](https://docs.github.com/en/site-policy/github-terms/github-terms-of-service#6-contributions-under-repository-license).
16 | At the time of writing, this means LGPL (version 3 or later) for the SymCC
17 | runtime, and GPL (version 3 or later) for the rest of SymCC.
18 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # This file is part of SymCC.
2 | #
3 | # SymCC is free software: you can redistribute it and/or modify it under the
4 | # terms of the GNU General Public License as published by the Free Software
5 | # Foundation, either version 3 of the License, or (at your option) any later
6 | # version.
7 | #
8 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | #
12 | # You should have received a copy of the GNU General Public License along with
13 | # SymCC. If not, see .
14 |
15 | #
16 | # The base image
17 | #
18 | FROM ubuntu:22.04 AS builder
19 |
20 | # Install dependencies
21 | RUN apt-get update \
22 | && DEBIAN_FRONTEND=noninteractive apt-get install -y \
23 | cargo \
24 | clang-15 \
25 | cmake \
26 | g++ \
27 | git \
28 | libz3-dev \
29 | llvm-15-dev \
30 | llvm-15-tools \
31 | ninja-build \
32 | python3-pip \
33 | zlib1g-dev \
34 | && rm -rf /var/lib/apt/lists/*
35 | RUN pip3 install lit
36 |
37 | # Build AFL.
38 | RUN git clone -b v2.56b https://github.com/google/AFL.git afl \
39 | && cd afl \
40 | && make
41 |
42 | # Download the LLVM sources already so that we don't need to get them again when
43 | # SymCC changes
44 | RUN git clone -b llvmorg-15.0.0 --depth 1 https://github.com/llvm/llvm-project.git /llvm_source
45 |
46 | # Build a version of SymCC with the simple backend to compile libc++
47 | COPY . /symcc_source
48 |
49 | # Init submodules if they are not initialiazed yet
50 | WORKDIR /symcc_source
51 | RUN if git submodule status | grep "^-">/dev/null ; then \
52 | echo "Initializing submodules"; \
53 | git submodule init; \
54 | git submodule update; \
55 | fi
56 |
57 |
58 | #
59 | # Build SymCC with the simple backend
60 | #
61 | FROM builder AS builder_simple
62 | WORKDIR /symcc_build_simple
63 | RUN cmake -G Ninja \
64 | -DQSYM_BACKEND=OFF \
65 | -DCMAKE_BUILD_TYPE=RelWithDebInfo \
66 | -DZ3_TRUST_SYSTEM_VERSION=on \
67 | /symcc_source \
68 | && ninja check
69 |
70 | #
71 | # Build libc++ with SymCC using the simple backend
72 | #
73 | FROM builder_simple AS builder_libcxx
74 | WORKDIR /libcxx_symcc
75 | RUN export SYMCC_REGULAR_LIBCXX=yes SYMCC_NO_SYMBOLIC_INPUT=yes \
76 | && mkdir /libcxx_symcc_build \
77 | && cd /libcxx_symcc_build \
78 | && cmake -G Ninja /llvm_source/llvm \
79 | -DLLVM_ENABLE_PROJECTS="libcxx;libcxxabi" \
80 | -DLLVM_TARGETS_TO_BUILD="X86" \
81 | -DLLVM_DISTRIBUTION_COMPONENTS="cxx;cxxabi;cxx-headers" \
82 | -DCMAKE_BUILD_TYPE=Release \
83 | -DCMAKE_INSTALL_PREFIX=/libcxx_symcc_install \
84 | -DCMAKE_C_COMPILER=/symcc_build_simple/symcc \
85 | -DCMAKE_CXX_COMPILER=/symcc_build_simple/sym++ \
86 | && ninja distribution \
87 | && ninja install-distribution
88 |
89 |
90 | #
91 | # Build SymCC with the Qsym backend
92 | #
93 | FROM builder_libcxx AS builder_qsym
94 | WORKDIR /symcc_build
95 | RUN cmake -G Ninja \
96 | -DQSYM_BACKEND=ON \
97 | -DCMAKE_BUILD_TYPE=RelWithDebInfo \
98 | -DZ3_TRUST_SYSTEM_VERSION=on \
99 | /symcc_source \
100 | && ninja check \
101 | && cargo install --path /symcc_source/util/symcc_fuzzing_helper
102 |
103 |
104 | #
105 | # The final image
106 | #
107 | FROM ubuntu:22.04
108 |
109 | RUN apt-get update \
110 | && DEBIAN_FRONTEND=noninteractive apt-get install -y \
111 | build-essential \
112 | clang-15 \
113 | g++ \
114 | libllvm15 \
115 | zlib1g \
116 | sudo \
117 | && rm -rf /var/lib/apt/lists/* \
118 | && useradd -m -s /bin/bash ubuntu \
119 | && echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/ubuntu
120 |
121 | COPY --from=builder_qsym /symcc_build /symcc_build
122 | COPY --from=builder_qsym /root/.cargo/bin/symcc_fuzzing_helper /symcc_build/
123 | COPY util/pure_concolic_execution.sh /symcc_build/
124 | COPY --from=builder_qsym /libcxx_symcc_install /libcxx_symcc_install
125 | COPY --from=builder_qsym /afl /afl
126 |
127 | ENV PATH /symcc_build:$PATH
128 | ENV AFL_PATH /afl
129 | ENV AFL_CC clang-15
130 | ENV AFL_CXX clang++-15
131 | ENV SYMCC_LIBCXX_PATH=/libcxx_symcc_install
132 |
133 | USER ubuntu
134 | WORKDIR /home/ubuntu
135 | COPY sample.cpp /home/ubuntu/
136 | RUN mkdir /tmp/output
137 |
--------------------------------------------------------------------------------
/LICENSE.lgpl:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/Vagrantfile:
--------------------------------------------------------------------------------
1 | # -*- mode: ruby -*-
2 | # vi: set ft=ruby :
3 |
4 | Vagrant.configure("2") do |config|
5 | config.vm.box = "ubuntu/groovy64"
6 | config.vm.provision "shell", path: "util/quicktest.sh"
7 | end
8 |
--------------------------------------------------------------------------------
/cmake/FindFilesystem.cmake:
--------------------------------------------------------------------------------
1 | # Distributed under the OSI-approved BSD 3-Clause License. See accompanying
2 | # file Copyright.txt or https://cmake.org/licensing for details.
3 |
4 | #[=======================================================================[.rst:
5 |
6 | FindFilesystem
7 | ##############
8 |
9 | This module supports the C++17 standard library's filesystem utilities. Use the
10 | :imp-target:`std::filesystem` imported target to
11 |
12 | Options
13 | *******
14 |
15 | The ``COMPONENTS`` argument to this module supports the following values:
16 |
17 | .. find-component:: Experimental
18 | :name: fs.Experimental
19 |
20 | Allows the module to find the "experimental" Filesystem TS version of the
21 | Filesystem library. This is the library that should be used with the
22 | ``std::experimental::filesystem`` namespace.
23 |
24 | .. find-component:: Final
25 | :name: fs.Final
26 |
27 | Finds the final C++17 standard version of the filesystem library.
28 |
29 | If no components are provided, behaves as if the
30 | :find-component:`fs.Final` component was specified.
31 |
32 | If both :find-component:`fs.Experimental` and :find-component:`fs.Final` are
33 | provided, first looks for ``Final``, and falls back to ``Experimental`` in case
34 | of failure. If ``Final`` is found, :imp-target:`std::filesystem` and all
35 | :ref:`variables ` will refer to the ``Final`` version.
36 |
37 |
38 | Imported Targets
39 | ****************
40 |
41 | .. imp-target:: std::filesystem
42 |
43 | The ``std::filesystem`` imported target is defined when any requested
44 | version of the C++ filesystem library has been found, whether it is
45 | *Experimental* or *Final*.
46 |
47 | If no version of the filesystem library is available, this target will not
48 | be defined.
49 |
50 | .. note::
51 | This target has ``cxx_std_17`` as an ``INTERFACE``
52 | :ref:`compile language standard feature `. Linking
53 | to this target will automatically enable C++17 if no later standard
54 | version is already required on the linking target.
55 |
56 |
57 | .. _fs.variables:
58 |
59 | Variables
60 | *********
61 |
62 | .. variable:: CXX_FILESYSTEM_IS_EXPERIMENTAL
63 |
64 | Set to ``TRUE`` when the :find-component:`fs.Experimental` version of C++
65 | filesystem library was found, otherwise ``FALSE``.
66 |
67 | .. variable:: CXX_FILESYSTEM_HAVE_FS
68 |
69 | Set to ``TRUE`` when a filesystem header was found.
70 |
71 | .. variable:: CXX_FILESYSTEM_HEADER
72 |
73 | Set to either ``filesystem`` or ``experimental/filesystem`` depending on
74 | whether :find-component:`fs.Final` or :find-component:`fs.Experimental` was
75 | found.
76 |
77 | .. variable:: CXX_FILESYSTEM_NAMESPACE
78 |
79 | Set to either ``std::filesystem`` or ``std::experimental::filesystem``
80 | depending on whether :find-component:`fs.Final` or
81 | :find-component:`fs.Experimental` was found.
82 |
83 |
84 | Examples
85 | ********
86 |
87 | Using `find_package(Filesystem)` with no component arguments:
88 |
89 | .. code-block:: cmake
90 |
91 | find_package(Filesystem REQUIRED)
92 |
93 | add_executable(my-program main.cpp)
94 | target_link_libraries(my-program PRIVATE std::filesystem)
95 |
96 |
97 | #]=======================================================================]
98 |
99 |
100 | if(TARGET std::filesystem)
101 | # This module has already been processed. Don't do it again.
102 | return()
103 | endif()
104 |
105 | cmake_minimum_required(VERSION 3.10)
106 |
107 | include(CMakePushCheckState)
108 | include(CheckIncludeFileCXX)
109 |
110 | # If we're not cross-compiling, try to run test executables.
111 | # Otherwise, assume that compile + link is a sufficient check.
112 | if(CMAKE_CROSSCOMPILING)
113 | include(CheckCXXSourceCompiles)
114 | macro(_cmcm_check_cxx_source code var)
115 | check_cxx_source_compiles("${code}" ${var})
116 | endmacro()
117 | else()
118 | include(CheckCXXSourceRuns)
119 | macro(_cmcm_check_cxx_source code var)
120 | check_cxx_source_runs("${code}" ${var})
121 | endmacro()
122 | endif()
123 |
124 | cmake_push_check_state()
125 |
126 | set(CMAKE_REQUIRED_QUIET ${Filesystem_FIND_QUIETLY})
127 |
128 | # All of our tests required C++17 or later
129 | set(CMAKE_CXX_STANDARD 17)
130 |
131 | # Normalize and check the component list we were given
132 | set(want_components ${Filesystem_FIND_COMPONENTS})
133 | if(Filesystem_FIND_COMPONENTS STREQUAL "")
134 | set(want_components Final)
135 | endif()
136 |
137 | # Warn on any unrecognized components
138 | set(extra_components ${want_components})
139 | list(REMOVE_ITEM extra_components Final Experimental)
140 | foreach(component IN LISTS extra_components)
141 | message(WARNING "Extraneous find_package component for Filesystem: ${component}")
142 | endforeach()
143 |
144 | # Detect which of Experimental and Final we should look for
145 | set(find_experimental TRUE)
146 | set(find_final TRUE)
147 | if(NOT "Final" IN_LIST want_components)
148 | set(find_final FALSE)
149 | endif()
150 | if(NOT "Experimental" IN_LIST want_components)
151 | set(find_experimental FALSE)
152 | endif()
153 |
154 | if(find_final)
155 | check_include_file_cxx("filesystem" _CXX_FILESYSTEM_HAVE_HEADER)
156 | mark_as_advanced(_CXX_FILESYSTEM_HAVE_HEADER)
157 | if(_CXX_FILESYSTEM_HAVE_HEADER)
158 | # We found the non-experimental header. Don't bother looking for the
159 | # experimental one.
160 | set(find_experimental FALSE)
161 | endif()
162 | else()
163 | set(_CXX_FILESYSTEM_HAVE_HEADER FALSE)
164 | endif()
165 |
166 | if(find_experimental)
167 | check_include_file_cxx("experimental/filesystem" _CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER)
168 | mark_as_advanced(_CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER)
169 | else()
170 | set(_CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER FALSE)
171 | endif()
172 |
173 | if(_CXX_FILESYSTEM_HAVE_HEADER)
174 | set(_have_fs TRUE)
175 | set(_fs_header filesystem)
176 | set(_fs_namespace std::filesystem)
177 | set(_is_experimental FALSE)
178 | elseif(_CXX_FILESYSTEM_HAVE_EXPERIMENTAL_HEADER)
179 | set(_have_fs TRUE)
180 | set(_fs_header experimental/filesystem)
181 | set(_fs_namespace std::experimental::filesystem)
182 | set(_is_experimental TRUE)
183 | else()
184 | set(_have_fs FALSE)
185 | endif()
186 |
187 | set(CXX_FILESYSTEM_HAVE_FS ${_have_fs} CACHE BOOL "TRUE if we have the C++ filesystem headers")
188 | set(CXX_FILESYSTEM_HEADER ${_fs_header} CACHE STRING "The header that should be included to obtain the filesystem APIs")
189 | set(CXX_FILESYSTEM_NAMESPACE ${_fs_namespace} CACHE STRING "The C++ namespace that contains the filesystem APIs")
190 | set(CXX_FILESYSTEM_IS_EXPERIMENTAL ${_is_experimental} CACHE BOOL "TRUE if the C++ filesystem library is the experimental version")
191 |
192 | set(_found FALSE)
193 |
194 | if(CXX_FILESYSTEM_HAVE_FS)
195 | # We have some filesystem library available. Do link checks
196 | string(CONFIGURE [[
197 | #include
198 | #include <@CXX_FILESYSTEM_HEADER@>
199 |
200 | int main() {
201 | auto cwd = @CXX_FILESYSTEM_NAMESPACE@::current_path();
202 | printf("%s", cwd.c_str());
203 | return EXIT_SUCCESS;
204 | }
205 | ]] code @ONLY)
206 |
207 | # Check a simple filesystem program without any linker flags
208 | _cmcm_check_cxx_source("${code}" CXX_FILESYSTEM_NO_LINK_NEEDED)
209 |
210 | set(can_link ${CXX_FILESYSTEM_NO_LINK_NEEDED})
211 |
212 | if(NOT CXX_FILESYSTEM_NO_LINK_NEEDED)
213 | set(prev_libraries ${CMAKE_REQUIRED_LIBRARIES})
214 | # Add the libstdc++ flag
215 | set(CMAKE_REQUIRED_LIBRARIES ${prev_libraries} -lstdc++fs)
216 | _cmcm_check_cxx_source("${code}" CXX_FILESYSTEM_STDCPPFS_NEEDED)
217 | set(can_link ${CXX_FILESYSTEM_STDCPPFS_NEEDED})
218 | if(NOT CXX_FILESYSTEM_STDCPPFS_NEEDED)
219 | # Try the libc++ flag
220 | set(CMAKE_REQUIRED_LIBRARIES ${prev_libraries} -lc++fs)
221 | _cmcm_check_cxx_source("${code}" CXX_FILESYSTEM_CPPFS_NEEDED)
222 | set(can_link ${CXX_FILESYSTEM_CPPFS_NEEDED})
223 | endif()
224 | endif()
225 |
226 | if(can_link)
227 | add_library(std::filesystem INTERFACE IMPORTED)
228 | set_property(TARGET std::filesystem APPEND PROPERTY INTERFACE_COMPILE_FEATURES cxx_std_17)
229 | set(_found TRUE)
230 |
231 | if(CXX_FILESYSTEM_NO_LINK_NEEDED)
232 | # Nothing to add...
233 | elseif(CXX_FILESYSTEM_STDCPPFS_NEEDED)
234 | set_property(TARGET std::filesystem APPEND PROPERTY INTERFACE_LINK_LIBRARIES -lstdc++fs)
235 | elseif(CXX_FILESYSTEM_CPPFS_NEEDED)
236 | set_property(TARGET std::filesystem APPEND PROPERTY INTERFACE_LINK_LIBRARIES -lc++fs)
237 | endif()
238 | endif()
239 | endif()
240 |
241 | cmake_pop_check_state()
242 |
243 | set(Filesystem_FOUND ${_found} CACHE BOOL "TRUE if we can run a program using std::filesystem" FORCE)
244 |
245 | if(Filesystem_FIND_REQUIRED AND NOT Filesystem_FOUND)
246 | message(FATAL_ERROR "Cannot run simple program using std::filesystem")
247 | endif()
248 |
--------------------------------------------------------------------------------
/compiler/Main.cpp:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | #include
16 | #if LLVM_VERSION_MAJOR <= 15
17 | #include
18 | #endif
19 | #include
20 | #include
21 |
22 | #if LLVM_VERSION_MAJOR >= 13
23 | #include
24 | #include
25 |
26 | #if LLVM_VERSION_MAJOR >= 14
27 | #include
28 | #else
29 | using OptimizationLevel = llvm::PassBuilder::OptimizationLevel;
30 | #endif
31 | #endif
32 |
33 | #if LLVM_VERSION_MAJOR >= 15
34 | #include
35 | #else
36 | #include
37 | #endif
38 |
39 | #if LLVM_VERSION_MAJOR >= 13
40 | #include
41 | #include
42 |
43 | #if LLVM_VERSION_MAJOR >= 14
44 | #include
45 | #else
46 | using OptimizationLevel = llvm::PassBuilder::OptimizationLevel;
47 | #endif
48 | #endif
49 |
50 | #include "Pass.h"
51 |
52 | using namespace llvm;
53 |
54 | //
55 | // Legacy pass registration (up to LLVM 13)
56 | //
57 |
58 | #if LLVM_VERSION_MAJOR <= 15
59 |
60 | void addSymbolizeLegacyPass(const PassManagerBuilder & /* unused */,
61 | legacy::PassManagerBase &PM) {
62 | PM.add(createScalarizerPass());
63 | PM.add(createLowerAtomicPass());
64 | PM.add(new SymbolizeLegacyPass());
65 | }
66 |
67 | // Make the pass known to opt.
68 | static RegisterPass X("symbolize", "Symbolization Pass");
69 | // Tell frontends to run the pass automatically.
70 | static struct RegisterStandardPasses Y(PassManagerBuilder::EP_VectorizerStart,
71 | addSymbolizeLegacyPass);
72 | static struct RegisterStandardPasses
73 | Z(PassManagerBuilder::EP_EnabledOnOptLevel0, addSymbolizeLegacyPass);
74 |
75 | #endif
76 |
77 | //
78 | // New pass registration (LLVM 13 and above)
79 | //
80 |
81 | #if LLVM_VERSION_MAJOR >= 13
82 |
83 | PassPluginLibraryInfo getSymbolizePluginInfo() {
84 | return {LLVM_PLUGIN_API_VERSION, "Symbolization Pass", LLVM_VERSION_STRING,
85 | [](PassBuilder &PB) {
86 | // We need to act on the entire module as well as on each function.
87 | // Those actions are independent from each other, so we register a
88 | // module pass at the start of the pipeline and a function pass just
89 | // before the vectorizer. (There doesn't seem to be a way to run
90 | // module passes at the start of the vectorizer, hence the split.)
91 | PB.registerPipelineStartEPCallback(
92 | [](ModulePassManager &PM, OptimizationLevel) {
93 | PM.addPass(SymbolizePass());
94 | });
95 | PB.registerVectorizerStartEPCallback(
96 | [](FunctionPassManager &PM, OptimizationLevel) {
97 | PM.addPass(ScalarizerPass());
98 | PM.addPass(LowerAtomicPass());
99 | PM.addPass(SymbolizePass());
100 | });
101 | }};
102 | }
103 |
104 | extern "C" LLVM_ATTRIBUTE_WEAK PassPluginLibraryInfo llvmGetPassPluginInfo() {
105 | return getSymbolizePluginInfo();
106 | }
107 |
108 | #endif
109 |
--------------------------------------------------------------------------------
/compiler/Pass.cpp:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | #include "Pass.h"
16 |
17 | #include
18 | #include
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include
27 |
28 | #if LLVM_VERSION_MAJOR < 14
29 | #include
30 | #else
31 | #include
32 | #endif
33 |
34 | #include "Runtime.h"
35 | #include "Symbolizer.h"
36 |
37 | using namespace llvm;
38 |
39 | #ifndef NDEBUG
40 | #define DEBUG(X) \
41 | do { \
42 | X; \
43 | } while (false)
44 | #else
45 | #define DEBUG(X) ((void)0)
46 | #endif
47 |
48 | char SymbolizeLegacyPass::ID = 0;
49 |
50 | namespace {
51 |
52 | static constexpr char kSymCtorName[] = "__sym_ctor";
53 |
54 | bool instrumentModule(Module &M) {
55 | DEBUG(errs() << "Symbolizer module instrumentation\n");
56 |
57 | // Redirect calls to external functions to the corresponding wrappers and
58 | // rename internal functions.
59 | for (auto &function : M.functions()) {
60 | auto name = function.getName();
61 | if (isInterceptedFunction(function))
62 | function.setName(name + "_symbolized");
63 | }
64 |
65 | // Insert a constructor that initializes the runtime and any globals.
66 | Function *ctor;
67 | std::tie(ctor, std::ignore) = createSanitizerCtorAndInitFunctions(
68 | M, kSymCtorName, "_sym_initialize", {}, {});
69 | appendToGlobalCtors(M, ctor, 0);
70 |
71 | return true;
72 | }
73 |
74 | bool canLower(const CallInst *CI) {
75 | const Function *Callee = CI->getCalledFunction();
76 | if (!Callee)
77 | return false;
78 |
79 | switch (Callee->getIntrinsicID()) {
80 | case Intrinsic::expect:
81 | case Intrinsic::ctpop:
82 | case Intrinsic::ctlz:
83 | case Intrinsic::cttz:
84 | case Intrinsic::prefetch:
85 | case Intrinsic::pcmarker:
86 | case Intrinsic::dbg_declare:
87 | case Intrinsic::dbg_label:
88 | case Intrinsic::eh_typeid_for:
89 | case Intrinsic::annotation:
90 | case Intrinsic::ptr_annotation:
91 | case Intrinsic::assume:
92 | #if LLVM_VERSION_MAJOR > 11
93 | case Intrinsic::experimental_noalias_scope_decl:
94 | #endif
95 | case Intrinsic::var_annotation:
96 | case Intrinsic::sqrt:
97 | case Intrinsic::log:
98 | case Intrinsic::log2:
99 | case Intrinsic::log10:
100 | case Intrinsic::exp:
101 | case Intrinsic::exp2:
102 | case Intrinsic::pow:
103 | case Intrinsic::sin:
104 | case Intrinsic::cos:
105 | case Intrinsic::floor:
106 | case Intrinsic::ceil:
107 | case Intrinsic::trunc:
108 | case Intrinsic::round:
109 | #if LLVM_VERSION_MAJOR > 10
110 | case Intrinsic::roundeven:
111 | #endif
112 | case Intrinsic::copysign:
113 | #if LLVM_VERSION_MAJOR < 16
114 | case Intrinsic::flt_rounds:
115 | #else
116 | case Intrinsic::get_rounding:
117 | #endif
118 | case Intrinsic::invariant_start:
119 | case Intrinsic::lifetime_start:
120 | case Intrinsic::invariant_end:
121 | case Intrinsic::lifetime_end:
122 | return true;
123 | default:
124 | return false;
125 | }
126 |
127 | llvm_unreachable("Control cannot reach here");
128 | }
129 |
130 | void liftInlineAssembly(CallInst *CI) {
131 | // TODO When we don't have to worry about the old pass manager anymore, move
132 | // the initialization to the pass constructor. (Currently there are two
133 | // passes, but only if we're on a recent enough LLVM...)
134 |
135 | Function *F = CI->getFunction();
136 | Module *M = F->getParent();
137 | auto triple = M->getTargetTriple();
138 |
139 | std::string error;
140 | auto target = TargetRegistry::lookupTarget(triple, error);
141 | if (!target) {
142 | errs() << "Warning: can't get target info to lift inline assembly\n";
143 | return;
144 | }
145 |
146 | auto cpu = F->getFnAttribute("target-cpu").getValueAsString();
147 | auto features = F->getFnAttribute("target-features").getValueAsString();
148 |
149 | std::unique_ptr TM(
150 | target->createTargetMachine(triple, cpu, features, TargetOptions(), {}));
151 | auto subTarget = TM->getSubtargetImpl(*F);
152 | if (subTarget == nullptr)
153 | return;
154 |
155 | auto targetLowering = subTarget->getTargetLowering();
156 | if (targetLowering == nullptr)
157 | return;
158 |
159 | targetLowering->ExpandInlineAsm(CI);
160 | }
161 |
162 | bool instrumentFunction(Function &F) {
163 | auto functionName = F.getName();
164 | if (functionName == kSymCtorName)
165 | return false;
166 |
167 | DEBUG(errs() << "Symbolizing function ");
168 | DEBUG(errs().write_escaped(functionName) << '\n');
169 |
170 | SmallVector allInstructions;
171 | allInstructions.reserve(F.getInstructionCount());
172 | for (auto &I : instructions(F))
173 | allInstructions.push_back(&I);
174 |
175 | IntrinsicLowering IL(F.getParent()->getDataLayout());
176 | for (auto *I : allInstructions) {
177 | if (auto *CI = dyn_cast(I)) {
178 | if (canLower(CI)) {
179 | IL.LowerIntrinsicCall(CI);
180 | } else if (isa(CI->getCalledOperand())) {
181 | liftInlineAssembly(CI);
182 | }
183 | }
184 | }
185 |
186 | allInstructions.clear();
187 | for (auto &I : instructions(F))
188 | allInstructions.push_back(&I);
189 |
190 | Symbolizer symbolizer(*F.getParent());
191 | symbolizer.symbolizeFunctionArguments(F);
192 |
193 | for (auto &basicBlock : F)
194 | symbolizer.insertBasicBlockNotification(basicBlock);
195 |
196 | for (auto *instPtr : allInstructions)
197 | symbolizer.visit(instPtr);
198 |
199 | symbolizer.finalizePHINodes();
200 | symbolizer.shortCircuitExpressionUses();
201 |
202 | // DEBUG(errs() << F << '\n');
203 | assert(!verifyFunction(F, &errs()) &&
204 | "SymbolizePass produced invalid bitcode");
205 |
206 | return true;
207 | }
208 |
209 | } // namespace
210 |
211 | bool SymbolizeLegacyPass::doInitialization(Module &M) {
212 | return instrumentModule(M);
213 | }
214 |
215 | bool SymbolizeLegacyPass::runOnFunction(Function &F) {
216 | return instrumentFunction(F);
217 | }
218 |
219 | #if LLVM_VERSION_MAJOR >= 13
220 |
221 | PreservedAnalyses SymbolizePass::run(Function &F, FunctionAnalysisManager &) {
222 | return instrumentFunction(F) ? PreservedAnalyses::none()
223 | : PreservedAnalyses::all();
224 | }
225 |
226 | PreservedAnalyses SymbolizePass::run(Module &M, ModuleAnalysisManager &) {
227 | return instrumentModule(M) ? PreservedAnalyses::none()
228 | : PreservedAnalyses::all();
229 | }
230 |
231 | #endif
232 |
--------------------------------------------------------------------------------
/compiler/Pass.h:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | #ifndef PASS_H
16 | #define PASS_H
17 |
18 | #include
19 | #include
20 | #include
21 |
22 | #if LLVM_VERSION_MAJOR >= 13
23 | #include
24 | #endif
25 |
26 | class SymbolizeLegacyPass : public llvm::FunctionPass {
27 | public:
28 | static char ID;
29 |
30 | SymbolizeLegacyPass() : FunctionPass(ID) {}
31 |
32 | virtual bool doInitialization(llvm::Module &M) override;
33 | virtual bool runOnFunction(llvm::Function &F) override;
34 | };
35 |
36 | #if LLVM_VERSION_MAJOR >= 13
37 |
38 | class SymbolizePass : public llvm::PassInfoMixin {
39 | public:
40 | llvm::PreservedAnalyses run(llvm::Function &F,
41 | llvm::FunctionAnalysisManager &);
42 | llvm::PreservedAnalyses run(llvm::Module &M, llvm::ModuleAnalysisManager &);
43 |
44 | static bool isRequired() { return true; }
45 | };
46 |
47 | #endif
48 |
49 | #endif
50 |
--------------------------------------------------------------------------------
/compiler/Runtime.h:
--------------------------------------------------------------------------------
1 | // This file is part of SymCC.
2 | //
3 | // SymCC is free software: you can redistribute it and/or modify it under the
4 | // terms of the GNU General Public License as published by the Free Software
5 | // Foundation, either version 3 of the License, or (at your option) any later
6 | // version.
7 | //
8 | // SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
9 | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
10 | // A PARTICULAR PURPOSE. See the GNU General Public License for more details.
11 | //
12 | // You should have received a copy of the GNU General Public License along with
13 | // SymCC. If not, see .
14 |
15 | #ifndef RUNTIME_H
16 | #define RUNTIME_H
17 |
18 | #include
19 | #include
20 |
21 | #if LLVM_VERSION_MAJOR >= 9 && LLVM_VERSION_MAJOR < 11
22 | using SymFnT = llvm::Value *;
23 | #else
24 | using SymFnT = llvm::FunctionCallee;
25 | #endif
26 |
27 | /// Runtime functions
28 | struct Runtime {
29 | Runtime(llvm::Module &M);
30 |
31 | SymFnT buildInteger{};
32 | SymFnT buildInteger128{};
33 | SymFnT buildFloat{};
34 | SymFnT buildNullPointer{};
35 | SymFnT buildTrue{};
36 | SymFnT buildFalse{};
37 | SymFnT buildBool{};
38 | SymFnT buildSExt{};
39 | SymFnT buildZExt{};
40 | SymFnT buildTrunc{};
41 | SymFnT buildBswap{};
42 | SymFnT buildIntToFloat{};
43 | SymFnT buildFloatToFloat{};
44 | SymFnT buildBitsToFloat{};
45 | SymFnT buildFloatToBits{};
46 | SymFnT buildFloatToSignedInt{};
47 | SymFnT buildFloatToUnsignedInt{};
48 | SymFnT buildFloatAbs{};
49 | SymFnT buildBoolAnd{};
50 | SymFnT buildBoolOr{};
51 | SymFnT buildBoolXor{};
52 | SymFnT buildBoolToBit{};
53 | SymFnT buildBitToBool{};
54 | SymFnT buildAddOverflow{};
55 | SymFnT buildSubOverflow{};
56 | SymFnT buildMulOverflow{};
57 | SymFnT buildSAddSat{};
58 | SymFnT buildUAddSat{};
59 | SymFnT buildSSubSat{};
60 | SymFnT buildUSubSat{};
61 | SymFnT buildSShlSat{};
62 | SymFnT buildUShlSat{};
63 | SymFnT buildFshl{};
64 | SymFnT buildFshr{};
65 | SymFnT buildAbs{};
66 | SymFnT buildConcat{};
67 | SymFnT pushPathConstraint{};
68 | SymFnT getParameterExpression{};
69 | SymFnT setParameterExpression{};
70 | SymFnT setReturnExpression{};
71 | SymFnT getReturnExpression{};
72 | SymFnT memcpy{};
73 | SymFnT memset{};
74 | SymFnT memmove{};
75 | SymFnT readMemory{};
76 | SymFnT writeMemory{};
77 | SymFnT buildZeroBytes{};
78 | SymFnT buildInsert{};
79 | SymFnT buildExtract{};
80 | SymFnT notifyCall{};
81 | SymFnT notifyRet{};
82 | SymFnT notifyBasicBlock{};
83 |
84 | /// Mapping from icmp predicates to the functions that build the corresponding
85 | /// symbolic expressions.
86 | std::array comparisonHandlers{};
87 |
88 | /// Mapping from binary operators to the functions that build the
89 | /// corresponding symbolic expressions.
90 | std::array binaryOperatorHandlers{};
91 |
92 | /// Mapping from unary operators to the functions that build the
93 | /// corresponding symbolic expressions.
94 | std::array unaryOperatorHandlers{};
95 | };
96 |
97 | bool isInterceptedFunction(const llvm::Function &f);
98 |
99 | #endif
100 |
--------------------------------------------------------------------------------
/compiler/sym++.in:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This file is part of SymCC.
4 | #
5 | # SymCC is free software: you can redistribute it and/or modify it under the
6 | # terms of the GNU General Public License as published by the Free Software
7 | # Foundation, either version 3 of the License, or (at your option) any later
8 | # version.
9 | #
10 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License along with
15 | # SymCC. If not, see .
16 |
17 | runtime_64bit_dir="${SYMCC_RUNTIME_DIR:-@SYM_RUNTIME_DIR@}"
18 | runtime_32bit_dir="${SYMCC_RUNTIME32_DIR:-@SYM_RUNTIME_32BIT_DIR@}"
19 | pass="${SYMCC_PASS_DIR:-@CMAKE_CURRENT_BINARY_DIR@}/libSymbolize.so"
20 | libcxx_var=SYMCC_LIBCXX_PATH
21 | compiler="${SYMCC_CLANGPP:-@CLANGPP_BINARY@}"
22 |
23 | # Find out if we're cross-compiling for a 32-bit architecture
24 | runtime_dir="$runtime_64bit_dir"
25 | for arg in "$@"; do
26 | if [[ $arg == "-m32" ]]; then
27 | if [ -z "$runtime_32bit_dir" ]; then
28 | echo "SymCC: 32-bit compilation requested but SymCC was not built with TARGET_32BIT=ON" >&2
29 | exit 255
30 | else
31 | runtime_dir="$runtime_32bit_dir"
32 | libcxx_var=SYMCC_LIBCXX_32BIT_PATH
33 | break
34 | fi
35 | fi
36 | done
37 |
38 | if [[ -v SYMCC_REGULAR_LIBCXX ]]; then
39 | stdlib_cflags=
40 | stdlib_ldflags=
41 | elif [[ ! -v $libcxx_var ]]; then
42 | >&2 echo "Please set $libcxx_var to the directory containing libc++ or confirm usage of the system library by setting SYMCC_REGULAR_LIBCXX!"
43 | exit 255
44 | else
45 | # It is important that the resulting binaries load libstdc++ before libc++;
46 | # otherwise our backend calls the instrumented library in cases where
47 | # exported names collide.
48 | stdlib_cflags="-isystem ${!libcxx_var}/include/c++/v1 -nostdlib++"
49 | stdlib_ldflags="-L${!libcxx_var}/lib -Wl,-rpath,${!libcxx_var}/lib -lstdc++ -lc++ -stdlib=libc++"
50 | fi
51 |
52 | if [ $# -eq 0 ]; then
53 | echo "Use sym++ as a drop-in replacement for clang++, e.g., sym++ -O2 -o foo foo.cpp" >&2
54 | exit 1
55 | fi
56 |
57 | exec $compiler \
58 | @CLANG_LOAD_PASS@"$pass" \
59 | $stdlib_cflags \
60 | "$@" \
61 | $stdlib_ldflags \
62 | -L"$runtime_dir" \
63 | -lSymRuntime \
64 | -Wl,-rpath,"$runtime_dir" \
65 | -Qunused-arguments
66 |
--------------------------------------------------------------------------------
/compiler/symcc.in:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # This file is part of SymCC.
4 | #
5 | # SymCC is free software: you can redistribute it and/or modify it under the
6 | # terms of the GNU General Public License as published by the Free Software
7 | # Foundation, either version 3 of the License, or (at your option) any later
8 | # version.
9 | #
10 | # SymCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 | # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
12 | # A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License along with
15 | # SymCC. If not, see .
16 |
17 | runtime_64bit_dir="${SYMCC_RUNTIME_DIR:-@SYM_RUNTIME_DIR@}"
18 | runtime_32bit_dir="${SYMCC_RUNTIME32_DIR:-@SYM_RUNTIME_32BIT_DIR@}"
19 | pass="${SYMCC_PASS_DIR:-@CMAKE_CURRENT_BINARY_DIR@}/libSymbolize.so"
20 | compiler="${SYMCC_CLANG:-@CLANG_BINARY@}"
21 |
22 | # Find out if we're cross-compiling for a 32-bit architecture
23 | runtime_dir="$runtime_64bit_dir"
24 | for arg in "$@"; do
25 | if [[ $arg == "-m32" ]]; then
26 | if [ -z "$runtime_32bit_dir" ]; then
27 | echo "SymCC: 32-bit compilation requested but SymCC was not built with TARGET_32BIT=ON" >&2
28 | exit 255
29 | else
30 | runtime_dir="$runtime_32bit_dir"
31 | break
32 | fi
33 | fi
34 | done
35 |
36 | if [ $# -eq 0 ]; then
37 | echo "Use symcc as a drop-in replacement for clang, e.g., symcc -O2 -o foo foo.c" >&2
38 | exit 1
39 | fi
40 |
41 | exec $compiler \
42 | @CLANG_LOAD_PASS@"$pass" \
43 | "$@" \
44 | -L"$runtime_dir" \
45 | -lSymRuntime \
46 | -Wl,-rpath,"$runtime_dir" \
47 | -Qunused-arguments
48 |
--------------------------------------------------------------------------------
/docs/32-bit.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Compiling 32-bit programs on a 64-bit host
4 | (and cross-compilation in general)
5 |
6 |
7 | In theory, SymCC can use clang to cross-compile programs for any architecture
8 | that LLVM supports. Note, however, that this requires cross-compiling the
9 | symbolic backend and its dependencies as well, and then linking the right
10 | backend into the target programs. We currently provide automatic handling only
11 | for the common case of compiling 32-bit libraries and programs on a 64-bit host
12 | machine. In all other cases, we recommend building SymCC on the target machine,
13 | so that cross-compilation is not needed.
14 |
15 | Let's assume that you're running SymCC on a 64-bit x86 machine, wanting to
16 | create 32-bit binaries from some code under test. First of all, make sure that
17 | 32-bit versions of the backend's dependencies are available: LLVM (usually
18 | obtainable via packages of your Linux distribution) and Z3 (which is reasonably
19 | easy to build following its CMake instructions and exporting CFLAGS="-m32" and
20 | CXXFLAGS="-m32"). Beware of Z3 version 4.8.7 - it contains a bug that breaks the
21 | 32-bit build with an error related to "__builtin_ctz" (see
22 | https://github.com/Z3Prover/z3/issues/2727). If you build Z3 yourself, note that
23 | it is sufficient to point SymCC to the build directory - there is no need to
24 | install the 32-bit version of Z3 in your system.
25 |
26 | Once the dependencies for 32-bit SymCC are available (as well as the 64-bit
27 | dependencies mentioned in the main README), configure and build SymCC as usual
28 | but add "-DTARGET_32BIT=ON" to the CMake invocation. If the build system doesn't
29 | find your 32-bit versions of LLVM and Z3, and specify their locations with
30 | "-DLLVM_32BIT_DIR=/some/path" and "-DZ3_32BIT_DIR=/some/other/path",
31 | respectively - analogously to how you would hint at the 64-bit versions.
32 |
33 | The resulting "symcc" and "sym++" scripts work like regular SymCC, but they
34 | additionally understand the "-m32" switch, which tells Clang to build 32-bit
35 | artifacts. If you build anything with "-m32", SymCC will make sure that the
36 | 32-bit version of the symbolic backend is linked to it instead of the 64-bit
37 | variant that would normally be used. Note that, in order to compile C++ code
38 | with "sym++" in 32-bit mode, you also need to build a 32-bit version of libc++
39 | (following the instructions for the 64-bit build from docs/C++.txt but
40 | additionally passing "-DLLVM_BUILD_32_BITS=ON" to CMake); communicate its
41 | location to SymCC via the environment variable SYMCC_LIBCXX_32BIT_PATH.
42 |
43 | If your 64-bit host is equipped to run 32-bit binaries, you can simply execute
44 | the instrumented programs produced by SymCC like any other program.
45 |
--------------------------------------------------------------------------------
/docs/Backends.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Symbolic backends
4 |
5 |
6 | We support different symbolic backends; currently, we have our own backend,
7 | which is a custom thin wrapper around Z3, and the QSYM backend. Users choose
8 | with a build option which backend to use. This file documents the internals of
9 | this mechanism.
10 |
11 | At compile time, we always insert the same calls, no matter which backend is
12 | used. Also, we always link against "libSymRuntime.so", so the choice of backend
13 | is deferred until run time. From the target program's point of view, the only
14 | requirement on a backend is that it be a shared library with the expected name
15 | that implements the interface defined in runtime/RuntimeCommon.h (with type
16 | "SymExpr" is defined to be something of pointer width).
17 |
18 | Depending on the build option QSYM_BACKEND we build either our own backend or
19 | parts of QSYM (which are pulled in via a git submodule) and a small translation
20 | layer. The code used by both backends is in the directory "runtime", while the
21 | specific parts are in "runtime/simple" and "runtime/qsym".
22 |
23 | The QSYM backend expects to be passed the program counter at each jump
24 | instruction, which is used to uniquely identify the jump site and implement a
25 | per-site back-off mechanism. However, there is no reason for the supplied value
26 | to be the program counter as long as it is a unique identifier. Since it is
27 | somewhat challenging to obtain the current program counter in our
28 | compilation-based setting, we follow an alternative approach: in the compiler
29 | pass, we identify each jump site by the address of the LLVM object that
30 | represents the instruction; experiments suggest that it is a good enough
31 | identifier. This value is embedded into the target program as a constant and
32 | passed to the backend at run time.
33 |
34 | Before compiling the QSYM code, we are expected to execute two Python scripts
35 | that the QSYM authors use for code generation; two custom CMake targets take
36 | care of running the scripts and tracking changes to the relevant source files.
37 |
--------------------------------------------------------------------------------
/docs/C++.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Compiling C++
4 |
5 |
6 | SymCC has full support for C++ code and provides a wrapper "sym++" around
7 | clang++. Since C++ programs typically depend on the C++ standard library, we
8 | have two options when building them with SymCC:
9 |
10 | 1. Use the C++ standard library provided by the system. This is the easiest,
11 | requiring no additional effort, but it has an important drawback: data that
12 | passes through the standard library will be concretized, i.e., we lose track
13 | of the corresponding symbolic expressions.
14 | 2. The alternative is to build an instrumented C++ standard library. This means
15 | that we can track data through the library, but it requires building the
16 | library and compiling all code against it.
17 |
18 | We discuss both approaches in more detail below.
19 |
20 |
21 | Building against the system's C++ standard library
22 |
23 |
24 | In order to use the regular (uninstrumented) C++ standard library that the
25 | system provides, just call sym++ as a drop-in replacement for clang++:
26 |
27 | $ export SYMCC_REGULAR_LIBCXX=yes
28 | $ sym++ -o myprogram mysource.cpp
29 | $ ./myprogram
30 |
31 | The program will execute and produce alternative outputs as usual with SymCC,
32 | but it will not be able to trace operations that happen in C++ standard classes,
33 | such as std::vector.
34 |
35 |
36 | Instrumenting the C++ standard library
37 |
38 |
39 | Building an instrumented C++ standard library is a one-time effort; the library
40 | can then be used in all subsequent C++ compilations. We use "libc++", the LLVM
41 | project's implementation of the standard library. First, get the source code:
42 |
43 | $ git clone --depth 1 https://github.com/llvm/llvm-project.git
44 |
45 | Then build the library with SymCC:
46 |
47 | $ mkdir libcxx_symcc
48 | $ cd libcxx_symcc
49 | $ export SYMCC_REGULAR_LIBCXX=yes
50 | $ export SYMCC_NO_SYMBOLIC_INPUT=yes
51 | $ cmake -G Ninja /path-to-llvm-project/llvm \
52 | -DLLVM_ENABLE_PROJECTS="libcxx;libcxxabi" \
53 | -DLLVM_TARGETS_TO_BUILD="X86" \
54 | -DLLVM_DISTRIBUTION_COMPONENTS="cxx;cxxabi;cxx-headers" \
55 | -DCMAKE_BUILD_TYPE=Release \
56 | -DCMAKE_INSTALL_PREFIX=/some/convenient/location \
57 | -DCMAKE_C_COMPILER=/path-to-symcc-with-simple-backend/symcc \
58 | -DCMAKE_CXX_COMPILER=/path-to-symcc-with-simple-backend/sym++
59 | $ ninja distribution
60 | $ ninja install-distribution
61 | $ unset SYMCC_REGULAR_LIBCXX SYMCC_NO_SYMBOLIC_INPUT
62 |
63 | Note the two environment variables: SYMCC_REGULAR_LIBCXX avoids a
64 | chicken-and-egg problem - without it, SymCC would expect to compile against the
65 | instrumented C++ standard library. SYMCC_NO_SYMBOLIC_INPUT disables symbolic
66 | handling of input data - the build process of libc++ involves the creation of
67 | helper programs that are subsequently run, and we do not want them to perform
68 | symbolic analysis.
69 |
70 | A word on the choice of backends: While the instrumented libc++ will work with
71 | both backends, building it currently doesn't work with the QSYM backend. Just
72 | use the simple backend for the build process - there is no problem in using the
73 | library with the QSYM backend later. For very interested readers, here is an
74 | explanation of the problem: libc++ is an LLVM project and as such uses LLVM
75 | support code. During the build process, it builds a code-generation tool that is
76 | subsequently invoked (hence the recommendation to set SYMCC_NO_SYMBOLIC_INPUT).
77 | At run-time, the tool loads code built from the LLVM sources we obtained via git
78 | above. Why is this a problem for the QSYM backend? QSYM uses support code from
79 | LLVM as well, which means that the QSYM backend is linked against your system's
80 | LLVM libraries. If we build libc++ with the QSYM backend, the code-generation
81 | tool loads the QSYM code at run time and, via dependency resolution, also the
82 | system's LLVM installation. The end result is that we have two versions of LLVM
83 | support code in the same process - the system version and the one built from git
84 | - which will most likely collide. Using the simple backend avoids the problem
85 | because it doesn't depend on the system installation of LLVM.
86 |
87 | Once the library is ready, tell SymCC where to find it and compile C++ code as
88 | usual:
89 |
90 | $ export SYMCC_LIBCXX_PATH=/path-provided-as-cmake-install-prefix-for-libcxx
91 | $ sym++ -o myprogram mysource.cpp
92 | $ ./myprogram
93 |
94 | Now the program will use the instrumented C++ standard library, which enables it
95 | to trace computations inside the library. Note that you need to compile all code
96 | against the instrumented standard library - attempts to mix it with code
97 | compiled against the system's C++ standard library will lead to linker errors.
98 | And if you're so brave as to mix it with code compiled against an uninstrumented
99 | libc++, a run-time crash is the best you can hope for...
100 |
--------------------------------------------------------------------------------
/docs/Concreteness.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Concreteness Checks
4 |
5 |
6 | If we do not distinguish between symbolic and concrete values in the program
7 | under test, then we end up passing every computation to the solver, including
8 | concrete ones. Since all parameters are known in the purely concrete case, the
9 | solver will just repeat the computation (most likely in a less efficient way)
10 | and conclude that there is no way to change its outcome. We can avoid such
11 | wasted effort by only passing symbolic computations to the solver.
12 |
13 | There are two stages at which data can be identified as concrete:
14 | 1. At compile time, if a value is known to be a constant we can conclude that it
15 | will always be concrete at run time.
16 | 2. At run time, a value that is not a constant may still turn out to be
17 | concrete. For example, data read from memory can be symbolic or concrete.
18 |
19 | If we detect in the compiler pass that a value is a compile-time constant (case
20 | 1 above), we do not emit code for symbolic handling at all. However, for any
21 | other type of data, we need to generate code that handles the case of it being
22 | symbolic at run time. Concretely (no pun intended), we mark concrete values at
23 | run time by setting their corresponding symbolic expression in shadow memory to
24 | null. This makes it very cheap to check concreteness during execution: just run
25 | a null check on the symbolic expression.
26 |
27 | The code that we inject into the program under test performs concreteness checks
28 | on the arguments of each instruction. For example, when the program adds two
29 | values, the generated code performs the addition and additionally represents it
30 | symbolically according to the concreteness of the two addends. There are
31 | multiple cases to distinguish:
32 | 1. If all arguments of a computation are concrete, we can skip symbolic
33 | processing altogether and just set the result expression to null, indicating
34 | that the result is a concrete value.
35 | 2. If at least one argument is symbolic, we need to generate an expression
36 | representing the result. Therefore, we generate expressions for all arguments
37 | (since the concrete arguments will have null expressions) and call into the
38 | run-time support library to produce an expression according to the performed
39 | computation. There are several opportunities for optimization, e.g., when
40 | the computation only has a single argument that is not a compile-time
41 | constant we do not need to check it for concreteness again.
42 |
43 | It is important to note that these checks cannot be performed by the compiler
44 | because the concreteness of non-constant data is not known at compile time.
45 | Instead, the compiler emits code that performs the required checks at run time
46 | and acts accordingly.
47 |
--------------------------------------------------------------------------------
/docs/Configuration.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Configuration options
4 |
5 |
6 | SymCC is configured at two different stages:
7 |
8 | 1. At compile time, you decide which features to enable, which mainly affects
9 | compilation time and the set of dependencies. This is done via arguments to
10 | CMake.
11 |
12 | 2. When you run programs that have been compiled with SymCC, the environment
13 | variables control various aspects of the execution and analysis.
14 |
15 | We list all available options for each stage in turn.
16 |
17 |
18 | Compile-time options
19 |
20 |
21 | Each of these is passed to CMake with "-D" when configuring the build:
22 |
23 | - QSYM_BACKEND=ON/OFF (default OFF): Compile either the QSYM backend or our
24 | simple Z3 wrapper (see docs/Backends.txt for details). Note that binaries
25 | produced by the SymCC compiler are backend-agnostic; you can use
26 | LD_LIBRARY_PATH to switch between backends per execution.
27 |
28 | - TARGET_32BIT=ON/OFF (default OFF): Enable support for 32-bit compilation on
29 | 64-bit hosts. This will essentially make the compiler switch "-m32" work as
30 | expected; see docs/32-bit.txt for details.
31 |
32 | - LLVM_DIR/LLVM_32BIT_DIR (default empty): Hints for the build system to find
33 | LLVM if it's in a non-standard location.
34 |
35 | - Z3_DIR/Z3_32BIT_DIR (default empty): Hints for the build system to find Z3 if
36 | it's in a non-standard location.
37 |
38 | - Z3_TRUST_SYSTEM_VERSION (default OFF): Trust that the system provides a
39 | suitable version of Z3 if the corresponding CMake module can't be found. Use
40 | this with Linux distributions that don't package the CMake module but still
41 | ship an otherwise usable development setup (e.g., Fedora before F33). Note
42 | that we can't check the Z3 version for compatibility in this case, so prepare
43 | for compiler errors if the system-wide installation of Z3 is too old.
44 |
45 |
46 | Run-time options
47 |
48 |
49 | "Run time" refers to the time when you run programs compiled with SymCC, not
50 | when you run SymCC itself. In other words, these are settings that you can
51 | change on every execution of an instrumented program. They are specified via
52 | environment variables.
53 |
54 | - SYMCC_NO_SYMBOLIC_INPUT=0/1 (default 0): When set to 1, input is never marked
55 | as symbolic; in other words, instrumented programs will run just like their
56 | uninstrumented counterparts.
57 |
58 | - SYMCC_OUTPUT_DIR (default "/tmp/output"): This is the directory where SymCC
59 | will store new inputs (QSYM backend only). If you prefer to handle them
60 | programmatically, make your program call symcc_set_test_case_handler; the
61 | handler will be called instead of the default handler each time the backend
62 | generates a new input.
63 |
64 | - SYMCC_INPUT_FILE (default empty): When empty, SymCC treats data read from
65 | standard input as symbolic; when set to a file name, any data read from that
66 | file is considered symbolic. Ignored if SYMCC_NO_SYMBOLIC_INPUT is set to 1.
67 |
68 | - SYMCC_MEMORY_INPUT=0/1 (default 0): When set to 1, expect the program under
69 | test to communicate symbolic inputs with one or more calls to
70 | symcc_make_symbolic. Can't be combined with SYMCC_INPUT_FILE. Ignored if
71 | SYMCC_NO_SYMBOLIC_INPUT is set to 1.
72 |
73 | - SYMCC_LOG_FILE (default empty): When set to a file name, SymCC creates the
74 | file (or overwrites any existing file!) and uses it to log backend activity
75 | including solver output (simple backend only).
76 |
77 | - SYMCC_ENABLE_LINEARIZATION=0/1 (default 0): Enable QSYM's basic-block pruning,
78 | a call-stack-aware strategy to reduce solver queries when executing code
79 | repeatedly (QSYM backend only). See the QSYM paper for details; highly
80 | recommended for fuzzing and enabled automatically by the fuzzing helper.
81 |
82 | - SYMCC_AFL_COVERAGE_MAP (default empty): When set to the file name of an
83 | AFL-style coverage map, load the map before executing the target program and
84 | use it to skip solver queries for paths that have already been covered (QSYM
85 | backend only). The map is updated in place, so beware of races when running
86 | multiple instances of SymCC! The fuzzing helper uses this to remember the
87 | state of exploration across multiple executions of the target program.
88 | Warning: This setting has a misleading name - while the format of the map
89 | follows (classic) AFL, the variable isn't meant to point at a map file that
90 | AFL uses too!
91 |
92 | (Most people should stop reading here.)
93 |
94 |
95 | Advanced options
96 |
97 |
98 | There is actually a third category of options: when compiling with SymCC, you
99 | can specify the location of its various components via environment variables.
100 | This is not necessary in most cases because the build system makes sure that all
101 | components know about each other; however, in some advanced setups you may need
102 | to move files around after building them, and in that case, you can use the
103 | variables documented below to communicate the new locations:
104 |
105 | - SYMCC_RUNTIME_DIR and SYMCC_RUNTIME32_DIR: The directory that contains the
106 | run-time support library (i.e., libSymRuntime.so).
107 |
108 | - SYMCC_PASS_DIR: The directory containing the compiler pass (i.e.,
109 | libSymbolize.so).
110 |
111 | - SYMCC_CLANG and SYMCC_CLANGPP: The clang and clang++ binaries to use during
112 | compilation. Be very careful with this one: if the version of the compiler you
113 | specify here doesn't match the one you built SymCC against, you'll most likely
114 | get linker errors.
115 |
--------------------------------------------------------------------------------
/docs/Experiments.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Experiments
4 |
5 |
6 | Here we document how to reproduce the experiments that we show in the paper
7 | "Symbolic execution with SymCC: Don't interpret, compile!" The same instructions
8 | are available on our website [1], which also provides our raw results. Feel free
9 | to reach out to us if you encounter problems with reproducing the benchmarks.
10 |
11 | The datasets are also archived on figshare [10].
12 |
13 | In the paper, we describe two sets of experiments: we first benchmark SymCC on
14 | the CGC programs, then we run it on real-world software.
15 |
16 |
17 | CGC experiments
18 |
19 | We used the Linux port of the CGC programs by Trail of Bits [2]. SymCC needs to
20 | be built with support for 32-bit compilation (see docs/32-bit.txt; this is not
21 | part of the Dockerfile because it would double the build time of the container
22 | while providing value to just a few users). Then you can simply export
23 | CC=/path/to/symcc, CXX=/path/to/sym++ and SYMCC_NO_SYMBOLIC_INPUT=1, and build
24 | the CGC programs as usual (i.e., by invoking their build.sh script).
25 |
26 | Run the programs on the raw PoV inputs [3] with SYMCC_NO_SYMBOLIC_INPUT=1 to
27 | measure pure execution time, and unset the environment variable for symbolic
28 | execution. To assess coverage, we ran afl-showmap with the AFL-instrumented CGC
29 | programs on each generated input and accumulated the resulting coverage maps per
30 | program, resulting in a set of covered map entries for each CGC program. The
31 | sizes of those sets can then be fed to the scoring formula presented in the
32 | paper.
33 |
34 | For KLEE and QSYM, we used the setup described in our IR study [3] but with the
35 | regular 32-bit binaries built by cb-multios.
36 |
37 |
38 | Real-world software
39 |
40 | The analysis of real-world software always follows the same procedure. Assuming
41 | you have exported CC=symcc, CXX=sym++ and SYMCC_NO_SYMBOLIC_INPUT=1, first
42 | download the code, then build it using its own build system, and finally unset
43 | SYMCC_NO_SYMBOLIC_INPUT and analyze the program in concert with AFL (which
44 | requires building a second time for AFL, see docs/Fuzzing.txt). We used AFL
45 | 2.56b and built the targets with AFL_USE_ASan=1. Note that the fuzzing helper is
46 | already installed in the Docker container.
47 |
48 | OpenJPEG [4]: we used revision 1f1e9682, built with CMake as described in the
49 | project's INSTALL.md (adding "-DBUILD_THIRDPARTY=ON" to make sure that
50 | third-party libraries are compiled with SymCC as well), and analyzed
51 | "bin/opj_decompress -i @@ -o /tmp/image.pgm"; the corpus consisted of test
52 | files file1.jp2 and file8.jp2 [5].
53 |
54 | libarchive [6]: we used revision 9ebb2484, built with CMake as described in the
55 | project's INSTALL (but adding "-DCMAKE_BUILD_TYPE=Release"), and analyzed
56 | "bin/bsdtar tf @@"; the corpus consisted of just a single dummy file
57 | containing the character "A".
58 |
59 | tcpdump: we built both tcpdump [7] and libpcap [8]; in order to make the former
60 | find the latter, just place the source directories next to each other in the
61 | same folder. We used revision d615abec of libpcap and revision d57927e1 of
62 | tcpdump. We built first libpcap and then tcpdump with "./configure && make",
63 | and analyzed "tcpdump/tcpdump -e -r @@"; the corpus consisted of just a single
64 | dummy file containing the character "A".
65 |
66 | All experiments used one AFL main process, one secondary AFL process, and one
67 | SymCC process. We let them run for 24 hours and repeated each of them 30 times
68 | to create the graphs in the paper; AFL map density was extracted from the
69 | secondary AFL process' "plot_data" file, column "map_size".
70 |
71 | The QSYM experiments used an analogous setup, replacing SymCC with QSYM and
72 | running it with AFL according to the QSYM authors' instructions [9].
73 |
74 | [1] http://www.s3.eurecom.fr/tools/symbolic_execution/symcc.html
75 | [2] https://github.com/trailofbits/cb-multios
76 | [3] http://www.s3.eurecom.fr/tools/symbolic_execution/ir_study.html
77 | [4] https://github.com/uclouvain/openjpeg.git
78 | [5] https://github.com/uclouvain/openjpeg-data/blob/master/input/conformance
79 | [6] https://github.com/libarchive/libarchive.git
80 | [7] https://github.com/the-tcpdump-group/tcpdump.git
81 | [8] https://github.com/the-tcpdump-group/libpcap.git
82 | [9] https://github.com/sslab-gatech/qsym#run-hybrid-fuzzing-with-afl
83 | [10] https://doi.org/10.6084/m9.figshare.24270709.v1 or https://figshare.com/articles/dataset/SymCC_evaluation_data/24270709
84 |
--------------------------------------------------------------------------------
/docs/Fuzzing.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Combining SymCC with a fuzzer
4 |
5 |
6 | Programs instrumented with SymCC generate new test inputs on every run. This is
7 | the core building block for program testing, but a full analysis requires
8 | additional components: new test cases need to be checked for whether they
9 | trigger vulnerabilities in the target program, we have to sort them by relevance
10 | and feed them back to symbolic execution. These tasks are essentially the same
11 | as in fuzzing, except that we use a smarter (yet more expensive) strategy to
12 | generate new inputs. Here we show how to reuse an existing fuzzer for the
13 | management tasks but additionally generate new inputs with SymCC.
14 |
15 |
16 | Setup
17 |
18 |
19 | We use AFL, a popular gray-box fuzzer, in its parallel mode. See AFL's
20 | documentation on parallel fuzzing for details on this mode - the basic idea is
21 | that SymCC and the fuzzer periodically exchange new inputs. SymCC comes with a
22 | helper that coordinates the collaboration with the fuzzer. It is written in
23 | Rust, so rustc and cargo (the Rust package manager) have to be installed. On
24 | Debian-based distributions, for example, a simple "apt install rustc cargo" is
25 | all you need. Build the tool by executing the following command in the root of
26 | SymCC's source repository:
27 |
28 | $ cargo install --path util/symcc_fuzzing_helper
29 |
30 | Afterwards, you should have a self-contained binary
31 | ~/.cargo/bin/symcc_fuzzing_helper. If you are interested in the tool's
32 | internals, you can render documentation as follows:
33 |
34 | $ cargo doc --manifest-path util/symcc_fuzzing_helper/Cargo.toml \
35 | --document-private-items --open
36 |
37 | This is all on the SymCC side. Now just make sure that AFL is installed - we've
38 | tested with version 2.56b.
39 |
40 |
41 | Testing an example program
42 |
43 |
44 | Suppose we wanted to search memory-related vulnerabilities in tcpdump's
45 | link-layer parsers. The program can be instructed to read from a pcap and print
46 | relevant headers like so:
47 |
48 | $ tcpdump -e -r
49 |
50 | Compile tcpdump and libpcap, the library it uses for pcap reading, once with
51 | SymCC and once with one of AFL's compiler wrappers (e.g., afl-clang). In order
52 | to detect memory corruptions, enable address sanitizer in the AFL-instrumented
53 | version by exporting AFL_USE_ASan=1 before compiling:
54 |
55 | $ git clone https://github.com/the-tcpdump-group/libpcap.git
56 | $ git clone https://github.com/the-tcpdump-group/tcpdump.git
57 |
58 | $ mkdir symcc_build; cd symcc_build
59 | $ cp -r ../{libpcap,tcpdump} .
60 | $ cd libpcap
61 | $ CC=/path/to/symcc ./configure
62 | $ make
63 | $ cd ../tcpdump
64 | $ CC=/path/to/symcc ./configure
65 | $ make
66 | $ cd ..
67 |
68 | $ mkdir afl_build; cd afl_build
69 | $ export AFL_USE_ASan=1
70 | $ cp -r ../{libpcap,tcpdump} .
71 | $ cd libpcap
72 | $ CC=/path/to/afl-clang ./configure
73 | $ make
74 | $ cd ../tcpdump
75 | $ CC=/path/to/afl-clang ./configure
76 | $ make
77 | $ cd ..
78 |
79 | Note that we need two copies of the source code because the projects build
80 | in-tree. Also, it is important to place the source code directories next to each
81 | other, so that tcpdump's build system can find and statically link the
82 | previously built libpcap.
83 |
84 | Create a corpus of dummy files somewhere (say, in a directory called "corpus");
85 | for tcpdump, we just start with an empty corpus containing only a dummy file for
86 | AFL:
87 |
88 | $ mkdir corpus
89 | $ echo A > corpus/dummy
90 |
91 | Then launch one AFL main and one AFL secondary instance, both writing their
92 | outputs to the arbitrarily named directory "afl_out":
93 |
94 | $ afl-fuzz -M afl-main -i corpus -o afl_out -m none -- afl_build/tcpdump/tcpdump -e -r @@
95 | $ afl-fuzz -S afl-secondary -i corpus -o afl_out -m none -- afl_build/tcpdump/tcpdump -e -r @@
96 |
97 | For simplicity, we disable memory limits (with "-m none"); be sure to read AFL's
98 | notes on address sanitizer to learn about the implications. Alternatively, you
99 | can compile the target program without address sanitizer, in which case you
100 | don't need to disable the memory limit.
101 |
102 | Finally, we can run SymCC using the helper:
103 |
104 | $ ~/.cargo/bin/symcc_fuzzing_helper -o afl_out -a afl-secondary -n symcc -- symcc_build/tcpdump/tcpdump -e -r @@
105 |
106 | It will run SymCC on the most promising inputs generated by the secondary AFL
107 | instance and feed any interesting results back to AFL. In AFL's status screen,
108 | you should see the counter "imported" in the "path geometry" section increase
109 | after a short time - this means that the fuzzer instances and SymCC are
110 | exchanging inputs. Crashes will be stored in afl_out/*/crashes as usual.
111 |
112 | It is possible to run SymCC with only an AFL main or only a secondary AFL
113 | instance; see the AFL docs for the implications. Moreover, the number of fuzzer
114 | and SymCC instances can be increased - just make sure that each has a unique
115 | name.
116 |
117 | Note that there are currently a few gotchas with the fuzzing helper:
118 |
119 | 1. It expects afl-showmap to be in the same directory as afl-fuzz (which is
120 | usually the case), and it finds that directory via your afl-fuzz command. If
121 | afl-fuzz is on your PATH (as we assumed in the example above), all is good
122 | and you can ignore this point. Otherwise, you need to either call afl-fuzz
123 | with an absolute path (e.g., /afl/afl-fuzz in the Docker image) or, if you
124 | use a relative path, start afl-fuzz from the same working directory as the
125 | fuzzing helper.
126 |
127 | 2. The helper needs to know how to call the AFL-instrumented version of the
128 | target, and it finds that information by scanning your afl-fuzz command. To
129 | this end, it _requires_ the double dash that we used in the example above to
130 | separate afl-fuzz options from the target command; if you omit it, you'll
131 | likely get errors from the helper when it tries to run afl-showmap.
132 |
--------------------------------------------------------------------------------
/docs/Ideas.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Ideas for future work
4 |
5 |
6 | Here we collect ideas around improvements and new features that could be
7 | interesting to implement.
8 |
9 |
10 | Position in the optimizer pipeline
11 |
12 | Intuitively, we should run towards the end of the pipeline, so that the target
13 | program has been simplified as much as possible. However, SymCC currently runs
14 | just before the vectorizer - a position later in the pipeline would require
15 | supporting LLVM vector instructions, so for now we choose implementation
16 | simplicity over potential performance gains. Still, it would be very interesting
17 | to check whether moving to the end of the pipeline accelerates the system
18 | significantly, and how much it would cost in terms of complexity.
19 |
20 |
21 | Optimize injected code
22 |
23 | We should schedule a few optimization passes after inserting our
24 | instrumentation, so that the instrumentation code gets optimized as well. This
25 | becomes more important the further we move our pass to the end of the pipeline.
26 | We could take inspiration from popular sanitizers like ASan and MSan regarding
27 | the concrete passes to run, and their order. Also, we should enable link-time
28 | optimization to inline some simple run-time support functions.
29 |
30 |
31 | Free symbolic expressions in memory
32 |
33 | SymCC currently doesn't free symbolic expressions. This is fine most of the time
34 | because intermediate values are rarely computed without being used: typically,
35 | they end up being inputs to future computations, so we couldn't free the
36 | corresponding expressions anyway. A notable exception is the computation of
37 | values only for output - the expressions for such values could be freed after
38 | the value is output, which would reduce memory consumption, especially with
39 | output-heavy target programs.
40 |
41 |
42 | Better fuzzer integration
43 |
44 | Our current coordination with the fuzzer is very crude: we use AFL's distributed
45 | mode to make it periodically pull new inputs from SymCC, and we try to
46 | prioritize the most interesting inputs from AFL's queue for execution in SymCC.
47 | However, a better integration would consider the trade-offs of symbolic
48 | execution: it's expensive but uses more sophisticated reasoning. As long as the
49 | fuzzer makes good progress (for some progress metric), CPU power should be
50 | allocated only to the fuzzer; the price of symbolic execution should be paid
51 | only when necessary. Moreover, a faster synchronization mechanism than AFL's
52 | file-system based approach would be nice.
53 |
54 |
55 | Work with other fuzzers
56 |
57 | Integrating with AFL is easy because its distributed mode only requires working
58 | with files and directories. Other fuzzers might not provide such easy
59 | mechanisms, but by integrating with them we would gain whatever performance
60 | improvements they have made over AFL (e.g., AFL++ or Honggfuzz).
61 |
62 |
63 | Forking version
64 |
65 | Instead of working with a fuzzer, we could also implement forking and some
66 | scheduling strategies ourselves. Georgia Tech has developed some OS-level
67 | primitives that could help to implement such a feature:
68 | https://github.com/sslab-gatech/perf-fuzz.
69 |
--------------------------------------------------------------------------------
/docs/Libc.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Libc Call Interception
4 |
5 |
6 | While we can, in principle, compile all code with instrumentation, it is unclear
7 | how difficult this is for the C standard library. The LLVM sanitizers don't try,
8 | even if MSan otherwise requires all code to be instrumented, and we take this as
9 | a sign that there may be unforeseen challenges. For now, we take the same route
10 | as the sanitizers and intercept calls to libc functions, wrapping them with
11 | symbolic handling. For example, the wrapper for "memset" obtains the symbolic
12 | expression for the value to be written in memory and pushes it to the shadow
13 | region of the destination memory. In the future, we may experiment with
14 | compiling (parts of) the libc to avoid the effort of manually defining wrappers.
15 |
16 | Initially, we tried the interception mechanism that the LLVM sanitizers use,
17 | implemented in the compiler-rt library. The Linux version basically just defines
18 | a function with the name of the libc function. The dynamic loader resolves
19 | symbols to the first function with the right name that it finds; given an
20 | appropriate link order, the wrapper (or "interceptor" in compiler-rt parlance)
21 | will be called instead of the libc function. Calling the real function is just a
22 | matter of asking the loader for alternative resolutions (i.e., calling "dlsym"
23 | with flag "RTLD_NEXT"). The problem for us is that this approach *globally*
24 | replaces a given libc function, in the executable and in all libraries that it
25 | loads. However, our run-time support library is loaded into the same process and
26 | makes heavy use of libc, so we need the ability to use wrappers in one part of
27 | the program and concrete functions in another. This turned out to complicate the
28 | compiler-rt-based implementation so much that we eventually abandoned the
29 | approach.
30 |
31 | Function renaming provided a convenient alternative: we control all code that is
32 | supposed to call wrappers rather than the libc functions properly, so we just rename
33 | the targets of their calls. For example, a call to "memset" in the program under
34 | test is turned into a call to "memset_symbolized", which we can easily define as
35 | a regular function wrapping "memset". Calls from our run-time library, on the
36 | other hand, use the regular function names and thus end up in libc as usual.
37 |
--------------------------------------------------------------------------------
/docs/Optimization.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Optimization
4 |
5 |
6 | A popular technique for experimenting with compiler passes is to produce bitcode
7 | with "clang -emit-llvm" and run the pass on the resulting bitcode with opt. Note
8 | that this approach does not mix well with optimization: simply running "opt -O3"
9 | on the instrumented bitcode yields inferior results. Why? In principle, the
10 | instrumentation that adds symbolic execution capabilities does not interfere
11 | with the compiler's regular optimization. However, while "opt -O3" runs the same
12 | middle-end optimizations as clang does internally, "clang -O3" performs
13 | additional analysis before invoking the middle end. In particular, type-based
14 | alias analysis (TBAA) adds metadata to the bitcode that enables the SROA pass to
15 | promote a lot of stack-allocated variables into SSA values.
16 |
17 | In order to produce bitcode that can later be properly optimized with opt, pass
18 | the desired optimization flag at each stage of the workflow:
19 |
20 | $ clang -O3 -Xclang -disable-llvm-passes -emit-llvm -S test.c -o test.ll
21 | $ opt -load ./libSymbolize.so -symbolize < test.ll > test_instrumented.bc
22 | $ opt -O3 < test_instrumented.bc > test_instrumented_optimized.bc
23 | $ clang -O3 test_instrumented_optimized.bc -o test
24 | $ ./test
25 |
--------------------------------------------------------------------------------
/docs/Testing.txt:
--------------------------------------------------------------------------------
1 |
2 |
3 | Testing
4 |
5 |
6 | The short version:
7 |
8 | $ ninja check
9 |
10 |
11 | The slightly longer version:
12 |
13 | We use the LLVM tools "lit" and "FileCheck", integrated with CMake. Since there
14 | is little documentation on setting up the combination of those tools (apart from
15 | the LLVM source code), here is an overview of the setup:
16 |
17 | 1. Tests are C or C++ source files in the "test" subdirectory of the project.
18 | They contain instructions for FileCheck in comments; see that tool's
19 | documentation.
20 |
21 | 2. We run the individual tests through lit, LLVM's configurable test runner. It
22 | finds each test file, performs some variable substitutions (see below), and
23 | runs the tests. The main source of configuration is "test/lit.cfg".
24 |
25 | 3. At configuration time, CMake creates an additional config file for lit,
26 | containing site-specific configuration such as the output directory of the
27 | build. The template is "test/lit.site.cfg.in".
28 |
29 | 4. CMake adds the "check" target, which invokes lit on the test suite. (It would
30 | be nice to call the target "test", but this is a reserved name in some
31 | versions of CMake, and the built-in test mechanism that it is reserved for
32 | doesn't track dependencies the way we need.)
33 |
34 | Test files can use the following patterns:
35 |
36 | %s The test file itself.
37 | %t A temporary file.
38 | %symcc Invocation of clang with our custom pass loaded.
39 | %filecheck Invocation of FileCheck with the right arguments for the backend.
40 |
41 | Since we support multiple symbolic backends, the tests must account for
42 | different output from different backends. To this end, we rely on FileCheck's
43 | prefix mechanism: test files use different prefixes to specify requirements on
44 | different backends. The following prefixes are supported:
45 |
46 | SIMPLE: Active when we test with our own backend.
47 | QSYM: Active when we test with the QSYM backend.
48 | ANY: Always active.
49 |
50 | The build system makes sure that "%filecheck" always expands to an invocation of
51 | FileCheck that activates the right prefixes for the current build configuration.
52 |
53 | Note that we run the tests only with the backend selected at configuration time,
54 | so a full test requires building the project in multiple configurations. Also,
55 | be aware that the backends write all log messages to standard error; therefore,
56 | checks should not depend on the relative ordering of backend logs and messages
57 | that the test program writes to standard output (use stderr instead).
58 |
59 |
60 | Regression tests
61 |
62 | In addition to the hand-written tests that exercise compiler functionality via C
63 | code, we have a directory "test/regression" where we can collect LLVM bitcode
64 | files that triggered bugs in real SymCC use. Generate the bitcode by running the
65 | crashing compiler command with additional arguments "-emit-llvm -S -o-", pipe
66 | the result through "opt -S -instnamer", and add a comment at the top to tell lit
67 | how to compile it. The instruction naming is necessary because different LLVM
68 | versions treat numbered (i.e., unnamed) instructions differently and may
69 | complain if the numbering sequence doesn't match expectations.
70 |
--------------------------------------------------------------------------------
/runtime/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # This file is part of the SymCC runtime.
2 | #
3 | # The SymCC runtime is free software: you can redistribute it and/or modify it
4 | # under the terms of the GNU Lesser General Public License as published by the
5 | # Free Software Foundation, either version 3 of the License, or (at your option)
6 | # any later version.
7 | #
8 | # The SymCC runtime is distributed in the hope that it will be useful, but
9 | # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 | # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
11 | # for more details.
12 | #
13 | # You should have received a copy of the GNU Lesser General Public License along
14 | # with the SymCC runtime. If not, see .
15 |
16 | cmake_minimum_required(VERSION 3.5)
17 | project(SymRuntime)
18 |
19 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17 \
20 | -Wredundant-decls -Wcast-align \
21 | -Wmissing-include-dirs -Wswitch-default \
22 | -Wextra -Wall -Winvalid-pch -Wredundant-decls -Wformat=2 \
23 | -Wmissing-format-attribute -Wformat-nonliteral")
24 |
25 | option(QSYM_BACKEND "Use the Qsym backend instead of our own" OFF)
26 | option(RUST_BACKEND "Build the support code required for a Rust backend as a static archive." OFF)
27 | option(Z3_TRUST_SYSTEM_VERSION "Use the system-provided Z3 without a version check" OFF)
28 |
29 | # Place the final product in the top-level output directory
30 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR})
31 |
32 | # There is list(TRANSFORM ... PREPEND ...), but it's not available before CMake 3.12.
33 | set(SHARED_RUNTIME_SOURCES
34 | ${CMAKE_CURRENT_SOURCE_DIR}/Config.cpp
35 | ${CMAKE_CURRENT_SOURCE_DIR}/RuntimeCommon.cpp
36 | ${CMAKE_CURRENT_SOURCE_DIR}/LibcWrappers.cpp
37 | ${CMAKE_CURRENT_SOURCE_DIR}/Shadow.cpp
38 | ${CMAKE_CURRENT_SOURCE_DIR}/GarbageCollection.cpp)
39 |
40 | if (${RUST_BACKEND})
41 | add_subdirectory(rust_backend)
42 | elseif (${QSYM_BACKEND})
43 | add_subdirectory(qsym_backend)
44 | else()
45 | add_subdirectory(simple_backend)
46 | endif()
47 |
--------------------------------------------------------------------------------
/runtime/Config.cpp:
--------------------------------------------------------------------------------
1 | // This file is part of the SymCC runtime.
2 | //
3 | // The SymCC runtime is free software: you can redistribute it and/or modify it
4 | // under the terms of the GNU Lesser General Public License as published by the
5 | // Free Software Foundation, either version 3 of the License, or (at your
6 | // option) any later version.
7 | //
8 | // The SymCC runtime is distributed in the hope that it will be useful, but
9 | // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 | // FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
11 | // for more details.
12 | //
13 | // You should have received a copy of the GNU Lesser General Public License
14 | // along with the SymCC runtime. If not, see .
15 |
16 | #include "Config.h"
17 |
18 | #include
19 | #include
20 | #include
21 | #include
22 | #include
23 | #include
24 |
25 | namespace {
26 |
27 | bool checkFlagString(std::string value) {
28 | std::transform(value.begin(), value.end(), value.begin(),
29 | [](unsigned char c) { return std::tolower(c); });
30 | if (value == "1" || value == "on" || value == "yes")
31 | return true;
32 |
33 | if (value.empty() || value == "0" || value == "off" || value == "no")
34 | return false;
35 |
36 | std::stringstream msg;
37 | msg << "Unknown flag value " << value;
38 | throw std::runtime_error(msg.str());
39 | }
40 |
41 | } // namespace
42 |
43 | Config g_config;
44 |
45 | void loadConfig() {
46 | auto *outputDir = getenv("SYMCC_OUTPUT_DIR");
47 | if (outputDir != nullptr)
48 | g_config.outputDir = outputDir;
49 |
50 | auto *inputFile = getenv("SYMCC_INPUT_FILE");
51 | if (inputFile != nullptr)
52 | g_config.input = FileInput{inputFile};
53 |
54 | auto *memoryInput = getenv("SYMCC_MEMORY_INPUT");
55 | if (memoryInput != nullptr && checkFlagString(memoryInput)) {
56 | if (std::holds_alternative(g_config.input))
57 | throw std::runtime_error{
58 | "Can't enable file and memory input at the same time"};
59 |
60 | g_config.input = MemoryInput{};
61 | }
62 |
63 | auto *fullyConcrete = getenv("SYMCC_NO_SYMBOLIC_INPUT");
64 | if (fullyConcrete != nullptr && checkFlagString(fullyConcrete))
65 | g_config.input = NoInput{};
66 |
67 | auto *logFile = getenv("SYMCC_LOG_FILE");
68 | if (logFile != nullptr)
69 | g_config.logFile = logFile;
70 |
71 | auto *pruning = getenv("SYMCC_ENABLE_LINEARIZATION");
72 | if (pruning != nullptr)
73 | g_config.pruning = checkFlagString(pruning);
74 |
75 | auto *aflCoverageMap = getenv("SYMCC_AFL_COVERAGE_MAP");
76 | if (aflCoverageMap != nullptr)
77 | g_config.aflCoverageMap = aflCoverageMap;
78 |
79 | auto *garbageCollectionThreshold = getenv("SYMCC_GC_THRESHOLD");
80 | if (garbageCollectionThreshold != nullptr) {
81 | try {
82 | g_config.garbageCollectionThreshold =
83 | std::stoul(garbageCollectionThreshold);
84 | } catch (std::invalid_argument &) {
85 | std::stringstream msg;
86 | msg << "Can't convert " << garbageCollectionThreshold << " to an integer";
87 | throw std::runtime_error(msg.str());
88 | } catch (std::out_of_range &) {
89 | std::stringstream msg;
90 | msg << "The GC threshold must be between 0 and "
91 | << std::numeric_limits::max();
92 | throw std::runtime_error(msg.str());
93 | }
94 | }
95 | }
96 |
--------------------------------------------------------------------------------
/runtime/Config.h:
--------------------------------------------------------------------------------
1 | // This file is part of the SymCC runtime.
2 | //
3 | // The SymCC runtime is free software: you can redistribute it and/or modify it
4 | // under the terms of the GNU Lesser General Public License as published by the
5 | // Free Software Foundation, either version 3 of the License, or (at your
6 | // option) any later version.
7 | //
8 | // The SymCC runtime is distributed in the hope that it will be useful, but
9 | // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 | // FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
11 | // for more details.
12 | //
13 | // You should have received a copy of the GNU Lesser General Public License
14 | // along with the SymCC runtime. If not, see .
15 |
16 | #ifndef CONFIG_H
17 | #define CONFIG_H
18 |
19 | #include
20 | #include
21 |
22 | /// Marker struct for fully concrete execution.
23 | struct NoInput {};
24 |
25 | /// Marker struct for symbolic input from stdin.
26 | struct StdinInput {};
27 |
28 | /// Marker struct for symbolic input via _sym_make_symbolic.
29 | struct MemoryInput {};
30 |
31 | /// Configuration for symbolic input from a file.
32 | struct FileInput {
33 | /// The name of input file.
34 | std::string fileName;
35 | };
36 |
37 | struct Config {
38 | using InputConfig = std::variant;
39 |
40 | /// The configuration for our symbolic input.
41 | InputConfig input = StdinInput{};
42 |
43 | /// The directory for storing new outputs.
44 | std::string outputDir = "/tmp/output";
45 |
46 | /// The file to log constraint solving information to.
47 | std::string logFile = "";
48 |
49 | /// Do we prune expressions on hot paths?
50 | bool pruning = false;
51 |
52 | /// The AFL coverage map to initialize with.
53 | ///
54 | /// Specifying a file name here allows us to track already covered program
55 | /// locations across multiple program executions.
56 | std::string aflCoverageMap = "";
57 |
58 | /// The garbage collection threshold.
59 | ///
60 | /// We will start collecting unused symbolic expressions if the total number
61 | /// of allocated expressions in the target program exceeds this number.
62 | ///
63 | /// Collecting too often hurts performance, whereas delaying garbage
64 | /// collection for too long might make us run out of memory. The goal of this
65 | /// empirically determined constant is to keep peek memory consumption below
66 | /// 2GB on most workloads because requiring that amount of memory per core
67 | /// participating in the analysis seems reasonable.
68 | size_t garbageCollectionThreshold = 5'000'000;
69 | };
70 |
71 | /// The global configuration object.
72 | ///
73 | /// It should be initialized once before we start executing the program and
74 | /// never changed afterwards.
75 | extern Config g_config;
76 |
77 | /// Populate g_config from the environment.
78 | ///
79 | /// The function will throw std::runtime_error if the value of an environment
80 | /// variable used for configuration cannot be interpreted.
81 | void loadConfig();
82 |
83 | #endif
84 |
--------------------------------------------------------------------------------
/runtime/GarbageCollection.cpp:
--------------------------------------------------------------------------------
1 | // This file is part of the SymCC runtime.
2 | //
3 | // The SymCC runtime is free software: you can redistribute it and/or modify it
4 | // under the terms of the GNU Lesser General Public License as published by the
5 | // Free Software Foundation, either version 3 of the License, or (at your
6 | // option) any later version.
7 | //
8 | // The SymCC runtime is distributed in the hope that it will be useful, but
9 | // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 | // FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
11 | // for more details.
12 | //
13 | // You should have received a copy of the GNU Lesser General Public License
14 | // along with SymCC. If not, see .
15 |
16 | #include "GarbageCollection.h"
17 |
18 | #include
19 |
20 | #include
21 | #include
22 |
23 | /// A list of memory regions that are known to contain symbolic expressions.
24 | std::vector expressionRegions;
25 |
26 | void registerExpressionRegion(ExpressionRegion r) {
27 | expressionRegions.push_back(std::move(r));
28 | }
29 |
30 | std::set collectReachableExpressions() {
31 | std::set reachableExpressions;
32 | auto collectReachableExpressions = [&](ExpressionRegion r) {
33 | auto *end = r.first + r.second;
34 | for (SymExpr *expr_ptr = r.first; expr_ptr < end; expr_ptr++) {
35 | if (*expr_ptr != nullptr) {
36 | reachableExpressions.insert(*expr_ptr);
37 | }
38 | }
39 | };
40 |
41 | for (auto &r : expressionRegions) {
42 | collectReachableExpressions(r);
43 | }
44 |
45 | for (const auto &mapping : g_shadow_pages) {
46 | collectReachableExpressions({mapping.second, kPageSize});
47 | }
48 |
49 | return reachableExpressions;
50 | }
51 |
--------------------------------------------------------------------------------
/runtime/GarbageCollection.h:
--------------------------------------------------------------------------------
1 | // This file is part of the SymCC runtime.
2 | //
3 | // The SymCC runtime is free software: you can redistribute it and/or modify it
4 | // under the terms of the GNU Lesser General Public License as published by the
5 | // Free Software Foundation, either version 3 of the License, or (at your
6 | // option) any later version.
7 | //
8 | // The SymCC runtime is distributed in the hope that it will be useful, but
9 | // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 | // FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
11 | // for more details.
12 | //
13 | // You should have received a copy of the GNU Lesser General Public License
14 | // along with the SymCC runtime. If not, see .
15 |
16 | #ifndef GARBAGECOLLECTION_H
17 | #define GARBAGECOLLECTION_H
18 |
19 | #include
20 | #include
21 |
22 | #include
23 |
24 | /// An imitation of std::span (which is not available before C++20) for symbolic
25 | /// expressions.
26 | using ExpressionRegion = std::pair;
27 |
28 | /// Add the specified region to the list of places to search for symbolic
29 | /// expressions.
30 | void registerExpressionRegion(ExpressionRegion r);
31 |
32 | /// Return the set of currently reachable symbolic expressions.
33 | std::set collectReachableExpressions();
34 |
35 | #endif
36 |
--------------------------------------------------------------------------------
/runtime/LibcWrappers.h:
--------------------------------------------------------------------------------
1 | // This file is part of the SymCC runtime.
2 | //
3 | // The SymCC runtime is free software: you can redistribute it and/or modify it
4 | // under the terms of the GNU Lesser General Public License as published by the
5 | // Free Software Foundation, either version 3 of the License, or (at your
6 | // option) any later version.
7 | //
8 | // The SymCC runtime is distributed in the hope that it will be useful, but
9 | // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 | // FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
11 | // for more details.
12 | //
13 | // You should have received a copy of the GNU Lesser General Public License
14 | // along with the SymCC runtime. If not, see .
15 |
16 | #ifndef LIBCWRAPPERS_H
17 | #define LIBCWRAPPERS_H
18 |
19 | /// Initialize the libc wrappers.
20 | ///
21 | /// The configuration needs to be loaded so that we can apply settings related
22 | /// to symbolic input.
23 | void initLibcWrappers();
24 |
25 | #endif
26 |
--------------------------------------------------------------------------------
/runtime/Shadow.cpp:
--------------------------------------------------------------------------------
1 | // This file is part of the SymCC runtime.
2 | //
3 | // The SymCC runtime is free software: you can redistribute it and/or modify it
4 | // under the terms of the GNU Lesser General Public License as published by the
5 | // Free Software Foundation, either version 3 of the License, or (at your
6 | // option) any later version.
7 | //
8 | // The SymCC runtime is distributed in the hope that it will be useful, but
9 | // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 | // FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
11 | // for more details.
12 | //
13 | // You should have received a copy of the GNU Lesser General Public License
14 | // along with SymCC. If not, see .
15 |
16 | #include "Shadow.h"
17 |
18 | std::map g_shadow_pages;
19 |
--------------------------------------------------------------------------------
/runtime/Shadow.h:
--------------------------------------------------------------------------------
1 | // This file is part of the SymCC runtime.
2 | //
3 | // The SymCC runtime is free software: you can redistribute it and/or modify it
4 | // under the terms of the GNU Lesser General Public License as published by the
5 | // Free Software Foundation, either version 3 of the License, or (at your
6 | // option) any later version.
7 | //
8 | // The SymCC runtime is distributed in the hope that it will be useful, but
9 | // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 | // FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
11 | // for more details.
12 | //
13 | // You should have received a copy of the GNU Lesser General Public License
14 | // along with the SymCC runtime. If not, see .
15 |
16 | #ifndef SHADOW_H
17 | #define SHADOW_H
18 |
19 | #include
20 | #include
21 | #include
22 | #include